HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_dc_EventObjects.py
Go to the documentation of this file.
1 '''
2 HCE project, Python bindings, Distributed Crawler application.
3 Event objects functional tests.
4 
5 @package: dc
6 @author bgv bgv.hce@gmail.com
7 @link: http://hierarchical-cluster-engine.com/
8 @copyright: Copyright © 2013-2014 IOIX Ukraine
9 @license: http://hierarchical-cluster-engine.com/license/
10 @since: 0.1
11 '''
12 
13 
14 import dc.EventObjects
15 import dtm.EventObjects
16 from app.Utils import SQLExpression
17 from datetime import datetime
18 #import MySQLdb
19 #import oursql
20 #import mysql
21 
22 
23 if __name__ == "__main__":
24  TEST_TITLE = "Test "
25  TEST_TITLE_OBJECT = " object:\n"
26 
27 
28  #Test Site object
29  #site = dc.EventObjects.Site("http://127.0.0.1/")
30  site = dc.EventObjects.Site("http://127.0.0.1/_site_random_generator.php?wrong=url")
31  site.filters = [dc.EventObjects.SiteFilter(site.id, "*")]
32  print TEST_TITLE + site.__class__.__name__ + TEST_TITLE_OBJECT, vars(site)
33  print site.toJSON()
34 
35  #Test SiteUpdate object with overwrite action
36  su = dc.EventObjects.SiteUpdate("b85ab149a528bd0a024fa0f43e80b5fc",
37  dc.EventObjects.SiteUpdate.UPDATE_TYPE_OVERWRITE)
38  su.uDate = SQLExpression("NOW()")
39  su.tcDate = SQLExpression("NOW()")
40  #su.cDate = "2012-12-12 12:12:12"
41  su.cDate = datetime.now()
42  su.resources = 2
43  su.iterations = 20
44  su.description = "Test update"
45  su.urls = ["http://localhost/"]
46  su.filters = [dc.EventObjects.SiteFilter(su.id, "http://localhost/*")]
47  su.properties = {"PROCESS_CTYPES":"text/plain", "STORE_HTTP_REQUEST":"1", "STORE_HTTP_HEADERS":"1"}
48  su.state = dc.EventObjects.Site.STATE_DISABLED
49  su.priority = 200
50  su.maxURLs = 2000
51  su.maxResources = 2000
52  su.maxErrors = 20000
53  su.maxResourceSize = 20000000
54  su.requestDelay = 200000000
55  su.httpTimeout = 2000000000
56  su.errorMask = 20000000000
57  su.errors = 200000000000
58  su.urlType = 2
59  print TEST_TITLE + su.__class__.__name__ + TEST_TITLE_OBJECT, vars(su)
60  print su.toJSON()
61 
62  #Test SiteUpdate object with append action
63  su = dc.EventObjects.SiteUpdate("699fcf4591fc23e79b839d8819904293",
64  dc.EventObjects.SiteUpdate.UPDATE_TYPE_APPEND)
65  su.uDate = SQLExpression("NOW()")
66  su.tcDate = SQLExpression("NOW()")
67  su.cDate = "2012-12-12 12:12:12"
68  su.resources = 2
69  su.iterations = 20
70  su.description = "Test update"
71  su.urls = ["http://localhost/"]
72  su.filters = [dc.EventObjects.SiteFilter(su.id, "http://localhost/*")]
73  su.properties = {"PROCESS_CTYPES":"text/plain", "STORE_HTTP_REQUEST":"1", "STORE_HTTP_HEADERS":"1"}
74  su.state = dc.EventObjects.Site.STATE_DISABLED
75  su.priority = 200
76  su.maxURLs = 2000
77  su.maxResources = 2000
78  su.maxErrors = 20000
79  su.maxResourceSize = 20000000
80  su.requestDelay = 200000000
81  su.httpTimeout = 2000000000
82  su.errorMask = 20000000000
83  su.errors = 200000000000
84  su.urlType = 2
85  print TEST_TITLE + su.__class__.__name__ + TEST_TITLE_OBJECT, vars(su)
86  print su.toJSON()
87 
88 
89  #Test SiteStatus object
90  siteStatus = dc.EventObjects.SiteStatus("699fcf4591fc23e79b839d8819904293")
91  print TEST_TITLE + siteStatus.__class__.__name__ + TEST_TITLE_OBJECT, vars(siteStatus)
92  print siteStatus.toJSON()
93 
94  #Test SiteDelete object
95  sd = dc.EventObjects.SiteDelete("699fcf4591fc23e79b839d8819904293")
96  print TEST_TITLE + sd.__class__.__name__ + TEST_TITLE_OBJECT, vars(sd)
97  print sd.toJSON()
98 
99  #Test SiteCleanup object
100  sc = dc.EventObjects.SiteCleanup("b85ab149a528bd0a024fa0f43e80b5fc")
101  print TEST_TITLE + sc.__class__.__name__ + TEST_TITLE_OBJECT, vars(sc)
102  print sc.toJSON()
103 
104  #Test SiteFilter object
105  sf = dc.EventObjects.SiteFilter("235325634634263", "*")
106  print TEST_TITLE + sf.__class__.__name__ + TEST_TITLE_OBJECT, vars(sf)
107 
108  #Test URL object simple
109  #url = dc.EventObjects.URL("b85ab149a528bd0a024fa0f43e80b5fc", "http://127.0.0.1/")
110  url = dc.EventObjects.URL("b85ab149a528bd0a024fa0f43e80b5fc", "http://127.0.0.1/_site_random_generator.php?a=1")
111  print TEST_TITLE + url.__class__.__name__ + TEST_TITLE_OBJECT, vars(url)
112  print url.toJSON()
113 
114  #Test URLStatus object
115  us = dc.EventObjects.URLStatus("b85ab149a528bd0a024fa0f43e80b5fc", "701ccc5c1c589041d31d13dae8dce90d")
116  us.urlType = dc.EventObjects.URLStatus.URL_TYPE_MD5
117  print TEST_TITLE + us.__class__.__name__ + TEST_TITLE_OBJECT, vars(us)
118  print us.toJSON()
119 
120  #Test BatchItem object
121  urlObj = dc.EventObjects.URL("235325634634263", "http://127.0.0.1/")
122  bi1 = dc.EventObjects.BatchItem("235325634634263", "235325634634234", urlObj)
123  print TEST_TITLE + bi1.__class__.__name__ + TEST_TITLE_OBJECT, vars(bi1)
124  bi2 = dc.EventObjects.BatchItem("335325634634264", "335325634634235", urlObj)
125  print bi1.toJSON()
126 
127  #Test Batch object
128  b = dc.EventObjects.Batch([bi1, bi2])
129  print TEST_TITLE + b.__class__.__name__ + TEST_TITLE_OBJECT, vars(b)
130  print b.toJSON()
131 
132  #Test URLFetch object
134  print TEST_TITLE + uf.__class__.__name__ + TEST_TITLE_OBJECT, vars(uf)
135  print uf.toJSON()
136  uf = dc.EventObjects.URLFetch(["235325634634263", "235325634634234"])
137  print TEST_TITLE + uf.__class__.__name__ + TEST_TITLE_OBJECT, vars(uf)
138  print uf.toJSON()
140  uf.urlsCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] = \
141  "Status=7 AND Crawled>0 AND Processed>0 AND CDate BETWEEN '2014-06-28 00:00:01' AND '2014-06-28 23:59:59'"
142  print TEST_TITLE + uf.__class__.__name__ + TEST_TITLE_OBJECT, vars(uf)
143  print uf.toJSON()
144 
145  #Test URLUpdate object
146  uu = dc.EventObjects.URLUpdate("525326523434525", "http://127.0.0.1/", statusField=dc.EventObjects.URL.STATUS_NEW)
147  print TEST_TITLE + uu.__class__.__name__ + TEST_TITLE_OBJECT, vars(uu)
148  print uu.toJSON()
149  uu = dc.EventObjects.URLUpdate("b85ab149a528bd0a024fa0f43e80b5fc", urlString="701ccc5c1c589041d31d13dae8dce90d",
150  urlType=dc.EventObjects.URLStatus.URL_TYPE_MD5,
151  stateField=dc.EventObjects.URL.STATE_ENABLED,
152  statusField=dc.EventObjects.URL.STATUS_NEW)
153  print TEST_TITLE + uu.__class__.__name__ + TEST_TITLE_OBJECT, vars(uu)
154  print uu.toJSON()
155 
156  #Test URLDelete object
157  ud = dc.EventObjects.URLDelete("233243243242423", urlString="http://127.0.0.1/")
158  print TEST_TITLE + ud.__class__.__name__ + TEST_TITLE_OBJECT, vars(ud)
159  print ud.toJSON()
160  ud = dc.EventObjects.URLDelete("b85ab149a528bd0a024fa0f43e80b5fc", urlString="701ccc5c1c589041d31d13dae8dce90d",
161  urlType=dc.EventObjects.URLStatus.URL_TYPE_MD5)
162  print TEST_TITLE + ud.__class__.__name__ + TEST_TITLE_OBJECT, vars(ud)
163  print ud.toJSON()
164 
165  #Test URLCleanup object
166  uc = dc.EventObjects.URLCleanup("346436436436346", urlString="http://127.0.0.1/", statusField=dc.EventObjects.URL.STATUS_NEW)
167  print TEST_TITLE + uc.__class__.__name__ + TEST_TITLE_OBJECT, vars(uc)
168  print uc.toJSON()
169  uc = dc.EventObjects.URLCleanup("3463463463463463", urlString="235325634634263", urlType=dc.EventObjects.URLStatus.URL_TYPE_MD5,
170  stateField=dc.EventObjects.URL.STATE_DISABLED)
171  print TEST_TITLE + uc.__class__.__name__ + TEST_TITLE_OBJECT, vars(uc)
172  print uc.toJSON()
173 
174  #Test URLContentRequest object
175  ucr = dc.EventObjects.URLContentRequest("325632523424234234", "http://127.0.0.1/")
176  print TEST_TITLE + ucr.__class__.__name__ + TEST_TITLE_OBJECT, vars(ucr)
177  print ucr.toJSON()
178  ucr = dc.EventObjects.URLContentRequest("3464363464363634", "http://127.0.0.1/",
179  dc.EventObjects.URLContentRequest.CONTENT_TYPE_PROCESSED +
180  dc.EventObjects.URLContentRequest.CONTENT_TYPE_RAW_ALL)
181  print TEST_TITLE + ucr.__class__.__name__ + TEST_TITLE_OBJECT, vars(ucr)
182  print ucr.toJSON()
185  uf.urlsCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] = \
186  "Status=7 AND Crawled>0 AND Processed>0 AND CDate BETWEEN '2014-06-28 00:00:01' AND '2014-06-28 23:59:59'"
187  uf.sitesCriterions[dc.EventObjects.URLFetch.CRITERION_WHERE] = \
188  "State IN (1,2,3)"
189  uf.algorithm = dc.EventObjects.URLFetch.PROPORTIONAL_ALGORITHM
190  uf.maxURLs = 10
191  ucr.urlFetch = uf
192  print TEST_TITLE + ucr.__class__.__name__ + TEST_TITLE_OBJECT, vars(ucr)
193  print ucr.toJSON()
194 
195 
196  #Test URLContentResponse object
197  ucr = dc.EventObjects.URLContentResponse("http://127.0.0.1/",
198  dc.EventObjects.Content(["<html>test content 1</html>"], 124124354),
199  dc.EventObjects.Content(["test content 1"]))
200  print TEST_TITLE + ucr.__class__.__name__ + TEST_TITLE_OBJECT, vars(ucr)
201  print ucr.toJSON()
202 
203 
204  #Test ClientResponse object
208  print TEST_TITLE + cr.__class__.__name__ + TEST_TITLE_OBJECT, vars(cr)
209  print cr.toJSON()
210 
GeneralResponse event object, represents general state response for multipurpose usage.