HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Public Member Functions | |
def | __init__ (self, keyValueStorageDir, rawDataDir, dBDataTask, dcSiteTemplate, keyValueDefaultFile, dcStatTemplate, dcLogTemplate, mutexLockTTL=Constants.DEFAULT_LOCK_TTL) |
def | process (self, urlFetches, queryCallback) |
def | processProportial (self, urlFetch, uRLUpdateTask, queryCallback) |
def | fillCriterionLimits (self, urlFetch, offset) |
def | processSimple (self, urlFetch, uRLUpdateTask, queryCallback) |
def | updateUrl (self, urls, urlFetch, uRLUpdateTask, queryCallback) |
def | fillSiteListSQLGenerate (self, sitesCriterions, userId=None) |
def | fillSiteList (self, incomeSiteList, queryCallback, sitesCriterions, userId=None) |
def | getURLFromURLTable (self, urlFetch, globalLen, queryCallback) |
def | execAdditionalSQLs (self, sqls, siteId, queryCallback) |
Public Member Functions inherited from dc_db.BaseTask.BaseTask | |
def | isSiteExist (self, siteId, queryCallback, userId=None) |
def | generateCriterionSQL (self, criterions, additionWhere=None, siteId=None) |
def | fetchByCriterions (self, criterions, queryCallback) |
def | dbLock (self, mutexName, queryCallback, sleepTime=1, mutexLockTTL=Constants.DEFAULT_LOCK_TTL) |
def | dbUnlock (self, mutexName, queryCallback) |
def | createUrlsInsertQuery (self, siteId, localKeys, localValues) |
def | copyUrlsToDcUrls (self, siteId, queryCallback) |
def | statisticLogUpdate (self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False) |
def | calculateMd5FormUrl (self, url, urlType, useNormilize=False) |
Static Public Member Functions | |
def | fillUrlObj (row) |
Static Public Member Functions inherited from dc_db.BaseTask.BaseTask | |
def | readValueFromSiteProp (siteId, propName, queryCallback, urlMd5=None) |
Public Attributes | |
quantMaxUrls | |
localSiteList | |
mutexLockTTL | |
uRLUpdateTask | |
siteUpdateTask | |
Definition at line 24 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.__init__ | ( | self, | |
keyValueStorageDir, | |||
rawDataDir, | |||
dBDataTask, | |||
dcSiteTemplate, | |||
keyValueDefaultFile, | |||
dcStatTemplate, | |||
dcLogTemplate, | |||
mutexLockTTL = Constants.DEFAULT_LOCK_TTL |
|||
) |
Definition at line 30 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.execAdditionalSQLs | ( | self, | |
sqls, | |||
siteId, | |||
queryCallback | |||
) |
Definition at line 318 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.fillCriterionLimits | ( | self, | |
urlFetch, | |||
offset | |||
) |
def dc_db.URLFetchTask.URLFetchTask.fillSiteList | ( | self, | |
incomeSiteList, | |||
queryCallback, | |||
sitesCriterions, | |||
userId = None |
|||
) |
Definition at line 206 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.fillSiteListSQLGenerate | ( | self, | |
sitesCriterions, | |||
userId = None |
|||
) |
Definition at line 174 of file URLFetchTask.py.
|
static |
Definition at line 241 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.getURLFromURLTable | ( | self, | |
urlFetch, | |||
globalLen, | |||
queryCallback | |||
) |
Definition at line 259 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.process | ( | self, | |
urlFetches, | |||
queryCallback | |||
) |
Definition at line 46 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.processProportial | ( | self, | |
urlFetch, | |||
uRLUpdateTask, | |||
queryCallback | |||
) |
Definition at line 93 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.processSimple | ( | self, | |
urlFetch, | |||
uRLUpdateTask, | |||
queryCallback | |||
) |
Definition at line 138 of file URLFetchTask.py.
def dc_db.URLFetchTask.URLFetchTask.updateUrl | ( | self, | |
urls, | |||
urlFetch, | |||
uRLUpdateTask, | |||
queryCallback | |||
) |
dc_db.URLFetchTask.URLFetchTask.localSiteList |
Definition at line 33 of file URLFetchTask.py.
dc_db.URLFetchTask.URLFetchTask.mutexLockTTL |
Definition at line 34 of file URLFetchTask.py.
dc_db.URLFetchTask.URLFetchTask.quantMaxUrls |
Definition at line 32 of file URLFetchTask.py.
dc_db.URLFetchTask.URLFetchTask.siteUpdateTask |
Definition at line 36 of file URLFetchTask.py.
dc_db.URLFetchTask.URLFetchTask.uRLUpdateTask |
Definition at line 35 of file URLFetchTask.py.