2 HCE project, Python bindings, Distributed Crawler application. 3 SitesManager object and related classes definitions. 6 @author bgv bgv.hce@gmail.com 7 @link: http://hierarchical-cluster-engine.com/ 8 @copyright: Copyright © 2013-2016 IOIX Ukraine 9 @license: http://hierarchical-cluster-engine.com/license/ 25 import cPickle
as pickle
32 from dc
import EventObjects
39 from drce.DRCEManager import ConnectionTimeout, TransportInternalErr, CommandExecutorErr
50 lock = threading.Lock()
60 DRCE_REDUCER_TTL = 3000000
61 SITE_PROPERTIES_RECRAWL_WHERE_NAME =
"RECRAWL_WHERE" 62 SITE_PROPERTIES_RECRAWL_DELETE_WHERE_NAME =
"RECRAWL_DELETE_WHERE" 63 SITE_PROPERTIES_RECRAWL_DELETE_NAME =
"RECRAWL_DELETE" 64 SITE_PROPERTIES_RECRAWL_OPTIMIZE_NAME =
"RECRAWL_OPTIMIZE" 66 SITE_PROPERTIES_RECRAWL_PERIOD_MODE_NAME =
"RECRAWL_PERIOD_MODE" 67 SITE_PROPERTIES_RECRAWL_PERIOD_MIN_NAME =
"RECRAWL_PERIOD_MIN" 68 SITE_PROPERTIES_RECRAWL_PERIOD_MAX_NAME =
"RECRAWL_PERIOD_MAX" 69 SITE_PROPERTIES_RECRAWL_PERIOD_STEP_NAME =
"RECRAWL_PERIOD_STEP" 70 SITE_RECRAWL_THREAD_NAME_PREFIX =
'ReCrawl_' 73 CONFIG_SERVER =
"server" 74 CONFIG_DRCE_HOST =
"DRCEHost" 75 CONFIG_DRCE_PORT =
"DRCEPort" 76 CONFIG_DRCE_TIMEOUT =
"DRCETimeout" 77 CONFIG_DRCE_DB_APP_NAME =
"DRCEDBAppName" 78 CONFIG_RECRAWL_SITES_MAX =
"RecrawlSiteMax" 79 CONFIG_RECRAWL_SITES_ITER_PERIOD =
"RecrawlSiteIterationPeriod" 80 CONFIG_RECRAWL_SITES_PERIOD_MODE =
"RecrawlSitePeriodMode" 81 CONFIG_RECRAWL_SITES_PERIOD_MIN =
"RecrawlSitePeriodMin" 82 CONFIG_RECRAWL_SITES_PERIOD_MAX =
"RecrawlSitePeriodMax" 83 CONFIG_RECRAWL_SITES_PERIOD_STEP =
"RecrawlSitePeriodStep" 84 CONFIG_RECRAWL_SITES_RECRAWL_DATE_EXP =
"RecrawlSiteRecrawlDateExpression" 85 CONFIG_RECRAWL_SITES_SELECT_CRITERION =
"RecrawlSiteSelectCriterion" 86 CONFIG_RECRAWL_SITES_SELECT_ORDER =
"RecrawlSiteSelectOrder" 87 CONFIG_RECRAWL_SITES_MAX_THREADS =
"RecrawlSiteMaxThreads" 88 CONFIG_RECRAWL_SITES_LOCK_STATE =
"RecrawlSiteLockState" 89 CONFIG_RECRAWL_SITES_OPTIMIZE =
"RecrawlSiteOptimize" 90 CONFIG_RECRAWL_SITES_DRCE_TIMEOUT =
"RecrawlSiteDRCETimeout" 91 CONFIG_RECRAWL_SITES_MODE =
"RecrawlSiteMode" 92 CONFIG_RECRAWL_DELAY_BEFORE =
"RecrawlDelayBefore" 93 CONFIG_RECRAWL_DELAY_AFTER =
"RecrawlDelayAfter" 94 CONFIG_POLLING_TIMEOUT =
"PollingTimeout" 95 CONFIG_DEFAULT_RECRAWL_UPDATE_CRITERION =
"DefaultRecrawUpdatelCriterion" 96 CONFIG_DEFAULT_RECRAWL_DELETE_OLD =
"DefaultRecrawDeleteOld" 97 CONFIG_DEFAULT_RECRAWL_DELETE_OLD_CRITERION =
"DefaultRecrawDeleteOldCriterion" 98 CONFIG_DRCE_ROUTE =
"DRCERoute" 99 CONFIG_PURGE_METHOD =
"PurgeMethod" 100 CONFIG_DRCE_NODES =
"DRCENodes" 101 CONFIG_COMMON_COMMANDS_THREADING_MODE =
"CommonCommandsThreadingMode" 103 DRCE_CONNECTIONS_POOL =
"DRCEConnectionsPool" 104 COMMON_COMMANDS_THREADING_SIMPLE = 0
105 COMMON_COMMANDS_THREADING_MULTI = 1
106 COMMON_COMMANDS_THREAD_NAME_PREFIX =
'Common_' 114 def __init__(self, configParser, connectionBuilderLight=None):
115 super(SitesManager, self).
__init__()
127 if connectionBuilderLight
is None:
130 className = self.__class__.__name__
137 serverConnection = connectionBuilderLight.build(TRANSPORT_CONSTS.SERVER_CONNECT, self.
serverName)
143 self.
eventTypes = {EVENT_TYPES.SITE_NEW:EVENT_TYPES.SITE_NEW_RESPONSE,
144 EVENT_TYPES.SITE_UPDATE:EVENT_TYPES.SITE_UPDATE_RESPONSE,
145 EVENT_TYPES.SITE_STATUS:EVENT_TYPES.SITE_STATUS_RESPONSE,
146 EVENT_TYPES.SITE_DELETE:EVENT_TYPES.SITE_DELETE_RESPONSE,
147 EVENT_TYPES.SITE_CLEANUP:EVENT_TYPES.SITE_CLEANUP_RESPONSE,
148 EVENT_TYPES.SITE_FIND:EVENT_TYPES.SITE_FIND_RESPONSE,
149 EVENT_TYPES.URL_NEW:EVENT_TYPES.URL_NEW_RESPONSE,
150 EVENT_TYPES.URL_STATUS:EVENT_TYPES.URL_STATUS_RESPONSE,
151 EVENT_TYPES.URL_UPDATE:EVENT_TYPES.URL_UPDATE_RESPONSE,
152 EVENT_TYPES.URL_DELETE:EVENT_TYPES.URL_DELETE_RESPONSE,
153 EVENT_TYPES.URL_FETCH:EVENT_TYPES.URL_FETCH_RESPONSE,
154 EVENT_TYPES.URL_CLEANUP:EVENT_TYPES.URL_CLEANUP_RESPONSE,
155 EVENT_TYPES.URL_CONTENT:EVENT_TYPES.URL_CONTENT_RESPONSE,
156 EVENT_TYPES.SQL_CUSTOM:EVENT_TYPES.SQL_CUSTOM_RESPONSE,
157 EVENT_TYPES.URL_PUT:EVENT_TYPES.URL_PUT_RESPONSE,
158 EVENT_TYPES.URL_HISTORY:EVENT_TYPES.URL_HISTORY_RESPONSE,
159 EVENT_TYPES.URL_STATS:EVENT_TYPES.URL_STATS_RESPONSE,
160 EVENT_TYPES.PROXY_NEW:EVENT_TYPES.PROXY_NEW_RESPONSE,
161 EVENT_TYPES.PROXY_UPDATE:EVENT_TYPES.PROXY_UPDATE_RESPONSE,
162 EVENT_TYPES.PROXY_DELETE:EVENT_TYPES.PROXY_DELETE_RESPONSE,
163 EVENT_TYPES.PROXY_STATUS:EVENT_TYPES.PROXY_STATUS_RESPONSE,
164 EVENT_TYPES.PROXY_FIND:EVENT_TYPES.PROXY_FIND_RESPONSE,
165 EVENT_TYPES.ATTR_SET:EVENT_TYPES.ATTR_SET_RESPONSE,
166 EVENT_TYPES.ATTR_UPDATE:EVENT_TYPES.ATTR_UPDATE_RESPONSE,
167 EVENT_TYPES.ATTR_DELETE:EVENT_TYPES.ATTR_DELETE_RESPONSE,
168 EVENT_TYPES.ATTR_FETCH:EVENT_TYPES.ATTR_FETCH_RESPONSE}
178 except ConfigParser.NoOptionError:
261 except ConfigParser.NoOptionError:
263 except Exception
as err:
264 logger.error(
"Error de-serialize json of connection parameters for DRCE connections pool: %s", err)
271 except ConfigParser.NoOptionError:
280 logger.debug(
"Periodic iteration started.")
286 logger.debug(
"Now time to try to perform re-crawl, interval %s",
289 self.
statFields[DC_CONSTS.RECRAWL_THREADS_COUNTER_QUEUE_NAME]:
291 logger.info(
"Forking new recrawl thread")
295 str(self.
statFields[DC_CONSTS.RECRAWL_THREADS_CREATED_COUNTER_NAME]))
297 logger.info(
"New recrawl thread forked")
300 logger.debug(
"Max recrawl threads limit reached %s",
303 logger.debug(
"Re-crawl disabled!")
306 except Exception
as err:
307 Utils.ExceptionLog.handler(logger, err,
"Exception:")
309 logger.debug(
"Periodic iteration finished.")
319 logger.info(
"Common command in simple mode")
324 logger.info(
"Forking new common commands thread")
326 t2 = threading.Thread(target=self.
eventsHandlerTS, args=(event, logging,))
328 str(self.
statFields[DC_CONSTS.COMMON_THREADS_CREATED_COUNTER_NAME]))
330 logger.info(
"New common commands thread forked")
333 except Exception
as err:
334 Utils.ExceptionLog.handler(logger, err,
"Exception:")
348 logger = loggingObj.getLogger(DC_CONSTS.LOGGER_NAME)
355 logger.debug(
"Request event:\n" + Utils.varDump(event))
360 persistentConnection =
False 362 connectionParams =
None 364 persistentConnection =
True 368 clientResponseObj = self.
processDRCERequest(drceRequest, persistentConnection, timeout, connectionParams)
369 logger.debug(
"Response ClientResponseObj:\n" + Utils.varDump(clientResponseObj))
376 if event.eventType == EVENT_TYPES.URL_CONTENT:
377 if event.cookie
is None:
379 if isinstance(event.cookie, dict):
380 event.cookie[EventObjects.URLFetch.CRITERION_ORDER] = []
381 for urlContentRequestItem
in event.eventObj:
382 if urlContentRequestItem.urlFetch
is not None and\
383 EventObjects.URLFetch.CRITERION_ORDER
in urlContentRequestItem.urlFetch.urlsCriterions:
384 event.cookie[EventObjects.URLFetch.CRITERION_ORDER].append(
385 urlContentRequestItem.urlFetch.urlsCriterions[EventObjects.URLFetch.CRITERION_ORDER])
387 event.cookie[EventObjects.URLFetch.CRITERION_ORDER].append(
"")
388 if len(event.cookie) == 0:
391 if event.eventType == EVENT_TYPES.URL_FETCH:
392 if event.cookie
is None:
394 if isinstance(event.cookie, dict):
395 event.cookie[EventObjects.URLFetch.CRITERION_ORDER] = []
396 for urlFetchRequestItem
in event.eventObj:
397 if EventObjects.URLFetch.CRITERION_ORDER
in urlFetchRequestItem.urlsCriterions:
398 event.cookie[EventObjects.URLFetch.CRITERION_ORDER].append(
399 urlFetchRequestItem.urlsCriterions[EventObjects.URLFetch.CRITERION_ORDER])
401 event.cookie[EventObjects.URLFetch.CRITERION_ORDER].append(
"")
402 if len(event.cookie) == 0:
406 self.
reply(event, replyEvent)
407 logger.info(
"Reply sent")
428 parts = item.split(
':')
430 ret = ((parts[0], int(parts[1])), int(parts[2]))
431 logger.info(
"Connection options found for event %s: %s", str(eventType), str(ret))
434 logger.error(
"Wrong items number 'host:port:timeout' in DRCE connections pool key: %s", str(item))
435 except Exception
as err:
436 logger.error(
"Error get DRCE connection parameters, possible wrong ini value for DRCE connections pool: %s\n%s",
450 drceSyncTasksCoverObj = DC_CONSTS.DRCESyncTasksCover(eventType, eventObj)
454 taskExecuteStruct.input = pickle.dumps(drceSyncTasksCoverObj)
455 taskExecuteStruct.session =
Session(Session.TMODE_SYNC)
456 logger.debug(
"DRCE taskExecuteStruct:\n" + Utils.varDump(taskExecuteStruct))
458 return taskExecuteStruct
467 def processDRCERequest(self, taskExecuteStruct, persistentDCREConnection=True, timeout=-1, connectionParams=None):
471 taskId = ctypes.c_uint32(zlib.crc32(idGenerator.get_connection_uid(), int(time.time()))).value
476 taskExecuteRequest.data = taskExecuteStruct
479 logger.info(
"Sending sync task id:" + str(taskId) +
" to DRCE router!")
481 response = self.
sendToDRCERouter(taskExecuteRequest, persistentDCREConnection, timeout, connectionParams)
482 logger.info(
"Received response on sync task from DRCE router!")
483 logger.debug(
"Response: %s", Utils.varDump(response))
489 clientResponse.errorCode = EventObjects.ClientResponse.STATUS_ERROR_NONE
490 clientResponse.errorMessage =
"Response error, None returned from DRCE, possible timeout " + \
492 logger.error(clientResponse.errorMessage)
494 if len(response.items) == 0:
495 clientResponse.errorCode = EventObjects.ClientResponse.STATUS_ERROR_EMPTY_LIST
496 clientResponse.errorMessage =
"Response error, empty list returned from DRCE, possible no one node in cluster!" 497 logger.error(clientResponse.errorMessage)
499 for item
in response.items:
503 if item.error_code > 0
or item.exit_status > 0:
504 clientResponseItem.errorCode = clientResponseItem.STATUS_ERROR_DRCE
505 clientResponseItem.errorMessage =
"Response item error error_message=" + item.error_message + \
506 ", error_code=" + str(item.error_code) + \
507 ", exit_status=" + str(item.exit_status) + \
508 ", stderror=" + str(item.stderror)
509 logger.error(clientResponseItem.errorMessage)
513 drceSyncTasksCover = pickle.loads(item.stdout)
514 clientResponseItem.itemObject = drceSyncTasksCover.eventObject
515 except Exception
as e:
516 clientResponseItem.errorCode = EventObjects.ClientResponseItem.STATUS_ERROR_RESTORE_OBJECT
517 clientResponseItem.errorMessage = EventObjects.ClientResponseItem.MSG_ERROR_RESTORE_OBJECT +
"\n" + \
518 str(e.message) +
"\nstdout=" + str(item.stdout) + \
519 ", stderror=" + str(item.stderror)
520 logger.error(clientResponseItem.errorMessage)
522 clientResponseItem.id = item.id
523 clientResponseItem.host = item.host
524 clientResponseItem.port = item.port
525 clientResponseItem.node = item.node
526 clientResponseItem.time = item.time
528 clientResponse.itemsList.append(clientResponseItem)
530 return clientResponse
540 def sendToDRCERouter(self, request, persistentDCREConnection=True, timeout=-1, connectionParams=None):
545 if persistentDCREConnection:
546 logger.info(
"DRCE router sending via persistent connection with timeout=%s", str(timeout))
550 if connectionParams
is None:
552 logger.info(
"DRCE router sending via temporary connection with timeout=" + str(timeout) + \
553 ", and regular host:" + str(self.
drceHost) +
", port:" + str(self.
drcePort))
555 logger.info(
"DRCE router sending via temporary connection with timeout=" + str(timeout) + \
556 ", and DRCE connections pool host:" + str(connectionParams[0]) + \
557 ", port:" + str(connectionParams[1]))
558 drceManager.activate_host(
HostParams(connectionParams[0], int(connectionParams[1])))
564 except (ConnectionTimeout, TransportInternalErr, CommandExecutorErr)
as err:
566 logger.error(
"DRCE router transport send error : " + str(err.message))
567 except Exception
as err:
569 logger.error(
"DRCE router common error : " + str(err.message))
571 logger.info(
"DRCE router sent!")
573 if not persistentDCREConnection:
575 drceManager.clear_host()
590 logger = loggingObj.getLogger(DC_CONSTS.LOGGER_NAME)
594 logger.info(
"RECRAWL_THREAD_STARTED")
603 logger.debug(
"Send DRCE request SITE_FIND")
605 logger.debug(
"clientResponse:" + Utils.varDump(clientResponse))
613 for siteId
in sites.keys():
616 logger.debug(
"Site %s is already in progress of recrawl by some thread", str(siteId))
624 sitesQueue[str(siteId)] = {
"time":t1}
632 logger.debug(
"Site selected for recrawl, site[" + str(siteId) +
"]:\n" + Utils.varDump(sites[siteId]))
634 sitePrevState = sites[siteId].state
639 siteUpdate.uDate = siteUpdate.tcDate
642 siteUpdate.recrawlDate = \
651 logger.debug(
"Site is not locked due empty string value of configuration parameter %s",
653 logger.debug(
"Update site request including lock state if configured, id=%s", str(siteId))
657 logger.debug(
"Update site request done, id=%s", str(siteId))
671 urlUpdate.urlMd5 =
None 672 urlUpdate.status = EventObjects.URLUpdate.STATUS_NEW
674 if crit
is None or crit ==
"":
676 logger.debug(
"Default update criterion: " + str(crit))
678 logger.debug(
"Custom site update criterion: " + str(crit))
679 urlUpdate.criterions = {EventObjects.URLFetch.CRITERION_WHERE : crit}
680 urlUpdateList.append(urlUpdate)
684 siteDelOld = EventObjects.Site.getFromProperties(sites[siteId].properties,
686 if siteDelOld
is None or siteDelOld !=
"0":
687 siteSqlExpression = EventObjects.Site.getFromProperties(sites[siteId].properties,
689 logger.debug(
"Site expression: " + str(siteSqlExpression))
690 if siteSqlExpression
is not None and siteSqlExpression !=
"":
691 sqlExpression = siteSqlExpression
692 logger.debug(
"Custom expression set: " + str(sqlExpression))
694 logger.debug(
"Site delete old: " + str(siteDelOld))
696 {EventObjects.URLFetch.CRITERION_WHERE:sqlExpression},
697 reason=EventObjects.URLDelete.REASON_RECRAWL)
698 urlDelete.urlType =
None 700 logger.debug(
"Old URLs delete due re-crawl: " + Utils.varDump(urlDelete))
701 urlDeleteList.append(urlDelete)
702 logger.debug(
"URLDelete request, id=%s", str(siteId))
706 logger.debug(
"URLDelete request done, id=%s", str(siteId))
708 self.
updateStatField(DC_CONSTS.SITES_RECRAWL_DELETED_COUNTER_NAME, len(urlDeleteList),
715 optimize = EventObjects.Site.getFromProperties(sites[siteId].properties,
718 sqlQuery =
"OPTIMIZE TABLE `urls_" + str(siteId) +
"`" 719 logger.debug(
"CustomRequest query: %s", sqlQuery)
724 logger.debug(
"CustomRequest request done, id=%s, customResponse:\n%s", str(siteId),
725 Utils.varDump(customResponse))
729 if recrawlPeriod
is not None:
730 siteUpdate.recrawlPeriod = recrawlPeriod
736 self.
updateStatField(DC_CONSTS.SITES_RECRAWL_UPDATED_COUNTER_NAME, len(urlUpdateList),
750 siteUpdate.state = sitePrevState
751 siteUpdate.iterations =
None 753 siteUpdate.uDate =
None 754 siteUpdate.recrawlDate =
None 755 logger.debug(
"Unlock site request, id=%s", str(siteId))
759 logger.debug(
"Unlock site request done, id=%s", str(siteId))
765 logger.debug(
"Site is not unlocked due empty string value of configuration parameter %s",
773 except Exception
as err:
775 logger.error(
"Recrawl thread exception:" + str(err))
779 logger.error(
"Recrawl thread unknown exception!")
786 if siteId
not in sitesQueue:
796 logger.info(
"RECRAWL_THREAD_FINISHED")
805 batchItemsCounter = 0
806 batchItemsTotalCounter = 0
809 for item
in clientResponseItems:
810 if item.errorCode == EventObjects.ClientResponseItem.STATUS_OK:
811 if isinstance(item.itemObject, list):
812 for site
in item.itemObject:
813 batchItemsTotalCounter = batchItemsTotalCounter + 1
815 if str(site.id)
not in uniqueSitesDic:
816 uniqueSitesDic[str(site.id)] = site
817 batchItemsCounter = batchItemsCounter + 1
820 uniqueSitesDic[str(site.id)].newURLs = uniqueSitesDic[str(site.id)].newURLs + site.newURLs
821 uniqueSitesDic[str(site.id)].collectedURLs = uniqueSitesDic[str(site.id)].collectedURLs + \
823 uniqueSitesDic[str(site.id)].deletedURLs = uniqueSitesDic[str(site.id)].deletedURLs + site.deletedURLs
824 uniqueSitesDic[str(site.id)].contents = uniqueSitesDic[str(site.id)].contents + site.contents
825 uniqueSitesDic[str(site.id)].resources = uniqueSitesDic[str(site.id)].resources + site.resources
827 logger.error(
"Wrong object type in the itemObject.item: " + str(
type(site)) + \
828 " but 'Site' expected")
830 logger.error(
"Wrong object type in the ClientResponseItem.itemObject: " + str(
type(item.itemObject)) + \
831 " but 'list' expected")
833 logger.debug(
"ClientResponseItem error: " + str(item.errorCode) +
" : " + item.errorMessage)
835 logger.debug(
"Unique sites: " + str(batchItemsCounter) +
", total sites: " + str(batchItemsTotalCounter))
837 return uniqueSitesDic
869 logger.debug(
"RecrawlPeriod auto recalculate, siteId:%s, mode:%s, minv:%s, maxv:%s, step:%s", str(siteObj.id),
870 str(mode), str(minv), str(maxv), str(step))
874 logger.debug(
"RecrawlPeriod auto recalculate is ON, siteId:%s, current value:%s",
875 str(siteObj.id), str(siteObj.recrawlPeriod))
877 if siteObj.newURLs > 0
or siteObj.resources > 0:
879 if siteObj.recrawlPeriod < maxv:
881 recrawlPeriod = siteObj.recrawlPeriod + step
883 logger.debug(
"Max value of RecrawlPeriod reached:%s", str(maxv))
886 if siteObj.recrawlPeriod > minv:
888 recrawlPeriod = siteObj.recrawlPeriod - step
890 logger.debug(
"Min value of RecrawlPeriod reached:%s", str(minv))
891 logger.debug(
"New RecrawlPeriod value for site %s is:%s", str(siteObj.id), str(recrawlPeriod))
902 if clientResponse.errorCode > 0:
903 logger.error(
"clientResponse.errorCode:" + str(clientResponse.errorCode) +
":" + clientResponse.errorMessage)
904 for clientResponseItem
in clientResponse.itemsList:
906 if clientResponseItem.errorCode != EventObjects.ClientResponseItem.STATUS_OK:
907 logger.error(
"ClientResponseItem error: " + str(clientResponseItem.errorCode) +
" : " + \
908 clientResponseItem.errorMessage +
"\n" + Utils.varDump(clientResponseItem))
910 logger.error(
"Wrong type: " + str(
type(clientResponseItem)) +
", expected ClientResponseItem\n" + \
911 Utils.varDump(clientResponseItem))
913 logger.error(
"Wrong type: " + str(
type(clientResponse)) +
", expected ClientResponse\n" + \
914 Utils.varDump(clientResponse))
924 if (event.eventType == DC_CONSTS.EVENT_TYPES.SITE_NEW
and isinstance(event.eventObj,
EventObjects.Site))
or\
925 (event.eventType == DC_CONSTS.EVENT_TYPES.SITE_UPDATE
and isinstance(event.eventObj,
EventObjects.SiteUpdate)):
926 fieldsList = [
"maxURLs",
"maxResources",
"maxErrors"]
927 for fieldName
in fieldsList:
928 setattr(event.eventObj, fieldName, self.
fixField(getattr(event.eventObj, fieldName), nodes, fieldName))
930 logger.error(
"Error %s", str(e))
940 if value
is not None:
944 ret = int(int(v) / int(d))
946 if ret < 1
and v > 0:
952 logger.debug(
"Initial value of field `%s` from %s was fixed to %s, divider %s", comment, str(value), str(ret),
def processRecrawling(self, loggingObj)
string CONFIG_RECRAWL_SITES_DRCE_TIMEOUT
def reply(self, event, reply_event)
wrapper for sending event in reply for event
def fixFields(self, event, nodes)
def on_poll_timeout(self)
def __init__(self, configParser, connectionBuilderLight=None)
string DRCE_CONNECTIONS_POOL
int COMMON_COMMANDS_THREADING_MULTI
def prepareDRCERequest(self, eventType, eventObj)
int COMMON_COMMANDS_THREADING_SIMPLE
string CONFIG_RECRAWL_SITES_ITER_PERIOD
string CONFIG_DEFAULT_RECRAWL_DELETE_OLD_CRITERION
string CONFIG_DRCE_DB_APP_NAME
string CONFIG_POLLING_TIMEOUT
def updateStatField(self, field_name, value, operation=STAT_FIELDS_OPERATION_ADD)
update values of stat field - default sum
def onEventsHandler(self, event)
string CONFIG_DEFAULT_RECRAWL_DELETE_OLD
wrapper for TaskExecuteStruct
string CONFIG_RECRAWL_SITES_OPTIMIZE
string SITE_PROPERTIES_RECRAWL_PERIOD_MODE_NAME
string POLL_TIMEOUT_CONFIG_VAR_NAME
string CONFIG_RECRAWL_SITES_PERIOD_MAX
string CONFIG_RECRAWL_SITES_MODE
string CONFIG_RECRAWL_SITES_MAX_THREADS
def setEventHandler(self, eventType, handler)
set event handler rewrite the current handler for eventType
def addConnection(self, name, connection)
string CONFIG_RECRAWL_DELAY_BEFORE
This is app base class for management server connection end-points and parallel transport messages pr...
int STAT_FIELDS_OPERATION_SET
def getSitesFromClientResponseItems(self, clientResponseItems)
string CONFIG_DEFAULT_RECRAWL_UPDATE_CRITERION
string CONFIG_RECRAWL_SITES_PERIOD_STEP
string SITE_PROPERTIES_RECRAWL_PERIOD_STEP_NAME
wrapper for Session fields array of execute task
def on_poll_timeout(self)
function will call every time when ConnectionTimeout exception arrive
def fixField(self, value, divider, comment)
def eventsHandlerTS(self, event, loggingObj)
int STAT_FIELDS_OPERATION_ADD
string SITE_PROPERTIES_RECRAWL_PERIOD_MIN_NAME
UIDGenerator is used to generate unique message id.
def sendToDRCERouter(self, request, persistentDCREConnection=True, timeout=-1, connectionParams=None)
Class hides routines of bulding connection objects.
statFields
stat fields container
string CONFIG_RECRAWL_SITES_RECRAWL_DATE_EXP
def getDRCEConnectionParamsFromPool(self, eventType)
string SITE_PROPERTIES_RECRAWL_OPTIMIZE_NAME
string CONFIG_RECRAWL_DELAY_AFTER
string CONFIG_RECRAWL_SITES_MAX
string SITE_PROPERTIES_RECRAWL_DELETE_NAME
string SITE_PROPERTIES_RECRAWL_WHERE_NAME
string SITE_PROPERTIES_RECRAWL_DELETE_WHERE_NAME
string CONFIG_PURGE_METHOD
string CONFIG_DRCE_TIMEOUT
string SITE_RECRAWL_THREAD_NAME_PREFIX
string CONFIG_COMMON_COMMANDS_THREADING_MODE
string SITE_PROPERTIES_RECRAWL_PERIOD_MAX_NAME
def logGeneralResponseResults(self, clientResponse)
string CONFIG_RECRAWL_SITES_PERIOD_MIN
int STAT_FIELDS_OPERATION_SUB
string CONFIG_RECRAWL_SITES_SELECT_CRITERION
IDGenerator is used to generate unique id for connections.
def recalculateRecrawlPeriod(self, siteObj)
Convertor which used to convert Task*Reques to json and TaskResponse from json.
string CONFIG_RECRAWL_SITES_PERIOD_MODE
string COMMON_COMMANDS_THREAD_NAME_PREFIX
string CONFIG_RECRAWL_SITES_SELECT_ORDER
string CONFIG_RECRAWL_SITES_LOCK_STATE
int STAT_FIELDS_OPERATION_INIT
def processDRCERequest(self, taskExecuteStruct, persistentDCREConnection=True, timeout=-1, connectionParams=None)