4 @link: http://hierarchical-cluster-engine.com/ 5 @copyright: Copyright © 2013-2014 IOIX Ukraine 6 @license: http://hierarchical-cluster-engine.com/license/ 28 CODE_ALREADY_EXIST = 2
32 def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask, siteTask=None):
46 raise Exception(
">>> URLNew.siteTask object is None!")
57 SITE_EXTRACT_SQL_QUERY =
"SELECT `RequestDelay`, `HTTPTimeout`, `URLType` FROM `sites` WHERE id = '%s'" 58 res = queryCallback(SITE_EXTRACT_SQL_QUERY % urlObj.siteId, Constants.PRIMARY_DB_ID)
59 if hasattr(res,
'__iter__')
and len(res) > 0:
61 if urlObj.requestDelay
is None:
62 urlObj.requestDelay = res[0][0]
63 if urlObj.httpTimeout
is None:
64 urlObj.httpTimeout = res[0][1]
65 if urlObj.type
is None:
66 urlObj.type = res[0][2]
76 query = Constants.SELECT_SITE_ID_BY_URL % url
77 res = queryCallback(query, Constants.PRIMARY_DB_ID)
78 if hasattr(res,
'__iter__')
and len(res) > 0:
80 logger.debug(
">>> Site_Id By URL = %s", str(ret))
85 canonicUrl = Utils.UrlParser.generateDomainUrl(url)
86 if canonicUrl
is not None and len(canonicUrl) > 0
and canonicUrl[-1] !=
'/':
88 localSiteId = hashlib.md5(canonicUrl).hexdigest()
97 if urlObj.siteSelect == dc.EventObjects.URL.SITE_SELECT_TYPE_EXPLICIT:
98 if urlObj.siteId ==
"" or not self.
isSiteExist(urlObj.siteId, queryCallback):
100 elif urlObj.siteSelect == dc.EventObjects.URL.SITE_SELECT_TYPE_AUTO:
102 canonicUrl = Utils.UrlParser.generateDomainUrl(urlObj.url)
103 if canonicUrl
is not None and len(canonicUrl) > 0
and canonicUrl[-1] !=
'/':
106 logger.debug(
">>> S_NEW_ID=" + str(localSiteId))
108 urlObj.siteId = localSiteId
110 elif canonicUrl
is not None:
112 urlObj.siteId = localSiteId
114 raise Exception(
">>> canonicUrl is None !!!")
116 logger.debug(
">>> UrlParseException")
117 elif urlObj.siteSelect == dc.EventObjects.URL.SITE_SELECT_TYPE_QUALIFY_URL:
119 if not self.
isSiteExist(localSiteId, queryCallback):
121 elif urlObj.siteSelect == dc.EventObjects.URL.SITE_SELECT_TYPE_NONE:
123 if not self.
isSiteExist(localSiteId, queryCallback):
124 Exception(
">>> urlObj operation can't find siteId")
126 raise Exception(
">>> urlObj.siteSelect field has wrong value - %s" % str(urlObj.siteSelect))
136 status = URLNewTask.CODE_BAD_INSERT
137 isRelatedSite =
False 139 isRelatedSite =
False 140 if url.siteId
is None and url.siteSelect != dc.EventObjects.URL.SITE_SELECT_TYPE_EXPLICIT:
144 logger.debug(
">>> Url New main = " + url.url)
146 if not isRelatedSite:
147 logger.debug(
">>> Url New before = " + url.url)
149 logger.debug(
">>> Site_Id By URL = %s", str(url.url))
150 logger.debug(
">>> Url New after = " + url.url)
151 if url.siteId
is not None and url.siteId !=
"":
153 except Exception
as excp:
154 logger.debug(
">>> Url New operation exception = " + str(excp))
155 ret.statuses.append(status)
167 if not self.
selectURL(urlObj, queryCallback):
168 if self.
addURL(urlObj, queryCallback):
169 ret = URLNewTask.CODE_GOOD_INSERT
170 if urlObj.attributes
is not None and len(urlObj.attributes) > 0:
173 ret = URLNewTask.CODE_ALREADY_EXIST
174 if urlObj.urlUpdate
is not None:
175 logger.debug(
">>> Url New Start Internal urlUpdate")
178 if urlObj.attributes
is not None and len(urlObj.attributes) > 0:
181 self.
recalculator.commonRecalc(urlObj.siteId, queryCallback)
182 if "urlPut" in urlObj.__dict__
and urlObj.urlPut
is not None:
183 self.
urlUpdateTask.urlPutOperation(urlObj, urlObj.urlPut, queryCallback)
194 LOCAL_URL_CHECK_QUERY =
"SELECT COUNT(*) FROM `urls_%s` WHERE `URLMd5` = '%s'" 195 if urlObject.urlMd5
is not None:
196 self.
urlMd5 = urlObject.urlMd5
198 self.
urlMd5 = hashlib.md5(urlObject.url).hexdigest()
199 query = LOCAL_URL_CHECK_QUERY % (urlObject.siteId, self.
urlMd5)
200 res = queryCallback(query, Constants.SECONDARY_DB_ID)
201 if hasattr(res,
'__iter__')
and len(res) > 0
and len(res[0]) > 0
and res[0][0] > 0:
210 def addURL(self, urlObject, queryCallback):
213 fields, values = Constants.getFieldsValuesTuple(urlObject, Constants.URLTableDict)
214 fieldValueString = Constants.createFieldsValuesString(fields, values)
215 if fieldValueString
is not None and fieldValueString !=
"":
216 query = Constants.INSERT_COMMON_TEMPLATE % ((Constants.DC_URLS_TABLE_NAME_TEMPLATE % urlObject.siteId),
218 logger.debug(str(query))
219 queryCallback(query, Constants.SECONDARY_DB_ID, Constants.EXEC_NAME,
True)
230 logger.debug(
">>> Add Attributes (len) == " + str(len(attributes)))
232 res = attrSetTask.process(attributes, queryCallback)
233 logger.debug(
">>> Add Attributes (res) == " +
varDump(res))
def isSiteExist(self, siteId, queryCallback, userId=None)
def __init__(self, keyValueStorageDir, rawDataDir, dBDataTask, siteTask=None)
def calcSiteIdByUrl(self, url)
def newSiteCreate(self, initUrl, queryCallback)
GeneralResponse event object, represents general state response for multipurpose usage.
def urlInsertWithGoodSietId(self, urlObj, statusInit, queryCallback)
def siteTableOperation(self, urlObj, queryCallback)
def addURL(self, urlObject, queryCallback)
def statisticLogUpdate(self, localObj, urlMd5, siteId, status, queryCallback, isInsert=False)
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
def process(self, urls, queryCallback)
def fillSiteRelatedFields(self, urlObj, queryCallback)
def selectURL(self, urlObject, queryCallback)
def resolveSiteIdByURL(self, url, queryCallback)
def attributesSet(self, attributes, queryCallback)