HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
dc_crawler.UrlSchema.UrlSchema Class Reference
Inheritance diagram for dc_crawler.UrlSchema.UrlSchema:
Collaboration diagram for dc_crawler.UrlSchema.UrlSchema:

Public Member Functions

def __init__ (self, schema=None, siteId=None, urlSchemaDataDir=None)
 
def readJsonFile (self, fileName)
 
def schemaPredefined (self, inUrl, parametrs)
 
def schemaIncrementalInt (self, inUrl, parameters, maxItems)
 
def replaceSchemaIncrementalInt (self, inUrl, macroName, minPos, maxPos, step)
 
def schemaRandomInt (self, inUrl, parametrs)
 
def schemaRandomStr (self, inUrl, parametrs)
 
def saveJsonInFile (self, fileName)
 
def resolveParametersByHTTP (self, urls, defaultValue=None)
 
def resolveParametersByFormat (self, parameters, delimiter=' ', formatValue='json', defaultValue=None)
 
def urlEncodeToParameters (self, parameters, urlEncode)
 
def getMaxCountParameters (self, parameters)
 
def resolveParametersFromFile (self, fileName, defaultValue=None)
 
def generateUrlSchema (self, inUrl)
 

Public Attributes

 batchInsert
 
 externalError
 
 indexFileName
 
 indexStruct
 
 schema
 

Static Public Attributes

int SCHEMA_DISABLE = 0
 
int SCHEMA_PREDEFINED = 1
 
int SCHEMA_INCREMENTAL_INT = 2
 
int SCHEMA_RANDOM_INT = 3
 
int SCHEMA_RANDOM_STR = 4
 
int CHAR_ASCII_LATIN = 0
 
int CHAR_HEXADECIMAL = 1
 
int CHAR_LOWER = 0
 
int CHAR_UPPER = 1
 
int MODE_ONE_URL = 0
 
int MODE_LIST_URLS = 1
 
int BATCH_INSERT_NO_ONE_ITEMS = 0
 
int BATCH_INSERT_ALL_NEW_ITEMS = 1
 
int BATCH_INSERT_ONLY_FIRST_ITEM = 2
 
int BATCH_INSERT_DEFAULT = BATCH_INSERT_NO_ONE_ITEMS
 
int BATCH_INSERT_MIN_ALLOWED_VALUE = BATCH_INSERT_NO_ONE_ITEMS
 
int BATCH_INSERT_MAX_ALLOWED_VALUE = BATCH_INSERT_ONLY_FIRST_ITEM
 
string JSON_SUFF = ".json"
 
string URL_SCHEMA_DATA_FILE_NAME_PREFIX = "url_schema_data_"
 

Detailed Description

Definition at line 32 of file UrlSchema.py.

Constructor & Destructor Documentation

◆ __init__()

def dc_crawler.UrlSchema.UrlSchema.__init__ (   self,
  schema = None,
  siteId = None,
  urlSchemaDataDir = None 
)

Definition at line 62 of file UrlSchema.py.

62  def __init__(self, schema=None, siteId=None, urlSchemaDataDir=None):
63  self.batchInsert = self.BATCH_INSERT_DEFAULT
64  self.externalError = APP_CONSTS.ERROR_OK
65  self.indexFileName = None
66  self.indexStruct = None
67  try:
68  # self.globalindex = None
69  self.schema = json.loads(schema)
70 
71  if isinstance(urlSchemaDataDir, basestring):
72  if not os.path.isdir(urlSchemaDataDir):
73  logger.debug("Create urlSchemaDataDir: %s", str(urlSchemaDataDir))
74  try:
75  os.makedirs(urlSchemaDataDir)
76  except OSError, err:
77  logger.debug("Creation of %s return error: %s", str(urlSchemaDataDir), str(err))
78 
79  if urlSchemaDataDir[-1] != '/':
80  urlSchemaDataDir += '/'
81  self.indexFileName = urlSchemaDataDir + self.URL_SCHEMA_DATA_FILE_NAME_PREFIX + str(siteId) + self.JSON_SUFF
82  if os.path.isfile(self.indexFileName):
83  self.indexStruct = self.readJsonFile(self.indexFileName)
84  logger.debug(">>> readJsonFile '" + str(self.indexFileName) + "' - SUCCESS")
85  else:
86  self.indexStruct = None
87 
88  except Exception as excp:
89  ExceptionLog.handler(logger, excp, ">>> UrlSchema wrong json loads")
90  self.schema = None
91 
92 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ generateUrlSchema()

def dc_crawler.UrlSchema.UrlSchema.generateUrlSchema (   self,
  inUrl 
)

Definition at line 486 of file UrlSchema.py.

486  def generateUrlSchema(self, inUrl):
487  ret = []
488  itemsLen = 1
489  if self.schema is not None:
490  try:
491  if "urls" in self.schema:
492  self.schema["parameters"] = self.resolveParametersByHTTP(self.schema["urls"])
493 
494  if "file_path" in self.schema:
495  self.schema["parameters"] = self.resolveParametersFromFile(self.schema["file_path"], \
496  self.schema["parameters"])
497 
498  # logger.debug('self.schema["parameters"]: ' + str(self.schema["parameters"]))
499 
500  if "format" in self.schema:
501  delimiter = ' '
502  if "delimiter" in self.schema:
503  delimiter = self.schema["delimiter"]
504  self.schema["parameters"] = self.resolveParametersByFormat(self.schema["parameters"], delimiter, \
505  self.schema["format"], \
506  self.schema["parameters"])
507 
508  if "url_encode" in self.schema:
509  self.schema["parameters"] = self.urlEncodeToParameters(self.schema["parameters"], self.schema["url_encode"])
510 
511  if "batch_insert" in self.schema and \
512  int(self.schema["batch_insert"]) >= self.BATCH_INSERT_MIN_ALLOWED_VALUE and \
513  int(self.schema["batch_insert"]) <= self.BATCH_INSERT_MAX_ALLOWED_VALUE:
514  self.batchInsert = int(self.schema["batch_insert"])
515 
516  # get max count parameters
517  maxCountParameters = self.getMaxCountParameters(self.schema["parameters"])
518  logger.debug('maxCountParameters: ' + str(maxCountParameters))
519 
520  if self.schema["mode"] == self.MODE_LIST_URLS:
521  itemsLen = int(self.schema["max_items"])
522  for _ in xrange(0, itemsLen):
523  localRet = inUrl
524  if self.schema["type"] == self.SCHEMA_PREDEFINED:
525  localRet = self.schemaPredefined(inUrl, self.schema["parameters"])
526 
527  elif self.schema["type"] == self.SCHEMA_INCREMENTAL_INT:
528  # localRet = self.schemaIncrementalInt(inUrl, self.schema["parameters"])
529  # get full urls list
530  ret = self.schemaIncrementalInt(inUrl, self.schema["parameters"], itemsLen)
531 
532  elif self.schema["type"] == self.SCHEMA_RANDOM_INT:
533  localRet = self.schemaRandomInt(inUrl, self.schema["parameters"])
534 
535  elif self.schema["type"] == self.SCHEMA_RANDOM_STR:
536  localRet = self.schemaRandomStr(inUrl, self.schema["parameters"])
537 
538  if localRet != inUrl and localRet not in ret:
539  ret.append(localRet)
540  if len(ret) >= int(maxCountParameters):
541  logger.debug('>>> break len(ret) = ' + str(len(ret)))
542  break
543  else:
544  break
545  except Exception as excp:
546  ExceptionLog.handler(logger, excp, ">>> generateUrlSchema has some error")
547  self.saveJsonInFile(self.indexFileName)
548  logger.debug(">>> urlSchema len = " + str(len(ret)))
549  return ret
550 
Here is the call graph for this function:

◆ getMaxCountParameters()

def dc_crawler.UrlSchema.UrlSchema.getMaxCountParameters (   self,
  parameters 
)

Definition at line 435 of file UrlSchema.py.

435  def getMaxCountParameters(self, parameters):
436  countsList = [0]
437  for values in parameters.values():
438  # logger.debug('>>> values: ' + str(values))
439  if isinstance(values, list):
440  countsList.append(len(values))
441 
442  return max(countsList)
443 
444 
Here is the caller graph for this function:

◆ readJsonFile()

def dc_crawler.UrlSchema.UrlSchema.readJsonFile (   self,
  fileName 
)

Definition at line 97 of file UrlSchema.py.

97  def readJsonFile(self, fileName):
98  ret = {}
99  fd = None
100  try:
101  fd = open(fileName, "r") if fd is not None:
102  ret = json.loads(fd.read()) # #.decode('utf-8').encode('latin-1', errors='ignore'))
103  fd.close()
104  except Exception, err:
105  logger.debug(">>> readJsonFile error, file name = " + str(fileName) + " | " + str(err))
106  if fd is not None:
107  fd.close()
108  return ret
109 
110 
111 

◆ replaceSchemaIncrementalInt()

def dc_crawler.UrlSchema.UrlSchema.replaceSchemaIncrementalInt (   self,
  inUrl,
  macroName,
  minPos,
  maxPos,
  step 
)

Definition at line 256 of file UrlSchema.py.

256  def replaceSchemaIncrementalInt(self, inUrl, macroName, minPos, maxPos, step):
257  # variable for result
258  ret = []
259  localRet = inUrl
260  for x in range(minPos, maxPos, step):
261  localUrl = copy.copy(inUrl)
262  logger.debug("Before replace inUrl = " + str(localUrl))
263  localUrl = localUrl.replace(macroName, str(x))
264  logger.debug("After replace inUrl = " + str(localUrl))
265 
266  if localRet != localUrl and localUrl not in ret:
267  ret.append(localUrl)
268 
269  return ret
270 
271 
Here is the caller graph for this function:

◆ resolveParametersByFormat()

def dc_crawler.UrlSchema.UrlSchema.resolveParametersByFormat (   self,
  parameters,
  delimiter = ' ',
  formatValue = 'json',
  defaultValue = None 
)

Definition at line 366 of file UrlSchema.py.

366  def resolveParametersByFormat(self, parameters, delimiter=' ', formatValue='json', defaultValue=None):
367  # variable for result
368  ret = defaultValue
369  logger.debug('!!! parameters: ' + str(parameters))
370 
371  if formatValue == 'plain-text':
372  for paramName in parameters:
373  logger.debug("paramName: '" + str(paramName) + "' type: " + str(type(paramName)))
374  logger.debug("paramValue: '" + str(parameters[paramName]) + "' type: " + str(type(parameters[paramName])))
375 
376  if isinstance(parameters[paramName], basestring):
377  if delimiter == "":
378  # split don't use delimiter
379  parameters[paramName] = unicode(parameters[paramName]).splitlines()
380  else:
381  # split use delimiter
382  parameters[paramName] = unicode(parameters[paramName]).split(delimiter)
383 
384  # remove empty strings from list
385  parameters[paramName] = [elem for elem in parameters[paramName] if elem]
386 
387  ret = parameters
388  logger.debug('!!! ret: ' + str(ret))
389 
390  elif formatValue == 'json':
391  ret = parameters
392  else:
393  logger.error("Unsupported format value: '" + str(formatValue) + "'")
394 
395  return ret
396 
397 
Here is the caller graph for this function:

◆ resolveParametersByHTTP()

def dc_crawler.UrlSchema.UrlSchema.resolveParametersByHTTP (   self,
  urls,
  defaultValue = None 
)

Definition at line 333 of file UrlSchema.py.

333  def resolveParametersByHTTP(self, urls, defaultValue=None):
334  if defaultValue is None:
335  ret = {}
336  else:
337  ret = defaultValue
338  newParams = None
339  for url in urls:
340  result = None
341  try:
342  result = requests.get(url)
343  except Exception as excp:
344  self.externalError = APP_CONSTS.ERROR_URLS_SCHEMA_EXTERNAL
345  logger.debug(">>> bad url request; url=" + url + ";err= " + str(excp))
346  if result is not None and result.status_code == 200 and result.text is not None:
347  try:
348  newParams = json.loads(result.text)
349  except Exception as excp:
350  self.externalError = APP_CONSTS.ERROR_URLS_SCHEMA_EXTERNAL
351  logger.debug(">>> bad external parameters json" + str(excp))
352  if newParams is not None:
353  ret = newParams
354  self.externalError = APP_CONSTS.ERROR_OK
355  break
356  return ret
357 
358 
Here is the caller graph for this function:

◆ resolveParametersFromFile()

def dc_crawler.UrlSchema.UrlSchema.resolveParametersFromFile (   self,
  fileName,
  defaultValue = None 
)

Definition at line 450 of file UrlSchema.py.

450  def resolveParametersFromFile(self, fileName, defaultValue=None):
451  logger.debug(">>> resolveParametersFromFile enter fileName: " + str(fileName))
452  # variable for result
453  ret = defaultValue
454  parameters = {}
455 
456  if fileName.find(self.JSON_SUFF) == len(fileName) - len(self.JSON_SUFF):
457 
458  fd = None
459  try:
460  fd = open(fileName, "r")
461  if fd is not None:
462  buff = fd.read()
463  if len(buff) > 0 and buff[0] == '{': # maybe 'json'
464  parameters = json.loads(buff)
465  else:
466  parameters = {"":buff} # maybe 'plain-text'
467 
468  except Exception, err:
469  logger.debug(">>> resolveParametersFromFile error, file name = " + str(fileName) + " | " + str(err))
470  finally:
471  if fd is not None:
472  fd.close()
473 
474  if len(parameters) > 0:
475  ret = parameters
476  else:
477  logger.debug("Wrong file name: '" + str(fileName) + "', expected '<file_name>.json'")
478 
479  return ret
480 
481 
Here is the caller graph for this function:

◆ saveJsonInFile()

def dc_crawler.UrlSchema.UrlSchema.saveJsonInFile (   self,
  fileName 
)

Definition at line 314 of file UrlSchema.py.

314  def saveJsonInFile(self, fileName):
315  if self.indexStruct is not None and len(self.indexStruct) > 0 and fileName is not None:
316  fd = None
317  try:
318  fd = open(fileName, "w")
319  if fd is not None:
320  fd.write(json.dumps(self.indexStruct, ensure_ascii=False))
321  fd.close()
322  except Exception, err:
323  ExceptionLog.handler(logger, err, ">>> saveJsonInFile error, file name = " + str(fileName))
324  if fd is not None:
325  fd.close()
326 
327 
Here is the caller graph for this function:

◆ schemaIncrementalInt()

def dc_crawler.UrlSchema.UrlSchema.schemaIncrementalInt (   self,
  inUrl,
  parameters,
  maxItems 
)

Definition at line 176 of file UrlSchema.py.

176  def schemaIncrementalInt(self, inUrl, parameters, maxItems):
177  logger.debug("schemaIncrementalInt() enter ... parameters: " + str(parameters) + "\ninUrl: " + str(inUrl) + \
178  "\nmaxItems: " + str(maxItems))
179  # variable for result
180  ret = []
181  # localRet = inUrl
182  for paramKey in parameters:
183  macroName = '%' + paramKey + '%'
184  if inUrl.find(macroName) >= 0:
185 # index = parameters[paramKey]["min"]
186 # logger.debug("index = " + str(index))
187 # if self.indexStruct is not None and paramKey in self.indexStruct:
188 # index = self.indexStruct[paramKey]
189 # logger.debug("index = " + str(index))
190 #
191 # if index >= parameters[paramKey]["min"] and index <= parameters[paramKey]["max"]:
192 # logger.debug("Before replace inUrl = " + str(inUrl))
193 # inUrl = inUrl.replace(macroName, str(index))
194 # logger.debug("After replace inUrl = " + str(inUrl))
195 # else:
196 # logger.debug("!!! continue !!!")
197 # continue
198 #
199 # logger.debug("self.indexStruct: " + varDump(self.indexStruct))
200 # if self.indexStruct is not None:
201 # logger.debug("Old index = " + str(index))
202 # index += parameters[paramKey]["step"]
203 # if index > parameters[paramKey]["max"]:
204 # index = parameters[paramKey]["min"]
205 # self.indexStruct[paramKey] = index
206 # logger.debug("New index = " + str(index))
207 
208  if maxItems > int(parameters[paramKey]["max"]):
209  ret = self.replaceSchemaIncrementalInt(inUrl,
210  macroName,
211  int(parameters[paramKey]["min"]),
212  int(parameters[paramKey]["max"]),
213  int(parameters[paramKey]["step"]))
214  else:
215  logger.debug("Start self.indexStruct: %s", varDump(self.indexStruct))
216  minPos = 0
217  if self.indexStruct is not None and paramKey in self.indexStruct:
218  minPos = int(self.indexStruct[paramKey])
219  logger.debug("minPos from structure = " + str(minPos))
220  else:
221  minPos = int(parameters[paramKey]["min"])
222 
223  nextPos = maxItems * int(parameters[paramKey]["step"]) + minPos
224  if nextPos >= int(parameters[paramKey]["max"]):
225  nextPos = int(parameters[paramKey]["max"])
226 
227  ret = self.replaceSchemaIncrementalInt(inUrl,
228  macroName,
229  minPos,
230  nextPos,
231  int(parameters[paramKey]["step"]))
232 
233  if self.indexStruct is None:
234  self.indexStruct = {}
235 
236  logger.debug("nextPos = " + str(nextPos))
237  if nextPos >= int(parameters[paramKey]["max"]):
238  nextPos = 0
239 
240  logger.debug("nextIndex after truncate = " + str(nextPos))
241  self.indexStruct[paramKey] = nextPos
242 
243  logger.debug("Finish self.indexStruct: %s", varDump(self.indexStruct))
244 
245  return ret
246 
247 
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
Definition: Utils.py:410
Here is the call graph for this function:
Here is the caller graph for this function:

◆ schemaPredefined()

def dc_crawler.UrlSchema.UrlSchema.schemaPredefined (   self,
  inUrl,
  parametrs 
)

Definition at line 117 of file UrlSchema.py.

117  def schemaPredefined(self, inUrl, parametrs):
118  # logger.debug('schemaPredefined enter parametrs: ' + str(parametrs))
119  for paramKey in parametrs:
120  macroName = '%' + paramKey + '%'
121  if inUrl.find(macroName) >= 0:
122 
123  paramList = []
124  frequencyList = []
125  timeList = []
126  elements = {}
127 
128  if self.indexStruct is not None and paramKey in self.indexStruct:
129  elements.update(self.indexStruct[paramKey])
130  # logger.debug('elements1: ' + str(elements))
131  for val in parametrs[paramKey]:
132  if val not in self.indexStruct[paramKey]:
133  elements.update({val:{"frequency":0, "time":0}})
134  self.indexStruct[paramKey].update(elements)
135  # logger.debug('elements3: ' + str(elements))
136  else:
137  for val in parametrs[paramKey]:
138  elements.update({val:{"frequency":0, "time":0}})
139  # logger.debug('elements2: ' + str(elements))
140  self.indexStruct = {paramKey:elements}
141 
142  for key, element in elements.items():
143  if "frequency" in element and "time" in element:
144  paramList.append(key)
145  frequencyList.append(int(element["frequency"]))
146  timeList.append(int(element["time"]))
147 
148  # logger.debug('key: ' + str(key))
149  # logger.debug('element: ' + str(element))
150 
151  mixIndex = frequencyList.index(min(frequencyList))
152  logger.debug('mixIndex: ' + str(mixIndex))
153  logger.debug('paramList[mixIndex]: ' + str(paramList[mixIndex]))
154 
155  logger.debug('>>> inUrl 1: ' + str(inUrl))
156  inUrl = unicode(inUrl.replace(macroName, paramList[mixIndex]))
157  logger.debug('>>> inUrl 2: ' + str(inUrl))
158  frequencyList[mixIndex] += 1
159  timeList[mixIndex] = int((datetime.now() - datetime.fromtimestamp(0)).total_seconds())
160 
161  # logger.debug('>>> self.indexStruct: ' + str(self.indexStruct))
162  # logger.debug('self.indexStruct[paramKey] 1: ' + str(self.indexStruct[paramKey]))
163  self.indexStruct[paramKey].update({paramList[mixIndex]:{"frequency":frequencyList[mixIndex], \
164  "time":timeList[mixIndex]}})
165  # logger.debug('self.indexStruct[paramKey] 2: ' + str(self.indexStruct[paramKey]))
166 
167  return inUrl
168 
169 
Here is the caller graph for this function:

◆ schemaRandomInt()

def dc_crawler.UrlSchema.UrlSchema.schemaRandomInt (   self,
  inUrl,
  parametrs 
)

Definition at line 277 of file UrlSchema.py.

277  def schemaRandomInt(self, inUrl, parametrs):
278  for paramKey in parametrs:
279  macroName = '%' + paramKey + '%'
280  if inUrl.find(macroName) >= 0:
281  inUrl = inUrl.replace(macroName, str(random.randint(parametrs[paramKey]["min"], parametrs[paramKey]["max"])))
282  return inUrl
283 
284 
Here is the caller graph for this function:

◆ schemaRandomStr()

def dc_crawler.UrlSchema.UrlSchema.schemaRandomStr (   self,
  inUrl,
  parametrs 
)

Definition at line 290 of file UrlSchema.py.

290  def schemaRandomStr(self, inUrl, parametrs):
291  lowAsciiSet = string.ascii_lowercase
292  hexdigitsSet = ''.join([ch for ch in string.hexdigits if not ch.isupper()])
293  for paramKey in parametrs:
294  macroName = '%' + paramKey + '%'
295  if inUrl.find(macroName) >= 0:
296  valueLen = random.randint(parametrs[paramKey]["min"], parametrs[paramKey]["max"])
297  valueStr = ''
298  for _ in xrange(0, valueLen):
299  if parametrs[paramKey]["chars"] == self.CHAR_ASCII_LATIN:
300  valueStr += lowAsciiSet[random.randint(0, len(lowAsciiSet) - 1)]
301  elif parametrs[paramKey]["chars"] == self.CHAR_HEXADECIMAL:
302  valueStr += hexdigitsSet[random.randint(0, len(hexdigitsSet) - 1)]
303  if parametrs[paramKey]["case"] == self.CHAR_LOWER:
304  valueStr = valueStr.lower()
305  elif parametrs[paramKey]["case"] == self.CHAR_UPPER:
306  valueStr = valueStr.upper()
307  inUrl = inUrl.replace(macroName, valueStr)
308  return inUrl
309 
310 
Definition: join.py:1
Here is the caller graph for this function:

◆ urlEncodeToParameters()

def dc_crawler.UrlSchema.UrlSchema.urlEncodeToParameters (   self,
  parameters,
  urlEncode 
)

Definition at line 403 of file UrlSchema.py.

403  def urlEncodeToParameters(self, parameters, urlEncode):
404  # variable for result
405  ret = parameters
406  # logger.debug('>>>>> parameters: ' + str(parameters))
407 
408  if urlEncode is not None and int(urlEncode) > 0:
409  for paramName in parameters:
410  if isinstance(parameters[paramName], list) or isinstance(parameters[paramName], unicode):
411  paramsList = []
412  for elem in parameters[paramName]:
413  if isinstance(elem, str) or isinstance(elem, unicode):
414  try:
415  encodedStr = urllib.urlencode({'':elem})
416  if len(encodedStr) > 0 and encodedStr[0] == '=':
417  encodedStr = encodedStr[1:]
418  paramsList.append(encodedStr)
419  except Exception, err:
420  logger.debug("urlencode '" + str(elem) + "' has error: " + str(err))
421  paramsList.append(unicode(elem))
422 
423  parameters[paramName] = paramsList
424 
425  ret = parameters
426  # logger.debug('>>>>> ret: ' + str(ret))
427 
428  return ret
429 
430 
Here is the caller graph for this function:

Member Data Documentation

◆ BATCH_INSERT_ALL_NEW_ITEMS

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_ALL_NEW_ITEMS = 1
static

Definition at line 49 of file UrlSchema.py.

◆ BATCH_INSERT_DEFAULT

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_DEFAULT = BATCH_INSERT_NO_ONE_ITEMS
static

Definition at line 51 of file UrlSchema.py.

◆ BATCH_INSERT_MAX_ALLOWED_VALUE

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_MAX_ALLOWED_VALUE = BATCH_INSERT_ONLY_FIRST_ITEM
static

Definition at line 53 of file UrlSchema.py.

◆ BATCH_INSERT_MIN_ALLOWED_VALUE

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_MIN_ALLOWED_VALUE = BATCH_INSERT_NO_ONE_ITEMS
static

Definition at line 52 of file UrlSchema.py.

◆ BATCH_INSERT_NO_ONE_ITEMS

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_NO_ONE_ITEMS = 0
static

Definition at line 48 of file UrlSchema.py.

◆ BATCH_INSERT_ONLY_FIRST_ITEM

int dc_crawler.UrlSchema.UrlSchema.BATCH_INSERT_ONLY_FIRST_ITEM = 2
static

Definition at line 50 of file UrlSchema.py.

◆ batchInsert

dc_crawler.UrlSchema.UrlSchema.batchInsert

Definition at line 63 of file UrlSchema.py.

◆ CHAR_ASCII_LATIN

int dc_crawler.UrlSchema.UrlSchema.CHAR_ASCII_LATIN = 0
static

Definition at line 40 of file UrlSchema.py.

◆ CHAR_HEXADECIMAL

int dc_crawler.UrlSchema.UrlSchema.CHAR_HEXADECIMAL = 1
static

Definition at line 41 of file UrlSchema.py.

◆ CHAR_LOWER

int dc_crawler.UrlSchema.UrlSchema.CHAR_LOWER = 0
static

Definition at line 42 of file UrlSchema.py.

◆ CHAR_UPPER

int dc_crawler.UrlSchema.UrlSchema.CHAR_UPPER = 1
static

Definition at line 43 of file UrlSchema.py.

◆ externalError

dc_crawler.UrlSchema.UrlSchema.externalError

Definition at line 64 of file UrlSchema.py.

◆ indexFileName

dc_crawler.UrlSchema.UrlSchema.indexFileName

Definition at line 65 of file UrlSchema.py.

◆ indexStruct

dc_crawler.UrlSchema.UrlSchema.indexStruct

Definition at line 66 of file UrlSchema.py.

◆ JSON_SUFF

string dc_crawler.UrlSchema.UrlSchema.JSON_SUFF = ".json"
static

Definition at line 55 of file UrlSchema.py.

◆ MODE_LIST_URLS

int dc_crawler.UrlSchema.UrlSchema.MODE_LIST_URLS = 1
static

Definition at line 46 of file UrlSchema.py.

◆ MODE_ONE_URL

int dc_crawler.UrlSchema.UrlSchema.MODE_ONE_URL = 0
static

Definition at line 45 of file UrlSchema.py.

◆ schema

dc_crawler.UrlSchema.UrlSchema.schema

Definition at line 69 of file UrlSchema.py.

◆ SCHEMA_DISABLE

int dc_crawler.UrlSchema.UrlSchema.SCHEMA_DISABLE = 0
static

Definition at line 34 of file UrlSchema.py.

◆ SCHEMA_INCREMENTAL_INT

int dc_crawler.UrlSchema.UrlSchema.SCHEMA_INCREMENTAL_INT = 2
static

Definition at line 36 of file UrlSchema.py.

◆ SCHEMA_PREDEFINED

int dc_crawler.UrlSchema.UrlSchema.SCHEMA_PREDEFINED = 1
static

Definition at line 35 of file UrlSchema.py.

◆ SCHEMA_RANDOM_INT

int dc_crawler.UrlSchema.UrlSchema.SCHEMA_RANDOM_INT = 3
static

Definition at line 37 of file UrlSchema.py.

◆ SCHEMA_RANDOM_STR

int dc_crawler.UrlSchema.UrlSchema.SCHEMA_RANDOM_STR = 4
static

Definition at line 38 of file UrlSchema.py.

◆ URL_SCHEMA_DATA_FILE_NAME_PREFIX

string dc_crawler.UrlSchema.UrlSchema.URL_SCHEMA_DATA_FILE_NAME_PREFIX = "url_schema_data_"
static

Definition at line 56 of file UrlSchema.py.


The documentation for this class was generated from the following file: