HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Public Member Functions | |
def | __init__ (self, schema=None, siteId=None, urlSchemaDataDir=None) |
def | readJsonFile (self, fileName) |
def | schemaPredefined (self, inUrl, parametrs) |
def | schemaIncrementalInt (self, inUrl, parameters, maxItems) |
def | replaceSchemaIncrementalInt (self, inUrl, macroName, minPos, maxPos, step) |
def | schemaRandomInt (self, inUrl, parametrs) |
def | schemaRandomStr (self, inUrl, parametrs) |
def | saveJsonInFile (self, fileName) |
def | resolveParametersByHTTP (self, urls, defaultValue=None) |
def | resolveParametersByFormat (self, parameters, delimiter=' ', formatValue='json', defaultValue=None) |
def | urlEncodeToParameters (self, parameters, urlEncode) |
def | getMaxCountParameters (self, parameters) |
def | resolveParametersFromFile (self, fileName, defaultValue=None) |
def | generateUrlSchema (self, inUrl) |
Public Attributes | |
batchInsert | |
externalError | |
indexFileName | |
indexStruct | |
schema | |
Static Public Attributes | |
int | SCHEMA_DISABLE = 0 |
int | SCHEMA_PREDEFINED = 1 |
int | SCHEMA_INCREMENTAL_INT = 2 |
int | SCHEMA_RANDOM_INT = 3 |
int | SCHEMA_RANDOM_STR = 4 |
int | CHAR_ASCII_LATIN = 0 |
int | CHAR_HEXADECIMAL = 1 |
int | CHAR_LOWER = 0 |
int | CHAR_UPPER = 1 |
int | MODE_ONE_URL = 0 |
int | MODE_LIST_URLS = 1 |
int | BATCH_INSERT_NO_ONE_ITEMS = 0 |
int | BATCH_INSERT_ALL_NEW_ITEMS = 1 |
int | BATCH_INSERT_ONLY_FIRST_ITEM = 2 |
int | BATCH_INSERT_DEFAULT = BATCH_INSERT_NO_ONE_ITEMS |
int | BATCH_INSERT_MIN_ALLOWED_VALUE = BATCH_INSERT_NO_ONE_ITEMS |
int | BATCH_INSERT_MAX_ALLOWED_VALUE = BATCH_INSERT_ONLY_FIRST_ITEM |
string | JSON_SUFF = ".json" |
string | URL_SCHEMA_DATA_FILE_NAME_PREFIX = "url_schema_data_" |
Definition at line 32 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.__init__ | ( | self, | |
schema = None , |
|||
siteId = None , |
|||
urlSchemaDataDir = None |
|||
) |
Definition at line 62 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.generateUrlSchema | ( | self, | |
inUrl | |||
) |
Definition at line 486 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.getMaxCountParameters | ( | self, | |
parameters | |||
) |
def dc_crawler.UrlSchema.UrlSchema.readJsonFile | ( | self, | |
fileName | |||
) |
Definition at line 97 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.replaceSchemaIncrementalInt | ( | self, | |
inUrl, | |||
macroName, | |||
minPos, | |||
maxPos, | |||
step | |||
) |
def dc_crawler.UrlSchema.UrlSchema.resolveParametersByFormat | ( | self, | |
parameters, | |||
delimiter = ' ' , |
|||
formatValue = 'json' , |
|||
defaultValue = None |
|||
) |
Definition at line 366 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.resolveParametersByHTTP | ( | self, | |
urls, | |||
defaultValue = None |
|||
) |
def dc_crawler.UrlSchema.UrlSchema.resolveParametersFromFile | ( | self, | |
fileName, | |||
defaultValue = None |
|||
) |
Definition at line 450 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.saveJsonInFile | ( | self, | |
fileName | |||
) |
def dc_crawler.UrlSchema.UrlSchema.schemaIncrementalInt | ( | self, | |
inUrl, | |||
parameters, | |||
maxItems | |||
) |
Definition at line 176 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.schemaPredefined | ( | self, | |
inUrl, | |||
parametrs | |||
) |
def dc_crawler.UrlSchema.UrlSchema.schemaRandomInt | ( | self, | |
inUrl, | |||
parametrs | |||
) |
Definition at line 277 of file UrlSchema.py.
def dc_crawler.UrlSchema.UrlSchema.schemaRandomStr | ( | self, | |
inUrl, | |||
parametrs | |||
) |
def dc_crawler.UrlSchema.UrlSchema.urlEncodeToParameters | ( | self, | |
parameters, | |||
urlEncode | |||
) |
|
static |
Definition at line 49 of file UrlSchema.py.
|
static |
Definition at line 51 of file UrlSchema.py.
|
static |
Definition at line 53 of file UrlSchema.py.
|
static |
Definition at line 52 of file UrlSchema.py.
|
static |
Definition at line 48 of file UrlSchema.py.
|
static |
Definition at line 50 of file UrlSchema.py.
dc_crawler.UrlSchema.UrlSchema.batchInsert |
Definition at line 63 of file UrlSchema.py.
|
static |
Definition at line 40 of file UrlSchema.py.
|
static |
Definition at line 41 of file UrlSchema.py.
|
static |
Definition at line 42 of file UrlSchema.py.
|
static |
Definition at line 43 of file UrlSchema.py.
dc_crawler.UrlSchema.UrlSchema.externalError |
Definition at line 64 of file UrlSchema.py.
dc_crawler.UrlSchema.UrlSchema.indexFileName |
Definition at line 65 of file UrlSchema.py.
dc_crawler.UrlSchema.UrlSchema.indexStruct |
Definition at line 66 of file UrlSchema.py.
|
static |
Definition at line 55 of file UrlSchema.py.
|
static |
Definition at line 46 of file UrlSchema.py.
|
static |
Definition at line 45 of file UrlSchema.py.
dc_crawler.UrlSchema.UrlSchema.schema |
Definition at line 69 of file UrlSchema.py.
|
static |
Definition at line 34 of file UrlSchema.py.
|
static |
Definition at line 36 of file UrlSchema.py.
|
static |
Definition at line 35 of file UrlSchema.py.
|
static |
Definition at line 37 of file UrlSchema.py.
|
static |
Definition at line 38 of file UrlSchema.py.
|
static |
Definition at line 56 of file UrlSchema.py.