HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.
2.0.0-chaika
Hierarchical Cluster Engine Python language binding
|
Go to the source code of this file.
Classes | |
class | dc_crawler.OwnRobots._Ruleset |
class | dc_crawler.OwnRobots.RobotExclusionRulesParser |
class | dc_crawler.OwnRobots.RobotFileParserLookalike |
Namespaces | |
dc_crawler.OwnRobots | |
Functions | |
def | dc_crawler.OwnRobots._raise_error (error, message) |
def | dc_crawler.OwnRobots._unquote_path (path) |
def | dc_crawler.OwnRobots._scrub_data (s) |
def | dc_crawler.OwnRobots._parse_content_type_header (header) |
Variables | |
dc_crawler.OwnRobots.PY_MAJOR_VERSION = sys.version_info[0] | |
int | dc_crawler.OwnRobots.MK1996 = 1 |
int | dc_crawler.OwnRobots.GYM2008 = 2 |
dc_crawler.OwnRobots._end_of_line_regex = re.compile(r"(?:\r\n)|\r|\n") | |
dc_crawler.OwnRobots._directive_regex = re.compile("(allow|disallow|user[-]?agent|sitemap|crawl-delay):[ \t]*(.*)", re.IGNORECASE) | |
int | dc_crawler.OwnRobots.SEVEN_DAYS = 60 * 60 * 24 * 7 |
int | dc_crawler.OwnRobots.MAX_FILESIZE = 100 * 1024 |
dc_crawler.OwnRobots._control_characters_regex = re.compile() | |
dc_crawler.OwnRobots._charset_extraction_regex = re.compile() | |