HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
OwnRobots.py File Reference

Go to the source code of this file.

Classes

class  dc_crawler.OwnRobots._Ruleset
 
class  dc_crawler.OwnRobots.RobotExclusionRulesParser
 
class  dc_crawler.OwnRobots.RobotFileParserLookalike
 

Namespaces

 dc_crawler.OwnRobots
 

Functions

def dc_crawler.OwnRobots._raise_error (error, message)
 
def dc_crawler.OwnRobots._unquote_path (path)
 
def dc_crawler.OwnRobots._scrub_data (s)
 
def dc_crawler.OwnRobots._parse_content_type_header (header)
 

Variables

 dc_crawler.OwnRobots.PY_MAJOR_VERSION = sys.version_info[0]
 
int dc_crawler.OwnRobots.MK1996 = 1
 
int dc_crawler.OwnRobots.GYM2008 = 2
 
 dc_crawler.OwnRobots._end_of_line_regex = re.compile(r"(?:\r\n)|\r|\n")
 
 dc_crawler.OwnRobots._directive_regex = re.compile("(allow|disallow|user[-]?agent|sitemap|crawl-delay):[ \t]*(.*)", re.IGNORECASE)
 
int dc_crawler.OwnRobots.SEVEN_DAYS = 60 * 60 * 24 * 7
 
int dc_crawler.OwnRobots.MAX_FILESIZE = 100 * 1024
 
 dc_crawler.OwnRobots._control_characters_regex = re.compile()
 
 dc_crawler.OwnRobots._charset_extraction_regex = re.compile()