HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
batch_generator Namespace Reference

Variables

int EXIT_SUCCESS = 0
 
int EXIT_FAILURE = 1
 
string LOGGER_NAME = "batch_generator"
 
string MSG_ERROR_READ_BATCH = "ERROR READ BATCH FROM STDIN"
 
 filename
 
 filemode
 
 logger = logging.getLogger(LOGGER_NAME)
 
int error = EXIT_SUCCESS
 
 input_json = sys.stdin.read()
 
 batch_data = json.loads(input_json)
 
 site_id = batch_data["site_id"]
 
 urls = batch_data["urls"]
 
 tags = batch_data["tags"]
 
 id = batch_data["id"]
 
int item_no = 1
 
list batch_items = []
 
 item_url = item["url"]
 
 item_site_id = None
 
 sid = item_site_id or site_id
 
 uid = hashlib.md5(item_url).hexdigest()
 
 url_obj = dc_event.URL(sid, item_url)
 
 batch_item = dc_event.BatchItem(sid, uid, url_obj)
 
 batch_obj = dc_event.Batch(id, batch_items, dc_event.Batch.TYPE_REAL_TIME_CRAWLER)
 

Detailed Description

  HCE project,  Python bindings, Distributed Tasks Manager application.
  Event objects definitions.
  
  @package: dc
  @file batch_generator.py
  @author Oleksii <developers.hce@gmail.com>
  @link: http://hierarchical-cluster-engine.com/
  @copyright: Copyright &copy; 2013-2014 IOIX Ukraine
  @license: http://hierarchical-cluster-engine.com/license/
  @since: 0.1

Variable Documentation

◆ batch_data

batch_generator.batch_data = json.loads(input_json)

Definition at line 46 of file batch_generator.py.

◆ batch_item

batch_generator.batch_item = dc_event.BatchItem(sid, uid, url_obj)

Definition at line 64 of file batch_generator.py.

◆ batch_items

list batch_generator.batch_items = []

Definition at line 53 of file batch_generator.py.

◆ batch_obj

batch_generator.batch_obj = dc_event.Batch(id, batch_items, dc_event.Batch.TYPE_REAL_TIME_CRAWLER)

Definition at line 66 of file batch_generator.py.

◆ error

int batch_generator.error = EXIT_SUCCESS

Definition at line 44 of file batch_generator.py.

◆ EXIT_FAILURE

int batch_generator.EXIT_FAILURE = 1

Definition at line 31 of file batch_generator.py.

◆ EXIT_SUCCESS

int batch_generator.EXIT_SUCCESS = 0

Definition at line 30 of file batch_generator.py.

◆ filemode

batch_generator.filemode

Definition at line 38 of file batch_generator.py.

◆ filename

batch_generator.filename

Definition at line 38 of file batch_generator.py.

◆ id

batch_generator.id = batch_data["id"]

Definition at line 50 of file batch_generator.py.

◆ input_json

batch_generator.input_json = sys.stdin.read()

Definition at line 45 of file batch_generator.py.

◆ item_no

int batch_generator.item_no = 1

Definition at line 52 of file batch_generator.py.

◆ item_site_id

batch_generator.item_site_id = None

Definition at line 56 of file batch_generator.py.

◆ item_url

batch_generator.item_url = item["url"]

Definition at line 55 of file batch_generator.py.

◆ logger

batch_generator.logger = logging.getLogger(LOGGER_NAME)

Definition at line 39 of file batch_generator.py.

◆ LOGGER_NAME

string batch_generator.LOGGER_NAME = "batch_generator"

Definition at line 33 of file batch_generator.py.

◆ MSG_ERROR_READ_BATCH

string batch_generator.MSG_ERROR_READ_BATCH = "ERROR READ BATCH FROM STDIN"

Definition at line 35 of file batch_generator.py.

◆ sid

batch_generator.sid = item_site_id or site_id

Definition at line 61 of file batch_generator.py.

◆ site_id

batch_generator.site_id = batch_data["site_id"]

Definition at line 47 of file batch_generator.py.

◆ tags

batch_generator.tags = batch_data["tags"]

Definition at line 49 of file batch_generator.py.

◆ uid

batch_generator.uid = hashlib.md5(item_url).hexdigest()

Definition at line 62 of file batch_generator.py.

◆ url_obj

batch_generator.url_obj = dc_event.URL(sid, item_url)

Definition at line 63 of file batch_generator.py.

◆ urls

batch_generator.urls = batch_data["urls"]

Definition at line 48 of file batch_generator.py.