RTCPreprocessor Class content main functional for preprocessor for realtime crawling, class inherits from foundation.CementApp.
More...
RTCPreprocessor Class content main functional for preprocessor for realtime crawling, class inherits from foundation.CementApp.
Definition at line 34 of file RTCPreprocessor.py.
◆ __init__()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.__init__ |
( |
|
self | ) |
|
constructor
Definition at line 60 of file RTCPreprocessor.py.
62 foundation.CementApp.__init__(self)
66 self.exitCode = APP_CONSTS.EXIT_SUCCESS
67 self.pickled_object =
None 68 self.envVars = {self.DRCE_NODES_TOTAL: 1,
69 self.DRCE_NODE_NUMBER: 1}
def __init__(self)
constructor
◆ __initApp()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.__initApp |
( |
|
self | ) |
|
|
private |
initialize application from config files
- Parameters
-
- Returns
- - None
Definition at line 97 of file RTCPreprocessor.py.
99 self.__loadLogConfig(self.__loadAppConfig(self.pargs.config))
101 raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG)
def __initApp(self, configName=None)
◆ __loadAppConfig()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.__loadAppConfig |
( |
|
self, |
|
|
|
configName |
|
) |
| |
|
private |
load application config file
- Parameters
-
configName | - name of application config file |
- Returns
- - log config file name
Definition at line 108 of file RTCPreprocessor.py.
113 config = ConfigParser.ConfigParser()
114 config.optionxform = str
116 readOk = config.read(configName)
119 raise Exception(self.MSG_ERROR_WRONG_CONFIG_FILE_NAME +
": " + configName)
121 if config.has_section(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME):
122 confLogFileName = str(config.get(APP_CONSTS.CONFIG_APPLICATION_SECTION_NAME, self.PREPROCESSOR_OPTION_LOG))
124 except Exception, err:
125 raise Exception(self.MSG_ERROR_LOAD_APP_CONFIG +
' ' + str(err))
127 return confLogFileName
def __loadAppConfig(self, configName)
◆ __loadLogConfig()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.__loadLogConfig |
( |
|
self, |
|
|
|
configName |
|
) |
| |
|
private |
load log config file
- Parameters
-
configName | - name of log rtc-finalizer config file |
- Returns
- - None
Definition at line 134 of file RTCPreprocessor.py.
136 if isinstance(configName, str)
and len(configName) == 0:
137 raise Exception(self.MSG_ERROR_EMPTY_CONFIG_FILE_NAME)
139 logging.config.fileConfig(configName)
142 self.logger = Utils.MPLogger().
getLogger()
144 except Exception, err:
145 raise Exception(self.MSG_ERROR_READ_LOG_CONFIG +
' ' + str(err))
def __loadLogConfig(self, configName)
◆ cutBatch()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.cutBatch |
( |
|
self | ) |
|
Definition at line 152 of file RTCPreprocessor.py.
153 self.batch = (pickle.loads(self.pickled_object))
154 self.logger.info(
"Before id:%s items: %s", str(self.batch.id), str(len(self.batch.items)))
155 self.logger.debug(
"self.batch: %s",
varDump(self.batch))
156 items = self.batch.items
158 splitted_items = self.split(self.batch.items, int(self.envVars[self.DRCE_NODES_TOTAL]))
159 self.logger.debug(
"Input items: %s", str(self.batch.items))
160 self.logger.debug(
"Splitted items: %s", str(splitted_items))
161 self.batch.items = splitted_items[int(self.envVars[self.DRCE_NODE_NUMBER]) - 1]
162 self.logger.debug(
"Output items: %s", str(self.batch.items))
163 self.logger.debug(
"Output batch: %s",
varDump(self.batch))
164 self.pickled_object = pickle.dumps(self.batch)
166 self.logger.info(
"After id:%s items: %s", str(self.batch.id), str(len(self.batch.items)))
def varDump(obj, stringify=True, strTypeMaxLen=256, strTypeCutSuffix='...', stringifyType=1, ignoreErrors=False, objectsHash=None, depth=0, indent=2, ensure_ascii=False, maxDepth=10)
◆ getBatchFromInput()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.getBatchFromInput |
( |
|
self | ) |
|
Definition at line 148 of file RTCPreprocessor.py.
148 def getBatchFromInput(self):
149 self.pickled_object = sys.stdin.read()
◆ getEnvVars()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.getEnvVars |
( |
|
self | ) |
|
Definition at line 177 of file RTCPreprocessor.py.
177 def getEnvVars(self):
178 for key
in self.envVars.keys():
179 if key
in os.environ
and os.environ[key] !=
"":
180 self.envVars[key] = os.environ[key]
181 self.logger.debug(
"os.environ[%s]: set to <<%s>>" % (key, self.envVars[key]))
183 self.logger.debug(
"os.environ[%s]: not set. Use default value: <<%s>>" % (key, self.envVars[key]))
184 self.exitCode = self.ERROR_EMPTY_ENV_VARS
◆ process()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.process |
( |
|
self | ) |
|
Definition at line 187 of file RTCPreprocessor.py.
189 self.getBatchFromInput()
191 if self.exitCode != self.ERROR_EMPTY_ENV_VARS:
192 self.logger.info(
"The batch possible will be reduced")
195 self.logger.info(
"The batch will not be reduced")
198 self.exitCode = APP_CONSTS.EXIT_FAILURE
◆ run()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.run |
( |
|
self | ) |
|
run application
Definition at line 79 of file RTCPreprocessor.py.
81 foundation.CementApp.run(self)
90 self.logger.info(APP_CONSTS.LOGGER_DELIMITER_LINE)
◆ sendBatch()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.sendBatch |
( |
|
self | ) |
|
◆ setup()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.setup |
( |
|
self | ) |
|
setup application
Definition at line 73 of file RTCPreprocessor.py.
75 foundation.CementApp.setup(self)
◆ split()
def dc_crawler.RTCPreprocessor.RTCPreprocessor.split |
( |
|
self, |
|
|
|
arr, |
|
|
|
count |
|
) |
| |
Definition at line 168 of file RTCPreprocessor.py.
168 def split(self, arr, count):
169 return [arr[i::count]
for i
in range(count)]
◆ batch
dc_crawler.RTCPreprocessor.RTCPreprocessor.batch |
◆ DRCE_NODE_NUMBER
string dc_crawler.RTCPreprocessor.RTCPreprocessor.DRCE_NODE_NUMBER = "DRCE_NODE_NUMBER" |
|
static |
◆ DRCE_NODES_TOTAL
string dc_crawler.RTCPreprocessor.RTCPreprocessor.DRCE_NODES_TOTAL = "DRCE_NODES_TOTAL" |
|
static |
◆ envVars
dc_crawler.RTCPreprocessor.RTCPreprocessor.envVars |
◆ ERROR_EMPTY_ENV_VARS
int dc_crawler.RTCPreprocessor.RTCPreprocessor.ERROR_EMPTY_ENV_VARS = 2 |
|
static |
◆ exitCode
dc_crawler.RTCPreprocessor.RTCPreprocessor.exitCode |
◆ logger
dc_crawler.RTCPreprocessor.RTCPreprocessor.logger |
◆ MSG_ERROR_EMPTY_CONFIG_FILE_NAME
string dc_crawler.RTCPreprocessor.RTCPreprocessor.MSG_ERROR_EMPTY_CONFIG_FILE_NAME = "Config file name is empty." |
|
static |
◆ MSG_ERROR_LOAD_APP_CONFIG
string dc_crawler.RTCPreprocessor.RTCPreprocessor.MSG_ERROR_LOAD_APP_CONFIG = "Error loading application config file." |
|
static |
◆ MSG_ERROR_PARSE_CMD_PARAMS
string dc_crawler.RTCPreprocessor.RTCPreprocessor.MSG_ERROR_PARSE_CMD_PARAMS = "Error parse command line parameters." |
|
static |
◆ MSG_ERROR_READ_LOG_CONFIG
string dc_crawler.RTCPreprocessor.RTCPreprocessor.MSG_ERROR_READ_LOG_CONFIG = "Error read log config file." |
|
static |
◆ MSG_ERROR_WRONG_CONFIG_FILE_NAME
string dc_crawler.RTCPreprocessor.RTCPreprocessor.MSG_ERROR_WRONG_CONFIG_FILE_NAME = "Config file name is wrong" |
|
static |
◆ pickled_object
dc_crawler.RTCPreprocessor.RTCPreprocessor.pickled_object |
◆ PREPROCESSOR_OPTION_LOG
string dc_crawler.RTCPreprocessor.RTCPreprocessor.PREPROCESSOR_OPTION_LOG = "log" |
|
static |
The documentation for this class was generated from the following file: