HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
DateTimeType.py
Go to the documentation of this file.
1 # coding: utf-8
2 """
3 HCE project, Python bindings, Distributed Tasks Manager application.
4 DateTimeType Class content main functional extract of datetime.
5 
6 @package: dc_processor
7 @file DateTimeType.py
8 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com>
9 @link: http://hierarchical-cluster-engine.com/
10 @copyright: Copyright &copy; 2013-2015 IOIX Ukraine
11 @license: http://hierarchical-cluster-engine.com/license/
12 @since: 0.1
13 """
14 
15 import re
16 import time
17 import copy
18 from datetime import tzinfo
19 import datetime
20 
21 # from dateutil.parser import parserinfo
22 from dateutil.parser import parse
23 from dateutil.relativedelta import relativedelta
24 from dateutil.tz import gettz
25 from dateutil import parser
26 
27 from app.Utils import getTracebackInfo
28 
29 try:
30  from app.Utils import ExceptionLog # pylint: disable=E0401
31 except: # pylint: disable=W0702
32  ExceptionLog = None
33 
34 import DateTimeTimezones # pylint: disable=W0403
35 
36 
37 
38 # # Class OffsetTzInfo for calculate offset
39 class OffsetTzInfo(tzinfo):
40  # #Constructor
41  def __init__(self, isNegative=False, hours=0, minutes=0):
42  self.isNegative = isNegative
43  self.hours = hours
44  self.minutes = minutes
45  super(OffsetTzInfo, self).__init__()
46 
47  # #Method utcoffset inheritor from interface of base class
48  def utcoffset(self, dt): # pylint: disable=W0613
49  ret = datetime.timedelta(hours=self.hours, minutes=self.minutes)
50  if self.isNegative:
51  ret = (-1) * ret
52  return ret
53 
54  # #Method dst inheritor from interface of base class
55  def dst(self, dt): # pylint: disable=W0613
56  return datetime.timedelta(0)
57 
58  # #Method tzname inheritor from interface of base class
59  def tzname(self, dt): # pylint: disable=W0613
60  ret = '{:%H:%M}'.format(datetime.time(hour=self.hours, minute=self.minutes))
61  if self.isNegative:
62  ret = '−' + ret
63  else:
64  ret = '+' + ret
65  return "UTC" + ret
66 
67 
68 # # Class DateTimeType for extract data
69 #
70 class DateTimeType(object):
71  # #Constans used in class
72  MIN_ALLOWED_YEAR = 2000
73  MIN_ALLOWED_LEN_FOR_DATEUTILS = 10
74  ISO_SEP = ' '
75  BAD_SIMBOLS = '=(),|@`'
76  TAG_NAMES = ['pubdate', 'dc_date']
77 
78  LANG_ENG = "ENG"
79  LANG_RUS = "RUS"
80  LANG_UKR = "UKR"
81  LANG_GER = "GERMAN"
82  LANG_JAP = "JAPAN"
83 
84  wordsListEng = [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec', \
85  u'Year', u'Today', u'Yesterday', u'Day before yesterday', u'year', u'month', u'day', u'hour', \
86  u'minute']
87  wordsListRus = [u'Янв', u'Февр', u'Мар', u'Апр', u'Май', u'Июнь', u'Июль', u'Авг', u'Сент', u'Окт', u'Нояб', \
88  u'Дек', u'Сегодня', u'Вчера', u'Поза вчера', u'Июня', u'Июля']
89  wordsListUkr = [u'Сiч', u'Лют', u'Бер', u'Квiт', u'Трав', u'Черв', u'Лип', u'Серп', u'Вер', u'Жовт', u'Лист', \
90  u'Груд', u'Рік', u'Сьогодні', u'Вчора', u'Позавчора']
91  wordsListGer = [u'März', u'Mai', u'Juni', u'Juli', u'Sept', u'Okt', u'Dez', u'Jahr', u'Heute', u'Gestern', \
92  u'Vorgestern', u'Uhr']
93  wordsListJap = [u'一月', u'二月', u'三月', u'四月', u'五月', u'六月', u'七月', u'八月', u'九月', u'十月', u'十一月', \
94  u'十二月', u'年', u'今日', u'イエスタデイ', u'おととい', u'月', u'日', u'時', u'分', u'付', u'更新']
95 
96  monthListEng = [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec']
97  monthListRus = [u'Янв', u'Февр', u'Март', u'Апр', u'Май', u'Июнь', u'Июль', u'Авг', u'Сент', u'Окт', u'Нояб', u'Дек']
98  monthListUkr = [u'Сiч', u'Лют', u'Бер', u'Квiт', u'Трав', u'Черв', u'Лип', u'Серп', u'Вер', u'Жовт', u'Лист', u'Груд']
99  monthListGer = [u'Jan', u'Feb', u'März', u'Apr', u'Mai', u'Juni', u'Juli', u'Aug', u'Sept', u'Okt', u'Nov', u'Dez']
100  monthListJap = [u'一月', u'二月', u'三月', u'四月', u'五月', u'六月', u'七月', u'八月', u'九月', u'十月', u'十一月', u'十二月']
101  monthListRusBad = [u'Янв', u'Февр', u'Март', u'Апр', u'Май', u'Июня', u'Июля', u'Авг', u'Сент', u'Окт', u'Нояб', \
102  u'Дек']
103 
104  dayStateEng = [u'Today', u'Yesterday', u'Day before yesterday']
105  dayStateRus = [u'Сегодня', u'Вчера', u'Поза вчера']
106  dayStateUkr = [u'Сьогодні', u'Вчора', u'Позавчора']
107  dayStateGer = [u'Heute', u'Gestern', u'Vorgestern']
108  dayStateJap = [u'今日', u'イエスタデイ', u'おととい']
109  dayStateRusStr = ['Сегодня', 'Вчера', 'Поза вчера']
110  dayStateUkrStr = ['Сьогодні', 'Вчора', 'Позавчора']
111 
112  LANG_DICT = {LANG_ENG: wordsListEng, LANG_RUS: wordsListRus, LANG_UKR: wordsListUkr, LANG_GER: wordsListGer, \
113  LANG_JAP: wordsListJap}
114 
115  MONTH_DICT = {LANG_ENG: monthListEng, LANG_RUS: monthListRus, LANG_UKR: monthListUkr, LANG_GER: monthListGer, \
116  LANG_JAP: monthListJap, LANG_RUS: monthListRusBad}
117 
118  DAY_STATE_DICT = {LANG_ENG: dayStateEng, LANG_RUS: dayStateRus, LANG_UKR: dayStateUkr, LANG_GER: dayStateGer, \
119  LANG_JAP: dayStateJap, LANG_RUS: dayStateRusStr, LANG_UKR: dayStateUkrStr}
120 
121  patternListDate = [r'(?P<mon>[ 1][0-9]) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
122  r'(?P<mon>\d{1,2})/(?P<day>[ 0123][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
123  r'(?P<mon>\d{1,2})/(?P<day>[0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
124  r'(?P<mon>\d{1,2})-(?P<day>[0-9][0-9])-(?P<year>[0-9][0-9][0-9][0-9])',
125  r'(?P<mon>\d{1,2})/(?P<day>[0123][0-9])/(?P<short_year>[0-9][0-9])',
126  r'(?P<mon>\d{1,2})/(?P<day>[0-9])/(?P<short_year>[0-9][0-9])',
127  r'(?P<mon>[A-Z][a-z][a-z]) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
128  r'(?P<mon>[A-Z][a-z][a-z]) (?P<day>[0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
129  r'(?P<mon>\w+) (?P<day>[0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
130  r'(?P<mon>\w+) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
131  r'(?P<mon>\w+) (?P<day>[0-9])(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
132  r'(?P<mon>\w+) (?P<day>[ 0123][0-9])(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
133  r'(?P<mon>\W+) (?P<day>\d{2})(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
134  r'(?P<mon>\w+) (?P<day>\d{1,2})(\W+\d{1,2}) (?P<year>\d{4})',
135  r'(?P<mon>\w+). (?P<day>\d{1,2}) (?P<year>[0-9][0-9][0-9][0-9])',
136  r'(?P<day>[0-9][0-9]) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
137  r'(?P<day>[0-9][0-9]) (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
138  r'(?P<year>[0-9][0-9][0-9][0-9]) (?P<mon>[0-1][0-9]) (?P<day>[0123][0-9])',
139  r'(?P<year>[0-9][0-9][0-9][0-9])-(?P<mon>[0-1][0-9])-(?P<day>[0123][0-9])',
140  r'(?P<year>[0-9][0-9][0-9][0-9])/(?P<mon>[0-1][0-9])/(?P<day>[0123][0-9])',
141  r'(?P<year>[0-9][0-9][0-9][0-9])/(?P<mon>[0-9])/(?P<day>[0123][0-9])',
142  r'(?P<year>[0-9][0-9][0-9][0-9])\.(?P<mon>[0-1][0-9])\.(?P<day>[0123][0-9])',
143  r'(?P<year>[0-9][0-9][0-9][0-9])(?P<mon>[0-1][0-9])(?P<day>[0123][0-9])',
144  r'(?P<day>[0-9][0-9])(\w{2}) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
145  r'(?P<day>[0-9])(\w{2}) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
146  r'(?P<day>[0-9]) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
147  r'(?P<day>[0-9]) (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
148  r'(?P<day>[0-9][0-9])/(?P<mon>[0-9][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
149  r'(?P<day>[0-9][0-9]).(?P<mon>[0-9][0-9]).(?P<year>[0-9][0-9][0-9][0-9])',
150  r'(?P<day>\d{2})/(?P<mon>[0-9][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
151  r'(?P<day>\d{2})/(?P<mon>\d+})/(?P<year>[0-9][0-9][0-9][0-9])',
152  r'(?P<day>\d{1,2})\.(?P<mon>\d{1,2})\.(?P<year>[0-9][0-9][0-9][0-9])',
153  r'(?P<day>\d{1,2})\. (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
154  r'(?P<day>\d{1,2})\. (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
155  r'(?P<day>[0-9][0-9]) (?P<mon>\w+)',
156  r'(?P<day>[0-9][0-9]) (?P<mon>\W+)',
157  r'(?P<day>[0-9]) (?P<mon>\w+)',
158  r'(?P<day>[0-9]) (?P<mon>\W+)',
159  r'(?P<mon>\d{1,2})\.(?P<day>[0123][0-9])\.(?P<short_year>[0-9][0-9])',
160  r'(?P<mon>\d{1,2})\.(?P<day>[0-9])\.(?P<short_year>[0-9][0-9])',
161  r'(?P<day>[0123][0-9])\.(?P<mon>[01][0-9])\.(?P<short_year>[0-9][0-9])',
162  r'(?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
163  r'(?P<mon>\w+) (?P<day>\d{1,2})',
164  r'(?P<mon>\W+) (?P<day>\d{1,2})']
165 
166  patternListTime = [r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
167  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
168  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
169  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
170  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
171  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
172  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
173  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
174  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>[0-9][0-9])',
175  r'(?P<hour>[ 0-9][0-9])(?P<min>\d{2}) GMT',
176  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) GMT',
177  r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{1,2})']
178 
179  patternListTimezoneOffset = [r'(?P<offset>[+-][0-9][0-9]:[0-9][0-9])',
180  r'(?P<offset>[+-][0-9][0-9][0-9][0-9])',
181  r'(?P<offset>[+-]\d{1,2})',
182  r'(?P<offset>[−][0-9][0-9])']
183 
184  patternListUtcTimezones = [r'(?P<hours>[0-9][0-9]):(?P<minutes>[0-9][0-9])',
185  r'(?P<hours>[0-9][0-9])']
186 
187  # #Constant of error messages
188  ERROR_INPUT_PARAMS = 'Error initialization by input parameters.'
189  ERROR_FORMAT_STRING_TYPE = 'Format string is not string.'
190  ERROR_DATA_STRING_TYPE = 'Data string is not string.'
191  ERROR_BAD_INPUT_DATA = 'Bad inputted data.'
192 
193  # #Constructor
194  #
195  # @param dataString - data string for extract datatime, can be as timestamp or string with data for extract
196  # @param formatString - format string for formatting data
197  def __init__(self, dataString=None, formatString=None):
198  self.datetime = None
199  self.isError = False
200  self.errorMsg = ''
201 
202  try:
203  self.datetime = self.__initDataTime(dataString, formatString)
204 
205  except Exception, err:
206  raise Exception(self.ERROR_INPUT_PARAMS + ' ' + str(err))
207 
208 
209  # # initialization of datatime
210  #
211  # @param dataString - data string for extract datatime, can be as timestamp or string with data for extract
212  # @param formatString - format string for formatting data
213  # @return datatime - extracted datatime or None
214  def __initDataTime(self, dataString=None, formatString=None):
215  # variable for result
216  ret = None
217  if dataString is not None and isinstance(dataString, int):
218  ret = datetime.datetime.fromtimestamp(dataString)
219  elif dataString is not None and formatString is not None:
220  # validate of input type of format string
221  if not isinstance(formatString, str):
222  raise Exception(self.ERROR_FORMAT_STRING_TYPE)
223  else:
224  pass
225  # validate of input type of data string
226  if not isinstance(dataString, str):
227  raise Exception(self.ERROR_DATA_STRING_TYPE)
228  else:
229  pass
230  # input types checked and can be used
231  ret = datetime.datetime.strptime(dataString, formatString)
232  elif dataString is None and formatString is None:
233  pass
234  else:
235  raise Exception(self.ERROR_BAD_INPUT_DATA)
236 
237  return ret
238 
239 
240  # # Return datatime as timestamp
241  #
242  # @param - None
243  # @return datatime as timestamp
244  def getInt(self):
245  # variable for result
246  ret = None
247  try:
248  ret = int((self.datetime - datetime.datetime.fromtimestamp(0)).total_seconds())
249  except Exception, err:
250  self.isError = True
251  self.errorMsg = str(err)
252  ret = None
253 
254  return ret
255 
256 
257  # # Return datatime as string
258  #
259  # @param - formatString - format string for formatting data
260  # @return datatime as timestamp
261  def getString(self, formatString=None):
262  # variable for result
263  ret = None
264 
265  try:
266  if formatString is None:
267  ret = self.datetime.isoformat(self.ISO_SEP)
268  else:
269  if not isinstance(formatString, str):
270  raise Exception(self.ERROR_FORMAT_STRING_TYPE)
271  else:
272  ret = self.datetime.strftime(formatString)
273  except Exception, err:
274  self.isError = True
275  self.errorMsg = str(err)
276  ret = None
277 
278  return ret
279 
280 
281  # # Intendification of lang
282  #
283  # @param inputStr - string for detection of lang
284  # @param logger - instance of logger for log if necessary
285  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
286  # @return constant of lang name
287  @staticmethod
288  def getLang(inputStr, logger=None, isExtendLog=False):
289  ret = None
290  found = False
291  langDict = DateTimeType.LANG_DICT
292  try:
293  dataString = inputStr
294  try:
295  dataString = unicode(inputStr, 'utf-8', 'ignore')
296  except Exception, err:
297  if logger and isExtendLog:
298  logger.debug("getLang: '" + str(err) + "'")
299  logger.info(getTracebackInfo())
300 
301  for key in langDict.keys():
302  for word in langDict[key]:
303  if dataString.lower().find(word) > -1 or dataString.lower().find(word.lower()) > -1:
304  ret = key
305  found = True
306  break
307  if found:
308  break
309 
310  except Exception, err:
311  if logger and isExtendLog:
312  logger.debug("getLang: '" + str(err) + "'")
313  logger.info(getTracebackInfo())
314  ret = DateTimeType.LANG_ENG
315 
316  return ret
317 
318 
319  # # Intendification of lang
320  #
321  # @param inputStr - string for detection of lang
322  # @param logger - logger instance
323  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
324  # @return constant of month number if found
325  @staticmethod
326  def getMonthNumber(inputStr, logger, isExtendLog=False):
327  if logger and isExtendLog:
328  logger.debug("getMonthNumber inputStr: '" + inputStr + "' type: " + str(type(inputStr)))
329 
330  ret = None
331  inputStr = inputStr.strip()
332 
333  if len(inputStr) < 3 and inputStr.isdigit():
334  ret = int(inputStr)
335  else:
336  found = False
337  monthDict = DateTimeType.MONTH_DICT
338 
339  for key in monthDict.keys():
340  # if logger and isExtendLog:
341  # logger.debug("key: '" + str(key) + "'")
342 
343  monthNumber = 0
344  for months in monthDict[key]:
345  monthNumber = monthNumber + 1
346  month = months.lower()
347  inputMonth = inputStr
348  try:
349  inputMonth = inputStr.decode('utf-8')
350  except UnicodeError, err:
351  if logger is not None and isExtendLog:
352  logger.debug("Operation decode'utf-8' has error: " + str(err))
353 
354  # if logger and isExtendLog:
355  # logger.debug(inputMonth.lower() + ' <=> ' + month)
356 
357  if inputMonth.lower().find(month) > -1 or month.lower().find(inputMonth.lower()) > -1:
358  ret = monthNumber
359  found = True
360  break
361  if found:
362  break
363 
364  return ret
365 
366 
367  # # Exctract english date from string
368  #
369  # @param inputStr - input string for preparation
370  # @param useCurrentYear - flag of default usage current year if wasn't selected
371  # @param logger - instance of logger for log if necessary
372  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
373  # @return extracted datatime or None
374  @staticmethod
375  def extractDateEng(inputStr, useCurrentYear, logger=None, isExtendLog=False):
376  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_ENG)
377  if pubdate is None:
378  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
379 
380  return pubdate
381 
382 
383  # # Exctract russian date from string
384  #
385  # @param inputStr - input string for preparation
386  # @param useCurrentYear - flag of default usage current year if wasn't selected
387  # @param logger - instance of logger for log if necessary
388  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
389  # @return extracted datatime or None
390  @staticmethod
391  def extractDateRus(inputStr, useCurrentYear, logger=None, isExtendLog=False):
392  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_RUS)
393  if pubdate is None:
394  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
395 
396  return pubdate
397 
398  # # Exctract ukrainian date from string
399  #
400  # @param inputStr - input string for preparation
401  # @param useCurrentYear - flag of default usage current year if wasn't selected
402  # @param logger - instance of logger for log if necessary
403  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
404  # @return extracted datatime or None
405  @staticmethod
406  def extractDateUkr(inputStr, useCurrentYear, logger=None, isExtendLog=False):
407  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_UKR)
408  if pubdate is None:
409  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
410 
411  return pubdate
412 
413 
414  # # Exctract german date from string
415  #
416  # @param inputStr - input string for preparation
417  # @param useCurrentYear - flag of default usage current year if wasn't selected
418  # @param logger - instance of logger for log if necessary
419  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
420  # @return extracted datatime or None
421  @staticmethod
422  def extractDateGerman(inputStr, useCurrentYear, logger=None, isExtendLog=False):
423  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_GER)
424  if pubdate is None:
425  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
426 
427  return pubdate
428 
429 
430  # # Exctract japan date from string
431  #
432  # @param inputStr - input string for preparation
433  # @param useCurrentYear - flag of default usage current year if wasn't selected
434  # @param logger - instance of logger for log if necessary
435  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
436  # @return extracted datatime or None
437  @staticmethod
438  def extractDateJapan(inputStr, useCurrentYear, logger=None, isExtendLog=False):
439  # replace japanise simbols
440  inputStr = DateTimeType.replaceJapanSimbols(inputStr, logger, isExtendLog)
441 
442  # extract data
443  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_JAP)
444 
445  if pubdate is None:
446  match = re.search(r'[0-9]', inputStr)
447  if match:
448  try:
449  pubdate = DateTimeType.convertPubDateToRFC2822(inputStr, logger, isExtendLog)
450  except Exception, err:
451  if logger and isExtendLog:
452  logger.debug('extractDateJapan: ' + str(err))
453 
454  if pubdate is None:
455  pubdate = DateTimeType.extractDateFromHeiseiPeriod(inputStr, logger, isExtendLog)
456 
457  if pubdate is None:
458  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
459 
460  return pubdate
461 
462 
463  # # static method for parse
464  #
465  # @param dataString - string for parse
466  # @param useCurrentYear - flag of default usage current year if wasn't selected
467  # @param logger - instance of logger for log if necessary
468  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
469  # @return extracted datetime or None
470  @staticmethod
471  def parse(dataString, useCurrentYear=True, logger=None, isExtendLog=False):
472  # variable for result
473  ret = None
474  if logger is not None and isExtendLog:
475  logger.debug("inputStr: '" + dataString + "'")
476 
477  if DateTimeType.isAllowedInputString(dataString, logger, isExtendLog):
478  if len(dataString) >= int(DateTimeType.MIN_ALLOWED_LEN_FOR_DATEUTILS):
479  try:
480  if not DateTimeType.isUtf8CodePage(dataString, logger, isExtendLog):
481  dataString = DateTimeType.changeCodePageToAscii(dataString, logger, isExtendLog)
482 
483  if logger is not None and isExtendLog:
484  logger.debug("try use 'dateutil'")
485 
486  ret = parser.parse(dataString)
487  if ret is not None:
488  # utc_zone = gettz('UTC')
489  # ret = ret.astimezone(utc_zone)
490  # ret = ret.replace(tzinfo=None)
491  # print ret.isoformat(' ')
492  if logger is not None and isExtendLog:
493  logger.debug("'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
494  ret = ret.replace(microsecond=0)
495  except Exception, err: # pylint: disable=W0702
496  if logger is not None and isExtendLog:
497  logger.debug("'dateutil' can not parse: " + str(err))
498  try:
499  normalizedString = DateTimeType.normalizeTimezone(dataString, logger, isExtendLog)
500  if dataString != normalizedString:
501  if logger is not None and isExtendLog:
502  logger.debug("retry parsing use 'dateutil'")
503  ret = parser.parse(normalizedString)
504  if ret is not None and logger is not None and isExtendLog:
505  logger.debug("'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
506  except Exception, err:
507  if logger is not None and isExtendLog:
508  logger.debug("'dateutil' can not parse: " + str(err))
509 
510  if ret is None:
511  # Intendification of lang
512  langType = DateTimeType.getLang(dataString, logger, isExtendLog)
513 
514  if logger is not None and isExtendLog:
515  logger.debug('lang type detected as: ' + str(langType))
516 
517  if langType == DateTimeType.LANG_ENG:
518  # extract english date
519  ret = DateTimeType.extractDateEng(dataString, useCurrentYear, logger, isExtendLog)
520  elif langType == DateTimeType.LANG_RUS:
521  # extract russian date
522  ret = DateTimeType.extractDateRus(dataString, useCurrentYear, logger, isExtendLog)
523  elif langType == DateTimeType.LANG_UKR:
524  # extract ukrainian date
525  ret = DateTimeType.extractDateUkr(dataString, useCurrentYear, logger, isExtendLog)
526  elif langType == DateTimeType.LANG_GER:
527  # extract germany date
528  ret = DateTimeType.extractDateGerman(dataString, useCurrentYear, logger, isExtendLog)
529  elif langType == DateTimeType.LANG_JAP:
530  # extract japan date
531  ret = DateTimeType.extractDateJapan(dataString, useCurrentYear, logger, isExtendLog)
532  else:
533  ret = DateTimeType.extractDateCommon(dataString, useCurrentYear, logger, isExtendLog)
534 
535  if ret is not None and ret.tzinfo is None:
536  timezoneName = DateTimeType.extractUtcTimezoneName(dataString, logger, isExtendLog)
537  utcZone = gettz(timezoneName)
538  # logger.debug("utcZone: " + str(utcZone))
539  # logger.debug("timezoneName: " + str(timezoneName))
540  if utcZone is not None:
541  ret = ret.replace(tzinfo=utcZone)
542  else:
543  ret = DateTimeType.applyUtcTimezone(ret, timezoneName, DateTimeTimezones.timezonesDict, logger, isExtendLog)
544 
545  if logger is not None and isExtendLog:
546  if ret is not None:
547  logger.debug('result pubdate: ' + str(ret.isoformat(DateTimeType.ISO_SEP)))
548  else:
549  logger.debug('result pubdate: NONE')
550 
551  return ret
552 
553 
554  # # Apply UTC timezone use tzInfo to datetime object
555  #
556  # @param dt - datetime instance
557  # @param tzName - name of timezone
558  # @param timezonesDict - dictionary with timezones
559  # @param logger - instance of logger for log if necessary
560  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
561  # @return datetime instance, already modified if success
562  @staticmethod
563  def applyUtcTimezone(dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False): # pylint: disable=W0102
564  if logger is not None and isExtendLog:
565  logger.debug("applyUtcTimezone enter ...")
566 
567  if dt is not None and tzName in timezonesDict and len(timezonesDict[tzName]) > 1:
568  rawOffset = timezonesDict[tzName][1]
569 
570  isNegative = False
571  if '−' in rawOffset or '-' in rawOffset:
572  isNegative = True
573  if logger is not None and isExtendLog:
574  logger.debug("isNegative: " + str(isNegative))
575 
576  for pattern in DateTimeType.patternListUtcTimezones:
577  match = re.search(pattern, rawOffset)
578  if match:
579  hours = 0
580  if 'hours' in match.groupdict():
581  hours = int(match.groupdict()['hours'])
582 
583  minutes = 0
584  if 'minutes' in match.groupdict():
585  minutes = int(match.groupdict()['minutes'])
586 
587  if logger is not None and isExtendLog:
588  logger.debug("hours: " + str(hours) + " minutes: " + str(minutes))
589 
590  tzInfo = OffsetTzInfo(isNegative, hours, minutes)
591  dt = dt.replace(tzinfo=tzInfo)
592  if logger is not None and isExtendLog:
593  logger.debug("tzname: " + str(dt.tzname()))
594  break
595 
596  return dt
597 
598 
599  # #Split datetime and timezone string
600  #
601  # @param dt - datetime instance
602  # @return datetime instance without tzInfo and timezone string
603  @staticmethod
604  def split(dt):
605  timezone = ''
606  if dt is not None:
607  timezone = dt.strftime('%z')
608  dt = dt.replace(tzinfo=None)
609 
610  return dt, timezone
611 
612 
613  # #Get timezone string
614  #
615  # @param dt - datetime instance
616  # @return timezone string
617  @staticmethod
618  def getTimezone(dt):
619  timezone = ''
620  if dt is not None:
621  timezone = dt.strftime('%z')
622 
623  return timezone
624 
625 
626  # # Check is allowed input string for next procedure of parse
627  #
628  # @param dataString - string for parse
629  # @param logger - instance of logger for log if necessary
630  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
631  # @return True if allowed or False otherwise
632  @staticmethod
633  def isAllowedInputString(dataString, logger=None, isExtendLog=False):
634  # variable for result
635  ret = True
636  if dataString is None or not isinstance(dataString, basestring):
637  ret = False
638  elif dataString != "" and dataString.isupper() and dataString.isalnum() and \
639  not dataString.isalpha():
640  ret = False
641  if logger and isExtendLog:
642  logger.debug('input string has not allowed format')
643 
644  return ret
645 
646 
647  # # Prepare input string (remove all bad simbols)
648  #
649  # @param inputStr - input string for preparation
650  # @return already prepared string
651  @staticmethod
652  def prepareString(inputStr):
653  ret = inputStr
654 
655  for tagName in DateTimeType.TAG_NAMES:
656  if inputStr.lower().find('%' + tagName + '%') > -1:
657  ret = ret.replace('%' + tagName + '%', '')
658  else:
659  pass
660 
661  for bad in DateTimeType.BAD_SIMBOLS:
662  ret = ret.replace(bad, ' ')
663 
664  ret = ret.replace(' ', ' ')
665 
666  return ret
667 
668 
669  # # Extract year, month and day from string
670  #
671  # @param inputStr - input string for extract date
672  # @param useCurrentYear - flag of default usage current year if wasn't selected
673  # @param logger - instance of logger for log if necessary
674  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
675  # @return year, month, day - trancated input string
676  @staticmethod
677  def extractDate(inputStr, useCurrentYear=True, logger=None, isExtendLog=False):
678  # variables for results
679  month = 0
680  day = 0
681  year = 0
682 
683  try:
684  for pattern in DateTimeType.patternListDate:
685  match = re.search(pattern, inputStr)
686  if logger and isExtendLog:
687  logger.debug('match: ' + str(match) + ' pattern: ' + str(pattern))
688 
689  if match:
690  if logger and isExtendLog:
691  logger.debug('match.groupdict(): ' + str(match.groupdict()))
692 
693  if 'short_year' in match.groupdict():
694  year = int(match.groupdict()['short_year']) + int(datetime.date.today().year // 1000 * 1000)
695 
696  if 'year' in match.groupdict():
697  year = match.groupdict()['year']
698 
699  if 'mon' in match.groupdict():
700  month = match.groupdict()['mon']
701  logger.debug('month: ' + month)
702  if month.isdigit() and int(month) > 12:
703  if logger and isExtendLog:
704  logger.debug('Bad month (' + str(month) + ') scipped!!!')
705  continue
706 
707  if 'day' in match.groupdict():
708  day = match.groupdict()['day']
709 
710  if logger and isExtendLog:
711  logger.debug('month = ' + month)
712 
713  monthNumber = DateTimeType.getMonthNumber(month, logger, isExtendLog)
714 
715  if logger and isExtendLog:
716  logger.debug('monthNumber = ' + str(monthNumber))
717 
718  if monthNumber is not None:
719  month = monthNumber
720  else:
721  month = day = year = 0
722 
723  if logger and isExtendLog:
724  logger.debug('year: ' + str(year) + ' month: ' + str(month) + ' day: ' + str(day))
725 
726  if int(year) > DateTimeType.MIN_ALLOWED_YEAR and int(year) <= datetime.date.today().year and \
727  int(month) <= 12 and int(day) <= 31:
728  # if int(year) == 0 and int(month) <= 12 and int(day) <= 31:
729  break
730 
731  if logger is not None and isExtendLog:
732  logger.debug('Match is good !!!')
733  break
734 
735  if useCurrentYear:
736  d = datetime.date.today()
737 
738  if year == 0 and month and day:
739  year = d.year
740 
741  if year and month and day == 0:
742  day = d.day
743 
744  except Exception, err:
745  if logger and isExtendLog:
746  logger.debug("inputStr: '" + str(inputStr) + "'")
747  if logger and ExceptionLog is not None:
748  ExceptionLog.handler(logger, err, 'extractDate:', (), \
749  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
750  elif logger:
751  logger.debug('extractDate:' + str(err))
752 
753  return int(year), int(month), int(day)
754 
755 
756  # # Extract hour, minute and second from string
757  #
758  # @param inputStr - input string for extract time
759  # @param logger - instance of logger for log if necessary
760  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
761  # @return hour, minute, second and tf (time format)
762  @staticmethod
763  def extractTime(inputStr, logger=None, isExtendLog=False): # pylint: disable=W0613
764 
765  hour = 0
766  minute = 0
767  second = 0
768  tf = ''
769 
770  try:
771  for pattern in DateTimeType.patternListTime:
772  match = re.search(pattern, inputStr)
773  # if logger and isExtendLog:
774  # logger.debug('pattern: ' + str(pattern))
775  if match:
776  # if logger and isExtendLog:
777  # logger.debug('match.groupdict(): ' + str(match.groupdict()))
778 
779  if 'hour' in match.groupdict():
780  hour = match.groupdict()['hour']
781 
782  if 'min' in match.groupdict():
783  minute = match.groupdict()['min']
784 
785  if 'sec' in match.groupdict():
786  second = match.groupdict()['sec']
787 
788  if 'tf' in match.groupdict():
789  tf = match.groupdict()['tf']
790 
791  break
792 
793  except Exception, err:
794  if logger and isExtendLog:
795  logger.debug("inputStr: '" + str(inputStr) + "'")
796  if logger and ExceptionLog is not None:
797  ExceptionLog.handler(logger, err, 'extractTime:', (), \
798  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
799  elif logger:
800  logger.debug('extractTime:' + str(err))
801 
802  return int(hour), int(minute), int(second), tf
803 
804 
805  # # Exctract common date from string
806  #
807  # @param inputStr - input string for preparation
808  # @param useCurrentYear - flag of default usage current year if wasn't selected
809  # @param logger - instance of logger for log if necessary
810  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
811  # @return extracted datatime or None
812  @staticmethod
813  def extractDateCommon(inputStr, useCurrentYear=True, logger=None, isExtendLog=False):
814  # variable for results
815  ret = None
816  try:
817  localStr = DateTimeType.prepareString(inputStr)
818 
819  if localStr:
820  match = re.search(r'\d{10}', localStr)
821  if match:
822  ret = datetime.datetime.fromtimestamp(int(match.group()))
823 
824  locCurrentTime = datetime.datetime.now()
825  utcCurrentTime = datetime.datetime.utcnow()
826  tmDelta = locCurrentTime - utcCurrentTime
827 
828  isNegative = bool(tmDelta.total_seconds() < 0)
829  hours = abs(locCurrentTime.hour - utcCurrentTime.hour)
830  minutes = abs(locCurrentTime.minute - utcCurrentTime.minute)
831  if logger is not None and isExtendLog:
832  logger.debug("isNegative: " + str(isNegative) + " hours: " + str(hours) + " minutes: " + str(minutes))
833 
834  # Correct datetime value to GMT
835  if isNegative:
836  ret = ret + datetime.timedelta(hours=hours, minutes=minutes)
837  else:
838  ret = ret - datetime.timedelta(hours=hours, minutes=minutes)
839 
840  # Apply tzInfo
841  # tzInfo = OffsetTzInfo(isNegative, hours, minutes)
842  tzInfo = OffsetTzInfo(False, 0, 0)
843  ret = ret.replace(tzinfo=tzInfo)
844  if logger is not None and isExtendLog:
845  logger.debug("tzname: " + str(ret.tzname()))
846  else:
847  year, month, day = DateTimeType.extractDate(localStr, useCurrentYear, logger, isExtendLog)
848  if logger and isExtendLog:
849  logger.debug('year: ' + str(year) + '\tmonth: ' + str(month) + '\tday: ' + str(day))
850 
851  hour, minute, second, tf = DateTimeType.extractTime(localStr, logger, isExtendLog)
852  if logger and isExtendLog:
853  logger.debug('hour: ' + str(hour) + '\tminute: ' + str(minute) + '\tsecond: ' + str(second) + \
854  '\ttf: ' + str(tf))
855 
856  hour, minute = DateTimeType.checkTimeFormat(hour, minute, tf, logger, isExtendLog)
857 
858  if logger is not None and isExtendLog:
859  logger.debug('hour: ' + str(hour) + '\tminute: ' + str(minute))
860 
861  if year and month and day:
862  if useCurrentYear:
863  now = datetime.datetime.now()
864  if month == now.month and day == now.day:
865  if hour == 0 and minute == 0:
866  hour = int(now.hour)
867  minute = int(now.minute)
868  second = now.second
869 
870  ret = datetime.datetime(year, month, day, hour, minute, second, tzinfo=None)
871  elif useCurrentYear and (year + month + day) == 0 and hour > 0 and minute > 0:
872  d = datetime.datetime.today()
873  year = d.year
874  month = d.month
875  day = d.day
876 
877  if int(d.hour) < int(hour):
878  day = (d - datetime.timedelta(days=1)).day
879 
880  ret = datetime.datetime(year, month, day, hour, minute, second)
881 
882  except Exception, err:
883  if logger is not None and isExtendLog:
884  logger.debug("inputStr: '" + str(inputStr) + "'")
885  if logger and ExceptionLog is not None:
886  ExceptionLog.handler(logger, err, 'extractDateCommon:', (inputStr), \
887  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
888  elif logger is not None:
889  logger.debug('extractDateCommon:' + str(err))
890 
891  return ret
892 
893 
894  # # Checking is exist offset from PM time format
895  #
896  # @param hour - hour value
897  # @param minute - minute value
898  # @param tf - time format (can be 'AM', 'PM' or other)
899  # @param logger - instance of logger for log if necessary
900  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
901  # @return day, hour - result variables after change if necessary
902  @staticmethod
903  def checkTimeFormat(hour, minute, tf, logger=None, isExtendLog=False):
904  if logger is not None and isExtendLog:
905  logger.debug("tf: '%s'", str(tf))
906  if tf is not None:
907  logger.debug("find = %s", str(tf.lower().find('p')))
908 
909  if tf and tf.lower().find('p') > -1: # found 'PM'
910  if int(hour) == 12 and int(minute) == 0: # # 12:00 PM -> 12:00
911  pass
912  elif int(hour) >= 0 and int(hour) < 12 and int(minute) >= 0: # # 00:01 PM -> 12:01
913  hour += 12
914 
915  else:
916  if int(hour) == 12 and int(minute) == 0: # # 12:00 AM -> 00:00
917  hour = 0
918  elif int(hour) >= 0 and int(hour) < 12 and int(minute) >= 0: # # 00:01 AM -> 00:01
919  pass
920 
921  return hour, minute
922 
923 
924  # # Extract date from string used state of day ('Today', 'Yesterday', 'Day before yesterday')
925  #
926  # @param inputStr - input string for preparation
927  # @param useCurrentYear - flag of default usage current datetime if wasn't selected
928  # @param langName - const name of used language
929  # @param logger - instance of logger for log if necessary
930  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
931  # @return extracted datatime or None
932  @staticmethod
933  def extractUseDayState(inputStr, useCurrentYear, langName, logger, isExtendLog):
934  # variable for results
935  ret = None
936  index = 0
937  for dayState in DateTimeType.DAY_STATE_DICT[langName]:
938  if not inputStr.lower().find(dayState) < 0:
939  if logger is not None and isExtendLog:
940  logger.debug("!!! dayState: " + str(dayState))
941 
942  if useCurrentYear:
943  d = datetime.date.today()
944  hour, minute, second, tf = DateTimeType.extractTime(inputStr, logger, isExtendLog) # pylint: disable=W0612
945  t = datetime.time(hour, minute, second)
946  dt = datetime.datetime.combine(d, t)
947  ret = dt - datetime.timedelta(days=index)
948  break
949  else:
950  index = index + 1
951 
952  return ret
953 
954 
955  # # Extract date from string used time name ('7 hours')
956  #
957  # @param inputStr - input string for preparation
958  # @param useCurrentYear - flag of default usage current datetime if wasn't selected
959  # @param langName - const name of used language
960  # @return extracted datatime or None
961  @staticmethod
962  def extractUseTimePeriodName(inputStr, useCurrentYear, langName):
963  # variable for results
964  ret = None
965  match = None
966  if useCurrentYear and langName == DateTimeType.LANG_ENG:
967  for pattern in [r'(?P<hour>\d{1,2}) hours']:
968  match = re.search(pattern, inputStr, re.UNICODE)
969  if match:
970  d = datetime.datetime.now()
971  if 'hour' in match.groupdict():
972  hour = match.groupdict()['hour']
973  if int(hour) >= 0 and int(hour) <= 24:
974  ret = d.replace(hour=int(hour), minute=0, second=0, microsecond=0, tzinfo=None)
975  break
976  # elif useCurrentYear and langName == DateTimeType.LANG_JAP:
977  # match = re.search(r'(?P<hour>\d{1,2})時間前', inputStr, re.U)
978  return ret
979 
980 
981  # # Extract date from string used period name left ('4 days left')
982  #
983  # @param inputStr - input string for preparation
984  # @param useCurrentYear - flag of default usage current datetime if wasn't selected
985  # @param langName - const name of used language
986  # @return extracted datatime or None
987  @staticmethod
988  def extractUseTimePeriodNameLeft(inputStr, useCurrentYear, langName):
989  # variable for results
990  ret = None
991  if useCurrentYear and langName == DateTimeType.LANG_ENG:
992  for period in [u'years', u'months', u'days', u'hours', u'minutes']:
993  match = re.search(r'(?P<value>\d{1,2}).? ' + period + '.?left', inputStr)
994  if match:
995  value = 0
996  if 'value' in match.groupdict():
997  value = int(match.groupdict()['value'])
998 
999  dt = datetime.datetime.now()
1000  if period == u'years':
1001  ret = dt + relativedelta(years=+value)
1002  elif period == u'months':
1003  ret = dt + relativedelta(months=+value)
1004  elif period == u'days':
1005  ret = dt + relativedelta(days=+value)
1006  elif period == u'hours':
1007  ret = dt + relativedelta(hours=+value)
1008  elif period == u'minutes':
1009  ret = dt + relativedelta(minutes=+value)
1010 
1011  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1012  break
1013 
1014  return ret
1015 
1016 
1017  # # Extract date from string used period name ago ('1 Hour Ago')
1018  #
1019  # @param inputStr - input string for preparation
1020  # @param useCurrentYear - flag of default usage current datetime if wasn't selected
1021  # @param langName - const name of used language
1022  # @param logger - instance of logger for log if necessary
1023  # @return extracted datatime or None
1024  @staticmethod
1025  def extractUseTimePeriodNameAgo(inputStr, useCurrentYear, langName, logger):
1026  # variable for results
1027  ret = None
1028  if useCurrentYear and langName == DateTimeType.LANG_JAP:
1029  for pattern in [r'(?P<value>\d{1,2})日前', r'(?P<value>\d{1,2})時間前']:
1030  match = re.search(pattern, inputStr, re.UNICODE)
1031  if match is not None:
1032  value = 0
1033  if 'value' in match.groupdict():
1034  value = int(match.groupdict()['value'])
1035 
1036  if pattern.find('日前') > 0:
1037  ret = datetime.datetime.now() + relativedelta(days=-value)
1038  elif pattern.find('時間前') > 0:
1039  ret = datetime.datetime.now() + relativedelta(hours=-value)
1040 
1041  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1042 
1043  if useCurrentYear and langName == DateTimeType.LANG_ENG:
1044  for period in [u'years', u'months', u'days', u'hours', u'minutes', u'Hour']:
1045  for pattern in [r'(?P<value>\d{1,2}).? ' + period + '.?Ago',
1046  r'(?P<value>\d{1,2}).? ' + period + '.?ago']:
1047  match = re.search(pattern, inputStr, re.UNICODE)
1048  if match:
1049  value = 0
1050  if 'value' in match.groupdict():
1051  value = int(match.groupdict()['value'])
1052 
1053  dt = datetime.datetime.now()
1054  if period == u'years':
1055  ret = dt + relativedelta(years=-value)
1056  elif period == u'months':
1057  ret = dt + relativedelta(months=-value)
1058  elif period == u'days':
1059  ret = dt + relativedelta(days=-value)
1060  elif period == u'hours' or period == u'Hour':
1061  ret = dt + relativedelta(hours=-value)
1062  elif period == u'minutes':
1063  ret = dt + relativedelta(minutes=-value)
1064 
1065  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1066  if logger is not None:
1067  logger.debug("ret: %s", str(ret))
1068  break
1069 
1070  return ret
1071 
1072 
1073  # # Check code page of input string
1074  #
1075  # @param inputStr - input string for preparation
1076  # @param logger - instance of logger for log if necessary
1077  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1078  # @return True if input string id UTF-8 code page or False otherwise
1079  @staticmethod
1080  def isUtf8CodePage(inputStr, logger, isExtendLog):
1081  # variable for result
1082  isUtf8 = False
1083  try:
1084  inputStr.decode('utf-8')
1085  isUtf8 = True
1086  except Exception, err:
1087  if logger is not None and isExtendLog:
1088  logger.debug('inputStr.decode: ' + str(err))
1089 
1090  return isUtf8
1091 
1092 
1093  # # Change code page of input string to ascii
1094  #
1095  # @param inputStr - input string for preparation
1096  # @param logger - instance of logger for log if necessary
1097  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1098  # @return string with ascii code page
1099  @staticmethod
1100  def changeCodePageToAscii(inputStr, logger=None, isExtendLog=False):
1101  # variable for result
1102  ret = inputStr
1103  try:
1104  dataString = inputStr.decode('latin-1')
1105  ret = dataString.encode('ascii', errors='ignore')
1106  except Exception, err:
1107  if logger and isExtendLog:
1108  logger.debug("inputStr.decode('latin-1') : " + str(err))
1109 
1110  return ret
1111 
1112 
1113  # # Extract date from string used intelligent algorithms
1114  #
1115  # @param inputStr - input string for preparation
1116  # @param useCurrentYear - flag of default usage current year if wasn't selected
1117  # @param logger - instance of logger for log if necessary
1118  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1119  # @param langName - const name of used language
1120  # @return extracted datatime or None
1121  @staticmethod
1122  def intelligentExtractor(inputStr, useCurrentYear=True, logger=None, isExtendLog=False, langName=None):
1123  # variable for results
1124  ret = None
1125  dataString = copy.copy(inputStr)
1126  dataString = DateTimeType.prepareString(dataString)
1127  try:
1128  if langName is None:
1129  langName = DateTimeType.getLang(dataString)
1130  else:
1131  pass
1132 
1133  if langName is not None:
1134  ret = DateTimeType.extractUseTimePeriodNameAgo(dataString, useCurrentYear, langName, logger)
1135 
1136  if langName is not None and langName != DateTimeType.LANG_JAP:
1137  if ret is None:
1138  ret = DateTimeType.extractUseDayState(dataString, useCurrentYear, langName, logger, isExtendLog)
1139  if ret is None:
1140  ret = DateTimeType.extractUseTimePeriodName(dataString, useCurrentYear, langName)
1141  if ret is None:
1142  ret = DateTimeType.extractUseTimePeriodNameLeft(dataString, useCurrentYear, langName)
1143 
1144  # #TODO here extended functional in future
1145  except Exception, err:
1146  if logger is not None and isExtendLog:
1147  logger.debug("inputStr: '" + inputStr + "'")
1148  if logger is not None and ExceptionLog is not None:
1149  ExceptionLog.handler(logger, err, 'intelligentExtractor:', (inputStr), \
1150  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
1151  elif logger:
1152  logger.debug('intelligentExtractor:' + str(err))
1153 
1154  if logger is not None and isExtendLog:
1155  logger.debug('intelligentExtractor return: ' + str(ret))
1156 
1157  return ret
1158 
1159 
1160  # Convertation japanise pubdate to RFC2822
1161  #
1162  # @param rawPubdate - candidate for extract data
1163  # @param logger - instance of logger for log if necessary
1164  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1165  # @return extracted datetime or None
1166  @staticmethod
1167  def convertPubDateToRFC2822(rawPubdate, logger=None, isExtendLog=False):
1168  try:
1169  # replace all unicode digits to decimal (e.g. u'\uff13' to '3')
1170  import unicodedata
1171 
1172  if isinstance(rawPubdate, unicode):
1173  for i in range(0, len(rawPubdate)):
1174  if rawPubdate[i].isdigit():
1175  rawPubdate = re.sub(rawPubdate[i], str(unicodedata.digit(rawPubdate[i])), rawPubdate)
1176 
1177  rawPubdate = rawPubdate.encode("utf_8")
1178 
1179  import calendar
1180 
1181  pubdate_parts = rawPubdate.split(",")
1182  if len(pubdate_parts) > 1:
1183  rawPubdate = pubdate_parts[0]
1184  rawPubdate = rawPubdate.replace("posted at", "")
1185  rawPubdate = rawPubdate.replace("Updated:", "")
1186  rawPubdate = re.sub(r"\(1/\d{1}ページ\)", "", rawPubdate)
1187 
1188  # Try extract 'Heisei' period
1189  if "平成" in rawPubdate:
1190  year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1191  if year is not None:
1192  if logger and isExtendLog:
1193  logger.debug("'Heisei' period before: " + str(rawPubdate))
1194  rawPubdate = re.sub(r"平成(\d{1,2})", str(year), rawPubdate)
1195  if logger and isExtendLog:
1196  logger.debug("'Heisei' period after: " + str(rawPubdate))
1197 
1198  rawPubdate = re.sub(r"\(木\)", "", rawPubdate)
1199  rawPubdate = re.sub(r" 年 ", "年", rawPubdate)
1200  rawPubdate = re.sub(r"@", "", rawPubdate)
1201  parsed_time_candidate_str = float(calendar.timegm(parse(rawPubdate).timetuple()))
1202 
1203  if logger and isExtendLog:
1204  logger.debug("pubdate in seconds: %s", str(parsed_time_candidate_str))
1205  except Exception, err:
1206  if logger is not None and isExtendLog:
1207  logger.debug("try replace rawPubdate return: " + str(err))
1208  rawPubdate = DateTimeType.adjustJapaneseDate(rawPubdate, logger, isExtendLog)
1209  # #rawPubdate = rawPubdate.decode('latin-1')
1210 
1211  t = u"%Y\xe5\xb9\xb4%m\xe6\x9c\x88%d\xe6\x97\xa5 %H:%M"
1212  parsed_time_candidate_str = time.mktime(time.strptime(rawPubdate, t))
1213 
1214  # set result value
1215  ret = datetime.datetime.fromtimestamp(parsed_time_candidate_str)
1216 
1217  return ret
1218 
1219 
1220  # Adjust japanise date
1221  #
1222  # @param rawPubdate - candidate for extract data
1223  # @param logger - instance of logger for log if necessary
1224  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1225  # @return rawPubdate - rawPubdate as string
1226  @staticmethod
1227  def adjustJapaneseDate(rawPubdate, logger=None, isExtendLog=False):
1228  # convert rawPubdate in format like
1229  # 8月20日 20:01
1230  # 9月3日 11時41分
1231  # (8月20日 紙面から)
1232  # 2014年8月7日
1233  # 2014年8月20日 夕刊
1234  # 2014年8月21日 朝刊>
1235  # 2014/8/14付
1236  # (2014/08/16-14:46)
1237  # 2014/6/3 18:53 (2014/6/3 20:13更新)
1238  # 2014.08.20 Wed posted at 17:52 JST
1239  # 2014.08.01 Fri posted at 12:36 JST
1240  # 2014年 08月 20日 14:34 JST
1241  # 2014年08月20日 19時46分
1242  # 2014年8月29日16時30分
1243  # 8/20 21:00 更新
1244  # 11月23日
1245  # to
1246  # 2014年8月7日 20:01
1247  # check if 日 exist in date
1248  if logger and isExtendLog:
1249  logger.debug("pubdate has to be converted: <<%s>>", rawPubdate)
1250  logger.debug("pubdate type: <<%s>>", str(type(rawPubdate)))
1251  # self.logger.debug("pubdate charcode: <<%s>>", str(icu.CharsetDetector(rawPubdate).detect().getName()))
1252  # rawPubdate = rawPubdate.decode("utf_8")
1253  # self.logger.debug("pubdate has to be converted: <<%s>>", rawPubdate)
1254  # self.logger.debug("pubdate type: <<%s>>", str(type(rawPubdate)))
1255  rawPubdate = rawPubdate.strip(" \r\t\n)()\xe6\x9b\xb4\xe6\x96\xb0\xef" +
1256  "\xbc\x89\xe5\x88\x86\xe4\xbb\x98JST\xe7\xb4\x99\xe9\x9d\xa2\xe3\x81\x8b\xe3\x82" +
1257  " \xe5\xa4\x95\xe5\x88\x8a\xe6\x9c\x9d\xe5\x88\x8a\xe3\x80\x80\xe5\x88\x86")
1258  # 2014.08.20 Wed posted at 17:52 JST
1259  # if "Wed posted at" in rawPubdate:
1260  # date_items = rawPubdate.replace(" Wed posted at ", " ").replace(".", " ").split()
1261  # rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] +\
1262  # "\xe6\x97\xa5 " + date_items[3]
1263  if "\xe6\x97\xa5" in rawPubdate:
1264  rawPubdate = rawPubdate.replace("\xe6\x97\xa5", "\xe6\x97\xa5 ")
1265  # check if date contain year
1266  # 2014年08月20日 19時46分
1267  if "\xe5\xb9\xb4" in rawPubdate and "\xe6\x99\x82" in rawPubdate:
1268  rawPubdate = rawPubdate.replace("\xe6\x99\x82", ":")
1269  elif "\xe5\xb9\xb4" in rawPubdate and not "\xe6\x99\x82" in rawPubdate:
1270  rawPubdate = rawPubdate + " 00:00"
1271  rawPubdate = rawPubdate.replace(' ', ' ')
1272  # 2014年08月19日 11:53
1273  elif "\xe5\xb9\xb4" in rawPubdate and ":" in rawPubdate and rawPubdate.count(" ") == 1:
1274  pass
1275  # 2014年 08月 20日 14:34 JST
1276  elif "\xe5\xb9\xb4" in rawPubdate and ":" in rawPubdate and rawPubdate.count(" ") > 1:
1277  pos = rawPubdate.find('\xe3\x80\x80')
1278  if pos > 0:
1279  rawPubdate = rawPubdate[:pos]
1280 
1281  date_items = rawPubdate.split()
1282  if isinstance(date_items, list) and len(date_items) == 2:
1283  rawPubdate = date_items[0] + " " + date_items[1]
1284  elif isinstance(date_items, list) and len(date_items) == 3:
1285  rawPubdate = date_items[0] + date_items[1]
1286  elif isinstance(date_items, list) and len(date_items) == 4:
1287  rawPubdate = date_items[0] + date_items[1] + date_items[2] + " " + date_items[3]
1288  else:
1289  pass
1290  elif "\xe5\xb9\xb4" in rawPubdate:
1291  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + rawPubdate
1292  # 2014/8/14付
1293  if rawPubdate.count("/") == 2:
1294  # date_items = rawPubdate.split("/")
1295  # (2014/08/16-14:46)
1296  date_items = re.split("/|-", rawPubdate)
1297  if isinstance(date_items, list) and len(date_items) == 4:
1298  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1299  "\xe6\x97\xa5 " + date_items[3]
1300  else:
1301  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1302  "\xe6\x97\xa5 00:00"
1303  # 8月20日 紙面から
1304  if not "\xe5\xb9\xb4" in rawPubdate and not "/" in rawPubdate:
1305  date_items = rawPubdate.replace("\xe6\x9c\x88", " ").replace("\xe6\x97\xa5", " ").\
1306  replace("\xe6\x99\x82", " ").split()
1307  if isinstance(date_items, list) and len(date_items) == 4:
1308  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1309  "\xe6\x97\xa5 " + date_items[2] + ":" + date_items[3]
1310  else:
1311  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1312  "\xe6\x97\xa5 00:00"
1313  # 8/20 21:00 更新
1314  if not "\xe5\xb9\xb4" in rawPubdate and "/" in rawPubdate:
1315  date_items = rawPubdate.replace("/", " ").split()
1316  # 2014/6/3 18:53 (2014/6/3 20:13更新)
1317  if len(date_items) > 4:
1318  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1319  "\xe6\x97\xa5 " + date_items[3]
1320  else:
1321  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1322  "\xe6\x97\xa5 " + date_items[2]
1323 
1324  if logger is not None and isExtendLog:
1325  logger.debug("pubdate converted is: <<%s>>", rawPubdate)
1326 
1327  return rawPubdate
1328 
1329 
1330  # #Exctract year for japanise date from Heisei period
1331  #
1332  # @param rawPubdate - input raw string content pubdate
1333  # @return year value if success or otherwise None
1334  @staticmethod
1336  # variable for result
1337  ret = None
1338  if "平成" in rawPubdate:
1339  startPeriodYear = 1988
1340  match = re.search(r'平成(?P<year>\d{1,2})年', rawPubdate)
1341  if match:
1342  if 'year' in match.groupdict():
1343  year = int(match.groupdict()['year'])
1344  ret = startPeriodYear + year
1345  else:
1346  match = re.search(r'(?P<year>\d{1,4})年', rawPubdate)
1347  if match:
1348  if 'year' in match.groupdict():
1349  ret = int(match.groupdict()['year'])
1350 
1351  return ret
1352 
1353 
1354  # #Extract japanise date from Heisei period
1355  #
1356  # @param rawPubdate - input raw string content pubdate
1357  # @param logger - instance of logger for log if necessary
1358  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1359  # @return datetime instance if success or otherwise None
1360  @staticmethod
1361  def extractDateFromHeiseiPeriod(rawPubdate, logger=None, isExtendLog=False):
1362  # variable for result
1363  ret = None
1364  try:
1365  year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1366  if year is not None:
1367  if logger and isExtendLog:
1368  logger.debug('rawPubdate: ' + str(rawPubdate))
1369  logger.debug('year: ' + str(year))
1370 
1371  # extract month
1372  beginPos = rawPubdate.find('年')
1373  endPos = rawPubdate.find('月')
1374  month = rawPubdate[beginPos + len('年'):endPos]
1375  if logger and isExtendLog:
1376  logger.debug('month: ' + str(month))
1377  month = int(unicode(month))
1378 
1379  # extract day
1380  beginPos = rawPubdate.find('月')
1381  endPos = rawPubdate.find('日')
1382  day = rawPubdate[beginPos + len('月'):endPos]
1383  if logger and isExtendLog:
1384  logger.debug('day: ' + str(day))
1385  day = int(unicode(day))
1386 
1387  # extract time if passible
1388  hour, minute, second, tf = DateTimeType.extractTime(rawPubdate, logger, isExtendLog)
1389  if logger is not None and isExtendLog:
1390  logger.debug("hour: %s, minute: %s, second: %s, tf: %s", str(hour), str(minute), str(second), str(tf))
1391 
1392  # create result datetime object
1393  ret = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second,
1394  tzinfo=None)
1395  except Exception, err:
1396  if logger is not None and isExtendLog:
1397  logger.debug("Extract 'Heisei' period has error: " + str(err))
1398 
1399  return ret
1400 
1401 
1402  # #Replace japan simbols to unicode
1403  #
1404  # @param rawPubdate - input raw string content pubdate
1405  # @param logger - instance of logger for log if necessary
1406  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1407  # @return rawPubdate - already modified string
1408  @staticmethod
1409  def replaceJapanSimbols(rawPubdate, logger=None, isExtendLog=False):
1410  simbolsDict = {'-':'-', '.':'.', ':':':', '/':'/', ',':',', '・':'.', 'ー':'-', \
1411  '0':'0', '1':'1', '2':'2', '3':'3', '4':'4', '5':'5', '6':'6', '7':'7', '8':'8', '9':'9'}
1412  # replace simbols
1413  for key, value in simbolsDict.items():
1414  try:
1415  rawPubdate = rawPubdate.replace(key, value)
1416  except Exception, err:
1417  if logger is not None and isExtendLog:
1418  logger.debug(str(err))
1419 
1420  return rawPubdate
1421 
1422 
1423  # #Convert timezone name to utc offset
1424  #
1425  # @param tzName - name of timezone
1426  # @param timezonesDict - dictionary with timezones
1427  # @return utc offset as string
1428  @staticmethod
1429  def utcOffset(tzName, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1430  # variable for result
1431  ret = None
1432  if tzName in timezonesDict:
1433  elem = timezonesDict[tzName]
1434  if len(elem) > 1:
1435  if elem[1].find('UTC') > -1:
1436  ret = elem[1][len('UTC'):]
1437 
1438  return ret
1439 
1440 
1441  # # Extract utc offset from string
1442  #
1443  # @param inputStr - input string for extract time
1444  # @param logger - instance of logger for log if necessary
1445  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1446  # @param timezonesDict - dictionary with timezones
1447  # @return utc offset numeric as string
1448  @staticmethod
1449  def extractUtcOffset(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1450  # variable for result
1451  ret = None
1452  if inputStr is not None:
1453  for key in timezonesDict.keys():
1454  if (inputStr.find(key)) > -1:
1455  ret = DateTimeType.utcOffset(key, timezonesDict)
1456  if logger is not None and isExtendLog:
1457  logger.debug('Timezone: ' + str(key) + ' offset: ' + str(ret))
1458  break
1459 
1460  if ret is None:
1461  try:
1462  for pattern in DateTimeType.patternListTimezoneOffset:
1463  match = re.match(pattern, inputStr)
1464  if logger is not None and isExtendLog:
1465  logger.debug('inputStr: ' + str(inputStr) + ' pattern: ' + str(pattern) + ' match: ' + str(match))
1466  if match:
1467  if 'offset' in match.groupdict():
1468  ret = match.groupdict()['offset']
1469  break
1470 
1471  except Exception, err:
1472  if logger is not None and isExtendLog:
1473  logger.debug('extractUtcOffset error: ' + str(err))
1474 
1475  if logger is not None and isExtendLog:
1476  logger.debug("!!! ret: %s", str(ret))
1477  return ret
1478 
1479 
1480  # # Extract timezone name from string
1481  # @param inputStr - input string for extract time
1482  # @param logger - instance of logger for log if necessary
1483  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1484  # @param timezonesDict - dictionary with timezones
1485  # @return utc timezone name as string, in case of fail extraction return empty string
1486  @staticmethod
1487  def extractUtcTimezoneName(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1488  # variable for result
1489  ret = ''
1490  if logger is not None and isExtendLog:
1491  logger.debug('inputStr: ' + inputStr)
1492  if inputStr is not None:
1493  for key in timezonesDict.keys():
1494  pos = inputStr.find(key)
1495  if (pos) > -1 and inputStr[pos - 1] == ' ':
1496  ret = key
1497  if logger is not None and isExtendLog:
1498  logger.debug('Timezone name: ' + str(key))
1499  break
1500 
1501  return ret
1502 
1503 
1504  # # Extract timezone name from string
1505  # @param inputStr - input string for extract time
1506  # @param logger - instance of logger for log if necessary
1507  # @param isExtendLog - boolean flag for allowed extend logging if True or only error message otherwise
1508  # @return normalized string
1509  @staticmethod
1510  def normalizeTimezone(inputStr, logger=None, isExtendLog=False):
1511  # variable for result
1512  ret = inputStr
1513  if inputStr is not None:
1514  pos = inputStr.rfind('+')
1515  length = len('+')
1516  if pos == -1:
1517  pos = inputStr.rfind('-')
1518  length = len('-')
1519 
1520  if pos > -1:
1521  oldValue = inputStr[pos + length:].strip()
1522  newValue = ''
1523  if logger is not None and isExtendLog:
1524  logger.debug('oldValue: ' + str(oldValue))
1525  if oldValue.isdigit() and len(oldValue) > 1:
1526  newValue = '0' + oldValue[:1] + ':00'
1527  else:
1528  match = re.search(r'(?P<tzone>\d{1,4})Z', oldValue)
1529  if match:
1530  if 'tzone' in match.groupdict():
1531  newValue = match.groupdict()['tzone']
1532 
1533  if logger is not None and isExtendLog:
1534  logger.debug('newValue: ' + str(newValue))
1535  ret = inputStr.replace(oldValue, newValue)
1536 
1537  return ret
1538 
1539 
1540  # # Convert to UTC offset form
1541  #
1542  # @param dt - datetime instance
1543  # @return datetime instance without tzInfo and timezone string
1544  @staticmethod
1545  def toUTC(dt):
1546  # variable for result
1547  ret = dt
1548  if dt.utcoffset() is not None:
1549  ret = dt - dt.utcoffset()
1550  ret = ret.replace(tzinfo=None)
1551 
1552  return ret
def __init__(self, dataString=None, formatString=None)
def normalizeTimezone(inputStr, logger=None, isExtendLog=False)
def extractDateEng(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def extractDateGerman(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def utcOffset(tzName, timezonesDict=DateTimeTimezones.timezonesDict)
def extractUtcTimezoneName(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
def extractTime(inputStr, logger=None, isExtendLog=False)
def __init__(self, isNegative=False, hours=0, minutes=0)
Definition: DateTimeType.py:41
def extractDateFromHeiseiPeriod(rawPubdate, logger=None, isExtendLog=False)
def changeCodePageToAscii(inputStr, logger=None, isExtendLog=False)
def checkTimeFormat(hour, minute, tf, logger=None, isExtendLog=False)
def extractDateCommon(inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
def parse(dataString, useCurrentYear=True, logger=None, isExtendLog=False)
def extractYearFromHeiseiPeriod(rawPubdate)
def extractDateUkr(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def getString(self, formatString=None)
def extractUtcOffset(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
def extractUseTimePeriodName(inputStr, useCurrentYear, langName)
def applyUtcTimezone(dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False)
def isAllowedInputString(dataString, logger=None, isExtendLog=False)
def intelligentExtractor(inputStr, useCurrentYear=True, logger=None, isExtendLog=False, langName=None)
def convertPubDateToRFC2822(rawPubdate, logger=None, isExtendLog=False)
def extractDateJapan(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def isUtf8CodePage(inputStr, logger, isExtendLog)
def extractUseTimePeriodNameLeft(inputStr, useCurrentYear, langName)
def __initDataTime(self, dataString=None, formatString=None)
def extractUseDayState(inputStr, useCurrentYear, langName, logger, isExtendLog)
def adjustJapaneseDate(rawPubdate, logger=None, isExtendLog=False)
def getMonthNumber(inputStr, logger, isExtendLog=False)
def extractDateRus(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def extractUseTimePeriodNameAgo(inputStr, useCurrentYear, langName, logger)
def extractDate(inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
def replaceJapanSimbols(rawPubdate, logger=None, isExtendLog=False)
def getLang(inputStr, logger=None, isExtendLog=False)