3 HCE project, Python bindings, Distributed Tasks Manager application. 4 DateTimeType Class content main functional extract of datetime. 8 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com> 9 @link: http://hierarchical-cluster-engine.com/ 10 @copyright: Copyright © 2013-2015 IOIX Ukraine 11 @license: http://hierarchical-cluster-engine.com/license/ 18 from datetime
import tzinfo
23 from dateutil.relativedelta
import relativedelta
24 from dateutil.tz
import gettz
25 from dateutil
import parser
34 import DateTimeTimezones
41 def __init__(self, isNegative=False, hours=0, minutes=0):
49 ret = datetime.timedelta(hours=self.
hours, minutes=self.
minutes)
56 return datetime.timedelta(0)
72 MIN_ALLOWED_YEAR = 2000
73 MIN_ALLOWED_LEN_FOR_DATEUTILS = 10
75 BAD_SIMBOLS =
'=(),|@`' 76 TAG_NAMES = [
'pubdate',
'dc_date']
84 wordsListEng = [
u'Jan',
u'Feb',
u'Mar',
u'Apr',
u'May',
u'Jun',
u'Jul',
u'Aug',
u'Sep',
u'Oct',
u'Nov',
u'Dec', \
85 u'Year',
u'Today',
u'Yesterday',
u'Day before yesterday',
u'year',
u'month',
u'day',
u'hour', \
87 wordsListRus = [
u'Янв',
u'Февр',
u'Мар',
u'Апр',
u'Май',
u'Июнь',
u'Июль',
u'Авг',
u'Сент',
u'Окт',
u'Нояб', \
88 u'Дек',
u'Сегодня',
u'Вчера',
u'Поза вчера',
u'Июня',
u'Июля']
89 wordsListUkr = [
u'Сiч',
u'Лют',
u'Бер',
u'Квiт',
u'Трав',
u'Черв',
u'Лип',
u'Серп',
u'Вер',
u'Жовт',
u'Лист', \
90 u'Груд',
u'Рік',
u'Сьогодні',
u'Вчора',
u'Позавчора']
91 wordsListGer = [
u'März',
u'Mai',
u'Juni',
u'Juli',
u'Sept',
u'Okt',
u'Dez',
u'Jahr',
u'Heute',
u'Gestern', \
92 u'Vorgestern',
u'Uhr']
93 wordsListJap = [
u'一月',
u'二月',
u'三月',
u'四月',
u'五月',
u'六月',
u'七月',
u'八月',
u'九月',
u'十月',
u'十一月', \
94 u'十二月',
u'年',
u'今日',
u'イエスタデイ',
u'おととい',
u'月',
u'日',
u'時',
u'分',
u'付',
u'更新']
96 monthListEng = [
u'Jan',
u'Feb',
u'Mar',
u'Apr',
u'May',
u'Jun',
u'Jul',
u'Aug',
u'Sep',
u'Oct',
u'Nov',
u'Dec']
97 monthListRus = [
u'Янв',
u'Февр',
u'Март',
u'Апр',
u'Май',
u'Июнь',
u'Июль',
u'Авг',
u'Сент',
u'Окт',
u'Нояб',
u'Дек']
98 monthListUkr = [
u'Сiч',
u'Лют',
u'Бер',
u'Квiт',
u'Трав',
u'Черв',
u'Лип',
u'Серп',
u'Вер',
u'Жовт',
u'Лист',
u'Груд']
99 monthListGer = [
u'Jan',
u'Feb',
u'März',
u'Apr',
u'Mai',
u'Juni',
u'Juli',
u'Aug',
u'Sept',
u'Okt',
u'Nov',
u'Dez']
100 monthListJap = [
u'一月',
u'二月',
u'三月',
u'四月',
u'五月',
u'六月',
u'七月',
u'八月',
u'九月',
u'十月',
u'十一月',
u'十二月']
101 monthListRusBad = [
u'Янв',
u'Февр',
u'Март',
u'Апр',
u'Май',
u'Июня',
u'Июля',
u'Авг',
u'Сент',
u'Окт',
u'Нояб', \
104 dayStateEng = [
u'Today',
u'Yesterday',
u'Day before yesterday']
105 dayStateRus = [
u'Сегодня',
u'Вчера',
u'Поза вчера']
106 dayStateUkr = [
u'Сьогодні',
u'Вчора',
u'Позавчора']
107 dayStateGer = [
u'Heute',
u'Gestern',
u'Vorgestern']
108 dayStateJap = [
u'今日',
u'イエスタデイ',
u'おととい']
109 dayStateRusStr = [
'Сегодня',
'Вчера',
'Поза вчера']
110 dayStateUkrStr = [
'Сьогодні',
'Вчора',
'Позавчора']
112 LANG_DICT = {LANG_ENG: wordsListEng, LANG_RUS: wordsListRus, LANG_UKR: wordsListUkr, LANG_GER: wordsListGer, \
113 LANG_JAP: wordsListJap}
115 MONTH_DICT = {LANG_ENG: monthListEng, LANG_RUS: monthListRus, LANG_UKR: monthListUkr, LANG_GER: monthListGer, \
116 LANG_JAP: monthListJap, LANG_RUS: monthListRusBad}
118 DAY_STATE_DICT = {LANG_ENG: dayStateEng, LANG_RUS: dayStateRus, LANG_UKR: dayStateUkr, LANG_GER: dayStateGer, \
119 LANG_JAP: dayStateJap, LANG_RUS: dayStateRusStr, LANG_UKR: dayStateUkrStr}
121 patternListDate = [
r'(?P<mon>[ 1][0-9]) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
122 r'(?P<mon>\d{1,2})/(?P<day>[ 0123][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
123 r'(?P<mon>\d{1,2})/(?P<day>[0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
124 r'(?P<mon>\d{1,2})-(?P<day>[0-9][0-9])-(?P<year>[0-9][0-9][0-9][0-9])',
125 r'(?P<mon>\d{1,2})/(?P<day>[0123][0-9])/(?P<short_year>[0-9][0-9])',
126 r'(?P<mon>\d{1,2})/(?P<day>[0-9])/(?P<short_year>[0-9][0-9])',
127 r'(?P<mon>[A-Z][a-z][a-z]) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
128 r'(?P<mon>[A-Z][a-z][a-z]) (?P<day>[0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
129 r'(?P<mon>\w+) (?P<day>[0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
130 r'(?P<mon>\w+) (?P<day>[ 0123][0-9]) (?P<year>[0-9][0-9][0-9][0-9])',
131 r'(?P<mon>\w+) (?P<day>[0-9])(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
132 r'(?P<mon>\w+) (?P<day>[ 0123][0-9])(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
133 r'(?P<mon>\W+) (?P<day>\d{2})(\w{2}) (?P<year>[0-9][0-9][0-9][0-9])',
134 r'(?P<mon>\w+) (?P<day>\d{1,2})(\W+\d{1,2}) (?P<year>\d{4})',
135 r'(?P<mon>\w+). (?P<day>\d{1,2}) (?P<year>[0-9][0-9][0-9][0-9])',
136 r'(?P<day>[0-9][0-9]) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
137 r'(?P<day>[0-9][0-9]) (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
138 r'(?P<year>[0-9][0-9][0-9][0-9]) (?P<mon>[0-1][0-9]) (?P<day>[0123][0-9])',
139 r'(?P<year>[0-9][0-9][0-9][0-9])-(?P<mon>[0-1][0-9])-(?P<day>[0123][0-9])',
140 r'(?P<year>[0-9][0-9][0-9][0-9])/(?P<mon>[0-1][0-9])/(?P<day>[0123][0-9])',
141 r'(?P<year>[0-9][0-9][0-9][0-9])/(?P<mon>[0-9])/(?P<day>[0123][0-9])',
142 r'(?P<year>[0-9][0-9][0-9][0-9])\.(?P<mon>[0-1][0-9])\.(?P<day>[0123][0-9])',
143 r'(?P<year>[0-9][0-9][0-9][0-9])(?P<mon>[0-1][0-9])(?P<day>[0123][0-9])',
144 r'(?P<day>[0-9][0-9])(\w{2}) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
145 r'(?P<day>[0-9])(\w{2}) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
146 r'(?P<day>[0-9]) (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
147 r'(?P<day>[0-9]) (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
148 r'(?P<day>[0-9][0-9])/(?P<mon>[0-9][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
149 r'(?P<day>[0-9][0-9]).(?P<mon>[0-9][0-9]).(?P<year>[0-9][0-9][0-9][0-9])',
150 r'(?P<day>\d{2})/(?P<mon>[0-9][0-9])/(?P<year>[0-9][0-9][0-9][0-9])',
151 r'(?P<day>\d{2})/(?P<mon>\d+})/(?P<year>[0-9][0-9][0-9][0-9])',
152 r'(?P<day>\d{1,2})\.(?P<mon>\d{1,2})\.(?P<year>[0-9][0-9][0-9][0-9])',
153 r'(?P<day>\d{1,2})\. (?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
154 r'(?P<day>\d{1,2})\. (?P<mon>\W+) (?P<year>[0-9][0-9][0-9][0-9])',
155 r'(?P<day>[0-9][0-9]) (?P<mon>\w+)',
156 r'(?P<day>[0-9][0-9]) (?P<mon>\W+)',
157 r'(?P<day>[0-9]) (?P<mon>\w+)',
158 r'(?P<day>[0-9]) (?P<mon>\W+)',
159 r'(?P<mon>\d{1,2})\.(?P<day>[0123][0-9])\.(?P<short_year>[0-9][0-9])',
160 r'(?P<mon>\d{1,2})\.(?P<day>[0-9])\.(?P<short_year>[0-9][0-9])',
161 r'(?P<day>[0123][0-9])\.(?P<mon>[01][0-9])\.(?P<short_year>[0-9][0-9])',
162 r'(?P<mon>\w+) (?P<year>[0-9][0-9][0-9][0-9])',
163 r'(?P<mon>\w+) (?P<day>\d{1,2})',
164 r'(?P<mon>\W+) (?P<day>\d{1,2})']
166 patternListTime = [
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
167 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
168 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
169 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
170 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
171 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
172 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
173 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
174 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>[0-9][0-9])',
175 r'(?P<hour>[ 0-9][0-9])(?P<min>\d{2}) GMT',
176 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) GMT',
177 r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{1,2})']
179 patternListTimezoneOffset = [
r'(?P<offset>[+-][0-9][0-9]:[0-9][0-9])',
180 r'(?P<offset>[+-][0-9][0-9][0-9][0-9])',
181 r'(?P<offset>[+-]\d{1,2})',
182 r'(?P<offset>[−][0-9][0-9])']
184 patternListUtcTimezones = [
r'(?P<hours>[0-9][0-9]):(?P<minutes>[0-9][0-9])',
185 r'(?P<hours>[0-9][0-9])']
188 ERROR_INPUT_PARAMS =
'Error initialization by input parameters.' 189 ERROR_FORMAT_STRING_TYPE =
'Format string is not string.' 190 ERROR_DATA_STRING_TYPE =
'Data string is not string.' 191 ERROR_BAD_INPUT_DATA =
'Bad inputted data.' 197 def __init__(self, dataString=None, formatString=None):
205 except Exception, err:
217 if dataString
is not None and isinstance(dataString, int):
218 ret = datetime.datetime.fromtimestamp(dataString)
219 elif dataString
is not None and formatString
is not None:
221 if not isinstance(formatString, str):
226 if not isinstance(dataString, str):
231 ret = datetime.datetime.strptime(dataString, formatString)
232 elif dataString
is None and formatString
is None:
248 ret = int((self.
datetime - datetime.datetime.fromtimestamp(0)).total_seconds())
249 except Exception, err:
266 if formatString
is None:
269 if not isinstance(formatString, str):
272 ret = self.
datetime.strftime(formatString)
273 except Exception, err:
288 def getLang(inputStr, logger=None, isExtendLog=False):
291 langDict = DateTimeType.LANG_DICT
293 dataString = inputStr
295 dataString = unicode(inputStr,
'utf-8',
'ignore')
296 except Exception, err:
297 if logger
and isExtendLog:
298 logger.debug(
"getLang: '" + str(err) +
"'")
301 for key
in langDict.keys():
302 for word
in langDict[key]:
303 if dataString.lower().find(word) > -1
or dataString.lower().find(word.lower()) > -1:
310 except Exception, err:
311 if logger
and isExtendLog:
312 logger.debug(
"getLang: '" + str(err) +
"'")
314 ret = DateTimeType.LANG_ENG
327 if logger
and isExtendLog:
328 logger.debug(
"getMonthNumber inputStr: '" + inputStr +
"' type: " + str(
type(inputStr)))
331 inputStr = inputStr.strip()
333 if len(inputStr) < 3
and inputStr.isdigit():
337 monthDict = DateTimeType.MONTH_DICT
339 for key
in monthDict.keys():
344 for months
in monthDict[key]:
345 monthNumber = monthNumber + 1
346 month = months.lower()
347 inputMonth = inputStr
349 inputMonth = inputStr.decode(
'utf-8')
350 except UnicodeError, err:
351 if logger
is not None and isExtendLog:
352 logger.debug(
"Operation decode'utf-8' has error: " + str(err))
357 if inputMonth.lower().find(month) > -1
or month.lower().find(inputMonth.lower()) > -1:
376 pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_ENG)
378 pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
392 pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_RUS)
394 pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
407 pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_UKR)
409 pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
423 pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_GER)
425 pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
440 inputStr = DateTimeType.replaceJapanSimbols(inputStr, logger, isExtendLog)
443 pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_JAP)
446 match = re.search(
r'[0-9]', inputStr)
449 pubdate = DateTimeType.convertPubDateToRFC2822(inputStr, logger, isExtendLog)
450 except Exception, err:
451 if logger
and isExtendLog:
452 logger.debug(
'extractDateJapan: ' + str(err))
455 pubdate = DateTimeType.extractDateFromHeiseiPeriod(inputStr, logger, isExtendLog)
458 pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
471 def parse(dataString, useCurrentYear=True, logger=None, isExtendLog=False):
474 if logger
is not None and isExtendLog:
475 logger.debug(
"inputStr: '" + dataString +
"'")
477 if DateTimeType.isAllowedInputString(dataString, logger, isExtendLog):
478 if len(dataString) >= int(DateTimeType.MIN_ALLOWED_LEN_FOR_DATEUTILS):
480 if not DateTimeType.isUtf8CodePage(dataString, logger, isExtendLog):
481 dataString = DateTimeType.changeCodePageToAscii(dataString, logger, isExtendLog)
483 if logger
is not None and isExtendLog:
484 logger.debug(
"try use 'dateutil'")
486 ret = parser.parse(dataString)
492 if logger
is not None and isExtendLog:
493 logger.debug(
"'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
494 ret = ret.replace(microsecond=0)
495 except Exception, err:
496 if logger
is not None and isExtendLog:
497 logger.debug(
"'dateutil' can not parse: " + str(err))
499 normalizedString = DateTimeType.normalizeTimezone(dataString, logger, isExtendLog)
500 if dataString != normalizedString:
501 if logger
is not None and isExtendLog:
502 logger.debug(
"retry parsing use 'dateutil'")
503 ret = parser.parse(normalizedString)
504 if ret
is not None and logger
is not None and isExtendLog:
505 logger.debug(
"'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
506 except Exception, err:
507 if logger
is not None and isExtendLog:
508 logger.debug(
"'dateutil' can not parse: " + str(err))
512 langType = DateTimeType.getLang(dataString, logger, isExtendLog)
514 if logger
is not None and isExtendLog:
515 logger.debug(
'lang type detected as: ' + str(langType))
517 if langType == DateTimeType.LANG_ENG:
519 ret = DateTimeType.extractDateEng(dataString, useCurrentYear, logger, isExtendLog)
520 elif langType == DateTimeType.LANG_RUS:
522 ret = DateTimeType.extractDateRus(dataString, useCurrentYear, logger, isExtendLog)
523 elif langType == DateTimeType.LANG_UKR:
525 ret = DateTimeType.extractDateUkr(dataString, useCurrentYear, logger, isExtendLog)
526 elif langType == DateTimeType.LANG_GER:
528 ret = DateTimeType.extractDateGerman(dataString, useCurrentYear, logger, isExtendLog)
529 elif langType == DateTimeType.LANG_JAP:
531 ret = DateTimeType.extractDateJapan(dataString, useCurrentYear, logger, isExtendLog)
533 ret = DateTimeType.extractDateCommon(dataString, useCurrentYear, logger, isExtendLog)
535 if ret
is not None and ret.tzinfo
is None:
536 timezoneName = DateTimeType.extractUtcTimezoneName(dataString, logger, isExtendLog)
537 utcZone = gettz(timezoneName)
540 if utcZone
is not None:
541 ret = ret.replace(tzinfo=utcZone)
543 ret = DateTimeType.applyUtcTimezone(ret, timezoneName, DateTimeTimezones.timezonesDict, logger, isExtendLog)
545 if logger
is not None and isExtendLog:
547 logger.debug(
'result pubdate: ' + str(ret.isoformat(DateTimeType.ISO_SEP)))
549 logger.debug(
'result pubdate: NONE')
563 def applyUtcTimezone(dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False):
564 if logger
is not None and isExtendLog:
565 logger.debug(
"applyUtcTimezone enter ...")
567 if dt
is not None and tzName
in timezonesDict
and len(timezonesDict[tzName]) > 1:
568 rawOffset = timezonesDict[tzName][1]
571 if '−' in rawOffset
or '-' in rawOffset:
573 if logger
is not None and isExtendLog:
574 logger.debug(
"isNegative: " + str(isNegative))
576 for pattern
in DateTimeType.patternListUtcTimezones:
577 match = re.search(pattern, rawOffset)
580 if 'hours' in match.groupdict():
581 hours = int(match.groupdict()[
'hours'])
584 if 'minutes' in match.groupdict():
585 minutes = int(match.groupdict()[
'minutes'])
587 if logger
is not None and isExtendLog:
588 logger.debug(
"hours: " + str(hours) +
" minutes: " + str(minutes))
591 dt = dt.replace(tzinfo=tzInfo)
592 if logger
is not None and isExtendLog:
593 logger.debug(
"tzname: " + str(dt.tzname()))
607 timezone = dt.strftime(
'%z')
608 dt = dt.replace(tzinfo=
None)
621 timezone = dt.strftime(
'%z')
636 if dataString
is None or not isinstance(dataString, basestring):
638 elif dataString !=
"" and dataString.isupper()
and dataString.isalnum()
and \
639 not dataString.isalpha():
641 if logger
and isExtendLog:
642 logger.debug(
'input string has not allowed format')
655 for tagName
in DateTimeType.TAG_NAMES:
656 if inputStr.lower().find(
'%' + tagName +
'%') > -1:
657 ret = ret.replace(
'%' + tagName +
'%',
'')
661 for bad
in DateTimeType.BAD_SIMBOLS:
662 ret = ret.replace(bad,
' ')
664 ret = ret.replace(
' ',
' ')
677 def extractDate(inputStr, useCurrentYear=True, logger=None, isExtendLog=False):
684 for pattern
in DateTimeType.patternListDate:
685 match = re.search(pattern, inputStr)
686 if logger
and isExtendLog:
687 logger.debug(
'match: ' + str(match) +
' pattern: ' + str(pattern))
690 if logger
and isExtendLog:
691 logger.debug(
'match.groupdict(): ' + str(match.groupdict()))
693 if 'short_year' in match.groupdict():
694 year = int(match.groupdict()[
'short_year']) + int(datetime.date.today().year // 1000 * 1000)
696 if 'year' in match.groupdict():
697 year = match.groupdict()[
'year']
699 if 'mon' in match.groupdict():
700 month = match.groupdict()[
'mon']
701 logger.debug(
'month: ' + month)
702 if month.isdigit()
and int(month) > 12:
703 if logger
and isExtendLog:
704 logger.debug(
'Bad month (' + str(month) +
') scipped!!!')
707 if 'day' in match.groupdict():
708 day = match.groupdict()[
'day']
710 if logger
and isExtendLog:
711 logger.debug(
'month = ' + month)
713 monthNumber = DateTimeType.getMonthNumber(month, logger, isExtendLog)
715 if logger
and isExtendLog:
716 logger.debug(
'monthNumber = ' + str(monthNumber))
718 if monthNumber
is not None:
721 month = day = year = 0
723 if logger
and isExtendLog:
724 logger.debug(
'year: ' + str(year) +
' month: ' + str(month) +
' day: ' + str(day))
726 if int(year) > DateTimeType.MIN_ALLOWED_YEAR
and int(year) <= datetime.date.today().year
and \
727 int(month) <= 12
and int(day) <= 31:
731 if logger
is not None and isExtendLog:
732 logger.debug(
'Match is good !!!')
736 d = datetime.date.today()
738 if year == 0
and month
and day:
741 if year
and month
and day == 0:
744 except Exception, err:
745 if logger
and isExtendLog:
746 logger.debug(
"inputStr: '" + str(inputStr) +
"'")
747 if logger
and ExceptionLog
is not None:
748 ExceptionLog.handler(logger, err,
'extractDate:', (), \
749 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
751 logger.debug(
'extractDate:' + str(err))
753 return int(year), int(month), int(day)
771 for pattern
in DateTimeType.patternListTime:
772 match = re.search(pattern, inputStr)
779 if 'hour' in match.groupdict():
780 hour = match.groupdict()[
'hour']
782 if 'min' in match.groupdict():
783 minute = match.groupdict()[
'min']
785 if 'sec' in match.groupdict():
786 second = match.groupdict()[
'sec']
788 if 'tf' in match.groupdict():
789 tf = match.groupdict()[
'tf']
793 except Exception, err:
794 if logger
and isExtendLog:
795 logger.debug(
"inputStr: '" + str(inputStr) +
"'")
796 if logger
and ExceptionLog
is not None:
797 ExceptionLog.handler(logger, err,
'extractTime:', (), \
798 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
800 logger.debug(
'extractTime:' + str(err))
802 return int(hour), int(minute), int(second), tf
817 localStr = DateTimeType.prepareString(inputStr)
820 match = re.search(
r'\d{10}', localStr)
822 ret = datetime.datetime.fromtimestamp(int(match.group()))
824 locCurrentTime = datetime.datetime.now()
825 utcCurrentTime = datetime.datetime.utcnow()
826 tmDelta = locCurrentTime - utcCurrentTime
828 isNegative = bool(tmDelta.total_seconds() < 0)
829 hours = abs(locCurrentTime.hour - utcCurrentTime.hour)
830 minutes = abs(locCurrentTime.minute - utcCurrentTime.minute)
831 if logger
is not None and isExtendLog:
832 logger.debug(
"isNegative: " + str(isNegative) +
" hours: " + str(hours) +
" minutes: " + str(minutes))
836 ret = ret + datetime.timedelta(hours=hours, minutes=minutes)
838 ret = ret - datetime.timedelta(hours=hours, minutes=minutes)
843 ret = ret.replace(tzinfo=tzInfo)
844 if logger
is not None and isExtendLog:
845 logger.debug(
"tzname: " + str(ret.tzname()))
847 year, month, day = DateTimeType.extractDate(localStr, useCurrentYear, logger, isExtendLog)
848 if logger
and isExtendLog:
849 logger.debug(
'year: ' + str(year) +
'\tmonth: ' + str(month) +
'\tday: ' + str(day))
851 hour, minute, second, tf = DateTimeType.extractTime(localStr, logger, isExtendLog)
852 if logger
and isExtendLog:
853 logger.debug(
'hour: ' + str(hour) +
'\tminute: ' + str(minute) +
'\tsecond: ' + str(second) + \
856 hour, minute = DateTimeType.checkTimeFormat(hour, minute, tf, logger, isExtendLog)
858 if logger
is not None and isExtendLog:
859 logger.debug(
'hour: ' + str(hour) +
'\tminute: ' + str(minute))
861 if year
and month
and day:
863 now = datetime.datetime.now()
864 if month == now.month
and day == now.day:
865 if hour == 0
and minute == 0:
867 minute = int(now.minute)
870 ret = datetime.datetime(year, month, day, hour, minute, second, tzinfo=
None)
871 elif useCurrentYear
and (year + month + day) == 0
and hour > 0
and minute > 0:
872 d = datetime.datetime.today()
877 if int(d.hour) < int(hour):
878 day = (d - datetime.timedelta(days=1)).day
880 ret = datetime.datetime(year, month, day, hour, minute, second)
882 except Exception, err:
883 if logger
is not None and isExtendLog:
884 logger.debug(
"inputStr: '" + str(inputStr) +
"'")
885 if logger
and ExceptionLog
is not None:
886 ExceptionLog.handler(logger, err,
'extractDateCommon:', (inputStr), \
887 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
888 elif logger
is not None:
889 logger.debug(
'extractDateCommon:' + str(err))
904 if logger
is not None and isExtendLog:
905 logger.debug(
"tf: '%s'", str(tf))
907 logger.debug(
"find = %s", str(tf.lower().find(
'p')))
909 if tf
and tf.lower().find(
'p') > -1:
910 if int(hour) == 12
and int(minute) == 0:
912 elif int(hour) >= 0
and int(hour) < 12
and int(minute) >= 0:
916 if int(hour) == 12
and int(minute) == 0:
918 elif int(hour) >= 0
and int(hour) < 12
and int(minute) >= 0:
937 for dayState
in DateTimeType.DAY_STATE_DICT[langName]:
938 if not inputStr.lower().find(dayState) < 0:
939 if logger
is not None and isExtendLog:
940 logger.debug(
"!!! dayState: " + str(dayState))
943 d = datetime.date.today()
944 hour, minute, second, tf = DateTimeType.extractTime(inputStr, logger, isExtendLog)
945 t = datetime.time(hour, minute, second)
946 dt = datetime.datetime.combine(d, t)
947 ret = dt - datetime.timedelta(days=index)
966 if useCurrentYear
and langName == DateTimeType.LANG_ENG:
967 for pattern
in [
r'(?P<hour>\d{1,2}) hours']:
968 match = re.search(pattern, inputStr, re.UNICODE)
970 d = datetime.datetime.now()
971 if 'hour' in match.groupdict():
972 hour = match.groupdict()[
'hour']
973 if int(hour) >= 0
and int(hour) <= 24:
974 ret = d.replace(hour=int(hour), minute=0, second=0, microsecond=0, tzinfo=
None)
991 if useCurrentYear
and langName == DateTimeType.LANG_ENG:
992 for period
in [
u'years',
u'months',
u'days',
u'hours',
u'minutes']:
993 match = re.search(
r'(?P<value>\d{1,2}).? ' + period +
'.?left', inputStr)
996 if 'value' in match.groupdict():
997 value = int(match.groupdict()[
'value'])
999 dt = datetime.datetime.now()
1000 if period ==
u'years':
1001 ret = dt + relativedelta(years=+value)
1002 elif period ==
u'months':
1003 ret = dt + relativedelta(months=+value)
1004 elif period ==
u'days':
1005 ret = dt + relativedelta(days=+value)
1006 elif period ==
u'hours':
1007 ret = dt + relativedelta(hours=+value)
1008 elif period ==
u'minutes':
1009 ret = dt + relativedelta(minutes=+value)
1011 ret = ret.replace(second=0, microsecond=0, tzinfo=
None)
1028 if useCurrentYear
and langName == DateTimeType.LANG_JAP:
1029 for pattern
in [
r'(?P<value>\d{1,2})日前',
r'(?P<value>\d{1,2})時間前']:
1030 match = re.search(pattern, inputStr, re.UNICODE)
1031 if match
is not None:
1033 if 'value' in match.groupdict():
1034 value = int(match.groupdict()[
'value'])
1036 if pattern.find(
'日前') > 0:
1037 ret = datetime.datetime.now() + relativedelta(days=-value)
1038 elif pattern.find(
'時間前') > 0:
1039 ret = datetime.datetime.now() + relativedelta(hours=-value)
1041 ret = ret.replace(second=0, microsecond=0, tzinfo=
None)
1043 if useCurrentYear
and langName == DateTimeType.LANG_ENG:
1044 for period
in [
u'years',
u'months',
u'days',
u'hours',
u'minutes',
u'Hour']:
1045 for pattern
in [
r'(?P<value>\d{1,2}).? ' + period +
'.?Ago',
1046 r'(?P<value>\d{1,2}).? ' + period +
'.?ago']:
1047 match = re.search(pattern, inputStr, re.UNICODE)
1050 if 'value' in match.groupdict():
1051 value = int(match.groupdict()[
'value'])
1053 dt = datetime.datetime.now()
1054 if period ==
u'years':
1055 ret = dt + relativedelta(years=-value)
1056 elif period ==
u'months':
1057 ret = dt + relativedelta(months=-value)
1058 elif period ==
u'days':
1059 ret = dt + relativedelta(days=-value)
1060 elif period ==
u'hours' or period ==
u'Hour':
1061 ret = dt + relativedelta(hours=-value)
1062 elif period ==
u'minutes':
1063 ret = dt + relativedelta(minutes=-value)
1065 ret = ret.replace(second=0, microsecond=0, tzinfo=
None)
1066 if logger
is not None:
1067 logger.debug(
"ret: %s", str(ret))
1084 inputStr.decode(
'utf-8')
1086 except Exception, err:
1087 if logger
is not None and isExtendLog:
1088 logger.debug(
'inputStr.decode: ' + str(err))
1104 dataString = inputStr.decode(
'latin-1')
1105 ret = dataString.encode(
'ascii', errors=
'ignore')
1106 except Exception, err:
1107 if logger
and isExtendLog:
1108 logger.debug(
"inputStr.decode('latin-1') : " + str(err))
1125 dataString = copy.copy(inputStr)
1126 dataString = DateTimeType.prepareString(dataString)
1128 if langName
is None:
1129 langName = DateTimeType.getLang(dataString)
1133 if langName
is not None:
1134 ret = DateTimeType.extractUseTimePeriodNameAgo(dataString, useCurrentYear, langName, logger)
1136 if langName
is not None and langName != DateTimeType.LANG_JAP:
1138 ret = DateTimeType.extractUseDayState(dataString, useCurrentYear, langName, logger, isExtendLog)
1140 ret = DateTimeType.extractUseTimePeriodName(dataString, useCurrentYear, langName)
1142 ret = DateTimeType.extractUseTimePeriodNameLeft(dataString, useCurrentYear, langName)
1145 except Exception, err:
1146 if logger
is not None and isExtendLog:
1147 logger.debug(
"inputStr: '" + inputStr +
"'")
1148 if logger
is not None and ExceptionLog
is not None:
1149 ExceptionLog.handler(logger, err,
'intelligentExtractor:', (inputStr), \
1150 {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
1152 logger.debug(
'intelligentExtractor:' + str(err))
1154 if logger
is not None and isExtendLog:
1155 logger.debug(
'intelligentExtractor return: ' + str(ret))
1172 if isinstance(rawPubdate, unicode):
1173 for i
in range(0, len(rawPubdate)):
1174 if rawPubdate[i].isdigit():
1175 rawPubdate = re.sub(rawPubdate[i], str(unicodedata.digit(rawPubdate[i])), rawPubdate)
1177 rawPubdate = rawPubdate.encode(
"utf_8")
1181 pubdate_parts = rawPubdate.split(
",")
1182 if len(pubdate_parts) > 1:
1183 rawPubdate = pubdate_parts[0]
1184 rawPubdate = rawPubdate.replace(
"posted at",
"")
1185 rawPubdate = rawPubdate.replace(
"Updated:",
"")
1186 rawPubdate = re.sub(
r"\(1/\d{1}ページ\)",
"", rawPubdate)
1189 if "平成" in rawPubdate:
1190 year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1191 if year
is not None:
1192 if logger
and isExtendLog:
1193 logger.debug(
"'Heisei' period before: " + str(rawPubdate))
1194 rawPubdate = re.sub(
r"平成(\d{1,2})", str(year), rawPubdate)
1195 if logger
and isExtendLog:
1196 logger.debug(
"'Heisei' period after: " + str(rawPubdate))
1198 rawPubdate = re.sub(
r"\(木\)",
"", rawPubdate)
1199 rawPubdate = re.sub(
r" 年 ",
"年", rawPubdate)
1200 rawPubdate = re.sub(
r"@",
"", rawPubdate)
1201 parsed_time_candidate_str = float(calendar.timegm(
parse(rawPubdate).timetuple()))
1203 if logger
and isExtendLog:
1204 logger.debug(
"pubdate in seconds: %s", str(parsed_time_candidate_str))
1205 except Exception, err:
1206 if logger
is not None and isExtendLog:
1207 logger.debug(
"try replace rawPubdate return: " + str(err))
1208 rawPubdate = DateTimeType.adjustJapaneseDate(rawPubdate, logger, isExtendLog)
1211 t =
u"%Y\xe5\xb9\xb4%m\xe6\x9c\x88%d\xe6\x97\xa5 %H:%M" 1212 parsed_time_candidate_str = time.mktime(time.strptime(rawPubdate, t))
1215 ret = datetime.datetime.fromtimestamp(parsed_time_candidate_str)
1248 if logger
and isExtendLog:
1249 logger.debug(
"pubdate has to be converted: <<%s>>", rawPubdate)
1250 logger.debug(
"pubdate type: <<%s>>", str(
type(rawPubdate)))
1255 rawPubdate = rawPubdate.strip(
" \r\t\n)()\xe6\x9b\xb4\xe6\x96\xb0\xef" +
1256 "\xbc\x89\xe5\x88\x86\xe4\xbb\x98JST\xe7\xb4\x99\xe9\x9d\xa2\xe3\x81\x8b\xe3\x82" +
1257 " \xe5\xa4\x95\xe5\x88\x8a\xe6\x9c\x9d\xe5\x88\x8a\xe3\x80\x80\xe5\x88\x86")
1263 if "\xe6\x97\xa5" in rawPubdate:
1264 rawPubdate = rawPubdate.replace(
"\xe6\x97\xa5",
"\xe6\x97\xa5 ")
1267 if "\xe5\xb9\xb4" in rawPubdate
and "\xe6\x99\x82" in rawPubdate:
1268 rawPubdate = rawPubdate.replace(
"\xe6\x99\x82",
":")
1269 elif "\xe5\xb9\xb4" in rawPubdate
and not "\xe6\x99\x82" in rawPubdate:
1270 rawPubdate = rawPubdate +
" 00:00" 1271 rawPubdate = rawPubdate.replace(
' ',
' ')
1273 elif "\xe5\xb9\xb4" in rawPubdate
and ":" in rawPubdate
and rawPubdate.count(
" ") == 1:
1276 elif "\xe5\xb9\xb4" in rawPubdate
and ":" in rawPubdate
and rawPubdate.count(
" ") > 1:
1277 pos = rawPubdate.find(
'\xe3\x80\x80')
1279 rawPubdate = rawPubdate[:pos]
1281 date_items = rawPubdate.split()
1282 if isinstance(date_items, list)
and len(date_items) == 2:
1283 rawPubdate = date_items[0] +
" " + date_items[1]
1284 elif isinstance(date_items, list)
and len(date_items) == 3:
1285 rawPubdate = date_items[0] + date_items[1]
1286 elif isinstance(date_items, list)
and len(date_items) == 4:
1287 rawPubdate = date_items[0] + date_items[1] + date_items[2] +
" " + date_items[3]
1290 elif "\xe5\xb9\xb4" in rawPubdate:
1291 rawPubdate = str(time.gmtime().tm_year) +
"\xe5\xb9\xb4" + rawPubdate
1293 if rawPubdate.count(
"/") == 2:
1296 date_items = re.split(
"/|-", rawPubdate)
1297 if isinstance(date_items, list)
and len(date_items) == 4:
1298 rawPubdate = date_items[0] +
"\xe5\xb9\xb4" + date_items[1] +
"\xe6\x9c\x88" + date_items[2] + \
1299 "\xe6\x97\xa5 " + date_items[3]
1301 rawPubdate = date_items[0] +
"\xe5\xb9\xb4" + date_items[1] +
"\xe6\x9c\x88" + date_items[2] + \
1302 "\xe6\x97\xa5 00:00" 1304 if not "\xe5\xb9\xb4" in rawPubdate
and not "/" in rawPubdate:
1305 date_items = rawPubdate.replace(
"\xe6\x9c\x88",
" ").replace(
"\xe6\x97\xa5",
" ").\
1306 replace(
"\xe6\x99\x82",
" ").
split()
1307 if isinstance(date_items, list)
and len(date_items) == 4:
1308 rawPubdate = str(time.gmtime().tm_year) +
"\xe5\xb9\xb4" + date_items[0] +
"\xe6\x9c\x88" + date_items[1] + \
1309 "\xe6\x97\xa5 " + date_items[2] +
":" + date_items[3]
1311 rawPubdate = str(time.gmtime().tm_year) +
"\xe5\xb9\xb4" + date_items[0] +
"\xe6\x9c\x88" + date_items[1] + \
1312 "\xe6\x97\xa5 00:00" 1314 if not "\xe5\xb9\xb4" in rawPubdate
and "/" in rawPubdate:
1315 date_items = rawPubdate.replace(
"/",
" ").
split()
1317 if len(date_items) > 4:
1318 rawPubdate = date_items[0] +
"\xe5\xb9\xb4" + date_items[1] +
"\xe6\x9c\x88" + date_items[2] + \
1319 "\xe6\x97\xa5 " + date_items[3]
1321 rawPubdate = str(time.gmtime().tm_year) +
"\xe5\xb9\xb4" + date_items[0] +
"\xe6\x9c\x88" + date_items[1] + \
1322 "\xe6\x97\xa5 " + date_items[2]
1324 if logger
is not None and isExtendLog:
1325 logger.debug(
"pubdate converted is: <<%s>>", rawPubdate)
1338 if "平成" in rawPubdate:
1339 startPeriodYear = 1988
1340 match = re.search(
r'平成(?P<year>\d{1,2})年', rawPubdate)
1342 if 'year' in match.groupdict():
1343 year = int(match.groupdict()[
'year'])
1344 ret = startPeriodYear + year
1346 match = re.search(
r'(?P<year>\d{1,4})年', rawPubdate)
1348 if 'year' in match.groupdict():
1349 ret = int(match.groupdict()[
'year'])
1365 year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1366 if year
is not None:
1367 if logger
and isExtendLog:
1368 logger.debug(
'rawPubdate: ' + str(rawPubdate))
1369 logger.debug(
'year: ' + str(year))
1372 beginPos = rawPubdate.find(
'年')
1373 endPos = rawPubdate.find(
'月')
1374 month = rawPubdate[beginPos + len(
'年'):endPos]
1375 if logger
and isExtendLog:
1376 logger.debug(
'month: ' + str(month))
1377 month = int(unicode(month))
1380 beginPos = rawPubdate.find(
'月')
1381 endPos = rawPubdate.find(
'日')
1382 day = rawPubdate[beginPos + len(
'月'):endPos]
1383 if logger
and isExtendLog:
1384 logger.debug(
'day: ' + str(day))
1385 day = int(unicode(day))
1388 hour, minute, second, tf = DateTimeType.extractTime(rawPubdate, logger, isExtendLog)
1389 if logger
is not None and isExtendLog:
1390 logger.debug(
"hour: %s, minute: %s, second: %s, tf: %s", str(hour), str(minute), str(second), str(tf))
1393 ret = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second,
1395 except Exception, err:
1396 if logger
is not None and isExtendLog:
1397 logger.debug(
"Extract 'Heisei' period has error: " + str(err))
1410 simbolsDict = {
'-':
'-',
'.':
'.',
':':
':',
'/':
'/',
',':
',',
'・':
'.',
'ー':
'-', \
1411 '0':
'0',
'1':
'1',
'2':
'2',
'3':
'3',
'4':
'4',
'5':
'5',
'6':
'6',
'7':
'7',
'8':
'8',
'9':
'9'}
1413 for key, value
in simbolsDict.items():
1415 rawPubdate = rawPubdate.replace(key, value)
1416 except Exception, err:
1417 if logger
is not None and isExtendLog:
1418 logger.debug(str(err))
1429 def utcOffset(tzName, timezonesDict=DateTimeTimezones.timezonesDict):
1432 if tzName
in timezonesDict:
1433 elem = timezonesDict[tzName]
1435 if elem[1].find(
'UTC') > -1:
1436 ret = elem[1][len(
'UTC'):]
1449 def extractUtcOffset(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict):
1452 if inputStr
is not None:
1453 for key
in timezonesDict.keys():
1454 if (inputStr.find(key)) > -1:
1455 ret = DateTimeType.utcOffset(key, timezonesDict)
1456 if logger
is not None and isExtendLog:
1457 logger.debug(
'Timezone: ' + str(key) +
' offset: ' + str(ret))
1462 for pattern
in DateTimeType.patternListTimezoneOffset:
1463 match = re.match(pattern, inputStr)
1464 if logger
is not None and isExtendLog:
1465 logger.debug(
'inputStr: ' + str(inputStr) +
' pattern: ' + str(pattern) +
' match: ' + str(match))
1467 if 'offset' in match.groupdict():
1468 ret = match.groupdict()[
'offset']
1471 except Exception, err:
1472 if logger
is not None and isExtendLog:
1473 logger.debug(
'extractUtcOffset error: ' + str(err))
1475 if logger
is not None and isExtendLog:
1476 logger.debug(
"!!! ret: %s", str(ret))
1490 if logger
is not None and isExtendLog:
1491 logger.debug(
'inputStr: ' + inputStr)
1492 if inputStr
is not None:
1493 for key
in timezonesDict.keys():
1494 pos = inputStr.find(key)
1495 if (pos) > -1
and inputStr[pos - 1] ==
' ':
1497 if logger
is not None and isExtendLog:
1498 logger.debug(
'Timezone name: ' + str(key))
1513 if inputStr
is not None:
1514 pos = inputStr.rfind(
'+')
1517 pos = inputStr.rfind(
'-')
1521 oldValue = inputStr[pos + length:].strip()
1523 if logger
is not None and isExtendLog:
1524 logger.debug(
'oldValue: ' + str(oldValue))
1525 if oldValue.isdigit()
and len(oldValue) > 1:
1526 newValue =
'0' + oldValue[:1] +
':00' 1528 match = re.search(
r'(?P<tzone>\d{1,4})Z', oldValue)
1530 if 'tzone' in match.groupdict():
1531 newValue = match.groupdict()[
'tzone']
1533 if logger
is not None and isExtendLog:
1534 logger.debug(
'newValue: ' + str(newValue))
1535 ret = inputStr.replace(oldValue, newValue)
1548 if dt.utcoffset()
is not None:
1549 ret = dt - dt.utcoffset()
1550 ret = ret.replace(tzinfo=
None)
def __init__(self, dataString=None, formatString=None)
def normalizeTimezone(inputStr, logger=None, isExtendLog=False)
def extractDateEng(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def prepareString(inputStr)
def extractDateGerman(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def utcOffset(tzName, timezonesDict=DateTimeTimezones.timezonesDict)
def extractUtcTimezoneName(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
def extractTime(inputStr, logger=None, isExtendLog=False)
def __init__(self, isNegative=False, hours=0, minutes=0)
def extractDateFromHeiseiPeriod(rawPubdate, logger=None, isExtendLog=False)
def changeCodePageToAscii(inputStr, logger=None, isExtendLog=False)
def checkTimeFormat(hour, minute, tf, logger=None, isExtendLog=False)
def extractDateCommon(inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
def parse(dataString, useCurrentYear=True, logger=None, isExtendLog=False)
def extractYearFromHeiseiPeriod(rawPubdate)
def extractDateUkr(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def getString(self, formatString=None)
string ERROR_FORMAT_STRING_TYPE
def extractUtcOffset(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
def extractUseTimePeriodName(inputStr, useCurrentYear, langName)
def applyUtcTimezone(dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False)
string ERROR_DATA_STRING_TYPE
def isAllowedInputString(dataString, logger=None, isExtendLog=False)
def intelligentExtractor(inputStr, useCurrentYear=True, logger=None, isExtendLog=False, langName=None)
def convertPubDateToRFC2822(rawPubdate, logger=None, isExtendLog=False)
def extractDateJapan(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def isUtf8CodePage(inputStr, logger, isExtendLog)
def extractUseTimePeriodNameLeft(inputStr, useCurrentYear, langName)
def __initDataTime(self, dataString=None, formatString=None)
def extractUseDayState(inputStr, useCurrentYear, langName, logger, isExtendLog)
string ERROR_BAD_INPUT_DATA
def adjustJapaneseDate(rawPubdate, logger=None, isExtendLog=False)
string ERROR_INPUT_PARAMS
def getMonthNumber(inputStr, logger, isExtendLog=False)
def extractDateRus(inputStr, useCurrentYear, logger=None, isExtendLog=False)
def extractUseTimePeriodNameAgo(inputStr, useCurrentYear, langName, logger)
def extractDate(inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
def getTracebackInfo(linesNumberMax=None)
def replaceJapanSimbols(rawPubdate, logger=None, isExtendLog=False)
def getLang(inputStr, logger=None, isExtendLog=False)