HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
app.DateTimeType.DateTimeType Class Reference
Inheritance diagram for app.DateTimeType.DateTimeType:
Collaboration diagram for app.DateTimeType.DateTimeType:

Public Member Functions

def __init__ (self, dataString=None, formatString=None)
 
def getInt (self)
 
def getString (self, formatString=None)
 

Static Public Member Functions

def getLang (inputStr, logger=None, isExtendLog=False)
 
def getMonthNumber (inputStr, logger, isExtendLog=False)
 
def extractDateEng (inputStr, useCurrentYear, logger=None, isExtendLog=False)
 
def extractDateRus (inputStr, useCurrentYear, logger=None, isExtendLog=False)
 
def extractDateUkr (inputStr, useCurrentYear, logger=None, isExtendLog=False)
 
def extractDateGerman (inputStr, useCurrentYear, logger=None, isExtendLog=False)
 
def extractDateJapan (inputStr, useCurrentYear, logger=None, isExtendLog=False)
 
def parse (dataString, useCurrentYear=True, logger=None, isExtendLog=False)
 
def applyUtcTimezone (dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False)
 
def split (dt)
 
def getTimezone (dt)
 
def isAllowedInputString (dataString, logger=None, isExtendLog=False)
 
def prepareString (inputStr)
 
def extractDate (inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
 
def extractTime (inputStr, logger=None, isExtendLog=False)
 
def extractDateCommon (inputStr, useCurrentYear=True, logger=None, isExtendLog=False)
 
def checkTimeFormat (hour, minute, tf, logger=None, isExtendLog=False)
 
def extractUseDayState (inputStr, useCurrentYear, langName, logger, isExtendLog)
 
def extractUseTimePeriodName (inputStr, useCurrentYear, langName)
 
def extractUseTimePeriodNameLeft (inputStr, useCurrentYear, langName)
 
def extractUseTimePeriodNameAgo (inputStr, useCurrentYear, langName, logger)
 
def isUtf8CodePage (inputStr, logger, isExtendLog)
 
def changeCodePageToAscii (inputStr, logger=None, isExtendLog=False)
 
def intelligentExtractor (inputStr, useCurrentYear=True, logger=None, isExtendLog=False, langName=None)
 
def convertPubDateToRFC2822 (rawPubdate, logger=None, isExtendLog=False)
 
def adjustJapaneseDate (rawPubdate, logger=None, isExtendLog=False)
 
def extractYearFromHeiseiPeriod (rawPubdate)
 
def extractDateFromHeiseiPeriod (rawPubdate, logger=None, isExtendLog=False)
 
def replaceJapanSimbols (rawPubdate, logger=None, isExtendLog=False)
 
def utcOffset (tzName, timezonesDict=DateTimeTimezones.timezonesDict)
 
def extractUtcOffset (inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
 
def extractUtcTimezoneName (inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict)
 
def normalizeTimezone (inputStr, logger=None, isExtendLog=False)
 
def toUTC (dt)
 

Public Attributes

 datetime
 
 isError
 
 errorMsg
 

Static Public Attributes

int MIN_ALLOWED_YEAR = 2000
 
int MIN_ALLOWED_LEN_FOR_DATEUTILS = 10
 
string ISO_SEP = ' '
 
string BAD_SIMBOLS = '=(),|@`'
 
list TAG_NAMES = ['pubdate', 'dc_date']
 
string LANG_ENG = "ENG"
 
string LANG_RUS = "RUS"
 
string LANG_UKR = "UKR"
 
string LANG_GER = "GERMAN"
 
string LANG_JAP = "JAPAN"
 
list wordsListEng
 
list wordsListRus
 
list wordsListUkr
 
list wordsListGer
 
list wordsListJap
 
list monthListEng = [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec']
 
list monthListRus = [u'Янв', u'Февр', u'Март', u'Апр', u'Май', u'Июнь', u'Июль', u'Авг', u'Сент', u'Окт', u'Нояб', u'Дек']
 
list monthListUkr = [u'Сiч', u'Лют', u'Бер', u'Квiт', u'Трав', u'Черв', u'Лип', u'Серп', u'Вер', u'Жовт', u'Лист', u'Груд']
 
list monthListGer = [u'Jan', u'Feb', u'März', u'Apr', u'Mai', u'Juni', u'Juli', u'Aug', u'Sept', u'Okt', u'Nov', u'Dez']
 
list monthListJap = [u'一月', u'二月', u'三月', u'四月', u'五月', u'六月', u'七月', u'八月', u'九月', u'十月', u'十一月', u'十二月']
 
list monthListRusBad
 
list dayStateEng = [u'Today', u'Yesterday', u'Day before yesterday']
 
list dayStateRus = [u'Сегодня', u'Вчера', u'Поза вчера']
 
list dayStateUkr = [u'Сьогодні', u'Вчора', u'Позавчора']
 
list dayStateGer = [u'Heute', u'Gestern', u'Vorgestern']
 
list dayStateJap = [u'今日', u'イエスタデイ', u'おととい']
 
list dayStateRusStr = ['Сегодня', 'Вчера', 'Поза вчера']
 
list dayStateUkrStr = ['Сьогодні', 'Вчора', 'Позавчора']
 
dictionary LANG_DICT
 
dictionary MONTH_DICT
 
dictionary DAY_STATE_DICT
 
list patternListDate
 
list patternListTime
 
list patternListTimezoneOffset
 
list patternListUtcTimezones
 
string ERROR_INPUT_PARAMS = 'Error initialization by input parameters.'
 
string ERROR_FORMAT_STRING_TYPE = 'Format string is not string.'
 
string ERROR_DATA_STRING_TYPE = 'Data string is not string.'
 
string ERROR_BAD_INPUT_DATA = 'Bad inputted data.'
 

Private Member Functions

def __initDataTime (self, dataString=None, formatString=None)
 

Detailed Description

Definition at line 70 of file DateTimeType.py.

Constructor & Destructor Documentation

◆ __init__()

def app.DateTimeType.DateTimeType.__init__ (   self,
  dataString = None,
  formatString = None 
)

Definition at line 197 of file DateTimeType.py.

197  def __init__(self, dataString=None, formatString=None):
198  self.datetime = None
199  self.isError = False
200  self.errorMsg = ''
201 
202  try:
203  self.datetime = self.__initDataTime(dataString, formatString)
204 
205  except Exception, err:
206  raise Exception(self.ERROR_INPUT_PARAMS + ' ' + str(err))
207 
208 
def __init__(self)
constructor
Definition: UIDGenerator.py:19

Member Function Documentation

◆ __initDataTime()

def app.DateTimeType.DateTimeType.__initDataTime (   self,
  dataString = None,
  formatString = None 
)
private

Definition at line 214 of file DateTimeType.py.

214  def __initDataTime(self, dataString=None, formatString=None):
215  # variable for result
216  ret = None
217  if dataString is not None and isinstance(dataString, int):
218  ret = datetime.datetime.fromtimestamp(dataString)
219  elif dataString is not None and formatString is not None:
220  # validate of input type of format string
221  if not isinstance(formatString, str):
222  raise Exception(self.ERROR_FORMAT_STRING_TYPE)
223  else:
224  pass
225  # validate of input type of data string
226  if not isinstance(dataString, str):
227  raise Exception(self.ERROR_DATA_STRING_TYPE)
228  else:
229  pass
230  # input types checked and can be used
231  ret = datetime.datetime.strptime(dataString, formatString)
232  elif dataString is None and formatString is None:
233  pass
234  else:
235  raise Exception(self.ERROR_BAD_INPUT_DATA)
236 
237  return ret
238 
239 

◆ adjustJapaneseDate()

def app.DateTimeType.DateTimeType.adjustJapaneseDate (   rawPubdate,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1227 of file DateTimeType.py.

1227  def adjustJapaneseDate(rawPubdate, logger=None, isExtendLog=False):
1228  # convert rawPubdate in format like
1229  # 8月20日 20:01
1230  # 9月3日 11時41分
1231  # (8月20日 紙面から)
1232  # 2014年8月7日
1233  # 2014年8月20日 夕刊
1234  # 2014年8月21日 朝刊>
1235  # 2014/8/14付
1236  # (2014/08/16-14:46)
1237  # 2014/6/3 18:53 (2014/6/3 20:13更新)
1238  # 2014.08.20 Wed posted at 17:52 JST
1239  # 2014.08.01 Fri posted at 12:36 JST
1240  # 2014年 08月 20日 14:34 JST
1241  # 2014年08月20日 19時46分
1242  # 2014年8月29日16時30分
1243  # 8/20 21:00 更新
1244  # 11月23日
1245  # to
1246  # 2014年8月7日 20:01
1247  # check if 日 exist in date
1248  if logger and isExtendLog:
1249  logger.debug("pubdate has to be converted: <<%s>>", rawPubdate)
1250  logger.debug("pubdate type: <<%s>>", str(type(rawPubdate)))
1251  # self.logger.debug("pubdate charcode: <<%s>>", str(icu.CharsetDetector(rawPubdate).detect().getName()))
1252  # rawPubdate = rawPubdate.decode("utf_8")
1253  # self.logger.debug("pubdate has to be converted: <<%s>>", rawPubdate)
1254  # self.logger.debug("pubdate type: <<%s>>", str(type(rawPubdate)))
1255  rawPubdate = rawPubdate.strip(" \r\t\n)()\xe6\x9b\xb4\xe6\x96\xb0\xef" +
1256  "\xbc\x89\xe5\x88\x86\xe4\xbb\x98JST\xe7\xb4\x99\xe9\x9d\xa2\xe3\x81\x8b\xe3\x82" +
1257  " \xe5\xa4\x95\xe5\x88\x8a\xe6\x9c\x9d\xe5\x88\x8a\xe3\x80\x80\xe5\x88\x86")
1258  # 2014.08.20 Wed posted at 17:52 JST
1259  # if "Wed posted at" in rawPubdate:
1260  # date_items = rawPubdate.replace(" Wed posted at ", " ").replace(".", " ").split()
1261  # rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] +\
1262  # "\xe6\x97\xa5 " + date_items[3]
1263  if "\xe6\x97\xa5" in rawPubdate:
1264  rawPubdate = rawPubdate.replace("\xe6\x97\xa5", "\xe6\x97\xa5 ")
1265  # check if date contain year
1266  # 2014年08月20日 19時46分
1267  if "\xe5\xb9\xb4" in rawPubdate and "\xe6\x99\x82" in rawPubdate:
1268  rawPubdate = rawPubdate.replace("\xe6\x99\x82", ":")
1269  elif "\xe5\xb9\xb4" in rawPubdate and not "\xe6\x99\x82" in rawPubdate:
1270  rawPubdate = rawPubdate + " 00:00"
1271  rawPubdate = rawPubdate.replace(' ', ' ')
1272  # 2014年08月19日 11:53
1273  elif "\xe5\xb9\xb4" in rawPubdate and ":" in rawPubdate and rawPubdate.count(" ") == 1:
1274  pass
1275  # 2014年 08月 20日 14:34 JST
1276  elif "\xe5\xb9\xb4" in rawPubdate and ":" in rawPubdate and rawPubdate.count(" ") > 1:
1277  pos = rawPubdate.find('\xe3\x80\x80')
1278  if pos > 0:
1279  rawPubdate = rawPubdate[:pos]
1280 
1281  date_items = rawPubdate.split()
1282  if isinstance(date_items, list) and len(date_items) == 2:
1283  rawPubdate = date_items[0] + " " + date_items[1]
1284  elif isinstance(date_items, list) and len(date_items) == 3:
1285  rawPubdate = date_items[0] + date_items[1]
1286  elif isinstance(date_items, list) and len(date_items) == 4:
1287  rawPubdate = date_items[0] + date_items[1] + date_items[2] + " " + date_items[3]
1288  else:
1289  pass
1290  elif "\xe5\xb9\xb4" in rawPubdate:
1291  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + rawPubdate
1292  # 2014/8/14付
1293  if rawPubdate.count("/") == 2:
1294  # date_items = rawPubdate.split("/")
1295  # (2014/08/16-14:46)
1296  date_items = re.split("/|-", rawPubdate)
1297  if isinstance(date_items, list) and len(date_items) == 4:
1298  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1299  "\xe6\x97\xa5 " + date_items[3]
1300  else:
1301  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1302  "\xe6\x97\xa5 00:00"
1303  # 8月20日 紙面から
1304  if not "\xe5\xb9\xb4" in rawPubdate and not "/" in rawPubdate:
1305  date_items = rawPubdate.replace("\xe6\x9c\x88", " ").replace("\xe6\x97\xa5", " ").\
1306  replace("\xe6\x99\x82", " ").split()
1307  if isinstance(date_items, list) and len(date_items) == 4:
1308  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1309  "\xe6\x97\xa5 " + date_items[2] + ":" + date_items[3]
1310  else:
1311  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1312  "\xe6\x97\xa5 00:00"
1313  # 8/20 21:00 更新
1314  if not "\xe5\xb9\xb4" in rawPubdate and "/" in rawPubdate:
1315  date_items = rawPubdate.replace("/", " ").split()
1316  # 2014/6/3 18:53 (2014/6/3 20:13更新)
1317  if len(date_items) > 4:
1318  rawPubdate = date_items[0] + "\xe5\xb9\xb4" + date_items[1] + "\xe6\x9c\x88" + date_items[2] + \
1319  "\xe6\x97\xa5 " + date_items[3]
1320  else:
1321  rawPubdate = str(time.gmtime().tm_year) + "\xe5\xb9\xb4" + date_items[0] + "\xe6\x9c\x88" + date_items[1] + \
1322  "\xe6\x97\xa5 " + date_items[2]
1323 
1324  if logger is not None and isExtendLog:
1325  logger.debug("pubdate converted is: <<%s>>", rawPubdate)
1326 
1327  return rawPubdate
1328 
1329 
Here is the call graph for this function:

◆ applyUtcTimezone()

def app.DateTimeType.DateTimeType.applyUtcTimezone (   dt,
  tzName,
  timezonesDict = DateTimeTimezones.timezonesDict,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 563 of file DateTimeType.py.

563  def applyUtcTimezone(dt, tzName, timezonesDict=DateTimeTimezones.timezonesDict, logger=None, isExtendLog=False): # pylint: disable=W0102
564  if logger is not None and isExtendLog:
565  logger.debug("applyUtcTimezone enter ...")
566 
567  if dt is not None and tzName in timezonesDict and len(timezonesDict[tzName]) > 1:
568  rawOffset = timezonesDict[tzName][1]
569 
570  isNegative = False
571  if '−' in rawOffset or '-' in rawOffset:
572  isNegative = True
573  if logger is not None and isExtendLog:
574  logger.debug("isNegative: " + str(isNegative))
575 
576  for pattern in DateTimeType.patternListUtcTimezones:
577  match = re.search(pattern, rawOffset)
578  if match:
579  hours = 0
580  if 'hours' in match.groupdict():
581  hours = int(match.groupdict()['hours'])
582 
583  minutes = 0
584  if 'minutes' in match.groupdict():
585  minutes = int(match.groupdict()['minutes'])
586 
587  if logger is not None and isExtendLog:
588  logger.debug("hours: " + str(hours) + " minutes: " + str(minutes))
589 
590  tzInfo = OffsetTzInfo(isNegative, hours, minutes)
591  dt = dt.replace(tzinfo=tzInfo)
592  if logger is not None and isExtendLog:
593  logger.debug("tzname: " + str(dt.tzname()))
594  break
595 
596  return dt
597 
598 

◆ changeCodePageToAscii()

def app.DateTimeType.DateTimeType.changeCodePageToAscii (   inputStr,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1100 of file DateTimeType.py.

1100  def changeCodePageToAscii(inputStr, logger=None, isExtendLog=False):
1101  # variable for result
1102  ret = inputStr
1103  try:
1104  dataString = inputStr.decode('latin-1')
1105  ret = dataString.encode('ascii', errors='ignore')
1106  except Exception, err:
1107  if logger and isExtendLog:
1108  logger.debug("inputStr.decode('latin-1') : " + str(err))
1109 
1110  return ret
1111 
1112 

◆ checkTimeFormat()

def app.DateTimeType.DateTimeType.checkTimeFormat (   hour,
  minute,
  tf,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 903 of file DateTimeType.py.

903  def checkTimeFormat(hour, minute, tf, logger=None, isExtendLog=False):
904  if logger is not None and isExtendLog:
905  logger.debug("tf: '%s'", str(tf))
906  if tf is not None:
907  logger.debug("find = %s", str(tf.lower().find('p')))
908 
909  if tf and tf.lower().find('p') > -1: # found 'PM'
910  if int(hour) == 12 and int(minute) == 0: # # 12:00 PM -> 12:00
911  pass
912  elif int(hour) >= 0 and int(hour) < 12 and int(minute) >= 0: # # 00:01 PM -> 12:01
913  hour += 12
914 
915  else:
916  if int(hour) == 12 and int(minute) == 0: # # 12:00 AM -> 00:00
917  hour = 0
918  elif int(hour) >= 0 and int(hour) < 12 and int(minute) >= 0: # # 00:01 AM -> 00:01
919  pass
920 
921  return hour, minute
922 
923 

◆ convertPubDateToRFC2822()

def app.DateTimeType.DateTimeType.convertPubDateToRFC2822 (   rawPubdate,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1167 of file DateTimeType.py.

1167  def convertPubDateToRFC2822(rawPubdate, logger=None, isExtendLog=False):
1168  try:
1169  # replace all unicode digits to decimal (e.g. u'\uff13' to '3')
1170  import unicodedata
1171 
1172  if isinstance(rawPubdate, unicode):
1173  for i in range(0, len(rawPubdate)):
1174  if rawPubdate[i].isdigit():
1175  rawPubdate = re.sub(rawPubdate[i], str(unicodedata.digit(rawPubdate[i])), rawPubdate)
1176 
1177  rawPubdate = rawPubdate.encode("utf_8")
1178 
1179  import calendar
1180 
1181  pubdate_parts = rawPubdate.split(",")
1182  if len(pubdate_parts) > 1:
1183  rawPubdate = pubdate_parts[0]
1184  rawPubdate = rawPubdate.replace("posted at", "")
1185  rawPubdate = rawPubdate.replace("Updated:", "")
1186  rawPubdate = re.sub(r"\(1/\d{1}ページ\)", "", rawPubdate)
1187 
1188  # Try extract 'Heisei' period
1189  if "平成" in rawPubdate:
1190  year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1191  if year is not None:
1192  if logger and isExtendLog:
1193  logger.debug("'Heisei' period before: " + str(rawPubdate))
1194  rawPubdate = re.sub(r"平成(\d{1,2})", str(year), rawPubdate)
1195  if logger and isExtendLog:
1196  logger.debug("'Heisei' period after: " + str(rawPubdate))
1197 
1198  rawPubdate = re.sub(r"\(木\)", "", rawPubdate)
1199  rawPubdate = re.sub(r" 年 ", "年", rawPubdate)
1200  rawPubdate = re.sub(r"@", "", rawPubdate)
1201  parsed_time_candidate_str = float(calendar.timegm(parse(rawPubdate).timetuple()))
1202 
1203  if logger and isExtendLog:
1204  logger.debug("pubdate in seconds: %s", str(parsed_time_candidate_str))
1205  except Exception, err:
1206  if logger is not None and isExtendLog:
1207  logger.debug("try replace rawPubdate return: " + str(err))
1208  rawPubdate = DateTimeType.adjustJapaneseDate(rawPubdate, logger, isExtendLog)
1209  # #rawPubdate = rawPubdate.decode('latin-1')
1210 
1211  t = u"%Y\xe5\xb9\xb4%m\xe6\x9c\x88%d\xe6\x97\xa5 %H:%M"
1212  parsed_time_candidate_str = time.mktime(time.strptime(rawPubdate, t))
1213 
1214  # set result value
1215  ret = datetime.datetime.fromtimestamp(parsed_time_candidate_str)
1216 
1217  return ret
1218 
1219 
Here is the call graph for this function:

◆ extractDate()

def app.DateTimeType.DateTimeType.extractDate (   inputStr,
  useCurrentYear = True,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 677 of file DateTimeType.py.

677  def extractDate(inputStr, useCurrentYear=True, logger=None, isExtendLog=False):
678  # variables for results
679  month = 0
680  day = 0
681  year = 0
682 
683  try:
684  for pattern in DateTimeType.patternListDate:
685  match = re.search(pattern, inputStr)
686  if logger and isExtendLog:
687  logger.debug('match: ' + str(match) + ' pattern: ' + str(pattern))
688 
689  if match:
690  if logger and isExtendLog:
691  logger.debug('match.groupdict(): ' + str(match.groupdict()))
692 
693  if 'short_year' in match.groupdict():
694  year = int(match.groupdict()['short_year']) + int(datetime.date.today().year // 1000 * 1000)
695 
696  if 'year' in match.groupdict():
697  year = match.groupdict()['year']
698 
699  if 'mon' in match.groupdict():
700  month = match.groupdict()['mon']
701  logger.debug('month: ' + month)
702  if month.isdigit() and int(month) > 12:
703  if logger and isExtendLog:
704  logger.debug('Bad month (' + str(month) + ') scipped!!!')
705  continue
706 
707  if 'day' in match.groupdict():
708  day = match.groupdict()['day']
709 
710  if logger and isExtendLog:
711  logger.debug('month = ' + month)
712 
713  monthNumber = DateTimeType.getMonthNumber(month, logger, isExtendLog)
714 
715  if logger and isExtendLog:
716  logger.debug('monthNumber = ' + str(monthNumber))
717 
718  if monthNumber is not None:
719  month = monthNumber
720  else:
721  month = day = year = 0
722 
723  if logger and isExtendLog:
724  logger.debug('year: ' + str(year) + ' month: ' + str(month) + ' day: ' + str(day))
725 
726  if int(year) > DateTimeType.MIN_ALLOWED_YEAR and int(year) <= datetime.date.today().year and \
727  int(month) <= 12 and int(day) <= 31:
728  # if int(year) == 0 and int(month) <= 12 and int(day) <= 31:
729  break
730 
731  if logger is not None and isExtendLog:
732  logger.debug('Match is good !!!')
733  break
734 
735  if useCurrentYear:
736  d = datetime.date.today()
737 
738  if year == 0 and month and day:
739  year = d.year
740 
741  if year and month and day == 0:
742  day = d.day
743 
744  except Exception, err:
745  if logger and isExtendLog:
746  logger.debug("inputStr: '" + str(inputStr) + "'")
747  if logger and ExceptionLog is not None:
748  ExceptionLog.handler(logger, err, 'extractDate:', (), \
749  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
750  elif logger:
751  logger.debug('extractDate:' + str(err))
752 
753  return int(year), int(month), int(day)
754 
755 

◆ extractDateCommon()

def app.DateTimeType.DateTimeType.extractDateCommon (   inputStr,
  useCurrentYear = True,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 813 of file DateTimeType.py.

813  def extractDateCommon(inputStr, useCurrentYear=True, logger=None, isExtendLog=False):
814  # variable for results
815  ret = None
816  try:
817  localStr = DateTimeType.prepareString(inputStr)
818 
819  if localStr:
820  match = re.search(r'\d{10}', localStr)
821  if match:
822  ret = datetime.datetime.fromtimestamp(int(match.group()))
823 
824  locCurrentTime = datetime.datetime.now()
825  utcCurrentTime = datetime.datetime.utcnow()
826  tmDelta = locCurrentTime - utcCurrentTime
827 
828  isNegative = bool(tmDelta.total_seconds() < 0)
829  hours = abs(locCurrentTime.hour - utcCurrentTime.hour)
830  minutes = abs(locCurrentTime.minute - utcCurrentTime.minute)
831  if logger is not None and isExtendLog:
832  logger.debug("isNegative: " + str(isNegative) + " hours: " + str(hours) + " minutes: " + str(minutes))
833 
834  # Correct datetime value to GMT
835  if isNegative:
836  ret = ret + datetime.timedelta(hours=hours, minutes=minutes)
837  else:
838  ret = ret - datetime.timedelta(hours=hours, minutes=minutes)
839 
840  # Apply tzInfo
841  # tzInfo = OffsetTzInfo(isNegative, hours, minutes)
842  tzInfo = OffsetTzInfo(False, 0, 0)
843  ret = ret.replace(tzinfo=tzInfo)
844  if logger is not None and isExtendLog:
845  logger.debug("tzname: " + str(ret.tzname()))
846  else:
847  year, month, day = DateTimeType.extractDate(localStr, useCurrentYear, logger, isExtendLog)
848  if logger and isExtendLog:
849  logger.debug('year: ' + str(year) + '\tmonth: ' + str(month) + '\tday: ' + str(day))
850 
851  hour, minute, second, tf = DateTimeType.extractTime(localStr, logger, isExtendLog)
852  if logger and isExtendLog:
853  logger.debug('hour: ' + str(hour) + '\tminute: ' + str(minute) + '\tsecond: ' + str(second) + \
854  '\ttf: ' + str(tf))
855 
856  hour, minute = DateTimeType.checkTimeFormat(hour, minute, tf, logger, isExtendLog)
857 
858  if logger is not None and isExtendLog:
859  logger.debug('hour: ' + str(hour) + '\tminute: ' + str(minute))
860 
861  if year and month and day:
862  if useCurrentYear:
863  now = datetime.datetime.now()
864  if month == now.month and day == now.day:
865  if hour == 0 and minute == 0:
866  hour = int(now.hour)
867  minute = int(now.minute)
868  second = now.second
869 
870  ret = datetime.datetime(year, month, day, hour, minute, second, tzinfo=None)
871  elif useCurrentYear and (year + month + day) == 0 and hour > 0 and minute > 0:
872  d = datetime.datetime.today()
873  year = d.year
874  month = d.month
875  day = d.day
876 
877  if int(d.hour) < int(hour):
878  day = (d - datetime.timedelta(days=1)).day
879 
880  ret = datetime.datetime(year, month, day, hour, minute, second)
881 
882  except Exception, err:
883  if logger is not None and isExtendLog:
884  logger.debug("inputStr: '" + str(inputStr) + "'")
885  if logger and ExceptionLog is not None:
886  ExceptionLog.handler(logger, err, 'extractDateCommon:', (inputStr), \
887  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
888  elif logger is not None:
889  logger.debug('extractDateCommon:' + str(err))
890 
891  return ret
892 
893 

◆ extractDateEng()

def app.DateTimeType.DateTimeType.extractDateEng (   inputStr,
  useCurrentYear,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 375 of file DateTimeType.py.

375  def extractDateEng(inputStr, useCurrentYear, logger=None, isExtendLog=False):
376  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_ENG)
377  if pubdate is None:
378  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
379 
380  return pubdate
381 
382 

◆ extractDateFromHeiseiPeriod()

def app.DateTimeType.DateTimeType.extractDateFromHeiseiPeriod (   rawPubdate,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1361 of file DateTimeType.py.

1361  def extractDateFromHeiseiPeriod(rawPubdate, logger=None, isExtendLog=False):
1362  # variable for result
1363  ret = None
1364  try:
1365  year = DateTimeType.extractYearFromHeiseiPeriod(rawPubdate)
1366  if year is not None:
1367  if logger and isExtendLog:
1368  logger.debug('rawPubdate: ' + str(rawPubdate))
1369  logger.debug('year: ' + str(year))
1370 
1371  # extract month
1372  beginPos = rawPubdate.find('年')
1373  endPos = rawPubdate.find('月')
1374  month = rawPubdate[beginPos + len('年'):endPos]
1375  if logger and isExtendLog:
1376  logger.debug('month: ' + str(month))
1377  month = int(unicode(month))
1378 
1379  # extract day
1380  beginPos = rawPubdate.find('月')
1381  endPos = rawPubdate.find('日')
1382  day = rawPubdate[beginPos + len('月'):endPos]
1383  if logger and isExtendLog:
1384  logger.debug('day: ' + str(day))
1385  day = int(unicode(day))
1386 
1387  # extract time if passible
1388  hour, minute, second, tf = DateTimeType.extractTime(rawPubdate, logger, isExtendLog)
1389  if logger is not None and isExtendLog:
1390  logger.debug("hour: %s, minute: %s, second: %s, tf: %s", str(hour), str(minute), str(second), str(tf))
1391 
1392  # create result datetime object
1393  ret = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second,
1394  tzinfo=None)
1395  except Exception, err:
1396  if logger is not None and isExtendLog:
1397  logger.debug("Extract 'Heisei' period has error: " + str(err))
1398 
1399  return ret
1400 
1401 

◆ extractDateGerman()

def app.DateTimeType.DateTimeType.extractDateGerman (   inputStr,
  useCurrentYear,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 422 of file DateTimeType.py.

422  def extractDateGerman(inputStr, useCurrentYear, logger=None, isExtendLog=False):
423  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_GER)
424  if pubdate is None:
425  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
426 
427  return pubdate
428 
429 

◆ extractDateJapan()

def app.DateTimeType.DateTimeType.extractDateJapan (   inputStr,
  useCurrentYear,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 438 of file DateTimeType.py.

438  def extractDateJapan(inputStr, useCurrentYear, logger=None, isExtendLog=False):
439  # replace japanise simbols
440  inputStr = DateTimeType.replaceJapanSimbols(inputStr, logger, isExtendLog)
441 
442  # extract data
443  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_JAP)
444 
445  if pubdate is None:
446  match = re.search(r'[0-9]', inputStr)
447  if match:
448  try:
449  pubdate = DateTimeType.convertPubDateToRFC2822(inputStr, logger, isExtendLog)
450  except Exception, err:
451  if logger and isExtendLog:
452  logger.debug('extractDateJapan: ' + str(err))
453 
454  if pubdate is None:
455  pubdate = DateTimeType.extractDateFromHeiseiPeriod(inputStr, logger, isExtendLog)
456 
457  if pubdate is None:
458  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
459 
460  return pubdate
461 
462 

◆ extractDateRus()

def app.DateTimeType.DateTimeType.extractDateRus (   inputStr,
  useCurrentYear,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 391 of file DateTimeType.py.

391  def extractDateRus(inputStr, useCurrentYear, logger=None, isExtendLog=False):
392  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_RUS)
393  if pubdate is None:
394  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
395 
396  return pubdate
397 

◆ extractDateUkr()

def app.DateTimeType.DateTimeType.extractDateUkr (   inputStr,
  useCurrentYear,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 406 of file DateTimeType.py.

406  def extractDateUkr(inputStr, useCurrentYear, logger=None, isExtendLog=False):
407  pubdate = DateTimeType.intelligentExtractor(inputStr, useCurrentYear, logger, isExtendLog, DateTimeType.LANG_UKR)
408  if pubdate is None:
409  pubdate = DateTimeType.extractDateCommon(inputStr, useCurrentYear, logger, isExtendLog)
410 
411  return pubdate
412 
413 

◆ extractTime()

def app.DateTimeType.DateTimeType.extractTime (   inputStr,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 763 of file DateTimeType.py.

763  def extractTime(inputStr, logger=None, isExtendLog=False): # pylint: disable=W0613
764 
765  hour = 0
766  minute = 0
767  second = 0
768  tf = ''
769 
770  try:
771  for pattern in DateTimeType.patternListTime:
772  match = re.search(pattern, inputStr)
773  # if logger and isExtendLog:
774  # logger.debug('pattern: ' + str(pattern))
775  if match:
776  # if logger and isExtendLog:
777  # logger.debug('match.groupdict(): ' + str(match.groupdict()))
778 
779  if 'hour' in match.groupdict():
780  hour = match.groupdict()['hour']
781 
782  if 'min' in match.groupdict():
783  minute = match.groupdict()['min']
784 
785  if 'sec' in match.groupdict():
786  second = match.groupdict()['sec']
787 
788  if 'tf' in match.groupdict():
789  tf = match.groupdict()['tf']
790 
791  break
792 
793  except Exception, err:
794  if logger and isExtendLog:
795  logger.debug("inputStr: '" + str(inputStr) + "'")
796  if logger and ExceptionLog is not None:
797  ExceptionLog.handler(logger, err, 'extractTime:', (), \
798  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
799  elif logger:
800  logger.debug('extractTime:' + str(err))
801 
802  return int(hour), int(minute), int(second), tf
803 
804 

◆ extractUseDayState()

def app.DateTimeType.DateTimeType.extractUseDayState (   inputStr,
  useCurrentYear,
  langName,
  logger,
  isExtendLog 
)
static

Definition at line 933 of file DateTimeType.py.

933  def extractUseDayState(inputStr, useCurrentYear, langName, logger, isExtendLog):
934  # variable for results
935  ret = None
936  index = 0
937  for dayState in DateTimeType.DAY_STATE_DICT[langName]:
938  if not inputStr.lower().find(dayState) < 0:
939  if logger is not None and isExtendLog:
940  logger.debug("!!! dayState: " + str(dayState))
941 
942  if useCurrentYear:
943  d = datetime.date.today()
944  hour, minute, second, tf = DateTimeType.extractTime(inputStr, logger, isExtendLog) # pylint: disable=W0612
945  t = datetime.time(hour, minute, second)
946  dt = datetime.datetime.combine(d, t)
947  ret = dt - datetime.timedelta(days=index)
948  break
949  else:
950  index = index + 1
951 
952  return ret
953 
954 

◆ extractUseTimePeriodName()

def app.DateTimeType.DateTimeType.extractUseTimePeriodName (   inputStr,
  useCurrentYear,
  langName 
)
static

Definition at line 962 of file DateTimeType.py.

962  def extractUseTimePeriodName(inputStr, useCurrentYear, langName):
963  # variable for results
964  ret = None
965  match = None
966  if useCurrentYear and langName == DateTimeType.LANG_ENG:
967  for pattern in [r'(?P<hour>\d{1,2}) hours']:
968  match = re.search(pattern, inputStr, re.UNICODE)
969  if match:
970  d = datetime.datetime.now()
971  if 'hour' in match.groupdict():
972  hour = match.groupdict()['hour']
973  if int(hour) >= 0 and int(hour) <= 24:
974  ret = d.replace(hour=int(hour), minute=0, second=0, microsecond=0, tzinfo=None)
975  break
976  # elif useCurrentYear and langName == DateTimeType.LANG_JAP:
977  # match = re.search(r'(?P<hour>\d{1,2})時間前', inputStr, re.U)
978  return ret
979 
980 

◆ extractUseTimePeriodNameAgo()

def app.DateTimeType.DateTimeType.extractUseTimePeriodNameAgo (   inputStr,
  useCurrentYear,
  langName,
  logger 
)
static

Definition at line 1025 of file DateTimeType.py.

1025  def extractUseTimePeriodNameAgo(inputStr, useCurrentYear, langName, logger):
1026  # variable for results
1027  ret = None
1028  if useCurrentYear and langName == DateTimeType.LANG_JAP:
1029  for pattern in [r'(?P<value>\d{1,2})日前', r'(?P<value>\d{1,2})時間前']:
1030  match = re.search(pattern, inputStr, re.UNICODE)
1031  if match is not None:
1032  value = 0
1033  if 'value' in match.groupdict():
1034  value = int(match.groupdict()['value'])
1035 
1036  if pattern.find('日前') > 0:
1037  ret = datetime.datetime.now() + relativedelta(days=-value)
1038  elif pattern.find('時間前') > 0:
1039  ret = datetime.datetime.now() + relativedelta(hours=-value)
1040 
1041  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1042 
1043  if useCurrentYear and langName == DateTimeType.LANG_ENG:
1044  for period in [u'years', u'months', u'days', u'hours', u'minutes', u'Hour']:
1045  for pattern in [r'(?P<value>\d{1,2}).? ' + period + '.?Ago',
1046  r'(?P<value>\d{1,2}).? ' + period + '.?ago']:
1047  match = re.search(pattern, inputStr, re.UNICODE)
1048  if match:
1049  value = 0
1050  if 'value' in match.groupdict():
1051  value = int(match.groupdict()['value'])
1052 
1053  dt = datetime.datetime.now()
1054  if period == u'years':
1055  ret = dt + relativedelta(years=-value)
1056  elif period == u'months':
1057  ret = dt + relativedelta(months=-value)
1058  elif period == u'days':
1059  ret = dt + relativedelta(days=-value)
1060  elif period == u'hours' or period == u'Hour':
1061  ret = dt + relativedelta(hours=-value)
1062  elif period == u'minutes':
1063  ret = dt + relativedelta(minutes=-value)
1064 
1065  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1066  if logger is not None:
1067  logger.debug("ret: %s", str(ret))
1068  break
1069 
1070  return ret
1071 
1072 

◆ extractUseTimePeriodNameLeft()

def app.DateTimeType.DateTimeType.extractUseTimePeriodNameLeft (   inputStr,
  useCurrentYear,
  langName 
)
static

Definition at line 988 of file DateTimeType.py.

988  def extractUseTimePeriodNameLeft(inputStr, useCurrentYear, langName):
989  # variable for results
990  ret = None
991  if useCurrentYear and langName == DateTimeType.LANG_ENG:
992  for period in [u'years', u'months', u'days', u'hours', u'minutes']:
993  match = re.search(r'(?P<value>\d{1,2}).? ' + period + '.?left', inputStr)
994  if match:
995  value = 0
996  if 'value' in match.groupdict():
997  value = int(match.groupdict()['value'])
998 
999  dt = datetime.datetime.now()
1000  if period == u'years':
1001  ret = dt + relativedelta(years=+value)
1002  elif period == u'months':
1003  ret = dt + relativedelta(months=+value)
1004  elif period == u'days':
1005  ret = dt + relativedelta(days=+value)
1006  elif period == u'hours':
1007  ret = dt + relativedelta(hours=+value)
1008  elif period == u'minutes':
1009  ret = dt + relativedelta(minutes=+value)
1010 
1011  ret = ret.replace(second=0, microsecond=0, tzinfo=None)
1012  break
1013 
1014  return ret
1015 
1016 

◆ extractUtcOffset()

def app.DateTimeType.DateTimeType.extractUtcOffset (   inputStr,
  logger = None,
  isExtendLog = False,
  timezonesDict = DateTimeTimezones.timezonesDict 
)
static

Definition at line 1449 of file DateTimeType.py.

1449  def extractUtcOffset(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1450  # variable for result
1451  ret = None
1452  if inputStr is not None:
1453  for key in timezonesDict.keys():
1454  if (inputStr.find(key)) > -1:
1455  ret = DateTimeType.utcOffset(key, timezonesDict)
1456  if logger is not None and isExtendLog:
1457  logger.debug('Timezone: ' + str(key) + ' offset: ' + str(ret))
1458  break
1459 
1460  if ret is None:
1461  try:
1462  for pattern in DateTimeType.patternListTimezoneOffset:
1463  match = re.match(pattern, inputStr)
1464  if logger is not None and isExtendLog:
1465  logger.debug('inputStr: ' + str(inputStr) + ' pattern: ' + str(pattern) + ' match: ' + str(match))
1466  if match:
1467  if 'offset' in match.groupdict():
1468  ret = match.groupdict()['offset']
1469  break
1470 
1471  except Exception, err:
1472  if logger is not None and isExtendLog:
1473  logger.debug('extractUtcOffset error: ' + str(err))
1474 
1475  if logger is not None and isExtendLog:
1476  logger.debug("!!! ret: %s", str(ret))
1477  return ret
1478 
1479 

◆ extractUtcTimezoneName()

def app.DateTimeType.DateTimeType.extractUtcTimezoneName (   inputStr,
  logger = None,
  isExtendLog = False,
  timezonesDict = DateTimeTimezones.timezonesDict 
)
static

Definition at line 1487 of file DateTimeType.py.

1487  def extractUtcTimezoneName(inputStr, logger=None, isExtendLog=False, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1488  # variable for result
1489  ret = ''
1490  if logger is not None and isExtendLog:
1491  logger.debug('inputStr: ' + inputStr)
1492  if inputStr is not None:
1493  for key in timezonesDict.keys():
1494  pos = inputStr.find(key)
1495  if (pos) > -1 and inputStr[pos - 1] == ' ':
1496  ret = key
1497  if logger is not None and isExtendLog:
1498  logger.debug('Timezone name: ' + str(key))
1499  break
1500 
1501  return ret
1502 
1503 

◆ extractYearFromHeiseiPeriod()

def app.DateTimeType.DateTimeType.extractYearFromHeiseiPeriod (   rawPubdate)
static

Definition at line 1335 of file DateTimeType.py.

1335  def extractYearFromHeiseiPeriod(rawPubdate):
1336  # variable for result
1337  ret = None
1338  if "平成" in rawPubdate:
1339  startPeriodYear = 1988
1340  match = re.search(r'平成(?P<year>\d{1,2})年', rawPubdate)
1341  if match:
1342  if 'year' in match.groupdict():
1343  year = int(match.groupdict()['year'])
1344  ret = startPeriodYear + year
1345  else:
1346  match = re.search(r'(?P<year>\d{1,4})年', rawPubdate)
1347  if match:
1348  if 'year' in match.groupdict():
1349  ret = int(match.groupdict()['year'])
1350 
1351  return ret
1352 
1353 

◆ getInt()

def app.DateTimeType.DateTimeType.getInt (   self)

Definition at line 244 of file DateTimeType.py.

244  def getInt(self):
245  # variable for result
246  ret = None
247  try:
248  ret = int((self.datetime - datetime.datetime.fromtimestamp(0)).total_seconds())
249  except Exception, err:
250  self.isError = True
251  self.errorMsg = str(err)
252  ret = None
253 
254  return ret
255 
256 

◆ getLang()

def app.DateTimeType.DateTimeType.getLang (   inputStr,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 288 of file DateTimeType.py.

288  def getLang(inputStr, logger=None, isExtendLog=False):
289  ret = None
290  found = False
291  langDict = DateTimeType.LANG_DICT
292  try:
293  dataString = inputStr
294  try:
295  dataString = unicode(inputStr, 'utf-8', 'ignore')
296  except Exception, err:
297  if logger and isExtendLog:
298  logger.debug("getLang: '" + str(err) + "'")
299  logger.info(getTracebackInfo())
300 
301  for key in langDict.keys():
302  for word in langDict[key]:
303  if dataString.lower().find(word) > -1 or dataString.lower().find(word.lower()) > -1:
304  ret = key
305  found = True
306  break
307  if found:
308  break
309 
310  except Exception, err:
311  if logger and isExtendLog:
312  logger.debug("getLang: '" + str(err) + "'")
313  logger.info(getTracebackInfo())
314  ret = DateTimeType.LANG_ENG
315 
316  return ret
317 
318 
def getTracebackInfo(linesNumberMax=None)
Definition: Utils.py:218
Here is the call graph for this function:

◆ getMonthNumber()

def app.DateTimeType.DateTimeType.getMonthNumber (   inputStr,
  logger,
  isExtendLog = False 
)
static

Definition at line 326 of file DateTimeType.py.

326  def getMonthNumber(inputStr, logger, isExtendLog=False):
327  if logger and isExtendLog:
328  logger.debug("getMonthNumber inputStr: '" + inputStr + "' type: " + str(type(inputStr)))
329 
330  ret = None
331  inputStr = inputStr.strip()
332 
333  if len(inputStr) < 3 and inputStr.isdigit():
334  ret = int(inputStr)
335  else:
336  found = False
337  monthDict = DateTimeType.MONTH_DICT
338 
339  for key in monthDict.keys():
340  # if logger and isExtendLog:
341  # logger.debug("key: '" + str(key) + "'")
342 
343  monthNumber = 0
344  for months in monthDict[key]:
345  monthNumber = monthNumber + 1
346  month = months.lower()
347  inputMonth = inputStr
348  try:
349  inputMonth = inputStr.decode('utf-8')
350  except UnicodeError, err:
351  if logger is not None and isExtendLog:
352  logger.debug("Operation decode'utf-8' has error: " + str(err))
353 
354  # if logger and isExtendLog:
355  # logger.debug(inputMonth.lower() + ' <=> ' + month)
356 
357  if inputMonth.lower().find(month) > -1 or month.lower().find(inputMonth.lower()) > -1:
358  ret = monthNumber
359  found = True
360  break
361  if found:
362  break
363 
364  return ret
365 
366 

◆ getString()

def app.DateTimeType.DateTimeType.getString (   self,
  formatString = None 
)

Definition at line 261 of file DateTimeType.py.

261  def getString(self, formatString=None):
262  # variable for result
263  ret = None
264 
265  try:
266  if formatString is None:
267  ret = self.datetime.isoformat(self.ISO_SEP)
268  else:
269  if not isinstance(formatString, str):
270  raise Exception(self.ERROR_FORMAT_STRING_TYPE)
271  else:
272  ret = self.datetime.strftime(formatString)
273  except Exception, err:
274  self.isError = True
275  self.errorMsg = str(err)
276  ret = None
277 
278  return ret
279 
280 

◆ getTimezone()

def app.DateTimeType.DateTimeType.getTimezone (   dt)
static

Definition at line 618 of file DateTimeType.py.

618  def getTimezone(dt):
619  timezone = ''
620  if dt is not None:
621  timezone = dt.strftime('%z')
622 
623  return timezone
624 
625 

◆ intelligentExtractor()

def app.DateTimeType.DateTimeType.intelligentExtractor (   inputStr,
  useCurrentYear = True,
  logger = None,
  isExtendLog = False,
  langName = None 
)
static

Definition at line 1122 of file DateTimeType.py.

1122  def intelligentExtractor(inputStr, useCurrentYear=True, logger=None, isExtendLog=False, langName=None):
1123  # variable for results
1124  ret = None
1125  dataString = copy.copy(inputStr)
1126  dataString = DateTimeType.prepareString(dataString)
1127  try:
1128  if langName is None:
1129  langName = DateTimeType.getLang(dataString)
1130  else:
1131  pass
1132 
1133  if langName is not None:
1134  ret = DateTimeType.extractUseTimePeriodNameAgo(dataString, useCurrentYear, langName, logger)
1135 
1136  if langName is not None and langName != DateTimeType.LANG_JAP:
1137  if ret is None:
1138  ret = DateTimeType.extractUseDayState(dataString, useCurrentYear, langName, logger, isExtendLog)
1139  if ret is None:
1140  ret = DateTimeType.extractUseTimePeriodName(dataString, useCurrentYear, langName)
1141  if ret is None:
1142  ret = DateTimeType.extractUseTimePeriodNameLeft(dataString, useCurrentYear, langName)
1143 
1144  # #TODO here extended functional in future
1145  except Exception, err:
1146  if logger is not None and isExtendLog:
1147  logger.debug("inputStr: '" + inputStr + "'")
1148  if logger is not None and ExceptionLog is not None:
1149  ExceptionLog.handler(logger, err, 'intelligentExtractor:', (inputStr), \
1150  {ExceptionLog.LEVEL_NAME_ERROR:ExceptionLog.LEVEL_VALUE_DEBUG})
1151  elif logger:
1152  logger.debug('intelligentExtractor:' + str(err))
1153 
1154  if logger is not None and isExtendLog:
1155  logger.debug('intelligentExtractor return: ' + str(ret))
1156 
1157  return ret
1158 
1159 

◆ isAllowedInputString()

def app.DateTimeType.DateTimeType.isAllowedInputString (   dataString,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 633 of file DateTimeType.py.

633  def isAllowedInputString(dataString, logger=None, isExtendLog=False):
634  # variable for result
635  ret = True
636  if dataString is None or not isinstance(dataString, basestring):
637  ret = False
638  elif dataString != "" and dataString.isupper() and dataString.isalnum() and \
639  not dataString.isalpha():
640  ret = False
641  if logger and isExtendLog:
642  logger.debug('input string has not allowed format')
643 
644  return ret
645 
646 

◆ isUtf8CodePage()

def app.DateTimeType.DateTimeType.isUtf8CodePage (   inputStr,
  logger,
  isExtendLog 
)
static

Definition at line 1080 of file DateTimeType.py.

1080  def isUtf8CodePage(inputStr, logger, isExtendLog):
1081  # variable for result
1082  isUtf8 = False
1083  try:
1084  inputStr.decode('utf-8')
1085  isUtf8 = True
1086  except Exception, err:
1087  if logger is not None and isExtendLog:
1088  logger.debug('inputStr.decode: ' + str(err))
1089 
1090  return isUtf8
1091 
1092 

◆ normalizeTimezone()

def app.DateTimeType.DateTimeType.normalizeTimezone (   inputStr,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1510 of file DateTimeType.py.

1510  def normalizeTimezone(inputStr, logger=None, isExtendLog=False):
1511  # variable for result
1512  ret = inputStr
1513  if inputStr is not None:
1514  pos = inputStr.rfind('+')
1515  length = len('+')
1516  if pos == -1:
1517  pos = inputStr.rfind('-')
1518  length = len('-')
1519 
1520  if pos > -1:
1521  oldValue = inputStr[pos + length:].strip()
1522  newValue = ''
1523  if logger is not None and isExtendLog:
1524  logger.debug('oldValue: ' + str(oldValue))
1525  if oldValue.isdigit() and len(oldValue) > 1:
1526  newValue = '0' + oldValue[:1] + ':00'
1527  else:
1528  match = re.search(r'(?P<tzone>\d{1,4})Z', oldValue)
1529  if match:
1530  if 'tzone' in match.groupdict():
1531  newValue = match.groupdict()['tzone']
1532 
1533  if logger is not None and isExtendLog:
1534  logger.debug('newValue: ' + str(newValue))
1535  ret = inputStr.replace(oldValue, newValue)
1536 
1537  return ret
1538 
1539 

◆ parse()

def app.DateTimeType.DateTimeType.parse (   dataString,
  useCurrentYear = True,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 471 of file DateTimeType.py.

471  def parse(dataString, useCurrentYear=True, logger=None, isExtendLog=False):
472  # variable for result
473  ret = None
474  if logger is not None and isExtendLog:
475  logger.debug("inputStr: '" + dataString + "'")
476 
477  if DateTimeType.isAllowedInputString(dataString, logger, isExtendLog):
478  if len(dataString) >= int(DateTimeType.MIN_ALLOWED_LEN_FOR_DATEUTILS):
479  try:
480  if not DateTimeType.isUtf8CodePage(dataString, logger, isExtendLog):
481  dataString = DateTimeType.changeCodePageToAscii(dataString, logger, isExtendLog)
482 
483  if logger is not None and isExtendLog:
484  logger.debug("try use 'dateutil'")
485 
486  ret = parser.parse(dataString)
487  if ret is not None:
488  # utc_zone = gettz('UTC')
489  # ret = ret.astimezone(utc_zone)
490  # ret = ret.replace(tzinfo=None)
491  # print ret.isoformat(' ')
492  if logger is not None and isExtendLog:
493  logger.debug("'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
494  ret = ret.replace(microsecond=0)
495  except Exception, err: # pylint: disable=W0702
496  if logger is not None and isExtendLog:
497  logger.debug("'dateutil' can not parse: " + str(err))
498  try:
499  normalizedString = DateTimeType.normalizeTimezone(dataString, logger, isExtendLog)
500  if dataString != normalizedString:
501  if logger is not None and isExtendLog:
502  logger.debug("retry parsing use 'dateutil'")
503  ret = parser.parse(normalizedString)
504  if ret is not None and logger is not None and isExtendLog:
505  logger.debug("'dateutil' return: " + str(ret.isoformat(DateTimeType.ISO_SEP)))
506  except Exception, err:
507  if logger is not None and isExtendLog:
508  logger.debug("'dateutil' can not parse: " + str(err))
509 
510  if ret is None:
511  # Intendification of lang
512  langType = DateTimeType.getLang(dataString, logger, isExtendLog)
513 
514  if logger is not None and isExtendLog:
515  logger.debug('lang type detected as: ' + str(langType))
516 
517  if langType == DateTimeType.LANG_ENG:
518  # extract english date
519  ret = DateTimeType.extractDateEng(dataString, useCurrentYear, logger, isExtendLog)
520  elif langType == DateTimeType.LANG_RUS:
521  # extract russian date
522  ret = DateTimeType.extractDateRus(dataString, useCurrentYear, logger, isExtendLog)
523  elif langType == DateTimeType.LANG_UKR:
524  # extract ukrainian date
525  ret = DateTimeType.extractDateUkr(dataString, useCurrentYear, logger, isExtendLog)
526  elif langType == DateTimeType.LANG_GER:
527  # extract germany date
528  ret = DateTimeType.extractDateGerman(dataString, useCurrentYear, logger, isExtendLog)
529  elif langType == DateTimeType.LANG_JAP:
530  # extract japan date
531  ret = DateTimeType.extractDateJapan(dataString, useCurrentYear, logger, isExtendLog)
532  else:
533  ret = DateTimeType.extractDateCommon(dataString, useCurrentYear, logger, isExtendLog)
534 
535  if ret is not None and ret.tzinfo is None:
536  timezoneName = DateTimeType.extractUtcTimezoneName(dataString, logger, isExtendLog)
537  utcZone = gettz(timezoneName)
538  # logger.debug("utcZone: " + str(utcZone))
539  # logger.debug("timezoneName: " + str(timezoneName))
540  if utcZone is not None:
541  ret = ret.replace(tzinfo=utcZone)
542  else:
543  ret = DateTimeType.applyUtcTimezone(ret, timezoneName, DateTimeTimezones.timezonesDict, logger, isExtendLog)
544 
545  if logger is not None and isExtendLog:
546  if ret is not None:
547  logger.debug('result pubdate: ' + str(ret.isoformat(DateTimeType.ISO_SEP)))
548  else:
549  logger.debug('result pubdate: NONE')
550 
551  return ret
552 
553 
Here is the caller graph for this function:

◆ prepareString()

def app.DateTimeType.DateTimeType.prepareString (   inputStr)
static

Definition at line 652 of file DateTimeType.py.

652  def prepareString(inputStr):
653  ret = inputStr
654 
655  for tagName in DateTimeType.TAG_NAMES:
656  if inputStr.lower().find('%' + tagName + '%') > -1:
657  ret = ret.replace('%' + tagName + '%', '')
658  else:
659  pass
660 
661  for bad in DateTimeType.BAD_SIMBOLS:
662  ret = ret.replace(bad, ' ')
663 
664  ret = ret.replace(' ', ' ')
665 
666  return ret
667 
668 

◆ replaceJapanSimbols()

def app.DateTimeType.DateTimeType.replaceJapanSimbols (   rawPubdate,
  logger = None,
  isExtendLog = False 
)
static

Definition at line 1409 of file DateTimeType.py.

1409  def replaceJapanSimbols(rawPubdate, logger=None, isExtendLog=False):
1410  simbolsDict = {'-':'-', '.':'.', ':':':', '/':'/', ',':',', '・':'.', 'ー':'-', \
1411  '0':'0', '1':'1', '2':'2', '3':'3', '4':'4', '5':'5', '6':'6', '7':'7', '8':'8', '9':'9'}
1412  # replace simbols
1413  for key, value in simbolsDict.items():
1414  try:
1415  rawPubdate = rawPubdate.replace(key, value)
1416  except Exception, err:
1417  if logger is not None and isExtendLog:
1418  logger.debug(str(err))
1419 
1420  return rawPubdate
1421 
1422 

◆ split()

def app.DateTimeType.DateTimeType.split (   dt)
static

Definition at line 604 of file DateTimeType.py.

604  def split(dt):
605  timezone = ''
606  if dt is not None:
607  timezone = dt.strftime('%z')
608  dt = dt.replace(tzinfo=None)
609 
610  return dt, timezone
611 
612 
Here is the caller graph for this function:

◆ toUTC()

def app.DateTimeType.DateTimeType.toUTC (   dt)
static

Definition at line 1545 of file DateTimeType.py.

1545  def toUTC(dt):
1546  # variable for result
1547  ret = dt
1548  if dt.utcoffset() is not None:
1549  ret = dt - dt.utcoffset()
1550  ret = ret.replace(tzinfo=None)
1551 
1552  return ret
1553 

◆ utcOffset()

def app.DateTimeType.DateTimeType.utcOffset (   tzName,
  timezonesDict = DateTimeTimezones.timezonesDict 
)
static

Definition at line 1429 of file DateTimeType.py.

1429  def utcOffset(tzName, timezonesDict=DateTimeTimezones.timezonesDict): # pylint: disable=W0102
1430  # variable for result
1431  ret = None
1432  if tzName in timezonesDict:
1433  elem = timezonesDict[tzName]
1434  if len(elem) > 1:
1435  if elem[1].find('UTC') > -1:
1436  ret = elem[1][len('UTC'):]
1437 
1438  return ret
1439 
1440 

Member Data Documentation

◆ BAD_SIMBOLS

string app.DateTimeType.DateTimeType.BAD_SIMBOLS = '=(),|@`'
static

Definition at line 75 of file DateTimeType.py.

◆ datetime

app.DateTimeType.DateTimeType.datetime

Definition at line 198 of file DateTimeType.py.

◆ DAY_STATE_DICT

dictionary app.DateTimeType.DateTimeType.DAY_STATE_DICT
static
Initial value:
= {LANG_ENG: dayStateEng, LANG_RUS: dayStateRus, LANG_UKR: dayStateUkr, LANG_GER: dayStateGer, \
LANG_JAP: dayStateJap, LANG_RUS: dayStateRusStr, LANG_UKR: dayStateUkrStr}

Definition at line 118 of file DateTimeType.py.

◆ dayStateEng

list app.DateTimeType.DateTimeType.dayStateEng = [u'Today', u'Yesterday', u'Day before yesterday']
static

Definition at line 104 of file DateTimeType.py.

◆ dayStateGer

list app.DateTimeType.DateTimeType.dayStateGer = [u'Heute', u'Gestern', u'Vorgestern']
static

Definition at line 107 of file DateTimeType.py.

◆ dayStateJap

list app.DateTimeType.DateTimeType.dayStateJap = [u'今日', u'イエスタデイ', u'おととい']
static

Definition at line 108 of file DateTimeType.py.

◆ dayStateRus

list app.DateTimeType.DateTimeType.dayStateRus = [u'Сегодня', u'Вчера', u'Поза вчера']
static

Definition at line 105 of file DateTimeType.py.

◆ dayStateRusStr

list app.DateTimeType.DateTimeType.dayStateRusStr = ['Сегодня', 'Вчера', 'Поза вчера']
static

Definition at line 109 of file DateTimeType.py.

◆ dayStateUkr

list app.DateTimeType.DateTimeType.dayStateUkr = [u'Сьогодні', u'Вчора', u'Позавчора']
static

Definition at line 106 of file DateTimeType.py.

◆ dayStateUkrStr

list app.DateTimeType.DateTimeType.dayStateUkrStr = ['Сьогодні', 'Вчора', 'Позавчора']
static

Definition at line 110 of file DateTimeType.py.

◆ ERROR_BAD_INPUT_DATA

string app.DateTimeType.DateTimeType.ERROR_BAD_INPUT_DATA = 'Bad inputted data.'
static

Definition at line 191 of file DateTimeType.py.

◆ ERROR_DATA_STRING_TYPE

string app.DateTimeType.DateTimeType.ERROR_DATA_STRING_TYPE = 'Data string is not string.'
static

Definition at line 190 of file DateTimeType.py.

◆ ERROR_FORMAT_STRING_TYPE

string app.DateTimeType.DateTimeType.ERROR_FORMAT_STRING_TYPE = 'Format string is not string.'
static

Definition at line 189 of file DateTimeType.py.

◆ ERROR_INPUT_PARAMS

string app.DateTimeType.DateTimeType.ERROR_INPUT_PARAMS = 'Error initialization by input parameters.'
static

Definition at line 188 of file DateTimeType.py.

◆ errorMsg

app.DateTimeType.DateTimeType.errorMsg

Definition at line 200 of file DateTimeType.py.

◆ isError

app.DateTimeType.DateTimeType.isError

Definition at line 199 of file DateTimeType.py.

◆ ISO_SEP

string app.DateTimeType.DateTimeType.ISO_SEP = ' '
static

Definition at line 74 of file DateTimeType.py.

◆ LANG_DICT

dictionary app.DateTimeType.DateTimeType.LANG_DICT
static
Initial value:
= {LANG_ENG: wordsListEng, LANG_RUS: wordsListRus, LANG_UKR: wordsListUkr, LANG_GER: wordsListGer, \
LANG_JAP: wordsListJap}

Definition at line 112 of file DateTimeType.py.

◆ LANG_ENG

string app.DateTimeType.DateTimeType.LANG_ENG = "ENG"
static

Definition at line 78 of file DateTimeType.py.

◆ LANG_GER

string app.DateTimeType.DateTimeType.LANG_GER = "GERMAN"
static

Definition at line 81 of file DateTimeType.py.

◆ LANG_JAP

string app.DateTimeType.DateTimeType.LANG_JAP = "JAPAN"
static

Definition at line 82 of file DateTimeType.py.

◆ LANG_RUS

string app.DateTimeType.DateTimeType.LANG_RUS = "RUS"
static

Definition at line 79 of file DateTimeType.py.

◆ LANG_UKR

string app.DateTimeType.DateTimeType.LANG_UKR = "UKR"
static

Definition at line 80 of file DateTimeType.py.

◆ MIN_ALLOWED_LEN_FOR_DATEUTILS

int app.DateTimeType.DateTimeType.MIN_ALLOWED_LEN_FOR_DATEUTILS = 10
static

Definition at line 73 of file DateTimeType.py.

◆ MIN_ALLOWED_YEAR

int app.DateTimeType.DateTimeType.MIN_ALLOWED_YEAR = 2000
static

Definition at line 72 of file DateTimeType.py.

◆ MONTH_DICT

dictionary app.DateTimeType.DateTimeType.MONTH_DICT
static
Initial value:
= {LANG_ENG: monthListEng, LANG_RUS: monthListRus, LANG_UKR: monthListUkr, LANG_GER: monthListGer, \
LANG_JAP: monthListJap, LANG_RUS: monthListRusBad}

Definition at line 115 of file DateTimeType.py.

◆ monthListEng

list app.DateTimeType.DateTimeType.monthListEng = [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec']
static

Definition at line 96 of file DateTimeType.py.

◆ monthListGer

list app.DateTimeType.DateTimeType.monthListGer = [u'Jan', u'Feb', u'März', u'Apr', u'Mai', u'Juni', u'Juli', u'Aug', u'Sept', u'Okt', u'Nov', u'Dez']
static

Definition at line 99 of file DateTimeType.py.

◆ monthListJap

list app.DateTimeType.DateTimeType.monthListJap = [u'一月', u'二月', u'三月', u'四月', u'五月', u'六月', u'七月', u'八月', u'九月', u'十月', u'十一月', u'十二月']
static

Definition at line 100 of file DateTimeType.py.

◆ monthListRus

list app.DateTimeType.DateTimeType.monthListRus = [u'Янв', u'Февр', u'Март', u'Апр', u'Май', u'Июнь', u'Июль', u'Авг', u'Сент', u'Окт', u'Нояб', u'Дек']
static

Definition at line 97 of file DateTimeType.py.

◆ monthListRusBad

list app.DateTimeType.DateTimeType.monthListRusBad
static
Initial value:
= [u'Янв', u'Февр', u'Март', u'Апр', u'Май', u'Июня', u'Июля', u'Авг', u'Сент', u'Окт', u'Нояб', \
u'Дек']

Definition at line 101 of file DateTimeType.py.

◆ monthListUkr

list app.DateTimeType.DateTimeType.monthListUkr = [u'Сiч', u'Лют', u'Бер', u'Квiт', u'Трав', u'Черв', u'Лип', u'Серп', u'Вер', u'Жовт', u'Лист', u'Груд']
static

Definition at line 98 of file DateTimeType.py.

◆ patternListDate

list app.DateTimeType.DateTimeType.patternListDate
static

Definition at line 121 of file DateTimeType.py.

◆ patternListTime

list app.DateTimeType.DateTimeType.patternListTime
static
Initial value:
= [r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2}) (?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>\d{2})(?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) (?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2})(?P<tf>[PpAaMm]{2})',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}):(?P<sec>[0-9][0-9])',
r'(?P<hour>[ 0-9][0-9])(?P<min>\d{2}) GMT',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{2}) GMT',
r'(?P<hour>[ 0-9][0-9]):(?P<min>\d{1,2})']

Definition at line 166 of file DateTimeType.py.

◆ patternListTimezoneOffset

list app.DateTimeType.DateTimeType.patternListTimezoneOffset
static
Initial value:
= [r'(?P<offset>[+-][0-9][0-9]:[0-9][0-9])',
r'(?P<offset>[+-][0-9][0-9][0-9][0-9])',
r'(?P<offset>[+-]\d{1,2})',
r'(?P<offset>[−][0-9][0-9])']

Definition at line 179 of file DateTimeType.py.

◆ patternListUtcTimezones

list app.DateTimeType.DateTimeType.patternListUtcTimezones
static
Initial value:
= [r'(?P<hours>[0-9][0-9]):(?P<minutes>[0-9][0-9])',
r'(?P<hours>[0-9][0-9])']

Definition at line 184 of file DateTimeType.py.

◆ TAG_NAMES

list app.DateTimeType.DateTimeType.TAG_NAMES = ['pubdate', 'dc_date']
static

Definition at line 76 of file DateTimeType.py.

◆ wordsListEng

list app.DateTimeType.DateTimeType.wordsListEng
static
Initial value:
= [u'Jan', u'Feb', u'Mar', u'Apr', u'May', u'Jun', u'Jul', u'Aug', u'Sep', u'Oct', u'Nov', u'Dec', \
u'Year', u'Today', u'Yesterday', u'Day before yesterday', u'year', u'month', u'day', u'hour', \
u'minute']

Definition at line 84 of file DateTimeType.py.

◆ wordsListGer

list app.DateTimeType.DateTimeType.wordsListGer
static
Initial value:
= [u'März', u'Mai', u'Juni', u'Juli', u'Sept', u'Okt', u'Dez', u'Jahr', u'Heute', u'Gestern', \
u'Vorgestern', u'Uhr']

Definition at line 91 of file DateTimeType.py.

◆ wordsListJap

list app.DateTimeType.DateTimeType.wordsListJap
static
Initial value:
= [u'一月', u'二月', u'三月', u'四月', u'五月', u'六月', u'七月', u'八月', u'九月', u'十月', u'十一月', \
u'十二月', u'年', u'今日', u'イエスタデイ', u'おととい', u'月', u'日', u'時', u'分', u'付', u'更新']

Definition at line 93 of file DateTimeType.py.

◆ wordsListRus

list app.DateTimeType.DateTimeType.wordsListRus
static
Initial value:
= [u'Янв', u'Февр', u'Мар', u'Апр', u'Май', u'Июнь', u'Июль', u'Авг', u'Сент', u'Окт', u'Нояб', \
u'Дек', u'Сегодня', u'Вчера', u'Поза вчера', u'Июня', u'Июля']

Definition at line 87 of file DateTimeType.py.

◆ wordsListUkr

list app.DateTimeType.DateTimeType.wordsListUkr
static
Initial value:
= [u'Сiч', u'Лют', u'Бер', u'Квiт', u'Трав', u'Черв', u'Лип', u'Серп', u'Вер', u'Жовт', u'Лист', \
u'Груд', u'Рік', u'Сьогодні', u'Вчора', u'Позавчора']

Definition at line 89 of file DateTimeType.py.


The documentation for this class was generated from the following file: