HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_DateTimeType.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # coding: utf-8
3 
4 import os
5 import sys
6 import logging
7 
8 from datetime import tzinfo # pylint: disable=W0611
9 from cement.core import foundation
10 from datetime import datetime
11 import datetime
12 import time
13 from sys import stdout
14 from sys import stderr
15 
16 from dc_processor.PDateTimezonesHandler import PDateTimezonesHandler
17 from app.DateTimeType import DateTimeType
18 
19 import re
20 import time
21 
22 import json
23 from time import sleep
24 import random
25 
26 
27 def getLogger():
28  # create logger
29  logger = logging.getLogger('test')
30  logger.setLevel(logging.DEBUG)
31 
32  # create console handler and set level to debug
33  ch = logging.StreamHandler()
34  ch.setLevel(logging.DEBUG)
35 
36  # create formatter
37  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
38 
39  # add formatter to ch
40  ch.setFormatter(formatter)
41 
42  # add ch to logger
43  logger.addHandler(ch)
44 
45  return logger
46 
47 
48 # # Test executon functional
49 #
50 # @param fileName - input file name for test
51 # @param ext - allowed extention of file
52 # @return - None
53 def testExecution(fileName, ext='.list'):
54 
55  if fileName.rfind(ext) > 0:
56  # stdout.write('\nOpen file: ' + str(fileName))
57  f = open(fileName, 'r')
58  lineList = f.readlines()
59  f.close()
60 
61  for rawPubdate in lineList:
62  if rawPubdate.find('%pubdate%') < 0:
63  s = rawPubdate
64  d = DateTimeType.parse(s)
65  if d is None:
66  print('fail: ' + str(s))
67  else:
68  pass
69 
70  else:
71  pass
72 
73 
75  logger = getLogger()
76  for period in ['years', 'months', 'days', 'hours', 'minutes']:
77  for testStr in ['22 ' + period + ' left', '23 ' + period + ' left']:
78  print('=======\ninput: ' + str(testStr))
79  d = DateTimeType.parse(testStr, True, logger, True)
80  if d is not None:
81  print('parse: ' + str(d.isoformat(' ')))
82  else:
83  print('parse: NONE')
84 
85 
86 if __name__ == '__main__':
87 
88  app = foundation.CementApp('DateTimeType')
89  app.setup()
90  app.add_arg('-f', '--file', action='store', metavar='input_file', help='input file with date list')
91  app.add_arg('-d', '--dir', action='store', metavar='input_file', help='input directory name with files dates')
92  app.run()
93 
94  fileName = app.pargs.file
95  dirName = app.pargs.dir
96  app.close()
97 
98  if dirName is not None:
99  files = os.listdir(dirName)
100  for inputFile in files:
101  testExecution(inputFile)
102 
103  elif fileName is not None:
104  testExecution(fileName)
105  else:
106  # handle test
107  # s = 'pubdate=Published On: Nov 03 2015 02:34:13 PM CST'
108  # s = 'pubdate=| October 29, 2015 12:14pm'
109  # s = 'pubdate=Posted: 11/03/2015 08:02 PM EST'
110  # s = 'pubdate=Updated 2255 GMT (0655 HKT) November 3, 2015'
111  # s = 'pubdate=11/3/2015 03:05 PM Connect Directly'
112  # s = 'pubdate=04 листопада 2015'
113  # s = 'pubdate=3 листопада'
114  # s = 'pubdate=17 вересня, 2015'
115  # s = 'pubdate=1446606656'
116  # s = '31 August 2015'
117  # s = 'pubdate=Wed, 30 Sep 2015 22:25:27 -0000'
118  # s = 'pubdate=| October 29, 2015 12:14pm'
119  # s = 'pubdate=Last updated at 20:00 GMT'
120  # s = 'pubdate=By: , November 4th, 2015 08:43 AM'
121  # s = 'pubdate=| October 15, 2015 8:54am |'
122  # s = 'pubdate=2015-11-04 09:48:00'
123  # s = 'pubdate=November 3, 2015 @ 5:08 pm'
124  # s = 'pubdate=- Associated Press - Tuesday, November 3, 2015'
125  # s = 'pubdate=%pubdate%'
126  # s = 'pubdate=25/08/2014'
127  # s = 'Сьогодні 12:14'
128  # s = 'Вчора 12:14'
129  # s = 'Позавчора 12:14'
130  # s = 'pubdate=17:25'
131  # s = 'pubdate=2015-11-05T16:22:00+09:00'
132  # s = 'pubdate=2015年11月5日20時10分'
133  # s = '2015年11月5日20時10分'
134  # s = 'pubdate=2015年10月24日'
135  # s = 'pubdate=11月5日 18時05分'
136  # s = '11月5日 18時05分'
137  # s = 'pubdate=16/09/2015'
138  # s = 'pubdate=3rd November 2015, 17:15'
139  # s = '2015-11-06 10:33:08'
140  # s = 'November 6, 2015'
141  # s = '2015/10/27'
142  # s = '20150416'
143  # s = '31 August 2015'
144  # s = '2015-08-31T14:24:05+01:00'
145  # s = '2015-05-09T16:20:15Z 2015-05-11T07:54:39Z'
146  # s = '20151120T1212+0200Z'
147  # s = '01.12.2015 12:01 Uhr'
148  # s = 'B81D5A241AD4BAFB0252A0D687615E0E'
149  # s = '2015-08-31T14:24:05+01:00'
150  # s = '2015年04月22日 10:41 '
151  # s = '2015年04月22日 10:41  発信地:リッセ/オランダ'
152  # s = 'JANUARY 6, 2016 09:00AM EST'
153  # s = '2015年 04月 22日 10:41 JST'
154  # s = 'POSTED: 01/21/16, 4:54 PM PST '
155  # s = '15:53 GMT, 27 January 2016'
156  # s = 'Вторник, 09.02.2016' ##
157  # s = '06:5509.02.2016'
158  # s = '09.02.2016 - 14:42'
159  # s = 'Лютий 10th, 2016'
160  # s = 'Wed Jan 6, 2016 2:48pm EST'
161  # s = '2/13/2016 10:05 AM Connect Directly'
162  # s = '2/12/2016 09:06 AM Connect Directly'
163  # s = 'February 9, 2016'
164  # s = 'Updated Feb. 26, 2015 10:53 a.m. ET'
165  # s = '17. February 2016 15:54'
166  # s = '17.2.2016, 15:57 Uhr'
167  # s = 'February 17, 2016 @ 12:52 pm'
168  # s = '17. February 2016 20:50'
169  # s = 'Last updated at 22:19 GMT'
170  # s = 'February 18 at 5:15 PM'
171  # s = '2.18.16 | 11:26PM'
172  # s = '2015-05-07 02:03:14'
173  # s = 'Fri Feb 19, 2016 11:14am EST'
174  # s = ' 18 лютого'
175  # s = 'B81D5A241AD4BAFB0252A0D687615E0E'
176  # s = '2016-01-06T19:48:47+0100'
177  # s = '<time datetime="06 Jan 2016 19:45 GMT">06 Jan 2016 19:45 GMT</time>'
178  # s = '2016年3月22日'
179  # s = 'Вчера, 18:59'
180  # s = '1545'
181  # s = '2016-03-01T09:32:02+00:00'
182  # s = 'April 14–17, 2016'
183  # s = '5B44585918D69318CA2120B5FA20D85C'
184  # s = 'January 5'
185  # s = '12:09 a.m. EST March 2, 2016'
186  # s = 'March 4 at 9:36 AM'
187  # s = 'This was first published in March 2016'
188  # s = '2 days left'
189  # s = '7 hours'
190  # s = '08 Мар 2016'
191  # s = '2 days\xa0left'
192  # s = '2016\xe5\xb9\xb420\xe6\x9c\x8841\xe6\x97\xa5 00:00'
193  # s = '2016年20月41日 00:00'
194  # s = 'Published on 13th May 2016 by Gareth Halfacree'
195 
196  # s = '20160324T0410+0200Z'
197  # s = u'\u0432 \u041c\u0430\u0440\u0442 23, 2016'
198  # s = '5日前' # 5 дней назад'
199  # s = '1時間前' # час назад
200  # s = '1 Hour Ago'
201  # s = '2016-05-10 2016-05-16 2016-05-16 2016-05-16 2016-05-16 2016-05-17 2016-05-17 2016-05-17 2016-05-16 2016-05-10'
202  # s = '2016-05-16 12:56:00'
203  # s = 'May 16, 2016, 12:56 pm EDT'
204 
205  # s = ' Пʼятниця, 19 лютого 2016, 04:59'
206  # s = 'Mon Mar 21, 2016 3:20am EDT'
207  # s = 'Wed Jan 6, 2016 2:48pm EST'
208  # s = '2016-05-02T17:21:59+09:00'
209 
210  # s = '2016年5月24日(火)'
211 
212  # s = 'FEBRUARY 22, 2016 | 12:00 PM'
213  # s = '2016.05.29 Sun posted at 18:58 JST'
214  # s = '1464373928'
215  # s = '2016/4/22付'
216  # s = '平成28年5月25日'
217  # s = '平成28年5月25日'
218  # s = 'May 21, 2016 — Ron Chusid'
219  # s = '2016-05-26T06:14-500'
220  # s = '<div class="post-item__info">Корреспондент.biz, Сегодня, 01:12</div>'
221  # s = '`Корреспондент.biz, Сегодня, 01:1'
222  # s = 'Дек 18'
223 
224  # s = ' в Май 23, 2016'
225  # s = ' 21 июня 2016, вторник, 23:58'
226  # s = '24.12.15'
227  # s = '2013/6/26'
228  # s = '2016-07-05T12:11:13:00.000Z'
229  # s = '2016年 05月 9日 10:15 JST'
230  # s = 'Wed, 20 Jul 2016 09:08:25 +0000'
231  # s = '2016年7月26日23時28分'
232  # s = '2016年07月21日 10:36 ' # 2016-07-21 strange time ignore http://www.afpbb.com/articles/-/3094693
233  # #s = '02.07.2016 16:28' # month and date are not placed right http://zovzakona.org/gugl-v-pomoshh-politsii
234  # s = '1464373928' # WRONG TZ should be GMT, http://www.bbc.com/news/uk-politics-36381328
235  # s = '2016\xe5\xb9\xb408\xe6\x9c\x8809\xe6\x97\xa5 16:33\xe3\x80\x80'
236  # s = u'23 \u0430\u0432\u0433\u0443\u0441\u0442\u0430 2016, 09:00'
237  # s = '10:20:11'
238  # s = '2016-10-22T12:00:34.295Z'
239  # s = 'Nov 6th 2016 8:40PM' # http://www.aol.com/article/2016/11/06/trump-reacts-to-fbi-clearing-clinton-in-email-probe-she-is-bei/21599923/
240  # s = '11/06/16, 3:22 PM PST'
241  # s = 'Posted 11-28-2016'
242  # s = '15 hours ago'
243  # s = 'Tue, 13 Dec 2016 07:14:00 +1300'
244  # s = '2017/3/12 19:42 (2017/3/12 23:31更新)'
245  # s = u'2017/3/12 19:42 (2017/3/12 23:31\u66f4\u65b0)'
246  s = 'Oct 4, 2017 \u2022\n \t 4:00 pm |\n \t\t (0) \n \t\t by Barry Schwartz \n \t\n\t\t\t\t\t\t\t \n\t\t\t \n\t\t \t\t \n\t\t\t \n\t\t \t\t\t\t\t\t\t\n \t| Filed Under Search Forum Recap'
247 
248  logger = getLogger()
249  # print('getLang = ' + str(DateTimeType.getLang(s, logger, True)))
250  # sys.exit()
251 
252  d = DateTimeType.parse(s, True, logger, True)
253  if d is not None:
254  print('parse: ' + str(d.isoformat(' ')))
255  # print('strftime: ' + str(d.strftime("%Y-%m-%d %H:%M")))
256 
257  d, timezone = DateTimeType.split(d)
258  print('datetime: ' + str(d.isoformat(' ')) + ' timezone: ' + str(timezone))
259  else:
260  print('parse: NONE')
261 
262 
263  # utcOffset = DateTimeType.extractUtcOffset(s, logger, True)
264  # logger.debug('utcOffset: ' + str(utcOffset))
265 # try:
266 # s = s.lower()
267 # print type(s)
268 # print s.find(u'Сегодня')
269 # print s
270 # except Exception, err:
271 # sys.stderr.write(str(err))
def testExecution(fileName, ext='.list')