HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
batch_from_json.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
3 """
4 HCE project, Python bindings, DC service utility
5 Batch from json preparation utility.
6 
7 @package: dc
8 @file digest.py
9 @author bgv <developers.hce@gmail.com>
10 @link: http://hierarchical-cluster-engine.com/
11 @copyright: Copyright &copy; 2015 IOIX Ukraine
12 @license: http://hierarchical-cluster-engine.com/license/
13 @since: 0.1
14 """
15 
16 
17 import ppath
18 from ppath import sys
19 
20 # For profiling
21 import app.Profiler as Profiler
22 
23 # Start profiling
24 pr = Profiler.Profiler()
25 if pr and pr.status > 0:
26  pr.start()
27 
28 import os
29 import sys
30 import app.Utils as Utils
31 import app.Consts as APP_CONSTS
32 from cement.core import foundation
33 import copy
34 import hashlib
35 import json
36 
37 
38 exit_code = APP_CONSTS.EXIT_FAILURE
39 
40 if __name__ == "__main__":
41  try:
42  # Create the application
43  app = foundation.CementApp('myapp')
44  app.setup()
45  app.args.add_argument('-t', '--txt', action='store', dest='txt', help='the text file one URL per line')
46  app.args.add_argument('-j', '--json', action='store', dest='json', help='the json file used as the Batch template, if omitted - read stdin')
47  app.args.add_argument('-o', '--out', action='store', dest='out', help='the output json file, if omitted write stdout')
48  app.run()
49 
50  if app.pargs.txt:
51  with open(app.pargs.txt, 'r') as f:
52  urlsList = f.read().splitlines()
53 
54  if app.pargs.json:
55  with open(app.pargs.json, 'r') as f:
56  batchDict = json.loads(f.read())
57  else:
58  batchDict = json.loads(sys.stdin.read())
59 
60  items = []
61  for url in urlsList:
62  url = url.strip()
63  if url != '':
64  item = copy.deepcopy(batchDict['items'][0])
65  item['urlObj']['url'] = url
66  item['urlObj']['urlMd5'] = hashlib.md5(url).hexdigest()
67  item['urlId'] = item['urlObj']['urlMd5']
68  items.append(item)
69 
70  batchDict['items'] = items
71 
72  if app.pargs.out:
73  with open(out, 'w') as f:
74  f.write(json.dumps(batchDict))
75  else:
76  print json.dumps(batchDict)
77  else:
78  sys.stderr.write('Required text file with URLs not provided, use with -h to see required arguments.')
79 
80  # Close the application
81  app.close()
82  except Exception as err:
83  sys.stderr.write(str(err) + '\n')
84  exit_code = APP_CONSTS.EXIT_FAILURE
85  except:
86  exit_code = APP_CONSTS.EXIT_FAILURE
87  finally:
88  # close the app
89  if app:
90  app.close()
91  # stop profiling
92  if pr:
93  pr.stop()
94  sys.stdout.flush()
95  os._exit(exit_code)
96