HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
get_statistic_variable.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 
3 """
4 HCE project, Tools for get statistic variable main functional.
5 
6 @package: bin
7 @file get_statistic_variable.py
8 @author Alexander Vybornyh <alexander.hce.cluster@gmail.com>
9 @link: http://hierarchical-cluster-engine.com/
10 @copyright: Copyright &copy; 2013-2017 IOIX Ukraine
11 @license: http://hierarchical-cluster-engine.com/license/
12 @since: 0.1
13 """
14 
15 # Sample of usage: ./get_statistic_variable.py -d=/var/www/archives/grasp/demo/2017/10/09 -p=.*statlog.json -n=itemsDetected
16 
17 import re
18 import os
19 import sys
20 import json
21 from cement.core import foundation
22 
23 
24 # # get arguments
25 #
26 # @param - None
27 # @return tuple of extracted parameters from cmd
28 def getArgs():
29  # variables for result
30  dirName = pattern = paramName = None
31 
32  app = foundation.CementApp('get_statistic_variable')
33  app.setup()
34  app.add_arg('-d', '--dir', action='store', metavar='input_directory_name', help='input directory name', required=True)
35  app.add_arg('-p', '--pattern', action='store', metavar='pattern_file_name', help='pattern file name for search in dirrectory', required=True)
36  app.add_arg('-n', '--name', action='store', metavar='parameter_name', help='parameter name in found files', required=True)
37  app.add_arg('-o', '--output', action='store', metavar='output_file_name', help='output file name. If not set output in stdout')
38  app.run()
39 
40  dirName = app.pargs.dir
41  pattern = app.pargs.pattern
42  paramName = app.pargs.name
43  outputFile = app.pargs.output if app.pargs.output else None
44  app.close()
45 
46  return dirName, pattern, paramName, outputFile
47 
48 
49 # # get files list
50 #
51 # @param dirName - dirrectory name for search files
52 # @param pattern - re pattern for search files
53 # @return files list
54 def getFilesList(dirName, pattern):
55  # variable for result
56  filesList = []
57  if dirName is not None and pattern is not None:
58  try:
59  files = os.listdir(dirName)
60  for fileName in files:
61  fullName = dirName + os.sep + fileName
62  if os.path.isdir(fullName):
63  internalFilesList = getFilesList(fullName, pattern)
64  filesList += internalFilesList
65  else:
66  if re.search(pattern, fullName, re.U + re.I) is not None:
67  filesList.append(fullName)
68  except Exception:
69  pass
70 
71  return filesList
72 
73 
74 # # get value from dict
75 #
76 # @param path - path to parameter
77 # @param itemObject - item object
78 # @param delimiter - delimiter used for split
79 # @return extracted value
80 def getValueFromDict(path, itemObject, delimiter=':'):
81  # variable for result
82  ret = 0 # itemObject
83  fieldNamesList = path.split(delimiter)
84  for fieldName in fieldNamesList:
85  if isinstance(ret, dict) and fieldName in ret:
86  ret = ret[fieldName]
87  elif isinstance(ret, list) and fieldName.isdigit():
88  ret = ret[int(fieldName)]
89 
90  return ret
91 
92 
93 # # extract data from file
94 #
95 # @param fileName - file name
96 # @param paramName - parameter name
97 # @return integer value extracted from file
98 def extractData(fileName, paramName):
99  # variable for result
100  ret = 0
101  try:
102  with open(fileName) as f:
103  dataDict = json.load(f)
104  ret = int(getValueFromDict(paramName, dataDict))
105 
106  except Exception, err:
107  sys.stderr.write("Extract data from % failed. Error: %s\n" % (fileName, str(err)))
108 
109  return ret
110 
111 
112 # # output data
113 #
114 # @param jsonDict - output dictionary
115 # @param outputFile - output file name
116 # @return - None
117 def outputData(jsonDict, outputFile):
118  try:
119  if isinstance(jsonDict, dict):
120  jsonData = json.dumps(jsonDict)
121 
122  if outputFile is None:
123  sys.stdout.write(jsonData)
124  else:
125  with open(outputFile, 'w') as f:
126  f.write(jsonData)
127 
128  except Exception, err:
129  sys.stderr.write("Ouput data failed. Error: %s\n" % str(err))
130 
131 
132 # # Main processing
133 if __name__ == '__main__':
134  # Contants used in json
135  FILES_FIELD_NAME = 'files'
136 
137  try:
138  dirName, pattern, paramName, outputFile = getArgs()
139 
140  totalCount = 0
141  filesDict = {}
142  filesList = getFilesList(dirName, pattern)
143  for fileName in filesList:
144  value = int(extractData(fileName, paramName))
145  filesDict[fileName] = value
146  totalCount += value
147 
148  # make output dictionary
149  jsonDict = {}
150  jsonDict[paramName] = totalCount
151  jsonDict[FILES_FIELD_NAME] = filesDict
152 
153  outputData(jsonDict, outputFile)
154 
155  except Exception, err:
156  sys.stderr.write("Error: %s\n" % str(err))
157 
def outputData(jsonDict, outputFile)
def getValueFromDict(path, itemObject, delimiter=':')
def getFilesList(dirName, pattern)
def extractData(fileName, paramName)