HCE Project Python language Distributed Tasks Manager Application, Distributed Crawler Application and client API bindings.  2.0.0-chaika
Hierarchical Cluster Engine Python language binding
ftest_UrlSchema.py
Go to the documentation of this file.
1 '''
2 Created on Nov 23, 2015
3 
4 @author: scorp
5 '''
6 import os
7 import unittest
8 import logging
9 from dc_crawler.UrlSchema import UrlSchema
10 import app.Consts as APP_CONSTS
11 
12 
13 class Test(unittest.TestCase):
14 
15  def __init__(self, methodName='runTest'):
16  unittest.TestCase.__init__(self, methodName)
17  self.testUrls = {}
18  self.testSchemas = {}
19  self.testUrls["url1"] = "http://domen1.com/file?%param1%&from%param2%and%param3%_end"
20  self.testUrls["schema1"] = ("{\"type\": 1, \"mode\": 0, \"parameters\" : {\"param1\" : [\"z1\", \"z2\"], " +
21  "\"param2\" : [\"ffA1\", \"ffA2\", \"ffA\"]}, \"max_items\": 33}")
22  self.testUrls["result1"] = ["http://domen1.com/file?z1&fromffA1and%param3%_end"]
23  self.testUrls["url2"] = "http://domen1.com/file?%param1%&from%param2%and%param3%_end"
24  self.testUrls["schema2"] = ("{\"type\": 2, \"mode\": 0, \"parameters\" : {\"param1\" : {\"min\": 10, " +
25  "\"max\": 22, \"step\": 3}, \"param2\" : {\"min\": -22, " +
26  "\"max\": 0, \"step\": 1}}, \"max_items\": 33}")
27  self.testUrls["result2"] = ["http://domen1.com/file?10&from-22and%param3%_end"]
28  self.testUrls["url3"] = "http://domen1.com/file?%param1%&from%param2%and%param3%_end"
29  self.testUrls["schema3"] = ("{\"type\": 3, \"mode\": 0, \"parameters\" : {\"param1\" : {\"min\": 10, " +
30  "\"max\": 22, \"step\": 3}, \"param2\" : {\"min\": -22, " +
31  "\"max\": 0, \"step\": 1}}, \"max_items\": 33}")
32  self.testUrls["url4"] = "http://domen1.com/file?%param1%&from%param2%and%param3%_end"
33  self.testUrls["schema4"] = ("{\"type\": 3, \"mode\": 0, \"parameters\" : {\"param1\" : {\"min\": 10, " +
34  "\"max\": 22, \"chars\": 0, \"case\": 0}, \"param2\" : {\"min\": -22, " +
35  "\"max\": 0, \"chars\": 1, \"case\": 1}}, \"max_items\": 33}")
36 
37  self.testUrls["url5"] = "http://domen1.com/file?%param1%&from%param2%and%param3%_end"
38  self.testUrls["schema5"] = ("{\"type\": 1, \"mode\": 1, \"parameters\" : {\"param1\" : [\"z1\", \"z2\"], " +
39  "\"param2\" : [\"ffA1\", \"ffA2\", \"ffA\"]}, \"max_items\": 3}")
40  self.testUrls["result5"] = ["http://domen1.com/file?z1&fromffA1and%param3%_end"]
41 
42 
44  schema = UrlSchema(self.testUrls["schema1"])
45  self.assertTrue(schema.generateUrlSchema(self.testUrls["url1"]) == self.testUrls["result1"])
46 
47 
49  schema = UrlSchema(self.testUrls["schema2"])
50  self.assertTrue(schema.generateUrlSchema(self.testUrls["url2"]) == self.testUrls["result2"])
51 
52 
54  schema = UrlSchema(self.testUrls["schema3"])
55  retList = schema.generateUrlSchema(self.testUrls["url3"])
56  self.assertTrue(len(retList) == 1)
57  self.assertTrue(retList[0].find("param1") == -1)
58  self.assertTrue(retList[0].find("param2") == -1)
59 
60 
62  schema = UrlSchema(self.testUrls["schema4"])
63  retList = schema.generateUrlSchema(self.testUrls["url4"])
64  self.assertTrue(len(retList) == 1)
65  self.assertTrue(retList[0].find("param1") == -1)
66  self.assertTrue(retList[0].find("param2") == -1)
67 
68 
70  schema = UrlSchema(self.testUrls["schema5"])
71  retList = schema.generateUrlSchema(self.testUrls["url5"])
72  self.assertTrue(len(retList) == 3)
73  self.assertTrue(retList[0] == self.testUrls["result5"][0])
74 
75 
76 def getLogger():
77  # create logger
78  logger = logging.getLogger(APP_CONSTS.LOGGER_NAME)
79  logger.setLevel(logging.DEBUG)
80 
81  # create console handler and set level to debug
82  ch = logging.StreamHandler()
83  ch.setLevel(logging.DEBUG)
84 
85  # create formatter
86  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
87 
88  # add formatter to ch
89  ch.setFormatter(formatter)
90 
91  # add ch to logger
92  logger.addHandler(ch)
93 
94  return logger
95 
96 
97 def test():
98  logger = getLogger()
99  testDataList = ['111', '222', '333', '1 2 3', '4 5']
100 
101  testUrl = 'http://127.0.0.1/some_test_url_%%'
102  testFilePath = '/tmp/urlSchemaTestFile.json'
103  urlSchemaData = "{\"type\":1, \"parameters\":{}, \"file_path\": \"%s\", \"mode\":1, \"max_items\":200, \"delimiter\":\"%s\",\"format\":\"plain-text\", \"url_encode\":1, \"batch_insert\":2}"
104  urlSchemaDelimiter = ''
105  urlSchemaParameter = urlSchemaData % (testFilePath, urlSchemaDelimiter)
106 
107  print urlSchemaParameter
108 
109  # fill data file
110  f = open(testFilePath, 'w')
111  if urlSchemaDelimiter == "":
112  for word in testDataList:
113  f.write(word + '\n')
114  else:
115  for word in testDataList:
116  f.write(word + urlSchemaDelimiter)
117  f.close()
118 
119 # f = open(testFilePath, 'r')
120 # data = f.read()
121 # f.close()
122 # print data
123 
124  schema = UrlSchema(schema=urlSchemaParameter)
125  resUrls = schema.generateUrlSchema(testUrl)
126 
127  print "Result:"
128  for url in resUrls:
129  print url
130 
131 # if os.path.isfile(testFilePath):
132 # os.remove(testFilePath)
133 
134 
135 if __name__ == "__main__":
136  # import sys;sys.argv = ['', 'Test.testName']
137  # unittest.main()
138 
139  test()
def __init__(self, methodName='runTest')