HCE Project PHP language client API bindings  1.5.1
Hierarchical Cluster Engine PHP Client Interface API
 All Classes Namespaces Files Functions Variables Pages
formats-standards-comparator.php
Go to the documentation of this file.
1 #!/usr/bin/php
2 <?php
3 
4 if(php_sapi_name()!=='cli' || !defined('STDIN')){
5  echo "Only cli execution mode supported\n";
6  exit(1);
7 }
8 
10 
11 $originalFile = isset($args['original']) ? $args['original'] : null;
12 $compareFile = isset($args['compare']) ? $args['compare'] : null;
13 $format = isset($args['format']) ? $args['format'] : 'json';
14 $type = isset($args['input_type']) ? $args['input_type'] : 'news';
15 $compareType = isset($args['type']) ? $args['type'] : 'text';
16 
17 $fieldPath = isset($args['path']) ? $args['path'] : null;
18 
19 $ignoredPaths = array('0:crawler_time', 'crawler_time', '0:scraper_time', 'scraper_time', '0:pubdate', 'pubdate', 'pubdate:', '0:dc_date', 'dc_date:', 'dc_date', 'dbFields:ProcessingTime');
20 
21 if (isset($args['h']) or isset($args['help']) or !$originalFile or !$compareFile){
22  echo "Usage: ".$argv[0]." --original=<json_file_1> --compare=<json_file_1> [--path=<custom field_path for check>] [--format=<answer format (json, csv, xml, html, sql, text), default=json>] [--type=<compare type (text, md5 or text), default=text>] [--input_type=<type of input format (news or rss), default=news>]\n";
23  exit(1);
24 }
25 
26 if (!file_exists($originalFile)) {
27  echo "File ".$originalFile." not found. Exit.\n";
28  die();
29 }
30 if (!file_exists($compareFile)){
31  echo "File ".$compareFile." not found. Exit.\n";
32  die();
33 }
34 
35 function errHandle($errNo, $errStr, $errFile, $errLine) {
36  $msg = "Error $errNo: $errStr in $errFile on line $errLine";
37  if ($errNo == E_WARNING or $errNo == UPLOAD_ERR_EXTENSION) {
38  exitScript( "Wrong format or wrong validation of input data\n");
39  } else {
40  echo $msg;
41  }
42 }
43 
44 set_error_handler('errHandle');
45 
46 function checkCrc($first, $second) {
47  if (crc32(serialize($first)) === crc32(serialize($second))) return true;
48  else return false;
49 }
50 
51 function checkMd5($first, $second) {
52  if (md5(serialize($first)) === md5(serialize($second))) return true;
53  else return false;
54 }
55 
56 function checkText($first, $second) {
57  if ($first === $second) return true;
58  else return false;
59 }
60 
61 function check($first, $second){
62  global $compareType;
63  switch ($compareType) {
64  case 'text':
65  return checkText($first, $second);
66  break;
67  case 'md5':
68  return checkMd5($first, $second);
69  break;
70  case 'crc32':
71  return checkCrc($first, $second);
72  break;
73  default:
74  return checkText($first, $second);
75  break;
76  }
77 }
78 
79 function getPathValue ($array) {
80  if (!is_array($array)) return false;
81  $ritit = new RecursiveIteratorIterator(new RecursiveArrayIterator($array));
82  $results = array();
83  foreach ($ritit as $leafValue) {
84  $path = array();
85  foreach (range(0, $ritit->getDepth()) as $depth) {
86  $path[] = $ritit->getSubIterator($depth)->key();
87  }
88  $results[join(':', $path)] = $leafValue;
89  }
90  return $results;
91 }
92 
93 function getData4Path ($input_json, $field_path){
94  if(is_array($input_json)){
95  $field_path=explode(':', $field_path);
96  foreach($field_path as $dir){
97  $dir=rawurldecode($dir);
98  if(isset($input_json[$dir])){
99  $input_json=$input_json[$dir];
100  }
101  }
102  return $input_json;
103  }
104  return null;
105 }
106 
107 function cli_parse_arguments($argv) {
108  $_ARG = array();
109  foreach ($argv as $arg) {
110  if (preg_match('/--([^=]+)=(.*)/',$arg,$reg)) {
111  $_ARG[$reg[1]] = $reg[2];
112  } elseif(preg_match('/-([a-zA-Z0-9])/',$arg,$reg)) {
113  $_ARG[$reg[1]] = 'true';
114  }
115  }
116  return $_ARG;
117 }
118 
119 function base64Check($data) {
120  if (is_array($data)) return array('type'=> 'array', 'data' => $data);
121  if (base64_encode(base64_decode($data)) === $data){
122  if (json_decode(base64_decode($data), true)) return array('type'=> 'array', 'data' => json_decode(base64_decode($data), true));
123  else return array('type'=> 'data', 'data' => base64_decode($data));
124  } else {
125  if (json_decode($data, true)) return array('type'=> 'json', 'data' => json_decode($data, true));
126  else return array('type'=> 'data', 'data' => $data);
127  }
128 }
129 
130 function csv_to_array($filename='', $delimiter=',') {
131  if(!file_exists($filename) || !is_readable($filename)) return false;
132  $header = NULL;
133  $data = array();
134  if (($handle = fopen($filename, 'r')) !== FALSE) {
135  while (($row = fgetcsv($handle, 0, $delimiter)) !== FALSE) {
136  if(!$header)
137  $header = $row;
138  else {
139  $data[] = array_combine($header, $row);
140  }
141  }
142  fclose($handle);
143  }
144  if (!empty($data)) return $data;
145  else return false;
146 }
147 
148 function csv_to_array_simple($filename='', $delimiter=',') {
149  if(!file_exists($filename) || !is_readable($filename)) return false;
150  $data = array();
151  if (($handle = fopen($filename, 'r')) !== FALSE) {
152  while (($row = fgetcsv($handle, 0, $delimiter)) !== FALSE) {
153  $data[] = $row;
154  }
155  fclose($handle);
156  }
157  if (!empty($data)) return $data;
158  else return false;
159 }
160 
161 function csv_to_array_rss($filename='', $delimiter=',') {
162  if(!file_exists($filename) || !is_readable($filename)) return false;
163  $data = array();
164  if (($handle = fopen($filename, 'r')) !== FALSE) {
165  while (($row = fgetcsv($handle, 0, $delimiter)) !== FALSE) {
166  $num = count($row);
167  for ($c=0; $c < $num; $c++) {
168  array_push($data, $row[$c]);
169  }
170  }
171  fclose($handle);
172  }
173  if (!empty($data)) return $data;
174  else return false;
175 }
176 
177 function htmlParse ($html){
178  #if (stripos($html, '<!DOCTYPE html>') === false) return false;
179  #print_r($html);
180  $array = array();
181  $dom = new domDocument;
182  $dom->recover = true;
183  $dom->strictErrorChecking = false;
184  #libxml_use_internal_errors(true);
185  #@$dom->loadHTML(rawurlencode($html));
186  @$dom->loadHTML($html);
187  #libxml_clear_errors();
188  $dom->preserveWhiteSpace = false;
189  $tables = $dom->getElementsByTagName('table');
190  $rows = $tables->item(0)->getElementsByTagName('tr');
191  if ($rows->length == 0) return false;
192  foreach ($rows as $row) {
193  $cols = $row->getElementsByTagName('td');
194  $array[$cols->item(0)->textContent] = $cols->item(1)->textContent;
195  }
196  return $array;
197 }
198 
199 function exitScript($msg) {
200  global $tempfile;
201  if ($tempfile) unlink($tempfile);
202  echo $msg;
203  exit(1);
204 }
205 
206 function prepare($file, $fieldPath){
207  $json = json_decode(file_get_contents($file), true);
208 
209  if ($fieldPath) {
211  }
212 
213  return base64Check($json);
214 }
215 
217  global $ignoredPaths;
218  $originalJson = prepare($originalFile, $fieldPath);
219  $compareJson = prepare($compareFile, $fieldPath);
220 
221  if ($originalJson['type'] == 'array' and $compareJson['type'] == 'array') {
222  $originalNewArray = getPathValue($originalJson['data']);
223  $compareNewArray = getPathValue($compareJson['data']);
224 
225  if ($originalNewArray == false or $compareNewArray == false) {
226  exitScript("Wrong json\n");
227  }
228 
229  if (count($originalNewArray) != count($compareNewArray)) {
230  exitScript("Number of elements of array not compare\n");
231  }
232  foreach ($originalNewArray as $key => $value) {
233  if (array_key_exists($key, $compareNewArray)) {
234  if (in_array($key, $ignoredPaths)){
235  if (stripos('%', trim($value)) or stripos('%', trim($compareNewArray[$key]))){
236  $check = check(trim($value), trim($compareNewArray[$key]));
237  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareNewArray[$key]."'\n\n";
238  }
239  } else {
240  $check = check(trim($value), trim($compareNewArray[$key]));
241  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareNewArray[$key]."'\n\n";
242  }
243  } else {
244  print "Error. Key ".$key." not exist in compare file\n";
245  }
246  }
247  } elseif ($originalJson['type'] == 'data' and $compareJson['type'] == 'data') {
248  exitScript("Wrong json\n");
249  } else {
250  exitScript("Wrong json\n");
251  }
252 }
253 
255  global $ignoredPaths;
256  global $type;
257  $originalCsv = prepare($originalFile, $fieldPath);
258  $compareCsv = prepare($compareFile, $fieldPath);
259 
260  if(empty($compareCsv['data'])) {
261  exitScript("Csv, compare content is null\n");
262  };
263 
264  if ($originalCsv['type'] == 'array' or $compareCsv['type'] == 'array') {
265  exitScript("Csv is wrong\n");
266  }
267 
268  $tempfile = tempnam("/tmp", pathinfo(__FILE__,PATHINFO_FILENAME));
269 
270  if ($type == 'rss') {
271  file_put_contents($tempfile, str_replace(array("\"\r\n,\"", "\"\r,\"", "\"\n,\""), '","', $originalCsv['data']));
272  $originalNewArray = getPathValue(csv_to_array_rss($tempfile));
273 
274  file_put_contents($tempfile, str_replace(array("\"\r\n,\"", "\"\r,\"", "\"\n,\""), '","', $compareCsv['data']));
275  $compareNewArray = getPathValue(csv_to_array_rss($tempfile));
276  } else {
277  file_put_contents($tempfile, str_replace(array("\"\r\n,\"", "\"\r,\"", "\"\n,\""), '","', $originalCsv['data']));
278  #$originalNewArray = getPathValue(csv_to_array($tempfile));
279  $originalNewArray = getPathValue(csv_to_array_simple($tempfile));
280 
281  file_put_contents($tempfile, str_replace(array("\"\r\n,\"", "\"\r,\"", "\"\n,\""), '","', $compareCsv['data']));
282  #$compareNewArray = getPathValue(csv_to_array($tempfile));
283  $compareNewArray = getPathValue(csv_to_array_simple($tempfile));
284  }
285 
286  unlink($tempfile);
287 
288  if ($originalNewArray == false or $compareNewArray == false) {
289  exitScript("Csv is wrong\n");
290  }
291 
292  if (count($originalNewArray) != count($compareNewArray)) {
293  exitScript("Number of elements of csv not compare");
294  }
295 
296  foreach ($originalNewArray as $key => $value) {
297  if (array_key_exists($key, $compareNewArray)) {
298  if (array_key_exists($key, $compareNewArray)) {
299  if (in_array($key, $ignoredPaths)){
300  if (stripos('%', trim($value)) or stripos('%', trim($compareNewArray[$key]))){
301  $check = check(trim($value), trim($compareNewArray[$key]));
302  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareNewArray[$key]."'\n\n";
303  }
304  } else {
305  $check = check(trim($value), trim($compareNewArray[$key]));
306  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareNewArray[$key]."'\n\n";
307  }
308  } else {
309  print "Error. Key ".$key." not exist in compare file\n";
310  }
311  } else {
312  print "Error. Key ".$key." not exist in compare file\n";
313  }
314  }
315 }
316 
318  global $ignoredPaths;
319  $originalCsv = prepare($originalFile, $fieldPath);
320  $compareCsv = prepare($compareFile, $fieldPath);
321 
322  if ($originalCsv['type'] == 'array' or $compareCsv['type'] == 'array' or stripos($originalCsv['data'], '<?xml version=') === false or stripos($compareCsv['data'], '<?xml version=') === false) {
323  exitScript("XML is wrong\n");
324  }
325 
326  $xmlOriginal = simplexml_load_string($originalCsv['data'], 'SimpleXMLElement', LIBXML_NOCDATA);
327  $originalArray = (array)$xmlOriginal->item;
328  $xmlCompare = simplexml_load_string($compareCsv['data'], 'SimpleXMLElement', LIBXML_NOCDATA);
329  $compareArray = (array)$xmlCompare->item;
330 
331  if ($xmlOriginal === false or $xmlCompare === false) {
332  exitScript("XML is wrong\n");
333  } else {
334  if (count($originalArray) != count($compareArray)) {
335  exitScript("Number of elements of array not compare\n");
336  }
337  foreach ($originalArray as $key => $value) {
338  if (array_key_exists($key, $compareArray)) {
339  if (in_array($key, $ignoredPaths)){
340  if (stripos('%', trim($value)) or stripos('%', trim($compareArray[$key]))){
341  $check = check(trim($value), trim($compareArray[$key]));
342  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareArray[$key]."'\n\n";
343  }
344  } else {
345  $check = check(trim($value), trim($compareArray[$key]));
346  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareArray[$key]."'\n\n";
347  }
348  } else {
349  print "Error. Key ".$key." not exist in compare file\n";
350  }
351  }
352  }
353 }
354 
356  global $ignoredPaths;
357  $originalHtml = prepare($originalFile, $fieldPath);
358  $compareHtml = prepare($compareFile, $fieldPath);
359 
360  if ($originalHtml['type'] == 'array' or $compareHtml['type'] == 'array') {
361  exitScript("HTML is wrong\n");
362  }
363 
364  #$originalArray = htmlParse(str_replace(array("\r\n", "\r", "\n"), '', $originalHtml['data']));
365  #$compareArray = htmlParse(str_replace(array("\r\n", "\r", "\n"), '', $compareHtml['data']));
366  $originalArray = htmlParse($originalHtml['data']);
367  $compareArray = htmlParse($compareHtml['data']);
368 
369  if ($originalArray === false or $compareArray === false) {
370  exitScript("HTML is wrong\n");
371  } else {
372  if (count($originalArray) != count($compareArray)) {
373  exitScript("Number of elements of array not compare\n");
374  }
375  foreach ($originalArray as $key => $value) {
376  if (array_key_exists($key, $compareArray)) {
377  if (in_array($key, $ignoredPaths)){
378  if (stripos('%', trim($value)) or stripos('%', trim($compareArray[$key]))){
379  $check = check(trim($value), trim($compareArray[$key]));
380  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareArray[$key]."'\n\n";
381  }
382  } else {
383  $check = check(trim($value), trim($compareArray[$key]));
384  if ($check == false) print "Error in path '".$key."'. Original value: '".$value."', compare value: '".$compareArray[$key]."'\n\n";
385  }
386  } else {
387  print "Error. Key ".$key." not exist in compare file\n";
388  }
389  }
390  }
391 }
392 
394  $originalSql = prepare($originalFile, $fieldPath);
395  $compareSql = prepare($compareFile, $fieldPath);
396 
397  if ($originalSql['type'] == 'array' or $compareSql['type'] == 'array') {
398  exitScript("SQL query is wrong\n");
399  }
400  if (stripos($originalSql['data'], 'INSERT INTO') === false or stripos($compareSql['data'], 'INSERT INTO') === false) {
401  exitScript("SQL query is wrong\n");
402  }
403 
404  preg_match('/INSERT INTO.*\((.*)\).*VALUES \n(.*).*/', $originalSql['data'], $originalOut);
405  preg_match('/INSERT INTO.*\((.*)\).*VALUES \n\((.*)\).*/', $compareSql['data'], $compareOut);
406 
407  $originalOutHeader = $originalOut[1];
408  $originalCsvHeader = explode(",", $originalOutHeader);
409  $originalOut = ltrim($originalOut[2], '(');
410  $originalOut = rtrim($originalOut, ');');
411  $originalCsvVal = explode('), (', $originalOut);
412 
413  $compareOutHeader = $compareOut[1];
414  $compareCsvHeader = explode(",", $compareOutHeader);
415  $compareOut = ltrim($compareOut[2], '(');
416  $compareOut = rtrim($compareOut, ');');
417  $compareCsvVal = explode('), (', $compareOut);
418 
419  if (count($originalCsvHeader) != count($compareCsvHeader)) {
420  exitScript("Number of elements in INSERT not compare\n");
421  }
422 
423  if (count($originalCsvVal) != count($compareCsvVal)) {
424  exitScript("Number of elements in VALUE not compare\n");
425  }
426 
427  $check = check($originalCsvHeader, $compareCsvHeader);
428  if ($check == false) exitScript("Not compare INSERT");
429 
430  $check = check($originalCsvVal, $compareCsvVal);
431  if ($check == false) exitScript("Not compare VALUE");
432 }
433 
435  $originalText = prepare($originalFile, $fieldPath);
436  $compareText = prepare($compareFile, $fieldPath);
437 
438  if ($originalText['type'] == 'array' or $compareText['type'] == 'array') {
439  exitScript("Text is wrong\n");
440  }
441  $check = check($originalText, $compareText);
442  if ($check == false) exitScript("Not compare");
443 }
444 
445 switch($format) {
446  case 'json':
448  break;
449  case 'csv':
451  break;
452  case 'xml':
454  break;
455  case 'html':
457  break;
458  case 'sql':
460  break;
461  case 'text':
463  break;
464  default:
465  print "Wrong format\n";
466  exit(1);
467 }
468