7 public function fetch($userId, $params, $one =
false)
16 $data[
'manError'][
'isError'] =
true;
17 $data[
'manError'][
'errorBody'] = $error;
33 'sitesCriterions' => array(
34 'ORDER BY' =>
'CDate DESC'
36 'sitesList' => array(),
38 'urlsCriterions' => array(
39 'WHERE' =>
"`urlMd5`='$params'",
41 'ORDER BY' =>
'CDate ASC'
51 'maxURLs' => $params[
'limit'],
52 'sitesCriterions' => array(
53 'WHERE' =>
"`Id`='{$params['siteId']}'",
55 'sitesList' => array(),
57 'urlsCriterions' => array(
58 'WHERE' =>
"$conditions",
59 'ORDER BY' => $orderConditions
63 if (isset($params[
'limit'])) {
64 $from = $params[
'pN']*$params[
'limit']-$params[
'limit'];
65 $json[0][
'maxURLs'] = $params[
'limit'];
66 $json[0][
'urlsCriterions'][
'LIMIT'] =
"$from, {$params['limit']}";
72 Logger::log(
"Opertion ->" . $this->operation,
false);
74 $path = Yii::app()->getBasePath() .
'/json_temp/';
75 $file = fopen($path . $userId .
'_request.json',
'w');
81 $orderByConditions =
'';
82 if ($params[
'sortBy'] ==
'') {
83 $orderByConditions .=
'UDate';
85 $orderByConditions .= $params[
'sortBy'];
87 if ($params[
'sortDirection'] ==
'') {
88 $orderByConditions .=
' DESC';
90 $orderByConditions .=
' ' . $params[
'sortDirection'];
92 return $orderByConditions;
98 if (
$form[
'status'] ==
'') {
99 $conditions .=
"`Status`>=0";
101 foreach (
$form as $name => $value) {
102 if (trim($value) !=
'') {
105 $conditions .=
"`Status`=$value";
108 $conditions .=
" AND `URL` LIKE '" . trim($value) .
"%'";
111 $conditions .=
" AND `URLMd5`='" . trim($value) .
"'";
114 $conditions .=
" AND `ContentType`='" . trim($value) .
"'";
117 $conditions .=
" AND `State`='" . trim($value) .
"'";
120 $conditions .=
" AND `Type`='" . trim($value) .
"'";
123 $conditions .=
" AND `ParentMd5`='" . md5(trim($value)) .
"'";
126 $conditions .=
" AND `ErrorMask`='" . trim($value) .
"'";
129 $conditions .=
" AND `TagsMask`&" . trim($value);
132 $conditions .=
" AND `TagsCount`='" . trim($value) .
"'";
135 $conditions .=
" AND `HttpCode`='" . trim($value) .
"'";
138 $conditions .=
" AND `ErrorMask`='" . trim($value) .
"'";
141 $conditions .=
" AND `TcDate`>='" .
$form[
'tcDateFrom'] .
' ' .
$form[
'tcTimeFrom'].
"'";
144 $conditions .=
" AND `TcDate` <='" .
$form[
'tcDateTo'] .
' ' .
$form[
'tcTimeTo'] .
"'";
147 $conditions .=
" AND `CDate`>='" .
$form[
'cDateFrom'] .
' ' .
$form[
'cTimeFrom'].
"'";
150 $conditions .=
" AND `CDate` <='" .
$form[
'cDateTo'] .
' ' .
$form[
'cTimeTo'] .
"'";
153 $conditions .=
" AND `PDate`>='" .
$form[
'pDateFrom'] .
' ' .
$form[
'pTimeFrom'].
"'";
156 $conditions .=
" AND `PDate` <='" .
$form[
'pDateTo'] .
' ' .
$form[
'pTimeTo'] .
"'";
159 $conditions .=
" AND `Depth`>='" .
$form[
'depthFrom'] .
"'";
162 $conditions .=
" AND `Depth`<='" .
$form[
'depthTo'] .
"'";
165 $conditions .=
" AND `parentMd5` = ''";
178 $api = Yii::app()->params[
'api'];
179 $path = Yii::app()->getBasePath() .
'/shell/';
180 $pathJson = Yii::app()->getBasePath() .
'/json_temp/';
181 $cmd =
"sh " . $path .
"url_fetch.sh $api $pathJson $userId";
182 $json = shell_exec($cmd);
183 $file = fopen($pathJson . $userId .
'_response.json',
'w');
184 fwrite($file,
$json);
190 $path = Yii::app()->getBasePath() .
'/json_temp/';
191 return file_get_contents($path . $userId .
'_response.json');
198 $dataProvider = CJSON::decode(
$json);
199 $data = $dataProvider[
"itemsList"][0][
"itemObject"];
201 $id = Yii::app()->getRequest()->getParam(
'urlId');
202 throw new CHttpException(
'404',
"Resource with ID<br>$id<br>was not found...");
204 $toReturn[
'manError'][
'isError'] =
false;
206 foreach (
$data[0] as
$i => $value) {
208 $data[0][
$i] =
'<div class="none" title=""' .
$i
209 .
'": "","><span title="">none</span></div>';
211 if ($value === null) {
212 $data[0][
$i] =
'<div class="none" title=""' .
$i
213 .
'": null,"><span title="">null</span></div>';
219 $data[0][
'url_act'] =
'<a target = "_blank" href="'.$data[0][
'url'].
'">'.
$data[0][
'url'].
'</a>';
227 $counters[
'crawled'] = 0;
228 $counters[
'processed'] = 0;
229 $counters[
'linksI'] = 0;
230 $counters[
'linksE'] = 0;
231 $counters[
'tagsCount'] = 0;
232 $counters[
'freq'] = 0;
233 $counters[
'crawlingTime'] = 0;
234 $counters[
'processingTime'] = 0;
235 $counters[
'totalTime'] = 0;
236 $counters[
'size'] = 0;
237 $counters[
'mRate'] = 0;
238 $counters[
'mRateCNT'] = 0;
240 foreach (
$json[
'itemsList'] as $item) {
241 foreach ($item[
'itemObject'] as $siteItem) {
242 if ($siteItem[
'urlMd5'] == $urlId) {
243 $counters[
'crawled'] += $siteItem[
'crawled'];
244 $counters[
'processed'] += $siteItem[
'processed'];
245 $counters[
'linksI'] += $siteItem[
'linksI'];
246 $counters[
'linksE'] += $siteItem[
'linksE'];
247 $counters[
'tagsCount'] += $siteItem[
'tagsCount'];
248 $counters[
'freq'] += $siteItem[
'freq'];
249 $counters[
'crawlingTime'] += $siteItem[
'crawlingTime'];
250 $counters[
'processingTime'] += $siteItem[
'processingTime'];
251 $counters[
'totalTime'] += $siteItem[
'totalTime'];
252 $counters[
'size'] += $siteItem[
'size'];
253 $counters[
'mRate'] += $siteItem[
'mRate'];
254 $counters[
'mRateCNT']++;
258 $counters[
'mRate'] = $counters[
'mRate'] / $counters[
'mRateCNT'];
267 $state =
$data[
'itemsList'][0][
'itemObject'][$n][
'state'];
268 $status =
$data[
'itemsList'][0][
'itemObject'][$n][
'status'];
271 $ret[
'state'] =
'Enabled';
274 $ret[
'state'] =
'Disabled';
277 $ret[
'state'] =
'Error';
282 $ret[
'status'] =
'Undefined';
285 $ret[
'status'] =
'New';
288 $ret[
'status'] =
'Selected for crawling';
291 $ret[
'status'] =
'Crawling';
294 $ret[
'status'] =
'Crawled';
297 $ret[
'status'] =
'Selected to process';
300 $ret[
'status'] =
'Processing';
303 $ret[
'status'] =
'Processed';
306 $ret[
'status'] =
'Selected for crawling (incremental)';
315 $dataProvider = CJSON::decode(
$json);
316 $items = $dataProvider[
"itemsList"][0][
"itemObject"];
317 foreach ($items as
$i => $urlItem) {
324 $data[
'manError'][
'isError'] =
false;
330 $limitsProvider = array();
333 $limitsProvider[] = array(
334 'limit_name' =>
'Priority',
335 'limit_value' =>
$data[
'itemsList'][0][
'itemObject'][0][
'priority'],
336 'limit_name_f' =>
'priority'
338 $limitsProvider[] = array(
339 'limit_name' =>
'Max URLs from page',
340 'limit_value' =>
$data[
'itemsList'][0][
'itemObject'][0][
'maxURLsFromPage'],
341 'limit_name_f' =>
'maxURLsFromPage'
343 $limitsProvider[] = array(
344 'limit_name' =>
'Processing delay, ms',
345 'limit_value' =>
$data[
'itemsList'][0][
'itemObject'][0][
'processingDelay'],
346 'limit_name_f' =>
'processingDelay'
348 $limitsProvider[] = array(
349 'limit_name' =>
'Request delay, ms',
350 'limit_value' =>
$data[
'itemsList'][0][
'itemObject'][0][
'requestDelay'],
351 'limit_name_f' =>
'requestDelay'
353 $limitsProvider[] = array(
354 'limit_name' =>
'HTTP Timeout, ms',
355 'limit_value' =>
$data[
'itemsList'][0][
'itemObject'][0][
'httpTimeout'],
356 'limit_name_f' =>
'httpTimeout'
358 return new CArrayDataProvider($limitsProvider, array(
359 'keyField' =>
'limit_name',
360 'pagination' =>
false
369 $mask =
$data[
'itemsList'][0][
'itemObject'][0][
'errorMask'];
370 for ($power = 0; $power <= 64; $power++) {
371 $error = $mask & pow(2, $power);
377 return new CArrayDataProvider($errors, array(
378 'keyField' =>
'errorType',
379 'pagination' =>
false
388 $mask =
$data[
'itemsList'][0][
'itemObject'][0][
'tagsMask'];
390 for ($power = 0; $power <= 64; $power++) {
391 $tag = $mask & pow(2, $power);
397 return new CArrayDataProvider($tags, array(
399 'pagination' =>
false
406 'siteId' =>
$data[
'siteId'],
407 'url' =>
$data[
'url'],
408 'urlMd5' =>
$data[
'urlMd5'],
414 $request = Yii::app()->request;
416 'contentTypeMask' => 4095,
417 'siteId' => $request->getParam(
"siteId"),
418 'url' => $request->getParam(
"url"),
420 'urlMd5' => $request->getParam(
"urlMd5"),
422 $json = CJSON::encode($json);
423 $path = Yii::app()->getBasePath() .
'/json_temp/';
424 $file = fopen($path . $userId .
'_request_content.json',
'w');
425 fwrite($file, $json);
426 $api = Yii::app()->params[
'api'];
427 $path = Yii::app()->getBasePath() .
'/shell/';
428 $pathFile = Yii::app()->getBasePath() .
'/json_temp/' . $userId .
'_request_content.json';
429 $cmd =
"sh " . $path .
"url_content.sh $api $pathFile";
430 $json = shell_exec($cmd);
431 $path = Yii::app()->getBasePath() .
'/json_temp/';
432 $file = fopen($path . $userId .
'_response_content.json',
'w+');
433 fwrite($file, $json);
435 $response = file_get_contents($path . $userId .
'_response_content.json');
437 $respDecoded = CJSON::decode($response);
438 if ($request->getParam(
"t") == 0) {
439 if(!isset($respDecoded[
'itemsList'][0][
'itemObject'][0][
'rawContents'][0])) {
440 $rawContent =
'No contents found...';
442 $rawContent = base64_decode($respDecoded[
'itemsList'][0][
'itemObject'][0][
'rawContents'][0][
'buffer']);
444 $contentItem[
'type'] =
'raw';
445 $contentItem[
'contents'] =
'<textarea id = "url-content">' . CHtml::encode($rawContent) .
'</textarea>';
448 if(!isset($respDecoded[
'itemsList'][0][
'itemObject'][0][
'processedContents'][0])) {
449 $proContent =
'{"Sorry...": "...no contents found"}';
450 $contentItem[
'type'] =
'processed';
451 $contentItem[
'contents'] = $proContent;
453 $proContent = base64_decode($respDecoded[
'itemsList'][0][
'itemObject'][0][
'processedContents'][0][
'buffer']);
454 $contentDecoded = CJSON::decode($proContent);
455 $contentItem[
'type'] =
'processed';
456 $contentItem[
'contents'] = $contentDecoded;
457 $contentItem[
'contents'] = CJSON::encode($contentItem[
'contents']);
496 "itemObject"=> array(
504 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
505 "url": "701ccc5c1c589041d31d13dae8dce90d",
508 "2015-06-26 17:46:44",
509 "2015-06-26 17:46:44",
518 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
519 "url": "701ccc5c1c589041d31d13dae8dce90d",
522 "2015-06-26 17:46:44",
523 "2015-06-26 17:46:44",
532 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
533 "url": "701ccc5c1c589041d31d13dae8dce90d",
536 "2015-06-26 17:46:44",
537 "2015-06-26 17:46:44",
546 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
547 "url": "701ccc5c1c589041d31d13dae8dce90d",
550 "2015-06-26 17:46:44",
551 "2015-06-26 17:46:44",
560 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
561 "url": "701ccc5c1c589041d31d13dae8dce90d",
564 "2015-06-26 17:46:44",
565 "2015-06-26 17:46:44",
574 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
575 "url": "701ccc5c1c589041d31d13dae8dce90d",
578 "2015-06-26 17:46:44",
579 "2015-06-26 17:46:44",
591 "itemObject"=> array(
599 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
600 "url": "701ccc5c1c589041d31d13dae8dce90d",
603 "2015-06-26 17:46:44",
604 "2015-06-26 17:46:44",
613 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
614 "url": "701ccc5c1c589041d31d13dae8dce90d",
617 "2015-06-26 17:46:44",
618 "2015-06-26 17:46:44",
627 "siteId": "b85ab149a528bd0a024fa0f43e80b5fc",
628 "url": "701ccc5c1c589041d31d13dae8dce90d",
631 "2015-06-26 17:46:44",
632 "2015-06-26 17:46:44",
640 foreach($history as $node) {
642 foreach ($node[
'itemObject'][
$i] as $k => $v) {
643 $node[
'itemObject'][
$i][
"0".$k] = $v;
644 unset($node[
'itemObject'][
$i][$k]);
648 if (isset($Object[
'reason']) &&
$operation[1] == 21) {
651 $node[
'itemObject'][
$i][
'Reason'] =
'';
654 $nodes[$node[
'node']] =
new CArrayDataProvider($node[
'itemObject'], array(
656 'pagination' =>
false
681 return "URL: Insert";
684 return "URL: Delete";
687 return "URL: Update URL";
690 return "URL: Cleanup URL";
696 return "URL: Content";
699 return "Status: New";
702 return "Status: Selected to crawl";
705 return "Status: Crawling";
708 return "Status: Crawled";
711 return "Status: Selected to process";
714 return "Status: Processing";
717 return "Status: Processed";
738 return "REASON_USER_REQUEST ";
741 return "REASON_AGING";
744 return "REASON_SITE_LIMITS";
747 return "REASON_SELECT_TO_CRAWL_TTL";
750 return "REASON_SELECT_TO_PROCESS_TTL";
753 return "REASON_RECRAWL";
756 return "REASON_CRAWLER_AUTOREMOVE";
759 return "REASON_SITE_UPDATE_ROOT_URLS";
762 return "REASON_RT_FINALIZER";
765 return "REASON_PROCESSOR_DUPLICATE";