-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
修复redis lsize过期bug;修复内容分页attached_url处理的bug
- Loading branch information
Showing
5 changed files
with
59 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -318,7 +318,7 @@ class phpspider | |
public $on_download_page = null; | ||
|
||
/** | ||
* 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理 | ||
* 在一个attached_url对应的网页下载完成之后调用. 主要用来对分页网页进行处理 | ||
* | ||
* @var mixed | ||
* @access public | ||
|
@@ -1532,7 +1532,7 @@ public function request_url($url, $link = array()) | |
requests::set_header($k, $v); | ||
} | ||
} | ||
//限制 http 请求模式为 get 或 post | ||
// 限制 http 请求模式为 get 或 post | ||
$method = trim(strtolower($link['method'])); | ||
$method = ($method == 'post') ? 'post' : 'get'; | ||
$params = empty($link['params']) ? array() : $link['params']; | ||
|
@@ -1545,7 +1545,7 @@ public function request_url($url, $link = array()) | |
|
||
$http_code = requests::$status_code; | ||
|
||
//请求完成 host 的并发计数减 1 2018-5 BY KEN <[email protected]> | ||
// 请求完成 host 的并发计数减 1 2018-5 BY KEN <[email protected]> | ||
if (self::$configs['max_task_per_host'] > 0) | ||
{ | ||
$this->incr_task_per_host($url, 'decr'); | ||
|
@@ -1593,14 +1593,8 @@ public function request_url($url, $link = array()) | |
} | ||
else | ||
{ | ||
if ( ! empty(self::$configs['max_try']) and $http_code == 407) | ||
{ | ||
// 扔到队列头部去, 继续采集 | ||
$this->queue_rpush($link); | ||
log::error("Failed to download page {$url}"); | ||
self::$collect_fail++; | ||
} | ||
elseif ( ! empty(self::$configs['max_try']) and in_array($http_code, array('0', '502', '503', '429'))) | ||
// 407 为代理服务器出错,其他是服务器出错 | ||
if ( ! empty(self::$configs['max_try']) and in_array($http_code, ['407', '0', '502', '503', '429']) ) | ||
{ | ||
// 采集次数加一 | ||
$link['try_num']++; | ||
|
@@ -2109,7 +2103,10 @@ public function get_fields($confs, $html, $url, $page) | |
$link['url'] = $collect_url; | ||
$link = $this->link_uncompress($link); | ||
requests::$input_encoding = null; | ||
$html = $this->request_url($collect_url, $link); | ||
$method = empty($link['method']) ? 'get' : strtolower($link['method']); | ||
$params = empty($link['params']) ? array() : $link['params']; | ||
$html = requests::$method($url, $params); | ||
//$html = $this->request_url($collect_url, $link); | ||
// 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理. | ||
if ($this->on_download_attached_page) | ||
{ | ||
|
@@ -2715,38 +2712,6 @@ public function queue_rpush($link = array(), $allowed_repeat = false) | |
return $status; | ||
} | ||
|
||
/** | ||
* 从队列左边取出 | ||
* 后进先出 | ||
* 可以避免采集内容页有分页的时候采集失败数据拼凑不全 | ||
* 还可以按顺序采集列表页 | ||
* | ||
* @return void | ||
* @author seatle <[email protected]> | ||
* @created time :2016-09-23 17:13 | ||
*/ | ||
public function queue_lpop() | ||
{ | ||
if (self::$use_redis) | ||
{ | ||
//根据采集设置为顺序采集还是随机采集,使用列表或集合对象 | ||
if (self::$configs['queue_order'] == 'rand') | ||
{ | ||
$link = queue::spop('collect_queue'); | ||
} | ||
else | ||
{ | ||
$link = queue::lpop('collect_queue'); | ||
} | ||
$link = json_decode($link, true); | ||
} | ||
else | ||
{ | ||
$link = array_pop(self::$collect_queue); | ||
} | ||
return $link; | ||
} | ||
|
||
/** | ||
* 从队列右边取出 | ||
* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,26 @@ | ||
<?php | ||
|
||
$arr = array('fff', 'ggg', '', ''); | ||
$arr = array_filter($arr); | ||
print_r($arr); | ||
$str = hash('sha256', 'bc'); | ||
echo strlen($str); | ||
exit; | ||
$data = array("url" => 'http://www.test.com'); | ||
$data = http_build_query($data); | ||
// Create a stream | ||
$opts = [ | ||
//"http" => [ | ||
//"method" => "POST", | ||
//"header" => "Content-Type: multipart/form-data\r\n", | ||
//"content" => $data, | ||
//], | ||
"ssl" => array( | ||
"verify_peer"=>false, | ||
"verify_peer_name"=>false, | ||
), | ||
]; | ||
|
||
$context = stream_context_create($opts); | ||
|
||
// Open the file using the HTTP headers set above | ||
$file = file_get_contents('https://api.potato.im:8443/10100386:Z0dT3Oalvu5IGC71OrvGs3hT/setWebhook', false, $context); | ||
|
||
var_dump($file); |