Skip to content

Commit

Permalink
修复redis lsize过期bug;修复内容分页attached_url处理的bug
Browse files Browse the repository at this point in the history
  • Loading branch information
owner888 committed Oct 9, 2020
1 parent e602114 commit d04f35e
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 49 deletions.
2 changes: 1 addition & 1 deletion autoloader.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ public static function load_by_namespace($name)
}
}

spl_autoload_register('\phpspider\autoloader::load_by_namespace');
spl_autoload_register('\phpspider\autoloader::load_by_namespace', true, true);
53 changes: 9 additions & 44 deletions core/phpspider.php
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ class phpspider
public $on_download_page = null;

/**
* 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理
* 在一个attached_url对应的网页下载完成之后调用. 主要用来对分页网页进行处理
*
* @var mixed
* @access public
Expand Down Expand Up @@ -1532,7 +1532,7 @@ public function request_url($url, $link = array())
requests::set_header($k, $v);
}
}
//限制 http 请求模式为 get 或 post
// 限制 http 请求模式为 get 或 post
$method = trim(strtolower($link['method']));
$method = ($method == 'post') ? 'post' : 'get';
$params = empty($link['params']) ? array() : $link['params'];
Expand All @@ -1545,7 +1545,7 @@ public function request_url($url, $link = array())

$http_code = requests::$status_code;

//请求完成 host 的并发计数减 1 2018-5 BY KEN <[email protected]>
// 请求完成 host 的并发计数减 1 2018-5 BY KEN <[email protected]>
if (self::$configs['max_task_per_host'] > 0)
{
$this->incr_task_per_host($url, 'decr');
Expand Down Expand Up @@ -1593,14 +1593,8 @@ public function request_url($url, $link = array())
}
else
{
if ( ! empty(self::$configs['max_try']) and $http_code == 407)
{
// 扔到队列头部去, 继续采集
$this->queue_rpush($link);
log::error("Failed to download page {$url}");
self::$collect_fail++;
}
elseif ( ! empty(self::$configs['max_try']) and in_array($http_code, array('0', '502', '503', '429')))
// 407 为代理服务器出错,其他是服务器出错
if ( ! empty(self::$configs['max_try']) and in_array($http_code, ['407', '0', '502', '503', '429']) )
{
// 采集次数加一
$link['try_num']++;
Expand Down Expand Up @@ -2109,7 +2103,10 @@ public function get_fields($confs, $html, $url, $page)
$link['url'] = $collect_url;
$link = $this->link_uncompress($link);
requests::$input_encoding = null;
$html = $this->request_url($collect_url, $link);
$method = empty($link['method']) ? 'get' : strtolower($link['method']);
$params = empty($link['params']) ? array() : $link['params'];
$html = requests::$method($url, $params);
//$html = $this->request_url($collect_url, $link);
// 在一个attached_url对应的网页下载完成之后调用. 主要用来对下载的网页进行处理.
if ($this->on_download_attached_page)
{
Expand Down Expand Up @@ -2715,38 +2712,6 @@ public function queue_rpush($link = array(), $allowed_repeat = false)
return $status;
}

/**
* 从队列左边取出
* 后进先出
* 可以避免采集内容页有分页的时候采集失败数据拼凑不全
* 还可以按顺序采集列表页
*
* @return void
* @author seatle <[email protected]>
* @created time :2016-09-23 17:13
*/
public function queue_lpop()
{
if (self::$use_redis)
{
//根据采集设置为顺序采集还是随机采集,使用列表或集合对象
if (self::$configs['queue_order'] == 'rand')
{
$link = queue::spop('collect_queue');
}
else
{
$link = queue::lpop('collect_queue');
}
$link = json_decode($link, true);
}
else
{
$link = array_pop(self::$collect_queue);
}
return $link;
}

/**
* 从队列右边取出
*
Expand Down
2 changes: 1 addition & 1 deletion core/queue.php
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ public static function lsize($key)
{
if ( self::$links[self::$link_name] )
{
return self::$links[self::$link_name]->lSize($key);
return self::$links[self::$link_name]->lLen($key);
}
}
catch (Exception $e)
Expand Down
26 changes: 26 additions & 0 deletions demo/test_requests.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,33 @@
/* Do NOT delete this comment */
/* 不要删除这段注释 */

$bot_token = "10100386:Z0dT3Oalvu5IGC71OrvGs3hT";

$url = "https://api.potato.im:8443/{$bot_token}/sendTextMessage";

$data = array(
'chat_type' => 2,
'chat_id' => 10160267,
'text' => 'Hello',
);
$data = json_encode($data);
requests::set_header("Content-Type", "application/json");
$html = requests::post($url, $data);
var_dump($html);
exit;


//$url = "https://api.telegram.org/bot631221524:AAHmiCfIDNfJdae1WXXNNQvhC7t2qSSjqPE/setWebhook";
$url = "https://api.potato.im:8443/{$bot_token}/setWebhook";

$data = array('url'=>'https://www.quivernote.com/bot.php');
$data = json_encode($data);
requests::set_header("Content-Type", "application/json");
$html = requests::post($url, $data);
var_dump($html);


exit;
$html = requests::get('http://lishi.zhuixue.net/xiachao/576024.html');
//echo $html;
$data = selector::select($html, "//div[@class='list']");
Expand Down
25 changes: 22 additions & 3 deletions test.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,26 @@
<?php

$arr = array('fff', 'ggg', '', '');
$arr = array_filter($arr);
print_r($arr);
$str = hash('sha256', 'bc');
echo strlen($str);
exit;
$data = array("url" => 'http://www.test.com');
$data = http_build_query($data);
// Create a stream
$opts = [
//"http" => [
//"method" => "POST",
//"header" => "Content-Type: multipart/form-data\r\n",
//"content" => $data,
//],
"ssl" => array(
"verify_peer"=>false,
"verify_peer_name"=>false,
),
];

$context = stream_context_create($opts);

// Open the file using the HTTP headers set above
$file = file_get_contents('https://api.potato.im:8443/10100386:Z0dT3Oalvu5IGC71OrvGs3hT/setWebhook', false, $context);

var_dump($file);

0 comments on commit d04f35e

Please sign in to comment.