介绍
本次为大家带来QueryListPHP
框架的深度用法
采集内容有
- 文章标题
- 文章链接
- 发布作者
- 发布时间
- 下载链接(自动获取下载链接通道及名字)
- 文章ID(用来做是否重复判断)
- 自动图片转MD语法
实现代码
use QL\QueryList;
$client = new GuzzleHttp\Client();
$res = $client->request('GET', 'http://www.aeink.com');
$html = (string)$res->getBody();
$title = QueryList::html($html)->find('.excerpt')->map(function ($Row) {
global $client;
$href = $Row->find("header>h2>a")->attr("href");
preg_match('/www.aeink.com\/(\d+)/', $href, $matches);
$id = $matches[1];
$title = $Row->find("header>h2>a")->text();
$res = $client->request('GET', $href);
$html = (string)$res->getBody();
$date = str_replace("日期:", "", QueryList::html($html)->find(".article-meta>span:first")->text());
$author = QueryList::html($html)->find(".article-meta span:eq(1)")->text();
$article_content = QueryList::html($html)->find(".article-content");
$down = $article_content->find("#down-tipid>strong a")->attr("href");
$article_content->find('.paydown,.post-copyright')->remove();
$content = $article_content->html();
$details = preg_replace_callback('/<img.*?src="(.*?)".*?>/is', function ($text) {
global $title;
return "\n" . '' . "\n";
}, $content);
$details = preg_replace_callback('/<style>(.*?)<\/style>/is', function ($text) {
return "";
}, $details);
$text = QueryList::html($details)->find("")->text();
$res = $client->request('GET', $down);
$html = (string)$res->getBody();
$dw = QueryList::html($html)->find(".panel-body a")->map(function ($R) {
return [
'name' => $R->text(),
'href' => $R->href
];
})->all();
return [
'thumb' => $Row->find(".focus img")->attr('src'),
'title' => $title,
'href' => $href,
'id' => $id,
'date' => $date,
'author' => $author,
'text' => $text,
'dw' => $dw
];
});
print_r($title->all());
说明
AE博客安装有waf(防火墙)建议三个小时执行一次,可以使用Redis缓存。