介绍
本次为大家带来QueryListPHP
框架的深度用法
采集内容有
- 文章标题
- 文章链接
- 发布作者
- 发布时间
- 下载链接(自动获取下载链接通道及名字)
- 文章ID(用来做是否重复判断)
自动图片转MD语法
实现代码
use QL\QueryList; $client = new GuzzleHttp\Client(); $res = $client->request('GET', 'http://www.aeink.com'); $html = (string)$res->getBody(); $title = QueryList::html($html)->find('.excerpt')->map(function ($Row) { global $client; $href = $Row->find("header>h2>a")->attr("href"); preg_match('/www.aeink.com\/(\d+)/', $href, $matches); $id = $matches[1]; $title = $Row->find("header>h2>a")->text(); $res = $client->request('GET', $href); $html = (string)$res->getBody(); $date = str_replace("日期:", "", QueryList::html($html)->find(".article-meta>span:first")->text()); $author = QueryList::html($html)->find(".article-meta span:eq(1)")->text(); $article_content = QueryList::html($html)->find(".article-content"); $down = $article_content->find("#down-tipid>strong a")->attr("href"); $article_content->find('.paydown,.post-copyright')->remove(); $content = $article_content->html(); $details = preg_replace_callback('/<img.*?src="(.*?)".*?>/is', function ($text) { global $title; return "\n" . '' . "\n"; }, $content); $details = preg_replace_callback('/<style>(.*?)<\/style>/is', function ($text) { return ""; }, $details); $text = QueryList::html($details)->find("")->text(); $res = $client->request('GET', $down); $html = (string)$res->getBody(); $dw = QueryList::html($html)->find(".panel-body a")->map(function ($R) { return [ 'name' => $R->text(), 'href' => $R->href ]; })->all(); return [ 'thumb' => $Row->find(".focus img")->attr('src'), 'title' => $title, 'href' => $href, 'id' => $id, 'date' => $date, 'author' => $author, 'text' => $text, 'dw' => $dw ]; }); print_r($title->all());
说明
AE博客安装有waf(防火墙)建议三个小时执行一次,可以使用Redis缓存。