火车头内容采集范例,支持下载远程图片,提取第一张做缩略图,支持入库推荐位
官方教程《火车头内容采集范例》 https://www.xunruicms.com/doc/1084.html
把官方教程中的 dayrui/My/Api/Caiji.php 整个改成
<?php
$this->_module_init('news'); // news 是模块目录
if ($_GET['action'] == 'category')
{
// 显示栏目
if (!$this->module['category'])
{
echo '模块【' . $this->module['dirname'] . '】没有创建栏目';
}
foreach ($this->module['category'] as $t)
{
if ($t['child'] == 0 && $t['tid'] == 1)
{
echo '<h1>' . $t['name'] . '<=>' . $t['id'] . '</h1>' . PHP_EOL;
}
}
}
else
{
// 入库数据
$data = $_REQUEST;
// 发布者id 1
$data['uid'] = 1;
// 发布者账号 admin
$data['author'] = 'admin';
// 主表字段
$fields[1] = $this->get_cache('table-' . SITE_ID, $this->content_model->dbprefix(SITE_ID . '_' . MOD_DIR));
$cache = $this->get_cache('table-' . SITE_ID, $this->content_model->dbprefix(SITE_ID . '_' . MOD_DIR . '_category_data'));
$cache && $fields[1] = array_merge($fields[1], $cache);
// 附表字段
$fields[0] = $this->get_cache('table-' . SITE_ID, $this->content_model->dbprefix(SITE_ID . '_' . MOD_DIR . '_data_0'));
// 去重复
$fields[0] = array_unique($fields[0]);
$fields[1] = array_unique($fields[1]);
// 格式化入库字段
// 一般是格式化非文本类的字段(例如多文件上传、复选框、联动字段等等)
// 这里需要按采集资料的格式入库格式化字段,
// 开始归类存储
$save = [];
// 主表附表归类
foreach ($fields as $ismain => $field)
{
foreach ($field as $name)
{
isset($data[$name]) && $save[$ismain][$name] = $data[$name];
}
}
if (!$data['catid'])
{
exit('栏目为空');
}
$save[1]['uid'] = $save[0]['uid'] = $data['uid'];
$save[1]['catid'] = $save[0]['catid'] = $data['catid'];
$save[1]['url'] = '';
$save[1]['status'] = 9; //9表示正常发布,1表示审核里面
$save[1]['hits'] = 0;
$save[1]['displayorder'] = 0;
$save[1]['link_id'] = 0;
$save[1]['inputtime'] = $save[1]['updatetime'] = SYS_TIME + rand(0, 7200);
$save[1]['inputip'] = '127.0.0.1';
//$save[1]['keywords'] = dr_get_keywords( $save[1]['title']); // 按插件提取关键词
//$save[1]['description'] = dr_get_description( $save[0]['content'], 100); // 在内容里面提取100个子作为描述
// 验证标题重复
if ($this->content_model->table(SITE_ID . '_' . MOD_DIR)->where('title', $save[1]['title'])->counts())
{
echo '重复';
exit;
}
// 文档内容
$value = $data['content'];
// 第一张作为缩略图
$slt = 1;
// 是否下载图片
$yct = 1;
$base64 = strpos($value, ';base64,');
// 附件入库标记字符
$rid = md5(FC_NOW_URL.\Phpcmf\Service::L('input')->get_user_agent().\Phpcmf\Service::L('input')->ip_address().\Phpcmf\Service::C()->uid);
// 下载远程图片
if ($yct || $slt || $base64) {
$temp = preg_replace('/<pre(.*)<\/pre>/siU', '', $value);
$temp = preg_replace('/<code(.*)<\/code>/siU', '', $temp);
if (preg_match_all("/(src)=([\"|']?)([^ \"'>]+)\\2/i", $temp, $imgs)) {
foreach ($imgs[3] as $img) {
if ($base64 && preg_match('/^(data:\s*image\/(\w+);base64,)/i', $img, $result)) {
// 处理图片
$ext = strtolower($result[2]);
if (!in_array($ext, ['png', 'jpg', 'jpeg', 'gif', 'webp'])) {
continue;
}
$content = base64_decode(str_replace($result[1], '', $img));
if (strlen($content) > 30000000) {
continue;
}
$rt = \Phpcmf\Service::L('upload')->base64_image([
'ext' => $ext,
'content' => $content,
'watermark' => \Phpcmf\Service::C()->get_cache('site', SITE_ID, 'watermark', 'ueditor') || $field['setting']['option']['watermark'] ? 1 : 0,
'attachment' => \Phpcmf\Service::M('Attachment')->get_attach_info(intval($field['setting']['option']['attachment']), $field['setting']['option']['image_reduce']),
]);
if (!$rt['code']) {
continue;
}
$att = \Phpcmf\Service::M('Attachment')->save_data($rt['data'], 'ueditor:'.$rid);
if ($att['code']) {
// 归档成功
$value = str_replace($img, $rt['data']['url'], $value);
$img = $att['code'];
// 标记附件
\Phpcmf\Service::M('Attachment')->save_ueditor_aid($rid, $att['code']);
}
} else {
$ext = _get_image_ext($img);
if (!$ext) {
continue;
}
// 下载图片
if ($yct && strpos($img, 'http') === 0) {
if (dr_is_app('mfile') && \Phpcmf\Service::M('mfile', 'mfile')->check_upload(\Phpcmf\Service::C()->uid)) {
//用户存储空间已满
} else {
// 正常下载
// 判断域名白名单
$arr = parse_url($img);
$domain = $arr['host'];
if ($domain) {
$sites = \Phpcmf\Service::R(WRITEPATH.'config/domain_site.php');
if (isset($sites[$domain])) {
// 过滤站点域名
} elseif (strpos(SYS_UPLOAD_URL, $domain) !== false) {
// 过滤附件白名单
} else {
$zj = 0;
$remote = \Phpcmf\Service::C()->get_cache('attachment');
if ($remote) {
foreach ($remote as $t) {
if (strpos($t['url'], $domain) !== false) {
$zj = 1;
break;
}
}
}
if ($zj == 0) {
// 可以下载文件
// 下载远程文件
$rt = \Phpcmf\Service::L('upload')->down_file([
'url' => $img,
'timeout' => 5,
'watermark' => \Phpcmf\Service::C()->get_cache('site', SITE_ID, 'watermark', 'ueditor') || $field['setting']['option']['watermark'] ? 1 : 0,
'attachment' => \Phpcmf\Service::M('Attachment')->get_attach_info(intval($field['setting']['option']['attachment']), $field['setting']['option']['image_reduce']),
'file_ext' => $ext,
]);
if ($rt['code']) {
$att = \Phpcmf\Service::M('Attachment')->save_data($rt['data'], 'ueditor:'.$rid);
if ($att['code']) {
// 归档成功
$value = str_replace($img, $rt['data']['url'], $value);
$img = $att['code'];
// 标记附件
\Phpcmf\Service::M('Attachment')->save_ueditor_aid($rid, $att['code']);
}
}
}
}
}
}
}
}
// 缩略图
if ($img && $slt) {
$_field = \Phpcmf\Service::L('form')->fields;
if (isset($_field['thumb']) && $_field['thumb']['fieldtype'] == 'File' && !\Phpcmf\Service::L('Field')->data[$_field['thumb']['ismain']]['thumb']) {
if (!is_numeric($img)) {
// 下载缩略图
// 判断域名白名单
$arr = parse_url($img);
$domain = $arr['host'];
if ($domain) {
$file = dr_catcher_data($img, 8);
if (!$file) {
CI_DEBUG && log_message('debug', '服务器无法下载图片:'.$img);
} else {
// 尝试找一找附件库
$att = \Phpcmf\Service::M()->table('attachment')->like('related', 'ueditor')->where('filemd5', md5($file))->getRow();
if ($att) {
$img = $att['id'];
} else {
// 下载归档
$rt = \Phpcmf\Service::L('upload')->down_file([
'url' => $img,
'timeout' => 5,
'watermark' => \Phpcmf\Service::C()->get_cache('site', SITE_ID, 'watermark', 'ueditor') || $field['setting']['option']['watermark'] ? 1 : 0,
'attachment' => \Phpcmf\Service::M('Attachment')->get_attach_info(intval($field['setting']['option']['attachment']), $field['setting']['option']['image_reduce']),
'file_ext' => $ext,
'file_content' => $file,
]);
if ($rt['code']) {
$att = \Phpcmf\Service::M('Attachment')->save_data($rt['data'], 'ueditor:'.$rid);
if ($att['code']) {
// 归档成功
$value = str_replace($img, $rt['data']['url'], $value);
$img = $att['code'];
// 标记附件
\Phpcmf\Service::M('Attachment')->save_ueditor_aid($rid, $att['code']);
}
}
}
}
}
}
\Phpcmf\Service::L('Field')->data[$_field['thumb']['ismain']]['thumb'] = $_POST['data']['thumb'] = $img;
}
}
}
}
}
// 最后得到的文档内容
$save[0]['content'] = $value;
// 最后得到的缩略图
$save[1]['thumb'] = $img;
$rt = $this->content_model->save_content(0, $save);
if ($rt['code'])
{
/*
// 用于发布成功后生成静态文件代码
//dr_html_auth($_SERVER['SERVER_ADDR']);
//dr_catcher_data(SITE_URL.'index.php?s='.MOD_DIR.'&c=html&m=showfile&id='.$rt['id']);
$atcode = 'chtml_'.SITE_ID.'_'.MOD_DIR.'_'.$rt['code'];
\Phpcmf\Service::L('cache')->set_auth_data($atcode, $rt['code'], SITE_ID);
dr_catcher_data(SITE_URL.'index.php?s='.MOD_DIR.'&c=html&m=showfile&id='.$rt['code'].'&atcode='.$atcode);
*/
// 处理推荐位
if ($data['flag']) {
$myflag = explode(',', $data['flag']);
foreach ($myflag as $i) {
if (isset($this->module['setting']['flag'][$i])) {
$this->content_model->insert_flag((int)$i, $rt['code'], $rt['data'][1]['uid'], $rt['data'][1]['catid']);
}
}
}
exit('成功');
}
else
{
exit('失败');
}
}
// 获取远程附件扩展名
function _get_image_ext($url) {
if (strlen($url) > 300) {
return '';
}
$arr = ['gif', 'jpg', 'jpeg', 'png', 'webp'];
$ext = str_replace('.', '', trim(strtolower(strrchr($url, '.')), '.'));
if ($ext && in_array($ext, $arr)) {
return $ext; // 满足扩展名
} elseif ($ext && strlen($ext) < 4) {
//CI_DEBUG && log_message('error', '此路径不是远程图片:'.$url);
return ''; // 表示不是图片扩展名了
}
foreach ($arr as $t) {
if (stripos($url, $t) !== false) {
return $t;
}
}
$rt = getimagesize($url);
if ($rt && $rt['mime']) {
foreach ($arr as $t) {
if (stripos($rt['mime'], $t) !== false) {
return $t;
}
}
}
CI_DEBUG && log_message('debug', '服务器无法获取远程图片的扩展名:'.dr_safe_replace($url));
return '';
}
exit;