【原创教程,高手勿喷】利用php抓取本吧首页的帖子标题
无语,随便回了,被认为**,删帖,实话实说而已,这功能采集用python写的话,只用几行代码,个人也写过采集软件,以下是Php部分采集代码,动态生成采集规则,实现过采集任何网站中的信息内容,再删除不回了,年轻人浮躁
class ControllerlankiibeginCaiji extends Controller {
public function index() {
$this->load->model('lankii/caijiSetting');
$caijiRule=$this->model_lankii_caijiSetting->getCaijiRule($this->request->get['ruleId']);
if(is_array($caijiRule[0])){
$this->data['caijiRule']=$caijiRule[0];
$this->load->model('lankii/caijiHttpClass');
$myhttp=$this->model_lankii_caijiHttpClass->doGet($caijiRule[0]['targetUrl']);
$lankiiHTML=mb_convert_encoding($myhttp, "UTF-8", $caijiRule[0]['targetSiteCharset']);
$urlGetAll="/<a(.*)href=\"?([^\"]*)\"?/i";
preg_match_all($urlGetAll,$lankiiHTML,$allUrl);
$lankiiHTML="";
//去除重复网址
if($caijiRule[0]['urlReplaceSame']){$allUrl[2]=array_flip(array_flip($allUrl[2])); }
$this->data['allUrl']=str_ireplace('../','',$allUrl[2]);
$urlCateReg=$this->replace($caijiRule[0]['urlCategoryReg']);
$urlCateReg='/'.$urlCateReg.'/i';