一、file_get_contents()获取网页
获取网页http://news.sina.com.cn/c/nd/2016-10-23/doc-ifxwztru6951143.shtml全部内容
<?php $url = "http://news.sina.com.cn/c/nd/2016-10-23/doc-ifxwztru6951143.shtml"; $html = file_get_contents($url); //如果出现中文乱码使用下面代码 //$getcontent = iconv("gb2312", "utf-8",$html); echo "<textarea style='width:800px;height:600px;'>".$html."</textarea>"; ?>
二、采集网页部分内容
$need=getNeedBetween($html, 'id="artibody"' , '<p class="article-editor">' );
内容中从字符串–id="artibody"开始截取
内容中从字符串-- < p class=“article-editor”>开始结束
<?php $need=getNeedBetween($html, 'id="artibody"' , '<p class="article-editor">' ); function getNeedBetween($kw1,$mark1,$mark2){ $kw=$kw1; $kw='123'.$kw.'123'; $st =stripos($kw,$mark1); $ed =stripos($kw,$mark2); if(($st==false||$ed==false)||$st>=$ed) return 0; $kw=substr($kw,($st+1),($ed-$st-1)); return $kw; } echo $need; ?>
三、整个php文件
<?php
$url = "http://news.sina.com.cn/c/nd/2016-10-23/doc-ifxwztru6951143.shtml";
$html = file_get_contents($url);
//如果出现中文乱码使用下面代码
//$getcontent = iconv("gb2312", "utf-8",$html);
$need=getNeedBetween($html, 'id="artibody">' , '<p class="article-editor">' );
function getNeedBetween($kw1,$mark1,$mark2){
$kw=$kw1;
$kw='123'.$kw.'123';
$st =stripos($kw,$mark1);
$ed =stripos($kw,$mark2);
if(($st==false||$ed==false)||$st>=$ed)
return 0;
$kw=substr($kw,($st+1),($ed-$st-1));
return $kw;
}
echo $need;
?>