如果采集下面链接的内容。http://my.mmosite.com/umge/Blog/Item/525f4dc178610c4f07321cc0925cda19.html这是一篇文章,
假设我们只要采集它的文章标题和文章内容。请写出实现代码。
PHP代码
- <?php
- /*
- 如果采集下面链接的内容。http://my.mmosite.com/umge/Blog/Item/525f4dc178610c4f07321cc0925cda19.html这是一篇文章,
- 假设我们只要采集它的文章标题和文章内容。请写出实现代码。
- */
- //$startStr1,$endStr1 标题采集开始处与结束外标记
- $startStr1=‘<td width="93%" class="blogtitle"> ‘;
- $endStr1=‘</td>
- </tr>
- <tr>
- <td class="subtitle" style="background-position:0px 32px">’;
- //$startStr2,$endStr2 文章内容采集开始处与结束外标记
- $startStr2=‘<div class="textMain">’;
- $endStr2=‘</div>
- <br></td>
- </tr>
- <tr>
- <td colspan="2" class="padding text4" style="word-break : normal; overflow:hidden; padding:25px 15px 10px 15px" id="textMain"><div align="center"><a href="#comment">’;
- $filename="http://my.mmosite.com/umge/Blog/Item/525f4dc178610c4f07321cc0925cda19.html";
- $mmosite=new mmosite($filename);
- echo $mmosite->show($startStr1,$endStr1);
- echo $mmosite->show($startStr2,$endStr2);
- //采集处理类
- class mmosite{
- public function __construct($filename)
- {
- $this->filename=$filename;
- }
- function go($contents,$startStr,$endStr)
- {
- $startStr=str_replace("\r\n","",$startStr);
- $endStr=str_replace("\r\n","",$endStr);
- $contents=str_replace("\r\n","",$contents);
- preg_match_all( "@" . preg_quote($startStr) . "(.*?)". preg_quote($endStr) ."@is", $contents, $tpl );
- $content = $tpl[1];
- $content = implode("", $content );
- return $content;
- }
- function open()
- {
- $handle=fopen($this->filename, "r");
- $this->contents=stream_get_contents($handle);
- fclose($handle);
- }
- function show($startStr,$endStr)
- {
- $this->open();
- return $this->go($this->contents,$startStr,$endStr);
- }
- }
- ?>