Perl模块

一个很好的网站CPAN

标准爬虫
#程序功能:给定一个url,自动下载该网页的html
use Encode;
use HTTP::Request;
use LWP::UserAgent;
#给定要抓取的url
$Website='http://kjs.mep.gov.cn/hjbhbz/bzwb/hxxhj/xgjcffbz/index_1.htm';
$ua=new LWP::UserAgent();
$request=new HTTP::Request('GET',$Website);
$response=$ua->request($request);
$str=$response->content;
Encode::_utf8_on($str);
print encode("gb2312",$str);


#下载一个pdf文件
use HTTP::Request::Common;
use LWP::UserAgent;

$ua=new LWP::UserAgent();
$ua->timeout(100);

$url="http://kjs.mep.gov.cn/hjbhbz/bzwb/dqhjbh/jcgfffbz/201203/W020120410332725219541.pdf";
$filespec="W020120410332725219541.pdf";
$response = $ua->request(GET($url),$filespec);


Related Articles

Quote Of The Day