[Home]Simon J Kissane/yrget perl script

HomePage | Simon J Kissane | Recent Changes | Preferences

#!/usr/local/bin/perl -w
use strict;
use LWP::UserAgent;

my $ua = LWP::UserAgent->new;

# Set attributes on UA
$ua->timeout(30);
$ua->agent("SJK downloading Year in Review: " . $ua->agent);
$ua->env_proxy ();

# fetch each article
open ENTRIES, "<ENTRIES"
  or die "can't open ENTRIES file: $!";

while (<ENTRIES>) {
  chomp;
     
     my $url = "http://www.wikipedia.com/wiki.cgi?action=history&id=$_";

     print "Getting $url...\n";
     my $request = HTTP::Request->new('GET', $url);
     my $response = $ua->request($request);
     if ($response->is_error) {
	 die $response->status_line . " \n";
     }
 
     my $maxrev = 0;
     my @lines = split(/\n/,$response->content);

     for my $line (@lines) {
	 if ($line =~ /Revision ([0-9]*): \<a href=/so) {
	     if ($1 > $maxrev) { $maxrev = $1; };
	 }
     }
    
     $url = "http://www.wikipedia.com/wiki.cgi?action=edit&revision=$maxrev&id=$_";

     print "Getting $url...\n";
     $request = HTTP::Request->new('GET', $url);
     $response = $ua->request($request);
     if ($response->is_error) {
	 die $response->status_line . " \n";
     }
 
    
     open DATA, ">data/$_"
	 or die "Can't open data/$_: $!";
     if ($response->content =~ /\<textarea[^\>]*\>(.*)\<\/textarea\>/s) {
	 my $out = "#YEAR [[$_]] REV=$maxrev\n" . $1;
	 $out =~ s/'/\'/g;
	 print DATA $out;
     }
     close DATA;
 }

HomePage | Simon J Kissane | Recent Changes | Preferences
This page is read-only | View other revisions
Last edited November 9, 2001 6:35 pm by Simon J Kissane (diff)
Search: