Not ready for the big time yet!
Comments welcomed. Please do not edit the program without testing it first!
# ParseWiki.pl - Parses the Wikipedia RecentChanges page.
# Copyright (C) 2001 Dave McKee
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. This program is distributed in the # hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. You should have received a copy of the GNU # General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# This should not even be considered an alpha version. More a proof-of-theory.
use strict;
use warnings;
my ($text, $Z0, $Z, $TIME, $CHANGES, $ISEDIT, $MESS, $IP, $X0, $X, $ID, @g);
# $text='<li><a href="/wiki.cgi?action=browse&diff=1&id=Frank">(diff)</a> <a href="/wiki/Frank">Frank</a> 4:49 pm (2 <a href="/wiki.cgi?action=history&id=Frank">changes</a>) . . . . . <a href="/wiki.cgi?MichaelTinkler" title="ID 4676 from 24.169.85.xxx">MichaelTinkler</a>';
# &diff=1&id= and ">(diff)
use LWP::Simple;
my @wiki=split(/<li>/, get "http://www.wikipedia.com/wiki.cgi?action=rc&days=1");
for (@wiki) {
# print $_;
$text=$_;
if ($text=~/^<a href="\/wiki\.cgi\?action=browse&diff=1&id=/)
{
{
$text=~/<a href="\/wiki\/(.*)">(.*)<\/a> (.{0,7}m)/;
$Z0=$1;
$Z=$2;
$TIME=$3;
}
{$text=~/ [a|p]m \((.*) <a href="\/wiki.cgi\?action=history/;
$CHANGES=$1;
if (not $CHANGES) {$CHANGES=0};
}
$ISEDIT=($text=~/<em>\(edit\)<\/em>/);
{$text=~/<strong>\[(.*)\]<\/strong>/;
$MESS=$1;
if (not $MESS) {$MESS="n/a"};
}
{if ($text=~/ . . ([0-9|\.]*\.xxx)$/) {$IP=$1;$Z0='n/a';$Z='n/a';$ID='n/a'} else
{
{$text=~/\. <a href="\/wiki\.cgi\?(.*)" title="ID (.*) from (.*\.xxx)">(.*)<\/a>/;
$X0=$1;
$ID=$2;
$IP=$3;
$X=$4;
}
} }
# print "$X0:$X at $TIME ($CHANGES) ($ISEDIT) [$MESS] by $ID $IP $Z0:$Z\n";
@_=($X0,$X,$TIME,$CHANGES,$ISEDIT,$MESS,$ID,$IP,$Z0,$Z);
push (@g,@_);
}
else
{print 'nowiki'}
};
print @g;