User:Dantheox/pmlinks.pl

From Wikipedia, the free encyclopedia
#!/usr/local/bin/perl -w
use strict;
use LWP;

if (@ARGV < 2) {
    print "Usage:\n";
    print "$0 <wiki source file> <url>\n";
    exit(1);
}

my $ua = LWP::UserAgent->new(
        agent => ''
);

# First get the wiki source
my $srcfile = shift;

# Then get the rendered page to establish whether a link is blue or red
my $url = shift;
my $resp = $ua->get($url) or die "Can't get page '$url'";
my $html = $resp->content;

# Extract the relevant links, taking title and text
my %links = ();

while ($html =~ m!<a href="[^"]+" (class="new" )?title="([^"]+)">(.*?)</a>!gsi) { #"
    unless ($1) { $links{$2}= $3 }
}

# Now check the source against the text
# Only pull out the first link on a line beginning with ":"
# Any subsequent lines which appear "under" this line are included with it
# This means everything up to either a blank line or a line beginning with a lone '#'

my $good = "";
my $bad = "";
my $lst = 0;

open IN, "$srcfile" or die "Can't open $srcfile";
while (<IN>) {
    if (/^#[^:#*;]/) {
	my ($lt) = /\[\[(.*?)\]\]/;
	unless ($lt) { $lst = 0; next }
	
	my ($link, $text);
	if ($lt =~ /\|/) { ($link, $text) = $lt =~ /([^|]+)\|(.+)/ }
        else             {  $link = $text = $lt }

	$lst = $links{$link} ? \$good : \$bad;

	$$lst.=$_;
    } elsif (/^\s*$/) {
	$lst = 0;
    } elsif ($lst) {
	$$lst.=$_;
    }
}
close IN;

print "Red links:\n";
print "$bad\n";
print "Blue links:\n";
print "$good\n";