User:ImplSearchBot/Code
<lang perl>#!/usr/bin/perl -w use strict; use MediaWiki::Bot; use JSON qw/to_json from_json/;
- Handles interaction with the wiki.
- Note that I had to modify HTTP::Message to make it work
- HTTP::Message silently failed when presented by MW
- with an encoding type of "application/json" or some such.
my $usage = "Usage: $0 (username) (password) [posttosite]";
my $username = shift @ARGV; my $password = shift @ARGV;
defined $username
or die $usage;
defined $password
or die $usage;
- my $json = new JSON;
- $json->allow_blessed(1);
- $json->convert_blessed(1);
my $posttosite = shift @ARGV; my $starttime = time;
my $pagesedited = 0;
print "Creating editor\n"; my $editor = MediaWiki::Bot->new('ImpleSearchBot'); $editor->{debug} = 1;
sub sanitizenamefs {
my $pagename = shift; $pagename =~ tr/:\//__/; return $pagename;
}
sub postpage {
my $pagename = shift; my $pagedata = shift; my $remark = shift; my $minoredit = shift;
++$pagesedited;
unless( defined $posttosite ) { $pagename = &sanitizenamefs($pagename); $pagename .= ".wikitxt";
print "Saving: $pagename\n";
open my $outfile, '>', $pagename or warn "Failed to open $pagename: $!";
return unless defined $outfile;
print $outfile $pagedata; close $outfile; } else { print "Posting $pagename\n"; $editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) or warn "Failed to post page: " . $editor->{'errstr'}; }
}
sub getcacheddata {
my $dataname = shift; my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); open my $infile, '<', $filename or warn "Failed to load cached data $filename: $!";
return undef unless defined $infile;
my $jsondata; $jsondata .= $_ while <$infile>; close $infile; return from_json($jsondata);
}
- Not doing anything with this yet. It's intended to allow us to compare site state between now and when we
- last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...)
- Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots
- to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper)
- SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax
- my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services.
sub cachedata {
my $dataname = shift; my $data = shift; my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); open my $outfile, '>', $filename or warn "Failed to cache $filename: $!"; return unless defined $outfile;
print "Caching $filename:" . scalar $data . "\n"; print $outfile to_json($data); close $outfile;
}
sub getcategory {
my $categoryname = shift; my @categorycontents = $editor->get_pages_in_category($categoryname);
&cachedata($categoryname, \@categorycontents); return @categorycontents;
}
- Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
- my initial attempts from before I added this line.
print "Trying to set wiki.\n"; $editor->set_wiki('rosettacode.org','w');
- Attempt to log in.
print "Trying to log in.\n"; unless("Success" == $editor->login($username, $password)) {
# No, it's not the "(expr) or die" syntax. This will be clearer # for most folks who read the code. die "Unable to login: " . from_json($editor);
}
- Get a complete listing of the tasks.
print "Getting tasks\n"; my @alltasks = &getcategory('Category:Programming Tasks');
- Get a complete listing of the languages.
print "Getting the languages.\n"; my @alllanguages = &getcategory('Category:Programming Languages');
- We want the language name, not the fully-qualified wiki name.
$_ =~ s/^Category:// foreach (@alllanguages);
- Get a list of the languages for which we've already provided bodies for the related omit categories.
- Store it as a hash, so the lookup will be faster.
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');
print "Identifying implemented and omitted languages\n"; foreach my $language (@alllanguages) {
my %implemented = map {$_, 1} &getcategory("Category:$language"); my %omitted = map {$_, 1} &getcategory("Category:$language/Omit"); my $omitcount = scalar keys %omitted;
my $pagename = "Tasks not implemented in $language"; print "Preparing data for:$pagename\n";
# Language metadata my $taskcount = scalar @alltasks; my $unimpcount = $taskcount - scalar keys %implemented; my $targetcount = ($taskcount - $omitcount);
# Language-specific page data. my $unimplisting = ""; my $omitlisting = ""; my $pagedata; # Not assembled until the end.
foreach my $taskname (@alltasks) { # We want the task name, not the fully-qualified wiki name. my $baretaskname = $taskname; $baretaskname =~ s/^Category://;
# Add the task to the unimplemented list, if it's unimplemented. $unimplisting .= "* $baretaskname\n" unless(exists $implemented{$taskname});
# Add the task to the omission list, if it's omitted. $omitlisting .= "* $baretaskname\n" if(exists $omitted{$taskname}) }
# Prepare template fields my $langfield = "|$language"; my $unimpfield = "|$unimpcount"; my $tcfield = "|$targetcount";
my $impperccalc = 0; $impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 unless ($targetcount == 0); my $imppercfield = sprintf "|%u", $impperccalc;
my $unimpltemplatename = "unimp_body_$language"; my $omittemplatename = "unimp_omit_body_$language";
# Prepare the listing page format. $pagedata = 'Template:Unimpl header' . $langfield . $unimpfield . $tcfield . $imppercfield . ''; $pagedata .= "Template:$unimpltemplatename" . "$unimpfield"; $pagedata .= "Template:Omit header" . "$langfield"; $pagedata .= "Template:$omittemplatename"; $pagedata .= "Template:Unimpl footer$langfield";
# Post the template containing the listing of unimplemented tasks. &postpage("Template:$unimpltemplatename", "", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
# Post the template containing the listing of omitted tasks. &postpage("Template:$omittemplatename", "", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
- I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit...
- # Update the layout of the listing page, because it's changed.
- &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1);
my $omitcategoryname = "Category:$language/Omit"; my $omitcatpagedata = "Tasks listed here have been marked as "un-implementable" in $language. Solutions may be impossible to do, too complex to be of any valuable instruction in $language, or prohibited by the task definition.
But hey; if you think you can prove us wrong, go for it. :-) ";
&postpage($omitcategoryname, $omitcatpagedata, "ImplSearchBot:Updating Omit category body", 0) unless ( exists $createdomitcategories{$omitcategoryname} );
}
- To add here: Post stats on activities:
- 1. Pages edited
- 2. Time last run started
- 3. Time to completion from start to just prior to stat post point.
my $runtime = time() - $starttime; my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n"; print "Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n";
&postpage("User:ImplSearchBot/Stats", $statsdata, 0);
print "Updating bot code page\n";
open my $sourcefile, '<', $0
or die "Finished without updating bot source page";
my $botsource; $botsource .= $_ while <$sourcefile>;
close $sourcefile;
my $tag = "lang";
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);
print "Done\n"; </lang>