Anonymous user
User:ImplSearchBot/Code: Difference between revisions
m
ImplSearchBot:0
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 1:
<lang perl>#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";
my %options;
#I don't care to pollute my global scope.
{
my $username;
my $password;
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy.
my $post; # Is this an actual run?
my $result = GetOptions(
"wiki=s" => \$wiki,
"username=s" => \$username,
"password=s" => \$password,
"verbosity=s" => \$verbosity,
"post" => \$post);
$options{'wiki'} = $wiki;
die $usage
unless defined $username;
$options{'username'} = $username;
die $usage
unless defined $password;
$options{'password'} = $password;
$options{'post'} = $post
if defined $post;
$options{'verbosity'} = $verbosity;
}
#Statistic tracking.
my $starttime = time;
# Handles interaction with the wiki.
# Note that I had to modify HTTP::Message to make it work
# HTTP::Message silently failed when presented by MW
# with an encoding type of "application/json" or some such.
&out("Creating editor\n", 3);
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# my initial attempts from before I added this line.
$editor->set_wiki('rosettacode.org','w');
# Attempt to log in.
die "Unable to login: " . $loginres
unless $loginres == "Success";
# Get a complete listing of the tasks.
my @alltasks = &getcategory('Category:Programming Tasks');
# Get a complete listing of the languages.
my @alllanguages = &getcategory('Category:Programming Languages');
Line 148 ⟶ 81:
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');
my %impldiff;
foreach my $lang (@alllanguages)
{
my $val = &getlangwork($lang);
next unless defined $val;
$impldiff{$lang} = $val;
}
foreach my $language (keys %impldiff)
{
my $pagename = "Tasks not implemented in $language";
&out("Preparing data for:$pagename\n", 3);
my $hashref = $impldiff{$language}->{'impl'};
my %implemented = %$hashref;
$hashref = $impldiff{$language}->{'omit'};
my %omitted = %$hashref;
my $omitcount = scalar keys %omitted;
my $unimpcount = $taskcount - scalar keys %implemented;
my $targetcount = ($taskcount - $omitcount);
# Language-specific page data.
my $unimplisting = "";
my $omitlisting = "";
my $pagedata; # Not assembled until the end.
foreach my $taskname (@alltasks)
{
# We want the task name, not the fully-qualified wiki name.
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
# Add the task to the unimplemented list, if it's unimplemented.
$unimplisting .= "* [[$baretaskname]]\n"
unless(exists $implemented{$taskname});
my $impperccalc = 0;
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100
unless ($targetcount == 0);
my $imppercfield = sprintf "|%u", $impperccalc;
# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit...
# # Update the layout of the listing page, because it's changed.
# &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1);
}
Line 228 ⟶ 173:
my $runtime = time() - $starttime;
my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n";
&postpage("User:ImplSearchBot/Stats", $statsdata, 0);
open my $sourcefile, '<', $0
my $botsource;
Line 247 ⟶ 192:
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);
exit(0);
sub out
{
my $string = shift;
my $loglevel = shift;
chomp $string;
if($options{'verbosity'} >= $loglevel)
{
if($loglevel == 1)
{
warn $string; # use stderr.
}
else
{
print "$string\n";
}
}
}
sub sanitizenamefs
{
my $pagename = shift;
$pagename =~ tr/:\//__/;
return $pagename;
}
sub diffcat_simple
{
my $first = shift;
my $second = shift;
my %firsthash = map { $_, 1 } @$first;
my @new = ();
foreach my $secondelement (@$second)
{
push @new, $secondelement
unless exists $firsthash{$secondelement};
}
return @new;
}
sub diffcat
{
my $first = shift;
my $second = shift;
my @newinfirst = &diffcat_simple($second, $first);
my @newinsecond = &diffcat_simple($first, $second);
return (\@newinfirst, \@newinsecond);
}
sub postpage
{
my $pagename = shift;
my $pagedata = shift;
my $remark = shift;
my $minoredit = shift;
++$pagesedited;
unless( exists $options{'post'} )
{
$pagename = &sanitizenamefs($pagename);
$pagename .= ".wikitxt";
&out("Saving: $pagename\n", 3);
open my $outfile, '>', $pagename
or &log("Failed to open $pagename: $!", 1);
return unless defined $outfile;
print $outfile $pagedata;
close $outfile;
}
else
{
&out("Posting $pagename\n", 3);
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)
or &out("Failed to post page: " . $editor->{'errstr'}, 1);
}
}
# This allows us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work.
# (Saves on server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control
# system like SVN or Git, to allow multiple bots
# to share the data and to provide history. We're using JSON
# Instead of Perl's native Data::Dumper as JSON has broader
# cross-language support, making it easier for others to use the data.
# SVN has the advantage that I already know how to use it.
# Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole
# in the firewall to access more services.
sub cachedata
{
my $dataname = shift;
my $data = shift;
&out("Caching $dataname...", 3);
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
my $outfile;
unless(open $outfile, '>', $filename)
{
&out("Failed to open $filename: $!\n", 1);
return;
}
print $outfile to_json($data);
close $outfile;
&out(scalar $data . " members cached.\n", 4);
}
sub getcacheddata
{
my $dataname = shift;
&out("Getting cached data for $dataname...", 3);
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
my $infile;
unless (open $infile, '<', $filename)
{
&out("Failed to load cached data $filename: $!\n", 1);
return [];
}
my $jsondata;
$jsondata .= $_ while <$infile>;
close $infile;
my $cacheddata = from_json($jsondata);
&out(scalar @$cacheddata . " members retrieved\n", 4);
return $cacheddata;
}
sub getcategory
{
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 3);
my @categorycontents = $editor->get_pages_in_category($categoryname);
&out(scalar @categorycontents . " members retrieved\n", 3);
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents);
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", (scalar @$removed + scalar @$added) > 0 ? 2 : 3);
&cachedata("$categoryname", \@categorycontents);
return @categorycontents;
}
sub getwork
{
my $categoryname = shift;
&out("Getting work for $categoryname.\n",3);
my $olddata = &getcacheddata($categoryname);
my @newdata = &getcategory($categoryname);
my ($removed, $added) = &diffcat($olddata, \@newdata);
my @work = ();
@work = @newdata
if((scalar @$removed + scalar @$added) > 0);
&out(scalar @work . " items to process for $categoryname.\n", 3);
return @work;
}
sub getlangwork
{
my $language = shift;
&out("Getting lang work for $language\n", 3);
my %impl;
foreach my $workitem (&getwork("Category:$language"))
{
$impl{$workitem} = 1;
}
my %omit;
foreach my $workitem (&getwork("Category:$language/Omit"))
{
$omit{$workitem} = 1;
}
# No work to do?
return undef
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl));
# work to do.
return {'impl' => \%impl,
'omit' => \%omit };
}
</lang>
|