User:ImplSearchBot/Code: Difference between revisions

m
ImplSearchBot:0
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 1:
<lang perl>#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
 
# Handles interaction with the wiki.
# Note that I had to modify HTTP::Message to make it work
# HTTP::Message silently failed when presented by MW
# with an encoding type of "application/json" or some such.
 
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";
 
my %options;
my $username = shift @ARGV;
my $password = shift @ARGV;
 
defined $username
or die $usage;
 
defined $password
or die $usage;
 
#my $json = new JSON;
#$json->allow_blessed(1);
#$json->convert_blessed(1);
 
my $posttosite = shift @ARGV;
my $starttime = time;
 
my $pagesedited = 0;
 
#I don't care to pollute my global scope.
print "Creating editor\n";
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;
 
sub sanitizenamefs
{
my $pagenamewiki = shift'rosettacode.org';
my $username;
$pagename =~ tr/:\//__/;
my $password;
return $pagename;
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy.
}
my $post; # Is this an actual run?
my $result = GetOptions(
"wiki=s" => \$wiki,
"username=s" => \$username,
"password=s" => \$password,
"verbosity=s" => \$verbosity,
"post" => \$post);
$options{'wiki'} = $wiki;
 
die $usage
sub postpage
unless defined $username;
{
my $pagename = shift;
my $pagedata = shift;
my $remark = shift;
my $minoredit = shift;
 
$options{'username'} = $username;
++$pagesedited;
 
die $usage
unless( defined $posttosite )
unless defined $password;
{
$pagename = &sanitizenamefs($pagename);
$pagename .= ".wikitxt";
 
$options{'password'} = $password;
print "Saving: $pagename\n";
 
$options{'post'} = $post
open my $outfile, '>', $pagename
if defined $post;
or warn "Failed to open $pagename: $!";
 
$options{'verbosity'} = $verbosity;
return unless defined $outfile;
 
print $outfile $pagedata;
close $outfile;
}
else
{
print "Posting $pagename\n";
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)
or warn "Failed to post page: " . $editor->{'errstr'};
}
}
 
sub getcacheddata
{
my $dataname = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $infile, '<', $filename
or warn "Failed to load cached data $filename: $!";
 
#Statistic tracking.
return undef unless defined $infile;
my $starttime = time;
 
my $jsondatapagesedited = 0;
$jsondata .= $_ while <$infile>;
close $infile;
return from_json($jsondata);
}
 
# Not doing anything with this yet. It's intended to allow us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots
# to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper)
# SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services.
sub cachedata
{
my $dataname = shift;
my $data = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $outfile, '>', $filename
or warn "Failed to cache $filename: $!";
return unless defined $outfile;
 
# Handles interaction with the wiki.
 
# Note that I had to modify HTTP::Message to make it work
print "Caching $filename:" . scalar $data . "\n";
# HTTP::Message silently failed when presented by MW
print $outfile to_json($data);
# with an encoding type of "application/json" or some such.
close $outfile;
&out("Creating editor\n", 3);
}
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
 
$editor->{debug} = 1;
sub getcategory
{
my $categoryname = shift;
my @categorycontents = $editor->get_pages_in_category($categoryname);
 
&cachedata($categoryname, \@categorycontents);
return @categorycontents;
}
 
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# my initial attempts from before I added this line.
print &out("Trying to set wiki.\n",3);
$editor->set_wiki('rosettacode.org','w');
 
# Attempt to log in.
print &out("Trying to log in.\n", 3);
unless("Success"my $loginres == $editor->login($options{'username'}, $options{'password'}));
 
{
die "Unable to login: " . $loginres
# No, it's not the "(expr) or die" syntax. This will be clearer
unless $loginres == "Success";
# for most folks who read the code.
die "Unable to login: " . from_json($editor);
}
 
# Get a complete listing of the tasks.
print &out("Getting tasks\n", 3);
my @alltasks = &getcategory('Category:Programming Tasks');
 
# Get a complete listing of the languages.
print &out("Getting the languages.\n", 3);
my @alllanguages = &getcategory('Category:Programming Languages');
 
Line 148 ⟶ 81:
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');
 
print &out("Identifying implementedwork andto omitted languagesdo\n", 3);
 
foreach my $language (@alllanguages)
my %impldiff;
 
foreach my $lang (@alllanguages)
{
my $val = &getlangwork($lang);
my %implemented = map {$_, 1} &getcategory("Category:$language");
next unless defined $val;
my %omitted = map {$_, 1} &getcategory("Category:$language/Omit");
$impldiff{$lang} = $val;
my $omitcount = scalar keys %omitted;
}
 
foreach my $language (keys %impldiff)
my $pagename = "Tasks not implemented in $language";
{
print "Preparing data for:$pagename\n";
my $pagename = "Tasks not implemented in $language";
&out("Preparing data for:$pagename\n", 3);
my $hashref = $impldiff{$language}->{'impl'};
my %implemented = %$hashref;
$hashref = $impldiff{$language}->{'omit'};
my %omitted = %$hashref;
my $omitcount = scalar keys %omitted;
 
# Language metadata
my $taskcount = scalar @alltasks;
my $unimpcount = $taskcount - scalar keys %implemented;
my $targetcount = ($taskcount - $omitcount);
 
# Language-specific page data.metadata
my $unimplistingtaskcount = ""scalar @alltasks;
my $unimpcount = $taskcount - scalar keys %implemented;
my $omitlisting = "";
my $targetcount = ($taskcount - $omitcount);
my $pagedata; # Not assembled until the end.
 
# Language-specific page data.
foreach my $taskname (@alltasks)
my $unimplisting = "";
{
my $omitlisting = "";
# We want the task name, not the fully-qualified wiki name.
my $pagedata; # Not assembled until the end.
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
 
foreach my $taskname (@alltasks)
# Add the task to the unimplemented list, if it's unimplemented.
{
$unimplisting .= "* [[$baretaskname]]\n"
# We want the task name, not the fully-qualified wiki name.
unless(exists $implemented{$taskname});
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
# Add the task to the unimplemented list, if it's unimplemented.
$unimplisting .= "* [[$baretaskname]]\n"
unless(exists $implemented{$taskname});
 
# Add the task to the omission list, if it's omitted.
$omitlisting .= "* [[$baretaskname]]\n"
if(exists $omitted{$taskname})
}
 
# Prepare template fields
my $langfield = "|$language";
my $unimpfield = "|$unimpcount";
my $tcfield = "|$targetcount";
my $impperccalc = 0;
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100
unless ($targetcount == 0);
 
my $imppercfield = sprintf "|%u", $impperccalc;
my $impperccalc = 0;
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100
unless ($targetcount == 0);
my $imppercfield = sprintf "|%u", $impperccalc;
 
my $unimpltemplatename = "unimp_body_$language";
my $omittemplatename = "unimp_omit_body_$language";
 
# Prepare the listing page format.
$pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}';
$pagedata .= "{{$unimpltemplatename" . "$unimpfield}}";
$pagedata .= "{{omit_header" . "$langfield}}";
$pagedata .= "{{$omittemplatename}}";
$pagedata .= "{{unimpl_footer$langfield}}";
 
# Post the template containing the listing of unimplemented tasks.
&postpage("Template:$unimpltemplatename", "<includeonly>$unimplisting</includeonly>", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
 
# Post the template containing the listing of omitted tasks.
&postpage("Template:$omittemplatename", "<includeonly>$omitlisting</includeonly>", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
 
# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit...
# # Update the layout of the listing page, because it's changed.
# &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1);
 
my $omitcategoryname = "Category:$language/Omit";
my $omitcatpagedata = "{{omit_cat|$language}}";
&postpage($omitcategoryname, $omitcatpagedata, "ImplSearchBot:Updating Omit category body", 0)
unless ( exists $createdomitcategories{$omitcategoryname} );
 
}
Line 228 ⟶ 173:
my $runtime = time() - $starttime;
my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n";
print &out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2);
 
&postpage("User:ImplSearchBot/Stats", $statsdata, 0);
 
 
print &out("Updating bot code page\n", 4);
 
open my $sourcefile, '<', $0
or die "Finished without updating bot source page";
 
my $botsource;
Line 247 ⟶ 192:
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);
 
print &out("Done\n", 2);
 
exit(0);
 
sub out
{
my $string = shift;
my $loglevel = shift;
 
chomp $string;
 
if($options{'verbosity'} >= $loglevel)
{
if($loglevel == 1)
{
warn $string; # use stderr.
}
else
{
print "$string\n";
}
}
}
 
sub sanitizenamefs
{
my $pagename = shift;
$pagename =~ tr/:\//__/;
return $pagename;
}
 
sub diffcat_simple
{
my $first = shift;
my $second = shift;
my %firsthash = map { $_, 1 } @$first;
 
my @new = ();
 
foreach my $secondelement (@$second)
{
push @new, $secondelement
unless exists $firsthash{$secondelement};
}
 
return @new;
}
 
sub diffcat
{
my $first = shift;
my $second = shift;
my @newinfirst = &diffcat_simple($second, $first);
my @newinsecond = &diffcat_simple($first, $second);
return (\@newinfirst, \@newinsecond);
}
 
sub postpage
{
my $pagename = shift;
my $pagedata = shift;
my $remark = shift;
my $minoredit = shift;
 
++$pagesedited;
 
unless( exists $options{'post'} )
{
$pagename = &sanitizenamefs($pagename);
$pagename .= ".wikitxt";
 
&out("Saving: $pagename\n", 3);
open my $outfile, '>', $pagename
or &log("Failed to open $pagename: $!", 1);
 
return unless defined $outfile;
 
print $outfile $pagedata;
close $outfile;
}
else
{
&out("Posting $pagename\n", 3);
 
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)
or &out("Failed to post page: " . $editor->{'errstr'}, 1);
}
}
 
# This allows us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work.
# (Saves on server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control
# system like SVN or Git, to allow multiple bots
# to share the data and to provide history. We're using JSON
# Instead of Perl's native Data::Dumper as JSON has broader
# cross-language support, making it easier for others to use the data.
# SVN has the advantage that I already know how to use it.
# Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole
# in the firewall to access more services.
sub cachedata
{
my $dataname = shift;
my $data = shift;
&out("Caching $dataname...", 3);
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
my $outfile;
unless(open $outfile, '>', $filename)
{
&out("Failed to open $filename: $!\n", 1);
return;
}
 
print $outfile to_json($data);
close $outfile;
&out(scalar $data . " members cached.\n", 4);
}
 
sub getcacheddata
{
my $dataname = shift;
&out("Getting cached data for $dataname...", 3);
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
my $infile;
unless (open $infile, '<', $filename)
{
&out("Failed to load cached data $filename: $!\n", 1);
return [];
}
 
my $jsondata;
$jsondata .= $_ while <$infile>;
close $infile;
 
my $cacheddata = from_json($jsondata);
&out(scalar @$cacheddata . " members retrieved\n", 4);
 
return $cacheddata;
}
 
sub getcategory
{
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 3);
my @categorycontents = $editor->get_pages_in_category($categoryname);
&out(scalar @categorycontents . " members retrieved\n", 3);
 
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents);
 
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", (scalar @$removed + scalar @$added) > 0 ? 2 : 3);
 
&cachedata("$categoryname", \@categorycontents);
 
return @categorycontents;
}
 
sub getwork
{
my $categoryname = shift;
&out("Getting work for $categoryname.\n",3);
my $olddata = &getcacheddata($categoryname);
my @newdata = &getcategory($categoryname);
my ($removed, $added) = &diffcat($olddata, \@newdata);
my @work = ();
@work = @newdata
if((scalar @$removed + scalar @$added) > 0);
&out(scalar @work . " items to process for $categoryname.\n", 3);
 
return @work;
}
 
sub getlangwork
{
my $language = shift;
&out("Getting lang work for $language\n", 3);
my %impl;
foreach my $workitem (&getwork("Category:$language"))
{
$impl{$workitem} = 1;
}
my %omit;
foreach my $workitem (&getwork("Category:$language/Omit"))
{
$omit{$workitem} = 1;
}
 
# No work to do?
return undef
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl));
 
# work to do.
return {'impl' => \%impl,
'omit' => \%omit };
}
</lang>