User:ImplSearchBot/Code: Difference between revisions
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 1: | Line 1: | ||
<lang perl>#!/usr/bin/perl -w |
<lang perl>#!/usr/bin/perl -w |
||
use strict; |
use strict; |
||
use Getopt::Long; |
|||
use MediaWiki::Bot; |
use MediaWiki::Bot; |
||
use JSON qw/to_json from_json/; |
use JSON qw/to_json from_json/; |
||
# Handles interaction with the wiki. |
|||
# Note that I had to modify HTTP::Message to make it work |
|||
# HTTP::Message silently failed when presented by MW |
|||
# with an encoding type of "application/json" or some such. |
|||
my $usage = "Usage: $0 (username) (password) [posttosite]"; |
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]"; |
||
my %options; |
|||
my $username = shift @ARGV; |
|||
my $password = shift @ARGV; |
|||
defined $username |
|||
or die $usage; |
|||
defined $password |
|||
or die $usage; |
|||
#my $json = new JSON; |
|||
#$json->allow_blessed(1); |
|||
#$json->convert_blessed(1); |
|||
my $posttosite = shift @ARGV; |
|||
my $starttime = time; |
|||
my $pagesedited = 0; |
|||
#I don't care to pollute my global scope. |
|||
print "Creating editor\n"; |
|||
my $editor = MediaWiki::Bot->new('ImpleSearchBot'); |
|||
$editor->{debug} = 1; |
|||
sub sanitizenamefs |
|||
{ |
{ |
||
my $wiki = 'rosettacode.org'; |
|||
my $username; |
|||
$pagename =~ tr/:\//__/; |
|||
my $password; |
|||
return $pagename; |
|||
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy. |
|||
} |
|||
my $post; # Is this an actual run? |
|||
my $result = GetOptions( |
|||
"wiki=s" => \$wiki, |
|||
"username=s" => \$username, |
|||
"password=s" => \$password, |
|||
"verbosity=s" => \$verbosity, |
|||
"post" => \$post); |
|||
$options{'wiki'} = $wiki; |
|||
die $usage |
|||
sub postpage |
|||
unless defined $username; |
|||
{ |
|||
my $pagename = shift; |
|||
my $pagedata = shift; |
|||
my $remark = shift; |
|||
my $minoredit = shift; |
|||
$options{'username'} = $username; |
|||
++$pagesedited; |
|||
die $usage |
|||
unless( defined $posttosite ) |
|||
unless defined $password; |
|||
{ |
|||
$pagename = &sanitizenamefs($pagename); |
|||
$pagename .= ".wikitxt"; |
|||
$options{'password'} = $password; |
|||
print "Saving: $pagename\n"; |
|||
$options{'post'} = $post |
|||
open my $outfile, '>', $pagename |
|||
if defined $post; |
|||
or warn "Failed to open $pagename: $!"; |
|||
$options{'verbosity'} = $verbosity; |
|||
return unless defined $outfile; |
|||
print $outfile $pagedata; |
|||
close $outfile; |
|||
} |
|||
else |
|||
{ |
|||
print "Posting $pagename\n"; |
|||
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) |
|||
or warn "Failed to post page: " . $editor->{'errstr'}; |
|||
} |
|||
} |
} |
||
sub getcacheddata |
|||
{ |
|||
my $dataname = shift; |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
open my $infile, '<', $filename |
|||
or warn "Failed to load cached data $filename: $!"; |
|||
#Statistic tracking. |
|||
return undef unless defined $infile; |
|||
my $starttime = time; |
|||
my $pagesedited = 0; |
|||
$jsondata .= $_ while <$infile>; |
|||
close $infile; |
|||
return from_json($jsondata); |
|||
} |
|||
# Not doing anything with this yet. It's intended to allow us to compare site state between now and when we |
|||
# last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...) |
|||
# Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots |
|||
# to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper) |
|||
# SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax |
|||
# my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services. |
|||
sub cachedata |
|||
{ |
|||
my $dataname = shift; |
|||
my $data = shift; |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
open my $outfile, '>', $filename |
|||
or warn "Failed to cache $filename: $!"; |
|||
return unless defined $outfile; |
|||
# Handles interaction with the wiki. |
|||
# Note that I had to modify HTTP::Message to make it work |
|||
print "Caching $filename:" . scalar $data . "\n"; |
|||
# HTTP::Message silently failed when presented by MW |
|||
print $outfile to_json($data); |
|||
# with an encoding type of "application/json" or some such. |
|||
close $outfile; |
|||
&out("Creating editor\n", 3); |
|||
} |
|||
my $editor = MediaWiki::Bot->new('ImpleSearchBot'); |
|||
$editor->{debug} = 1; |
|||
sub getcategory |
|||
{ |
|||
my $categoryname = shift; |
|||
my @categorycontents = $editor->get_pages_in_category($categoryname); |
|||
&cachedata($categoryname, \@categorycontents); |
|||
return @categorycontents; |
|||
} |
|||
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like |
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like |
||
# my initial attempts from before I added this line. |
# my initial attempts from before I added this line. |
||
&out("Trying to set wiki.\n",3); |
|||
$editor->set_wiki('rosettacode.org','w'); |
$editor->set_wiki('rosettacode.org','w'); |
||
# Attempt to log in. |
# Attempt to log in. |
||
&out("Trying to log in.\n", 3); |
|||
my $loginres = $editor->login($options{'username'}, $options{'password'}); |
|||
{ |
|||
die "Unable to login: " . $loginres |
|||
# No, it's not the "(expr) or die" syntax. This will be clearer |
|||
unless $loginres == "Success"; |
|||
# for most folks who read the code. |
|||
die "Unable to login: " . from_json($editor); |
|||
} |
|||
# Get a complete listing of the tasks. |
# Get a complete listing of the tasks. |
||
&out("Getting tasks\n", 3); |
|||
my @alltasks = &getcategory('Category:Programming Tasks'); |
my @alltasks = &getcategory('Category:Programming Tasks'); |
||
# Get a complete listing of the languages. |
# Get a complete listing of the languages. |
||
&out("Getting the languages.\n", 3); |
|||
my @alllanguages = &getcategory('Category:Programming Languages'); |
my @alllanguages = &getcategory('Category:Programming Languages'); |
||
Line 148: | Line 81: | ||
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated'); |
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated'); |
||
&out("Identifying work to do\n", 3); |
|||
foreach my $language (@alllanguages) |
|||
my %impldiff; |
|||
foreach my $lang (@alllanguages) |
|||
{ |
{ |
||
my $val = &getlangwork($lang); |
|||
my %implemented = map {$_, 1} &getcategory("Category:$language"); |
|||
next unless defined $val; |
|||
my %omitted = map {$_, 1} &getcategory("Category:$language/Omit"); |
|||
$impldiff{$lang} = $val; |
|||
my $omitcount = scalar keys %omitted; |
|||
} |
|||
foreach my $language (keys %impldiff) |
|||
my $pagename = "Tasks not implemented in $language"; |
|||
{ |
|||
print "Preparing data for:$pagename\n"; |
|||
my $pagename = "Tasks not implemented in $language"; |
|||
&out("Preparing data for:$pagename\n", 3); |
|||
my $hashref = $impldiff{$language}->{'impl'}; |
|||
my %implemented = %$hashref; |
|||
$hashref = $impldiff{$language}->{'omit'}; |
|||
my %omitted = %$hashref; |
|||
my $omitcount = scalar keys %omitted; |
|||
# Language metadata |
|||
my $taskcount = scalar @alltasks; |
|||
my $unimpcount = $taskcount - scalar keys %implemented; |
|||
my $targetcount = ($taskcount - $omitcount); |
|||
# Language metadata |
|||
my $taskcount = scalar @alltasks; |
|||
my $unimpcount = $taskcount - scalar keys %implemented; |
|||
my $omitlisting = ""; |
|||
my $targetcount = ($taskcount - $omitcount); |
|||
my $pagedata; # Not assembled until the end. |
|||
# Language-specific page data. |
|||
foreach my $taskname (@alltasks) |
|||
my $unimplisting = ""; |
|||
{ |
|||
my $omitlisting = ""; |
|||
# We want the task name, not the fully-qualified wiki name. |
|||
my $pagedata; # Not assembled until the end. |
|||
my $baretaskname = $taskname; |
|||
$baretaskname =~ s/^Category://; |
|||
foreach my $taskname (@alltasks) |
|||
# Add the task to the unimplemented list, if it's unimplemented. |
|||
{ |
|||
$unimplisting .= "* [[$baretaskname]]\n" |
|||
# We want the task name, not the fully-qualified wiki name. |
|||
unless(exists $implemented{$taskname}); |
|||
my $baretaskname = $taskname; |
|||
$baretaskname =~ s/^Category://; |
|||
# Add the task to the unimplemented list, if it's unimplemented. |
|||
$unimplisting .= "* [[$baretaskname]]\n" |
|||
unless(exists $implemented{$taskname}); |
|||
# Add the task to the omission list, if it's omitted. |
|||
$omitlisting .= "* [[$baretaskname]]\n" |
|||
if(exists $omitted{$taskname}) |
|||
} |
|||
# Prepare template fields |
|||
my $langfield = "|$language"; |
|||
my $unimpfield = "|$unimpcount"; |
|||
my $tcfield = "|$targetcount"; |
|||
my $impperccalc = 0; |
|||
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 |
|||
unless ($targetcount == 0); |
|||
my $imppercfield = sprintf "|%u", $impperccalc; |
|||
my $impperccalc = 0; |
|||
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 |
|||
unless ($targetcount == 0); |
|||
my $imppercfield = sprintf "|%u", $impperccalc; |
|||
my $unimpltemplatename = "unimp_body_$language"; |
|||
my $omittemplatename = "unimp_omit_body_$language"; |
|||
# Prepare the listing page format. |
|||
$pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}'; |
|||
$pagedata .= "{{$unimpltemplatename" . "$unimpfield}}"; |
|||
$pagedata .= "{{omit_header" . "$langfield}}"; |
|||
$pagedata .= "{{$omittemplatename}}"; |
|||
$pagedata .= "{{unimpl_footer$langfield}}"; |
|||
# Post the template containing the listing of unimplemented tasks. |
|||
&postpage("Template:$unimpltemplatename", "<includeonly>$unimplisting</includeonly>", "ImplSearchBot:Updating list body of unimplemented tasks.", 1); |
|||
# Post the template containing the listing of omitted tasks. |
|||
&postpage("Template:$omittemplatename", "<includeonly>$omitlisting</includeonly>", "ImplSearchBot:Updating list body of unimplemented tasks.", 1); |
|||
# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit... |
# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit... |
||
# # Update the layout of the listing page, because it's changed. |
# # Update the layout of the listing page, because it's changed. |
||
# &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1); |
# &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1); |
||
my $omitcategoryname = "Category:$language/Omit"; |
|||
my $omitcatpagedata = "{{omit_cat|$language}}"; |
|||
&postpage($omitcategoryname, $omitcatpagedata, "ImplSearchBot:Updating Omit category body", 0) |
|||
unless ( exists $createdomitcategories{$omitcategoryname} ); |
|||
} |
} |
||
Line 228: | Line 173: | ||
my $runtime = time() - $starttime; |
my $runtime = time() - $starttime; |
||
my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n"; |
my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n"; |
||
&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2); |
|||
&postpage("User:ImplSearchBot/Stats", $statsdata, 0); |
&postpage("User:ImplSearchBot/Stats", $statsdata, 0); |
||
&out("Updating bot code page\n", 4); |
|||
open my $sourcefile, '<', $0 |
open my $sourcefile, '<', $0 |
||
or die "Finished without updating bot source page"; |
|||
my $botsource; |
my $botsource; |
||
Line 247: | Line 192: | ||
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0); |
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0); |
||
&out("Done\n", 2); |
|||
exit(0); |
|||
sub out |
|||
{ |
|||
my $string = shift; |
|||
my $loglevel = shift; |
|||
chomp $string; |
|||
if($options{'verbosity'} >= $loglevel) |
|||
{ |
|||
if($loglevel == 1) |
|||
{ |
|||
warn $string; # use stderr. |
|||
} |
|||
else |
|||
{ |
|||
print "$string\n"; |
|||
} |
|||
} |
|||
} |
|||
sub sanitizenamefs |
|||
{ |
|||
my $pagename = shift; |
|||
$pagename =~ tr/:\//__/; |
|||
return $pagename; |
|||
} |
|||
sub diffcat_simple |
|||
{ |
|||
my $first = shift; |
|||
my $second = shift; |
|||
my %firsthash = map { $_, 1 } @$first; |
|||
my @new = (); |
|||
foreach my $secondelement (@$second) |
|||
{ |
|||
push @new, $secondelement |
|||
unless exists $firsthash{$secondelement}; |
|||
} |
|||
return @new; |
|||
} |
|||
sub diffcat |
|||
{ |
|||
my $first = shift; |
|||
my $second = shift; |
|||
my @newinfirst = &diffcat_simple($second, $first); |
|||
my @newinsecond = &diffcat_simple($first, $second); |
|||
return (\@newinfirst, \@newinsecond); |
|||
} |
|||
sub postpage |
|||
{ |
|||
my $pagename = shift; |
|||
my $pagedata = shift; |
|||
my $remark = shift; |
|||
my $minoredit = shift; |
|||
++$pagesedited; |
|||
unless( exists $options{'post'} ) |
|||
{ |
|||
$pagename = &sanitizenamefs($pagename); |
|||
$pagename .= ".wikitxt"; |
|||
&out("Saving: $pagename\n", 3); |
|||
open my $outfile, '>', $pagename |
|||
or &log("Failed to open $pagename: $!", 1); |
|||
return unless defined $outfile; |
|||
print $outfile $pagedata; |
|||
close $outfile; |
|||
} |
|||
else |
|||
{ |
|||
&out("Posting $pagename\n", 3); |
|||
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) |
|||
or &out("Failed to post page: " . $editor->{'errstr'}, 1); |
|||
} |
|||
} |
|||
# This allows us to compare site state between now and when we |
|||
# last ran, so that we can learn to avoid doing unnecessary work. |
|||
# (Saves on server resources and bloated edit statistics...) |
|||
# Also, I plan on publishing the cache files in a version control |
|||
# system like SVN or Git, to allow multiple bots |
|||
# to share the data and to provide history. We're using JSON |
|||
# Instead of Perl's native Data::Dumper as JSON has broader |
|||
# cross-language support, making it easier for others to use the data. |
|||
# SVN has the advantage that I already know how to use it. |
|||
# Git has the advantage in that I can use GitHub and not tax |
|||
# my Slice with Git traffic, and I don't have to punch a hole |
|||
# in the firewall to access more services. |
|||
sub cachedata |
|||
{ |
|||
my $dataname = shift; |
|||
my $data = shift; |
|||
&out("Caching $dataname...", 3); |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
my $outfile; |
|||
unless(open $outfile, '>', $filename) |
|||
{ |
|||
&out("Failed to open $filename: $!\n", 1); |
|||
return; |
|||
} |
|||
print $outfile to_json($data); |
|||
close $outfile; |
|||
&out(scalar $data . " members cached.\n", 4); |
|||
} |
|||
sub getcacheddata |
|||
{ |
|||
my $dataname = shift; |
|||
&out("Getting cached data for $dataname...", 3); |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
my $infile; |
|||
unless (open $infile, '<', $filename) |
|||
{ |
|||
&out("Failed to load cached data $filename: $!\n", 1); |
|||
return []; |
|||
} |
|||
my $jsondata; |
|||
$jsondata .= $_ while <$infile>; |
|||
close $infile; |
|||
my $cacheddata = from_json($jsondata); |
|||
&out(scalar @$cacheddata . " members retrieved\n", 4); |
|||
return $cacheddata; |
|||
} |
|||
sub getcategory |
|||
{ |
|||
my $categoryname = shift; |
|||
&out("Getting category contents for $categoryname...", 3); |
|||
my @categorycontents = $editor->get_pages_in_category($categoryname); |
|||
&out(scalar @categorycontents . " members retrieved\n", 3); |
|||
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents); |
|||
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", (scalar @$removed + scalar @$added) > 0 ? 2 : 3); |
|||
&cachedata("$categoryname", \@categorycontents); |
|||
return @categorycontents; |
|||
} |
|||
sub getwork |
|||
{ |
|||
my $categoryname = shift; |
|||
&out("Getting work for $categoryname.\n",3); |
|||
my $olddata = &getcacheddata($categoryname); |
|||
my @newdata = &getcategory($categoryname); |
|||
my ($removed, $added) = &diffcat($olddata, \@newdata); |
|||
my @work = (); |
|||
@work = @newdata |
|||
if((scalar @$removed + scalar @$added) > 0); |
|||
&out(scalar @work . " items to process for $categoryname.\n", 3); |
|||
return @work; |
|||
} |
|||
sub getlangwork |
|||
{ |
|||
my $language = shift; |
|||
&out("Getting lang work for $language\n", 3); |
|||
my %impl; |
|||
foreach my $workitem (&getwork("Category:$language")) |
|||
{ |
|||
$impl{$workitem} = 1; |
|||
} |
|||
my %omit; |
|||
foreach my $workitem (&getwork("Category:$language/Omit")) |
|||
{ |
|||
$omit{$workitem} = 1; |
|||
} |
|||
# No work to do? |
|||
return undef |
|||
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl)); |
|||
# work to do. |
|||
return {'impl' => \%impl, |
|||
'omit' => \%omit }; |
|||
} |
|||
</lang> |
</lang> |
Revision as of 07:10, 12 March 2009
<lang perl>#!/usr/bin/perl -w use strict; use Getopt::Long; use MediaWiki::Bot; use JSON qw/to_json from_json/;
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";
my %options;
- I don't care to pollute my global scope.
{ my $wiki = 'rosettacode.org'; my $username; my $password; my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy. my $post; # Is this an actual run? my $result = GetOptions( "wiki=s" => \$wiki, "username=s" => \$username, "password=s" => \$password, "verbosity=s" => \$verbosity, "post" => \$post); $options{'wiki'} = $wiki;
die $usage unless defined $username;
$options{'username'} = $username;
die $usage unless defined $password;
$options{'password'} = $password;
$options{'post'} = $post if defined $post;
$options{'verbosity'} = $verbosity; }
- Statistic tracking.
my $starttime = time; my $pagesedited = 0;
- Handles interaction with the wiki.
- Note that I had to modify HTTP::Message to make it work
- HTTP::Message silently failed when presented by MW
- with an encoding type of "application/json" or some such.
&out("Creating editor\n", 3); my $editor = MediaWiki::Bot->new('ImpleSearchBot'); $editor->{debug} = 1;
- Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
- my initial attempts from before I added this line.
&out("Trying to set wiki.\n",3); $editor->set_wiki('rosettacode.org','w');
- Attempt to log in.
&out("Trying to log in.\n", 3); my $loginres = $editor->login($options{'username'}, $options{'password'});
die "Unable to login: " . $loginres unless $loginres == "Success";
- Get a complete listing of the tasks.
&out("Getting tasks\n", 3); my @alltasks = &getcategory('Category:Programming Tasks');
- Get a complete listing of the languages.
&out("Getting the languages.\n", 3); my @alllanguages = &getcategory('Category:Programming Languages');
- We want the language name, not the fully-qualified wiki name.
$_ =~ s/^Category:// foreach (@alllanguages);
- Get a list of the languages for which we've already provided bodies for the related omit categories.
- Store it as a hash, so the lookup will be faster.
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');
&out("Identifying work to do\n", 3);
my %impldiff;
foreach my $lang (@alllanguages) { my $val = &getlangwork($lang); next unless defined $val; $impldiff{$lang} = $val; }
foreach my $language (keys %impldiff) { my $pagename = "Tasks not implemented in $language"; &out("Preparing data for:$pagename\n", 3); my $hashref = $impldiff{$language}->{'impl'}; my %implemented = %$hashref; $hashref = $impldiff{$language}->{'omit'}; my %omitted = %$hashref; my $omitcount = scalar keys %omitted;
# Language metadata
my $taskcount = scalar @alltasks;
my $unimpcount = $taskcount - scalar keys %implemented;
my $targetcount = ($taskcount - $omitcount);
# Language-specific page data. my $unimplisting = ""; my $omitlisting = ""; my $pagedata; # Not assembled until the end.
foreach my $taskname (@alltasks) { # We want the task name, not the fully-qualified wiki name. my $baretaskname = $taskname; $baretaskname =~ s/^Category://;
# Add the task to the unimplemented list, if it's unimplemented. $unimplisting .= "* $baretaskname\n" unless(exists $implemented{$taskname});
# Add the task to the omission list, if it's omitted. $omitlisting .= "* $baretaskname\n" if(exists $omitted{$taskname}) }
# Prepare template fields my $langfield = "|$language"; my $unimpfield = "|$unimpcount"; my $tcfield = "|$targetcount";
my $impperccalc = 0; $impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 unless ($targetcount == 0);
my $imppercfield = sprintf "|%u", $impperccalc;
my $unimpltemplatename = "unimp_body_$language"; my $omittemplatename = "unimp_omit_body_$language";
# Prepare the listing page format. $pagedata = 'Template:Unimpl header' . $langfield . $unimpfield . $tcfield . $imppercfield . ''; $pagedata .= "Template:$unimpltemplatename" . "$unimpfield"; $pagedata .= "Template:Omit header" . "$langfield"; $pagedata .= "Template:$omittemplatename"; $pagedata .= "Template:Unimpl footer$langfield";
# Post the template containing the listing of unimplemented tasks. &postpage("Template:$unimpltemplatename", "", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
# Post the template containing the listing of omitted tasks. &postpage("Template:$omittemplatename", "", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);
# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit... # # Update the layout of the listing page, because it's changed. # &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1);
my $omitcategoryname = "Category:$language/Omit"; my $omitcatpagedata = "Tasks listed here have been marked as "un-implementable" in $language. Solutions may be impossible to do, too complex to be of any valuable instruction in $language, or prohibited by the task definition.
But hey; if you think you can prove us wrong, go for it. :-) "; &postpage($omitcategoryname, $omitcatpagedata, "ImplSearchBot:Updating Omit category body", 0) unless ( exists $createdomitcategories{$omitcategoryname} );
}
- To add here: Post stats on activities:
- 1. Pages edited
- 2. Time last run started
- 3. Time to completion from start to just prior to stat post point.
my $runtime = time() - $starttime; my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n"; &out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2);
&postpage("User:ImplSearchBot/Stats", $statsdata, 0);
&out("Updating bot code page\n", 4);
open my $sourcefile, '<', $0 or die "Finished without updating bot source page";
my $botsource; $botsource .= $_ while <$sourcefile>;
close $sourcefile;
my $tag = "lang";
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);
&out("Done\n", 2);
exit(0);
sub out { my $string = shift; my $loglevel = shift;
chomp $string;
if($options{'verbosity'} >= $loglevel) { if($loglevel == 1) { warn $string; # use stderr. } else { print "$string\n"; } } }
sub sanitizenamefs { my $pagename = shift; $pagename =~ tr/:\//__/; return $pagename; }
sub diffcat_simple { my $first = shift; my $second = shift; my %firsthash = map { $_, 1 } @$first;
my @new = ();
foreach my $secondelement (@$second) { push @new, $secondelement unless exists $firsthash{$secondelement}; }
return @new; }
sub diffcat { my $first = shift; my $second = shift; my @newinfirst = &diffcat_simple($second, $first); my @newinsecond = &diffcat_simple($first, $second); return (\@newinfirst, \@newinsecond); }
sub postpage { my $pagename = shift; my $pagedata = shift; my $remark = shift; my $minoredit = shift;
++$pagesedited;
unless( exists $options{'post'} ) { $pagename = &sanitizenamefs($pagename); $pagename .= ".wikitxt";
&out("Saving: $pagename\n", 3);
open my $outfile, '>', $pagename or &log("Failed to open $pagename: $!", 1);
return unless defined $outfile;
print $outfile $pagedata; close $outfile; } else { &out("Posting $pagename\n", 3);
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) or &out("Failed to post page: " . $editor->{'errstr'}, 1); } }
- This allows us to compare site state between now and when we
- last ran, so that we can learn to avoid doing unnecessary work.
- (Saves on server resources and bloated edit statistics...)
- Also, I plan on publishing the cache files in a version control
- system like SVN or Git, to allow multiple bots
- to share the data and to provide history. We're using JSON
- Instead of Perl's native Data::Dumper as JSON has broader
- cross-language support, making it easier for others to use the data.
- SVN has the advantage that I already know how to use it.
- Git has the advantage in that I can use GitHub and not tax
- my Slice with Git traffic, and I don't have to punch a hole
- in the firewall to access more services.
sub cachedata { my $dataname = shift; my $data = shift; &out("Caching $dataname...", 3); my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); my $outfile; unless(open $outfile, '>', $filename) { &out("Failed to open $filename: $!\n", 1); return; }
print $outfile to_json($data); close $outfile; &out(scalar $data . " members cached.\n", 4); }
sub getcacheddata { my $dataname = shift; &out("Getting cached data for $dataname...", 3); my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); my $infile; unless (open $infile, '<', $filename) { &out("Failed to load cached data $filename: $!\n", 1); return []; }
my $jsondata; $jsondata .= $_ while <$infile>; close $infile;
my $cacheddata = from_json($jsondata); &out(scalar @$cacheddata . " members retrieved\n", 4);
return $cacheddata; }
sub getcategory { my $categoryname = shift; &out("Getting category contents for $categoryname...", 3); my @categorycontents = $editor->get_pages_in_category($categoryname); &out(scalar @categorycontents . " members retrieved\n", 3);
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents);
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", (scalar @$removed + scalar @$added) > 0 ? 2 : 3);
&cachedata("$categoryname", \@categorycontents);
return @categorycontents; }
sub getwork { my $categoryname = shift; &out("Getting work for $categoryname.\n",3); my $olddata = &getcacheddata($categoryname); my @newdata = &getcategory($categoryname); my ($removed, $added) = &diffcat($olddata, \@newdata); my @work = (); @work = @newdata if((scalar @$removed + scalar @$added) > 0);
&out(scalar @work . " items to process for $categoryname.\n", 3);
return @work; }
sub getlangwork { my $language = shift; &out("Getting lang work for $language\n", 3); my %impl; foreach my $workitem (&getwork("Category:$language")) { $impl{$workitem} = 1; } my %omit; foreach my $workitem (&getwork("Category:$language/Omit")) { $omit{$workitem} = 1; }
# No work to do? return undef if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl));
# work to do. return {'impl' => \%impl, 'omit' => \%omit }; } </lang>