User:ImplSearchBot/Code: Difference between revisions
Content added Content deleted
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 1: | Line 1: | ||
<lang perl>#!/usr/bin/perl -w |
<lang perl>#!/usr/bin/perl -w |
||
use strict; |
use strict; |
||
use MediaWiki::Bot; |
use MediaWiki::Bot; |
||
use JSON qw/to_json from_json/; |
|||
use Data::Dumper; |
|||
# Handles interaction with the wiki. |
# Handles interaction with the wiki. |
||
Line 20: | Line 19: | ||
defined $password |
defined $password |
||
or die $usage; |
or die $usage; |
||
#my $json = new JSON; |
|||
#$json->allow_blessed(1); |
|||
#$json->convert_blessed(1); |
|||
my $posttosite = shift @ARGV; |
my $posttosite = shift @ARGV; |
||
Line 29: | Line 32: | ||
my $editor = MediaWiki::Bot->new('ImpleSearchBot'); |
my $editor = MediaWiki::Bot->new('ImpleSearchBot'); |
||
$editor->{debug} = 1; |
$editor->{debug} = 1; |
||
sub sanitizenamefs |
|||
{ |
|||
my $pagename = shift; |
|||
$pagename =~ tr/:\//__/; |
|||
return $pagename; |
|||
} |
|||
sub postpage |
sub postpage |
||
Line 41: | Line 51: | ||
unless( defined $posttosite ) |
unless( defined $posttosite ) |
||
{ |
{ |
||
$pagename = |
$pagename = &sanitizenamefs($pagename); |
||
$pagename .= ".wikitxt"; |
$pagename .= ".wikitxt"; |
||
Line 60: | Line 70: | ||
or warn "Failed to post page: " . $editor->{'errstr'}; |
or warn "Failed to post page: " . $editor->{'errstr'}; |
||
} |
} |
||
} |
|||
sub getcacheddata |
|||
{ |
|||
my $dataname = shift; |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
open my $infile, '<', $filename |
|||
or warn "Failed to load cached data $filename: $!"; |
|||
return undef unless defined $infile; |
|||
my $jsondata; |
|||
$jsondata .= $_ while <$infile>; |
|||
close $infile; |
|||
return from_json($jsondata); |
|||
} |
|||
# Not doing anything with this yet. It's intended to allow us to compare site state between now and when we |
|||
# last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...) |
|||
# Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots |
|||
# to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper) |
|||
# SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax |
|||
# my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services. |
|||
sub cachedata |
|||
{ |
|||
my $dataname = shift; |
|||
my $data = shift; |
|||
my $filename = &sanitizenamefs("cache_" . $dataname . ".json"); |
|||
open my $outfile, '>', $filename |
|||
or warn "Failed to cache $filename: $!"; |
|||
return unless defined $outfile; |
|||
print "Caching $filename:" . scalar $data . "\n"; |
|||
print $outfile to_json($data); |
|||
close $outfile; |
|||
} |
|||
sub getcategory |
|||
{ |
|||
my $categoryname = shift; |
|||
my @categorycontents = $editor->get_pages_in_category($categoryname); |
|||
&cachedata($categoryname, \@categorycontents); |
|||
return @categorycontents; |
|||
} |
} |
||
Line 73: | Line 130: | ||
# No, it's not the "(expr) or die" syntax. This will be clearer |
# No, it's not the "(expr) or die" syntax. This will be clearer |
||
# for most folks who read the code. |
# for most folks who read the code. |
||
die "Unable to login: " . |
die "Unable to login: " . from_json($editor); |
||
} |
} |
||
# Get a complete listing of the tasks. |
# Get a complete listing of the tasks. |
||
print "Getting tasks\n"; |
print "Getting tasks\n"; |
||
my @alltasks = |
my @alltasks = &getcategory('Category:Programming Tasks'); |
||
# Get a complete listing of the languages. |
# Get a complete listing of the languages. |
||
print "Getting the languages.\n"; |
print "Getting the languages.\n"; |
||
my @alllanguages = |
my @alllanguages = &getcategory('Category:Programming Languages'); |
||
# We want the language name, not the fully-qualified wiki name. |
# We want the language name, not the fully-qualified wiki name. |
||
Line 89: | Line 146: | ||
# Get a list of the languages for which we've already provided bodies for the related omit categories. |
# Get a list of the languages for which we've already provided bodies for the related omit categories. |
||
# Store it as a hash, so the lookup will be faster. |
# Store it as a hash, so the lookup will be faster. |
||
my %createdomitcategories = map {$_, 1} |
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated'); |
||
print "Identifying implemented and omitted languages\n"; |
print "Identifying implemented and omitted languages\n"; |
||
foreach my $language (@alllanguages) |
foreach my $language (@alllanguages) |
||
{ |
{ |
||
my %implemented = map {$_, 1} |
my %implemented = map {$_, 1} &getcategory("Category:$language"); |
||
my %omitted = map {$_, 1} |
my %omitted = map {$_, 1} &getcategory("Category:$language/Omit"); |
||
my $omitcount = scalar keys %omitted; |
my $omitcount = scalar keys %omitted; |
||