Anonymous user
User:ImplSearchBot/Code: Difference between revisions
m
ImplSearchBot:0
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 1:
<lang perl>#!/usr/bin/perl -w
use strict;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
# Handles interaction with the wiki.
Line 20 ⟶ 19:
defined $password
or die $usage;
#my $json = new JSON;
#$json->allow_blessed(1);
#$json->convert_blessed(1);
my $posttosite = shift @ARGV;
Line 29 ⟶ 32:
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;
sub sanitizenamefs
{
my $pagename = shift;
$pagename =~ tr/:\//__/;
return $pagename;
}
sub postpage
Line 41 ⟶ 51:
unless( defined $posttosite )
{
$pagename =
$pagename .= ".wikitxt";
Line 60 ⟶ 70:
or warn "Failed to post page: " . $editor->{'errstr'};
}
}
sub getcacheddata
{
my $dataname = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $infile, '<', $filename
or warn "Failed to load cached data $filename: $!";
return undef unless defined $infile;
my $jsondata;
$jsondata .= $_ while <$infile>;
close $infile;
return from_json($jsondata);
}
# Not doing anything with this yet. It's intended to allow us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots
# to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper)
# SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services.
sub cachedata
{
my $dataname = shift;
my $data = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $outfile, '>', $filename
or warn "Failed to cache $filename: $!";
return unless defined $outfile;
print "Caching $filename:" . scalar $data . "\n";
print $outfile to_json($data);
close $outfile;
}
sub getcategory
{
my $categoryname = shift;
my @categorycontents = $editor->get_pages_in_category($categoryname);
&cachedata($categoryname, \@categorycontents);
return @categorycontents;
}
Line 73 ⟶ 130:
# No, it's not the "(expr) or die" syntax. This will be clearer
# for most folks who read the code.
die "Unable to login: " .
}
# Get a complete listing of the tasks.
print "Getting tasks\n";
my @alltasks =
# Get a complete listing of the languages.
print "Getting the languages.\n";
my @alllanguages =
# We want the language name, not the fully-qualified wiki name.
Line 89 ⟶ 146:
# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Store it as a hash, so the lookup will be faster.
my %createdomitcategories = map {$_, 1}
print "Identifying implemented and omitted languages\n";
foreach my $language (@alllanguages)
{
my %implemented = map {$_, 1}
my %omitted = map {$_, 1}
my $omitcount = scalar keys %omitted;
|