User:ImplSearchBot/Code: Difference between revisions

Content added Content deleted
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 1: Line 1:
<lang perl>#!/usr/bin/perl -w
<lang perl>#!/usr/bin/perl -w

use strict;
use strict;
use MediaWiki::Bot;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
use Data::Dumper;


# Handles interaction with the wiki.
# Handles interaction with the wiki.
Line 20: Line 19:
defined $password
defined $password
or die $usage;
or die $usage;

#my $json = new JSON;
#$json->allow_blessed(1);
#$json->convert_blessed(1);


my $posttosite = shift @ARGV;
my $posttosite = shift @ARGV;
Line 29: Line 32:
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;
$editor->{debug} = 1;

sub sanitizenamefs
{
my $pagename = shift;
$pagename =~ tr/:\//__/;
return $pagename;
}


sub postpage
sub postpage
Line 41: Line 51:
unless( defined $posttosite )
unless( defined $posttosite )
{
{
$pagename =~ tr/:\//_-/;
$pagename = &sanitizenamefs($pagename);
$pagename .= ".wikitxt";
$pagename .= ".wikitxt";


Line 60: Line 70:
or warn "Failed to post page: " . $editor->{'errstr'};
or warn "Failed to post page: " . $editor->{'errstr'};
}
}
}

sub getcacheddata
{
my $dataname = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $infile, '<', $filename
or warn "Failed to load cached data $filename: $!";

return undef unless defined $infile;

my $jsondata;
$jsondata .= $_ while <$infile>;
close $infile;
return from_json($jsondata);
}

# Not doing anything with this yet. It's intended to allow us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work. (Server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control system like SVN or Git, to allow multiple bots
# to share the data and to provide history. (It's the public target that necessitates using JSON instead of Data::Dumper)
# SVN has the advantage that I already know how to use it. Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole in the firewall to access more services.
sub cachedata
{
my $dataname = shift;
my $data = shift;
my $filename = &sanitizenamefs("cache_" . $dataname . ".json");
open my $outfile, '>', $filename
or warn "Failed to cache $filename: $!";
return unless defined $outfile;


print "Caching $filename:" . scalar $data . "\n";
print $outfile to_json($data);
close $outfile;
}

sub getcategory
{
my $categoryname = shift;
my @categorycontents = $editor->get_pages_in_category($categoryname);

&cachedata($categoryname, \@categorycontents);
return @categorycontents;
}
}


Line 73: Line 130:
# No, it's not the "(expr) or die" syntax. This will be clearer
# No, it's not the "(expr) or die" syntax. This will be clearer
# for most folks who read the code.
# for most folks who read the code.
die "Unable to login: " . Dumper($editor);
die "Unable to login: " . from_json($editor);
}
}


# Get a complete listing of the tasks.
# Get a complete listing of the tasks.
print "Getting tasks\n";
print "Getting tasks\n";
my @alltasks = $editor->get_pages_in_category('Category:Programming Tasks');
my @alltasks = &getcategory('Category:Programming Tasks');


# Get a complete listing of the languages.
# Get a complete listing of the languages.
print "Getting the languages.\n";
print "Getting the languages.\n";
my @alllanguages = $editor->get_pages_in_category('Category:Programming Languages');
my @alllanguages = &getcategory('Category:Programming Languages');


# We want the language name, not the fully-qualified wiki name.
# We want the language name, not the fully-qualified wiki name.
Line 89: Line 146:
# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Store it as a hash, so the lookup will be faster.
# Store it as a hash, so the lookup will be faster.
my %createdomitcategories = map {$_, 1} $editor->get_pages_in_category('Category:Maintenance/OmitCategoriesCreated');
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');


print "Identifying implemented and omitted languages\n";
print "Identifying implemented and omitted languages\n";
foreach my $language (@alllanguages)
foreach my $language (@alllanguages)
{
{
my %implemented = map {$_, 1} $editor->get_pages_in_category("Category:$language");
my %implemented = map {$_, 1} &getcategory("Category:$language");
my %omitted = map {$_, 1} $editor->get_pages_in_category("Category:$language/Omit");
my %omitted = map {$_, 1} &getcategory("Category:$language/Omit");
my $omitcount = scalar keys %omitted;
my $omitcount = scalar keys %omitted;