User:ImplSearchBot/Code: Difference between revisions

Content added Content deleted
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 17: Line 17:
my $post; # Is this an actual run?
my $post; # Is this an actual run?
my $cachepath = "cache/";
my $cachepath = "cache/";
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
my $result = GetOptions(
my $result = GetOptions(
"wiki=s" => \$wiki,
"wiki=s" => \$wiki,
Line 23: Line 25:
"verbosity=s" => \$verbosity,
"verbosity=s" => \$verbosity,
"post" => \$post,
"post" => \$post,
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath);
"cachepath=s" => \$cachepath);
$options{'wiki'} = $wiki;
$options{'wiki'} = $wiki;


$options{'nosvn'} = $nosvn
die $usage
unless defined $username;
if defined $nosvn;


$options{'username'} = $username;
$options{'username'} = $username
if defined $username;


$options{'password'} = $password
die $usage
unless defined $password;
if defined $password;

$options{'password'} = $password;


$options{'post'} = $post
$options{'post'} = $post
Line 45: Line 48:
$options{'cachepath'} = $cachepath;
$options{'cachepath'} = $cachepath;

$options{'cacheonly'} = $cacheonly
if defined $cacheonly;


# This could be reversed as a qualified statement, but I don't know
# if that it'd be any less ugly.
unless( exists $options{'cacheonly'})
{
if( exists $options{'post'} )
{
unless(exists $options{'username'} and exists $options{'password'})
{
use Data::Dumper;
print Dumper(%options);
die $usage;
}
}
}
}
}


Line 66: Line 88:
&out(scalar @initialcache . " categories initially cached\n", 4);
&out(scalar @initialcache . " categories initially cached\n", 4);


# Get our editor
# Handles interaction with the wiki.
my $editor = &geteditor();
# Note that I had to modify HTTP::Message to make it work
# HTTP::Message silently failed when presented by MW
# with an encoding type of "application/json" or some such.
&out("Creating editor\n", 3);
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;

# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# my initial attempts from before I added this line.
&out("Trying to set wiki.\n",3);
$editor->set_wiki('rosettacode.org','w');

# Attempt to log in.
&out("Trying to log in.\n", 3);
my $loginres = $editor->login($options{'username'}, $options{'password'});

die "Unable to login: " . $loginres
unless $loginres == "Success";


# Get a complete listing of the tasks.
# Get a complete listing of the tasks.
&out("Getting tasks\n", 3);
&out("Getting tasks\n", 3);
my @alltasks = &getcategory('Category:Programming Tasks');
my $alltasks = &getcategory('Category:Programming Tasks');


# Get a complete listing of the languages.
# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
&out("Getting the languages.\n", 3);
my @alllanguages = &getcategory('Category:Programming Languages');
my $alllanguages = &getcategory('Category:Programming Languages');


# We want the language name, not the fully-qualified wiki name.
# We want the language name, not the fully-qualified wiki name.
$_ =~ s/^Category:// foreach (@alllanguages);
$_ =~ s/^Category:// foreach (@$alllanguages);


# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Store it as a hash, so the lookup will be faster.
# Store it as a hash, so the lookup will be faster.
my %createdomitcategories = map {$_, 1} &getcategory('Category:Maintenance/OmitCategoriesCreated');
my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated');
my %createdomitcategories = map {$_, 1} @$omitcatcontents;


&out("Identifying work to do\n", 3);
&out("Identifying work to do\n", 3);
Line 105: Line 111:
my %impldiff;
my %impldiff;


foreach my $lang (@alllanguages)
foreach my $lang (@$alllanguages)
{
{
my $val = &getlangwork($lang);
my $val = &getlangwork($lang);
Line 124: Line 130:


# Language metadata
# Language metadata
my $taskcount = scalar @alltasks;
my $taskcount = scalar @$alltasks;
my $unimpcount = $taskcount - scalar keys %implemented;
my $unimpcount = $taskcount - scalar keys %implemented;
my $targetcount = ($taskcount - $omitcount);
my $targetcount = ($taskcount - $omitcount);
Line 133: Line 139:
my $pagedata; # Not assembled until the end.
my $pagedata; # Not assembled until the end.


foreach my $taskname (@alltasks)
foreach my $taskname (@$alltasks)
{
{
# We want the task name, not the fully-qualified wiki name.
# We want the task name, not the fully-qualified wiki name.
Line 218: Line 224:


exit(0);
exit(0);

sub geteditor
{
# If we're not posting, and we're only drawing from cache
# We don't actually need to pull from the wiki.
if( exists $options{'cacheonly'} )
{
return undef
unless exists $options{'post'};
}

# Handles interaction with the wiki.
# Note that I had to modify HTTP::Message to make it work
# HTTP::Message silently failed when presented by MW
# with an encoding type of "application/json" or some such.
&out("Creating editor\n", 3);
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;

# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# my initial attempts from before I added this line.
&out("Trying to set wiki.\n",3);
$editor->set_wiki('rosettacode.org','w');

# If we're not posting, we don't need to log in if we're pulling from cache.
# Otherwise, attempt to log in.
&out("Trying to log in.\n", 3);
my $loginres = $editor->login($options{'username'}, $options{'password'});

die "Unable to login: " . $loginres
unless $loginres == "Success";
return $editor;
}


sub out
sub out
Line 366: Line 406:
{
{
my $categoryname = shift;
my $categoryname = shift;

# Return the cache data if we're not supposed to query the database.
return &getcacheddata($categoryname)
if( exists $options{'cacheonly'} );

&out("Getting category contents for $categoryname...", 4);
&out("Getting category contents for $categoryname...", 4);
my @categorycontents = $editor->get_pages_in_category($categoryname);
my @categorycontents = $editor->get_pages_in_category($categoryname);
Line 376: Line 421:
&cachedata("$categoryname", \@categorycontents);
&cachedata("$categoryname", \@categorycontents);


return @categorycontents;
return \@categorycontents;
}
}


Line 383: Line 428:
my $categoryname = shift;
my $categoryname = shift;
&out("Getting work for $categoryname.\n",4);
&out("Getting work for $categoryname.\n",4);
my $olddata = &getcacheddata($categoryname);
my $cacheddata = &getcacheddata($categoryname);

my @newdata = &getcategory($categoryname);
# If we're on a cache-only basis, we'll just say we have no old data,
my ($removed, $added) = &diffcat($olddata, \@newdata);
# and that our cached data is our new data.
my @work = ();
my $olddata;
@work = @newdata
my $newdata;
if (exists $options{'cacheonly'})
{
$olddata = [];
$newdata = $cacheddata;
}
else
{
$olddata = $cacheddata;
$newdata = &getcategory($categoryname);
}

my ($removed, $added) = &diffcat($olddata, $newdata);
my $work = [];
$work = $newdata
if((scalar @$removed + scalar @$added) > 0);
if((scalar @$removed + scalar @$added) > 0);
&out(scalar @work . " items to process for $categoryname.\n", 3);
&out(scalar @$work . " items to process for $categoryname.\n", 3);


return @work;
return @$work;
}
}


Line 463: Line 523:
sub svn
sub svn
{
{
return
if(exists $options{'nosvn'});
my @args = @_;
my @args = @_;