User:ImplSearchBot/Code: Difference between revisions
Content added Content deleted
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 4: | Line 4: | ||
use MediaWiki::Bot; |
use MediaWiki::Bot; |
||
use JSON qw/to_json from_json/; |
use JSON qw/to_json from_json/; |
||
my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]"; |
|||
my %options; |
my %options; |
||
Line 19: | Line 17: | ||
my $cacheonly; # Don't query the wiki for data. Just pull from cache. |
my $cacheonly; # Don't query the wiki for data. Just pull from cache. |
||
my $nosvn; |
my $nosvn; |
||
my $result = GetOptions( |
|||
my $opt_matrix = { |
|||
"wiki=s" => \$wiki, |
"wiki=s" => \$wiki, |
||
"username=s" => \$username, |
"username=s" => \$username, |
||
Line 27: | Line 26: | ||
"cacheonly" => \$cacheonly, |
"cacheonly" => \$cacheonly, |
||
"nosvn" => \$nosvn, |
"nosvn" => \$nosvn, |
||
"cachepath=s" => \$cachepath |
"cachepath=s" => \$cachepath }; |
||
my $result = GetOptions( %$opt_matrix ); |
|||
$options{'wiki'} = $wiki; |
$options{'wiki'} = $wiki; |
||
Line 52: | Line 53: | ||
if defined $cacheonly; |
if defined $cacheonly; |
||
my $usage = "Usage: $0 (options)\n The available options are:\n"; |
|||
$usage .= "\t--$_\n" |
|||
foreach (keys %$opt_matrix); |
|||
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n"; |
|||
# This could be reversed as a qualified statement, but I don't know |
|||
# if that it'd be any less ugly. |
|||
unless( exists $options{'cacheonly'}) |
|||
my $wikineeded; |
|||
$wikineeded = "yes" |
|||
unless ( exists $options{'cacheonly'} ); |
|||
$wikineeded = "yes" |
|||
if ( exists $options{'post'} ); |
|||
if("yes" eq $wikineeded) |
|||
{ |
{ |
||
unless(exists $options{'username'} and exists $options{'password'}) |
|||
{ |
{ |
||
die $usage; |
|||
unless(exists $options{'username'} and exists $options{'password'}) |
|||
{ |
|||
use Data::Dumper; |
|||
print Dumper(%options); |
|||
die $usage; |
|||
} |
|||
} |
} |
||
} |
} |
||
} |
} |
||
Line 96: | Line 106: | ||
# Get a complete listing of the languages. |
# Get a complete listing of the languages. |
||
&out("Getting the languages.\n", 3); |
&out("Getting the languages.\n", 3); |
||
my $alllanguages |
my $alllanguages= &getcategory('Category:Programming Languages'); |
||
# We want the language name, not the fully-qualified wiki name. |
# We want the language name, not the fully-qualified wiki name. |
||
Line 119: | Line 129: | ||
&processimplediff(\%impldiff); |
&processimplediff(\%impldiff); |
||
# To add here: Post stats on activities: |
|||
# 2. Time last run started |
|||
my $runtime = time() - $starttime; |
my $runtime = time() - $starttime; |
||
Line 149: | Line 156: | ||
&out("Done\n", 3); |
&out("Done\n", 3); |
||
# END OF PROGRAM EXECUTION |
|||
exit(0); |
exit(0); |
||
#--------------------- |
|||
# These are all the functions that break up our work |
|||
# into logical chunks. |
|||
#--------------------- |
|||
# Builds a simple hash ref associating a page name with body. |
|||
# Used to help us prepare our postings in one pass, then commit them |
|||
# in a second pass. |
|||
sub build_posting |
sub build_posting |
||
{ |
{ |
||
Line 159: | Line 176: | ||
} |
} |
||
# Prepares the template body for the unimplemented data. |
|||
sub prep_unimp_posting |
sub prep_unimp_posting |
||
{ |
{ |
||
Line 167: | Line 186: | ||
foreach my $taskname (@$alltasks) |
foreach my $taskname (@$alltasks) |
||
{ |
{ |
||
# |
# If it's a category task, the task name will be slightly different. |
||
my $baretaskname = $taskname; |
my $baretaskname = $taskname; |
||
$baretaskname =~ s/^Category://; |
$baretaskname =~ s/^Category://; |
||
my $implpage = $taskname; |
|||
$implpage =~ s/^Category:(.*)/$1\/$language/; |
|||
my $link; |
|||
if($taskname eq $baretaskname) |
|||
{ |
|||
$link = "[[$taskname]]"; |
|||
} |
|||
else |
|||
{ |
|||
$link = "[[:$taskname|$baretaskname]]"; |
|||
} |
|||
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list. |
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list. |
||
$unimplisting .= "* [[$baretaskname]]\n" |
|||
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage}) |
|||
{ |
|||
$unimplisting .= "* $link\n"; |
|||
} |
|||
} |
} |
||
Line 181: | Line 215: | ||
} |
} |
||
# Prepares the template body for the listings of omitted data for a page. |
|||
sub prep_omit_posting |
sub prep_omit_posting |
||
{ |
{ |
||
Line 205: | Line 240: | ||
} |
} |
||
# Prepares the page body that folks look at to find out what changed for a language. |
|||
sub prep_listing_posting |
sub prep_listing_posting |
||
{ |
{ |
||
Line 239: | Line 275: | ||
} |
} |
||
# Prepare the body of the omit category. |
|||
sub prep_omit_cat_posting |
sub prep_omit_cat_posting |
||
{ |
{ |
||
Line 246: | Line 283: | ||
} |
} |
||
# Prepare the page title for the page folks look at to see what change what. |
|||
sub get_listing_name |
sub get_listing_name |
||
{ |
{ |
||
Line 252: | Line 290: | ||
} |
} |
||
# Prepare the template name for the omit listing body |
|||
sub get_omit_template_name |
sub get_omit_template_name |
||
{ |
{ |
||
Line 258: | Line 297: | ||
} |
} |
||
# Prepare the template name for the unimplimended listing body |
|||
sub get_unimpl_template_name |
sub get_unimpl_template_name |
||
{ |
{ |
||
Line 264: | Line 304: | ||
} |
} |
||
# Return the name of the category to find the omitted pages for a particular language. |
|||
sub get_omit_cat_name |
sub get_omit_cat_name |
||
{ |
{ |
||
Line 270: | Line 311: | ||
} |
} |
||
# Prepare all the pages associated with a specific language. |
|||
sub process_language |
sub process_language |
||
{ |
{ |
||
Line 292: | Line 334: | ||
} |
} |
||
# Prepare and post all of the core pages for this run. |
|||
sub processimplediff |
sub processimplediff |
||
{ |
{ |
||
my $implediff = shift; |
my $implediff = shift; |
||
my %work; |
my %work; |
||
# Prepare all our work. |
|||
foreach my $language (keys %impldiff) |
foreach my $language (keys %impldiff) |
||
{ |
{ |
||
Line 303: | Line 348: | ||
} |
} |
||
# Now that we've prepared all our work, commit it. |
|||
foreach my $pagename (keys %work) |
foreach my $pagename (keys %work) |
||
{ |
{ |
||
Line 309: | Line 355: | ||
} |
} |
||
# Return the Mediawiki editor obect. |
|||
sub geteditor |
sub geteditor |
||
{ |
{ |
||
Line 343: | Line 390: | ||
} |
} |
||
# Simple logging infrastructure. Current sends to STDWARN or STDOUT, which cron |
|||
# emails to the user, which gets forwarded to Short Circuit. (Anyone want a copy?) |
|||
sub out |
sub out |
||
{ |
{ |
||
Line 363: | Line 412: | ||
} |
} |
||
# Many popular filesystems can't andle : and \ in filenames. |
|||
# Since I plan to open the SVN repo we save to the rest of the world at some point, |
|||
# I'm trying to make sure the files are representable. |
|||
sub sanitizenamefs |
sub sanitizenamefs |
||
{ |
{ |
||
Line 370: | Line 422: | ||
} |
} |
||
# Find all the entries that are in the second list ref, but not the first. |
|||
sub diffcat_simple |
sub diffcat_simple |
||
{ |
{ |
||
Line 387: | Line 440: | ||
} |
} |
||
# Find all the entries that are in one listref, but not the other. |
|||
sub diffcat |
sub diffcat |
||
{ |
{ |
||
Line 396: | Line 450: | ||
} |
} |
||
# Post a page (or save it to disk, if we're testing.) |
|||
sub postpage |
sub postpage |
||
{ |
{ |
||
Line 433: | Line 488: | ||
# last ran, so that we can learn to avoid doing unnecessary work. |
# last ran, so that we can learn to avoid doing unnecessary work. |
||
# (Saves on server resources and bloated edit statistics...) |
# (Saves on server resources and bloated edit statistics...) |
||
# Also, |
# Also, the data gets saved to an SVN repo, so that multiple bots can |
||
# use the history. We're using JSON, as JSON has broader |
|||
# system like SVN or Git, to allow multiple bots |
|||
# cross-language support than Data::Dumper, making it easier for others |
|||
# to share the data and to provide history. We're using JSON |
|||
# to use the data. |
|||
# Instead of Perl's native Data::Dumper as JSON has broader |
|||
# cross-language support, making it easier for others to use the data. |
|||
# SVN has the advantage that I already know how to use it. |
|||
# Git has the advantage in that I can use GitHub and not tax |
|||
# my Slice with Git traffic, and I don't have to punch a hole |
|||
# in the firewall to access more services. |
|||
sub cachedata |
sub cachedata |
||
{ |
{ |
||
Line 465: | Line 515: | ||
} |
} |
||
# Return data we cached previously. |
|||
sub getcacheddata |
sub getcacheddata |
||
{ |
{ |
||
Line 473: | Line 524: | ||
unless (open $infile, '<', $filename) |
unless (open $infile, '<', $filename) |
||
{ |
{ |
||
&out("Failed to load cached data $filename: $!\n", |
&out("Failed to load cached data $filename: $!\n", 5); |
||
return []; |
return []; |
||
} |
} |
||
Line 488: | Line 539: | ||
} |
} |
||
# Report the changes between two categories. |
|||
sub getcategory |
|||
# More interesting than "x added, y removed" |
|||
sub reportcatchanges |
|||
{ |
{ |
||
my $ |
my $category = shift; |
||
my $old = shift; |
|||
my $new = shift; |
|||
my ($removed, $added) = &diffcat($old, $new); |
|||
# Return the cache data if we're not supposed to query the database. |
|||
return &getcacheddata($categoryname) |
|||
if( exists $options{'cacheonly'} ); |
|||
my $out = "Removed from $category:\n"; |
|||
my $page; |
|||
foreach $page (@$removed) |
|||
{ |
|||
$out .= "$page\n"; |
|||
} |
|||
$out .= "Added to $category:\n"; |
|||
foreach $page (@$added) |
|||
{ |
|||
$out .= "$page\n"; |
|||
} |
|||
&out($out, 2); |
|||
} |
|||
# Pull the category data, or cached data if we're not pulling from the wiki. |
|||
sub getcategory |
|||
{ |
|||
my $categoryname = shift; |
|||
&out("Getting category contents for $categoryname...", 4); |
&out("Getting category contents for $categoryname...", 4); |
||
my @categorycontents = $editor->get_pages_in_category($categoryname); |
|||
&out(scalar @categorycontents . " members retrieved for $categoryname\n", 5); |
|||
my $old; |
|||
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents); |
|||
my $new; |
|||
if( exists $options{'cacheonly'} ) |
|||
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", &getloglevelfromdiff(2, $removed, $added)); |
|||
{ |
|||
# Return the cache data if we're not supposed to query the database. |
|||
$old = []; |
|||
$new = &getcacheddata($categoryname); |
|||
} |
|||
else |
|||
{ |
|||
$old = &getcacheddata($categoryname); |
|||
$new = [$editor->get_pages_in_category($categoryname)]; |
|||
++$categorypulls; |
|||
} |
|||
&reportcatchanges($categoryname, $old, $new); |
|||
++$categorypulls; |
|||
&cachedata("$categoryname", |
&cachedata("$categoryname", $new); |
||
&out(scalar @$new . " members returned for $categoryname\n", 5); |
|||
return \@categorycontents; |
|||
return $new; |
|||
} |
} |
||
# Find if this category changed, report its contents if it has. |
|||
sub getwork |
sub getwork |
||
{ |
{ |
||
Line 542: | Line 629: | ||
} |
} |
||
# If changes occurred, the info is more important than if they didn't. |
|||
sub getloglevelfromdiff |
sub getloglevelfromdiff |
||
{ |
{ |
||
Line 552: | Line 640: | ||
} |
} |
||
# Find all the work items for a given language. |
|||
sub getlangwork |
sub getlangwork |
||
{ |
{ |
||
Line 583: | Line 672: | ||
} |
} |
||
# Commit the cache. |
|||
sub commitcache |
sub commitcache |
||
{ |
{ |
||
Line 615: | Line 705: | ||
} |
} |
||
# Wrap svn commands so we can log them. |
|||
sub svn |
sub svn |
||
{ |
{ |