User:ImplSearchBot/Code: Difference between revisions

Content added Content deleted
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 4: Line 4:
use MediaWiki::Bot;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
use JSON qw/to_json from_json/;

my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";


my %options;
my %options;
Line 19: Line 17:
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
my $nosvn;

my $result = GetOptions(
my $opt_matrix = {
"wiki=s" => \$wiki,
"wiki=s" => \$wiki,
"username=s" => \$username,
"username=s" => \$username,
Line 27: Line 26:
"cacheonly" => \$cacheonly,
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath);
"cachepath=s" => \$cachepath };

my $result = GetOptions( %$opt_matrix );
$options{'wiki'} = $wiki;
$options{'wiki'} = $wiki;


Line 52: Line 53:
if defined $cacheonly;
if defined $cacheonly;


my $usage = "Usage: $0 (options)\n The available options are:\n";
$usage .= "\t--$_\n"
foreach (keys %$opt_matrix);


$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n";
# This could be reversed as a qualified statement, but I don't know

# if that it'd be any less ugly.

unless( exists $options{'cacheonly'})
my $wikineeded;

$wikineeded = "yes"
unless ( exists $options{'cacheonly'} );

$wikineeded = "yes"
if ( exists $options{'post'} );

if("yes" eq $wikineeded)
{
{
if( exists $options{'post'} )
unless(exists $options{'username'} and exists $options{'password'})
{
{
die $usage;
unless(exists $options{'username'} and exists $options{'password'})
{
use Data::Dumper;
print Dumper(%options);
die $usage;
}
}
}
}
}
}
}





Line 96: Line 106:
# Get a complete listing of the languages.
# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
&out("Getting the languages.\n", 3);
my $alllanguages = &getcategory('Category:Programming Languages');
my $alllanguages= &getcategory('Category:Programming Languages');


# We want the language name, not the fully-qualified wiki name.
# We want the language name, not the fully-qualified wiki name.
Line 119: Line 129:
&processimplediff(\%impldiff);
&processimplediff(\%impldiff);



# To add here: Post stats on activities:
# 2. Time last run started


my $runtime = time() - $starttime;
my $runtime = time() - $starttime;
Line 149: Line 156:
&out("Done\n", 3);
&out("Done\n", 3);



# END OF PROGRAM EXECUTION
exit(0);
exit(0);


#---------------------
# These are all the functions that break up our work
# into logical chunks.
#---------------------

# Builds a simple hash ref associating a page name with body.
# Used to help us prepare our postings in one pass, then commit them
# in a second pass.
sub build_posting
sub build_posting
{
{
Line 159: Line 176:
}
}



# Prepares the template body for the unimplemented data.
sub prep_unimp_posting
sub prep_unimp_posting
{
{
Line 167: Line 186:
foreach my $taskname (@$alltasks)
foreach my $taskname (@$alltasks)
{
{
# We want the task name, not the fully-qualified wiki name.
# If it's a category task, the task name will be slightly different.
my $baretaskname = $taskname;
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
$baretaskname =~ s/^Category://;
my $implpage = $taskname;
$implpage =~ s/^Category:(.*)/$1\/$language/;

my $link;
if($taskname eq $baretaskname)
{
$link = "[[$taskname]]";
}
else
{
$link = "[[:$taskname|$baretaskname]]";
}
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
$unimplisting .= "* [[$baretaskname]]\n"
unless(exists $impldiff->{$language}->{$taskname} or exists $impldiff->{$language}->{'omit'}->{$taskname});
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage})
{
$unimplisting .= "* $link\n";
}


}
}
Line 181: Line 215:
}
}


# Prepares the template body for the listings of omitted data for a page.
sub prep_omit_posting
sub prep_omit_posting
{
{
Line 205: Line 240:
}
}


# Prepares the page body that folks look at to find out what changed for a language.
sub prep_listing_posting
sub prep_listing_posting
{
{
Line 239: Line 275:
}
}


# Prepare the body of the omit category.
sub prep_omit_cat_posting
sub prep_omit_cat_posting
{
{
Line 246: Line 283:
}
}


# Prepare the page title for the page folks look at to see what change what.
sub get_listing_name
sub get_listing_name
{
{
Line 252: Line 290:
}
}


# Prepare the template name for the omit listing body
sub get_omit_template_name
sub get_omit_template_name
{
{
Line 258: Line 297:
}
}


# Prepare the template name for the unimplimended listing body
sub get_unimpl_template_name
sub get_unimpl_template_name
{
{
Line 264: Line 304:
}
}


# Return the name of the category to find the omitted pages for a particular language.
sub get_omit_cat_name
sub get_omit_cat_name
{
{
Line 270: Line 311:
}
}


# Prepare all the pages associated with a specific language.
sub process_language
sub process_language
{
{
Line 292: Line 334:
}
}


# Prepare and post all of the core pages for this run.
sub processimplediff
sub processimplediff
{
{
my $implediff = shift;
my $implediff = shift;
my %work;
my %work;

# Prepare all our work.
foreach my $language (keys %impldiff)
foreach my $language (keys %impldiff)
{
{
Line 303: Line 348:
}
}


# Now that we've prepared all our work, commit it.
foreach my $pagename (keys %work)
foreach my $pagename (keys %work)
{
{
Line 309: Line 355:
}
}


# Return the Mediawiki editor obect.
sub geteditor
sub geteditor
{
{
Line 343: Line 390:
}
}


# Simple logging infrastructure. Current sends to STDWARN or STDOUT, which cron
# emails to the user, which gets forwarded to Short Circuit. (Anyone want a copy?)
sub out
sub out
{
{
Line 363: Line 412:
}
}


# Many popular filesystems can't andle : and \ in filenames.
# Since I plan to open the SVN repo we save to the rest of the world at some point,
# I'm trying to make sure the files are representable.
sub sanitizenamefs
sub sanitizenamefs
{
{
Line 370: Line 422:
}
}


# Find all the entries that are in the second list ref, but not the first.
sub diffcat_simple
sub diffcat_simple
{
{
Line 387: Line 440:
}
}


# Find all the entries that are in one listref, but not the other.
sub diffcat
sub diffcat
{
{
Line 396: Line 450:
}
}


# Post a page (or save it to disk, if we're testing.)
sub postpage
sub postpage
{
{
Line 433: Line 488:
# last ran, so that we can learn to avoid doing unnecessary work.
# last ran, so that we can learn to avoid doing unnecessary work.
# (Saves on server resources and bloated edit statistics...)
# (Saves on server resources and bloated edit statistics...)
# Also, I plan on publishing the cache files in a version control
# Also, the data gets saved to an SVN repo, so that multiple bots can
# use the history. We're using JSON, as JSON has broader
# system like SVN or Git, to allow multiple bots
# cross-language support than Data::Dumper, making it easier for others
# to share the data and to provide history. We're using JSON
# to use the data.
# Instead of Perl's native Data::Dumper as JSON has broader
# cross-language support, making it easier for others to use the data.
# SVN has the advantage that I already know how to use it.
# Git has the advantage in that I can use GitHub and not tax
# my Slice with Git traffic, and I don't have to punch a hole
# in the firewall to access more services.
sub cachedata
sub cachedata
{
{
Line 465: Line 515:
}
}


# Return data we cached previously.
sub getcacheddata
sub getcacheddata
{
{
Line 473: Line 524:
unless (open $infile, '<', $filename)
unless (open $infile, '<', $filename)
{
{
&out("Failed to load cached data $filename: $!\n", 1);
&out("Failed to load cached data $filename: $!\n", 5);
return [];
return [];
}
}
Line 488: Line 539:
}
}


# Report the changes between two categories.
sub getcategory
# More interesting than "x added, y removed"
sub reportcatchanges
{
{
my $categoryname = shift;
my $category = shift;
my $old = shift;
my $new = shift;


my ($removed, $added) = &diffcat($old, $new);
# Return the cache data if we're not supposed to query the database.
return &getcacheddata($categoryname)
if( exists $options{'cacheonly'} );


my $out = "Removed from $category:\n";

my $page;

foreach $page (@$removed)
{
$out .= "$page\n";
}

$out .= "Added to $category:\n";
foreach $page (@$added)
{
$out .= "$page\n";
}

&out($out, 2);
}

# Pull the category data, or cached data if we're not pulling from the wiki.
sub getcategory
{
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 4);
&out("Getting category contents for $categoryname...", 4);
my @categorycontents = $editor->get_pages_in_category($categoryname);
&out(scalar @categorycontents . " members retrieved for $categoryname\n", 5);


my $old;
my ($removed, $added) = &diffcat(&getcacheddata($categoryname), \@categorycontents);
my $new;


if( exists $options{'cacheonly'} )
&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", &getloglevelfromdiff(2, $removed, $added));
{
# Return the cache data if we're not supposed to query the database.
$old = [];
$new = &getcacheddata($categoryname);
}
else
{
$old = &getcacheddata($categoryname);
$new = [$editor->get_pages_in_category($categoryname)];
++$categorypulls;
}


&reportcatchanges($categoryname, $old, $new);
++$categorypulls;


&cachedata("$categoryname", \@categorycontents);
&cachedata("$categoryname", $new);


&out(scalar @$new . " members returned for $categoryname\n", 5);
return \@categorycontents;
return $new;
}
}


# Find if this category changed, report its contents if it has.
sub getwork
sub getwork
{
{
Line 542: Line 629:
}
}


# If changes occurred, the info is more important than if they didn't.
sub getloglevelfromdiff
sub getloglevelfromdiff
{
{
Line 552: Line 640:
}
}


# Find all the work items for a given language.
sub getlangwork
sub getlangwork
{
{
Line 583: Line 672:
}
}


# Commit the cache.
sub commitcache
sub commitcache
{
{
Line 615: Line 705:
}
}



# Wrap svn commands so we can log them.
sub svn
sub svn
{
{