User:ImplSearchBot/Code: Difference between revisions

Content added Content deleted
m (ImplSearchBot:0)
m (ImplSearchBot:0)
Line 4: Line 4:
use MediaWiki::Bot;
use MediaWiki::Bot;
use JSON qw/to_json from_json/;
use JSON qw/to_json from_json/;

our $editor;
our $starttime = time;
our $pagesedited = 0;
our $categorypulls = 0;
our $cachehits = 0;
our $task_count_change;
our $alltasks;
our %impldiff;
our %createdomitcategories;
our @initialcache;
our $alllanguages;


my %options;
my %options;

#I don't care to pollute my global scope.
{
my $wiki = 'rosettacode.org';
my $username;
my $password;
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy.
my $post; # Is this an actual run?
my $cachepath = "cache/";
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
my $rebuild_all;

my $opt_matrix = {
"wiki=s" => \$wiki,
"username=s" => \$username,
"password=s" => \$password,
"verbosity=s" => \$verbosity,
"post" => \$post,
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath,
"rebuildall" => \$rebuild_all };

my $result = GetOptions( %$opt_matrix );
$options{'wiki'} = $wiki;

$options{'nosvn'} = $nosvn
if defined $nosvn;

$options{'username'} = $username
if defined $username;

$options{'password'} = $password
if defined $password;

$options{'post'} = $post
if defined $post;

$options{'verbosity'} = $verbosity;

$cachepath .= '/'
if('/' ne substr($cachepath, -1, 1));
$options{'cachepath'} = $cachepath;

$options{'cacheonly'} = $cacheonly
if defined $cacheonly;

$options{'rebuild_all'} = $rebuild_all
if defined $rebuild_all;

my $usage = "Usage: $0 (options)\n The available options are:\n";
$usage .= "\t--$_\n"
foreach (keys %$opt_matrix);

$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n";


my $wikineeded;

$wikineeded = "yes"
unless ( exists $options{'cacheonly'} );

$wikineeded = "yes"
if ( exists $options{'post'} );

if(defined $wikineeded)
{
unless(exists $options{'username'} and exists $options{'password'})
{
die $usage;
}
}
}



#Statistic tracking.
my $starttime = time;
my $pagesedited = 0;
my $categorypulls = 0;
my $cachehits = 0;

# Tracking for svn checkin at end.
# We *should* be the only ones writing to the cache path for now.
# Anyone else should have their own checkout of the data,
# and test runs are done with --cacheonly and --nosvn.
opendir(CACHEDIR, $options{'cachepath'})
or die "Unable to open cache directory";
my @initialcache = readdir(CACHEDIR);
closedir(CACHEDIR);

&out(scalar @initialcache . " categories initially cached\n", 4);

# Get our editor
my $editor = &geteditor();

# Get a complete listing of the tasks.
&out("Getting tasks\n", 3);
my $alltasks = &getcategory('Category:Programming Tasks');

# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
my $alllanguages= &getcategory('Category:Programming Languages');

# Quick check. Did we add or lose any tasks? If so, we've got to recalc *all*
# of the pages. :-/
my $task_count_change;
unless(exists $options{'rebuild_all'})
{
my $cacheddata = &getcacheddata('Category:Programming Tasks');
my ($added, $removed) = &diffcat($alltasks, $cacheddata);
$task_count_change = scalar @$added + scalar @$removed;
}
else
{
$task_count_change = 1; # It just needs to be nonzero...
}

# We want the language name, not the fully-qualified wiki name.
$_ =~ s/^Category:// foreach (@$alllanguages);

# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Store it as a hash, so the lookup will be faster.
my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated');
my %createdomitcategories = map {$_, 1} @$omitcatcontents;

&out("Identifying work to do\n", 3);

my %impldiff;

foreach my $lang (@$alllanguages)
{
my $val = &getlangwork($lang);
next unless defined $val;
$impldiff{$lang} = $val;
}

&processimplediff(\%impldiff);


my $runtime = time() - $starttime;
my $statsdata = "Pages edited last run: $pagesedited<br/>Time to post all per-language updates: $runtime seconds<br/>Category pulls: $categorypulls<br/>Cache hits: $cachehits<br/>";
&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2);

&postpage("User:ImplSearchBot/Stats", $statsdata, "Updating stats data", 0);


&out("Updating bot code page\n", 4);

open my $sourcefile, '<', $0
or die "Finished without updating bot source page";

my $botsource;
$botsource .= $_ while <$sourcefile>;

close $sourcefile;

my $tag = "lang";

&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);

&out("Updating cache\n", 4);
&commitcache();

&out("Done\n", 3);


# END OF PROGRAM EXECUTION
exit(0);


#---------------------
#---------------------
Line 189: Line 29:
sub build_posting
sub build_posting
{
{
my $name = shift;
my $name = shift;
my $body = shift;
my $body = shift;

return {$name => $body};
return {$name => $body};
}
}



# Prepares the template body for the unimplemented data.
# Prepares the template body for the unimplemented data.
sub prep_unimp_posting
sub prep_unimp_posting
{
{
my $impldiff = shift;
my $impldiff = shift;
my $language = shift;
my $language = shift;
my $unimplisting = "";
my $unimplisting = "";
foreach my $taskname (@$alltasks)
foreach my $taskname (@$alltasks)
{
{
# If it's a category task, the task name will be slightly different.
# If it's a category task, the task name will be slightly different.
my $baretaskname = $taskname;
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
$baretaskname =~ s/^Category://;
my $implpage = $taskname;
my $implpage = $taskname;
$implpage =~ s/^Category:(.*)/$1\/$language/;
$implpage =~ s/^Category:(.*)/$1\/$language/;

my $link;
if($taskname eq $baretaskname)
{
$link = "[[$taskname]]";
}
else
{
$link = "[[:$taskname|$baretaskname]]";
}
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage})
{
$unimplisting .= "* $link\n";
}

}


my $escapedImplPage = $implpage;


$escapedImplPage =~ s/ /_/g;
return &build_posting("Template:" . &get_unimpl_template_name($language), $unimplisting);
my $link;
# if($taskname eq $baretaskname)
# {
$link = "[http://rosettacode.org/mw/index.php?action=edit&title=$escapedImplPage $implpage]";
# }
# else
# {
# $link = "[[:$taskname|$baretaskname]]";
# }
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list.
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage})
{
$unimplisting .= "* $link\n";
}
}
return &build_posting("Template:" . &get_unimpl_template_name($language), $unimplisting);
}
}


Line 237: Line 80:
sub prep_omit_posting
sub prep_omit_posting
{
{
my $implediff = shift;
my $implediff = shift;
my $language = shift;
my $language = shift;
my $omitlisting = "";
my $omitlisting = "";
my $omittemplatename = &get_omit_template_name($language);
my $omittemplatename = &get_omit_template_name($language);

foreach my $taskname (@$alltasks)
foreach my $taskname (@$alltasks)
{
{
# We want the task name, not the fully-qualified wiki name.
# We want the task name, not the fully-qualified wiki name.
my $baretaskname = $taskname;
my $baretaskname = $taskname;
$baretaskname =~ s/^Category://;
$baretaskname =~ s/^Category://;
# Add the task to the omission list, if it's omitted.
# Add the task to the omission list, if it's omitted.
$omitlisting .= "* [[$baretaskname]]\n"
my $escapedTaskName = $baretaskname;
$escapedTaskName =~ s/ /_/g;
if(exists $impldiff{$language}->{'omit'}->{$taskname})
$omitlisting .= "* [http://rosettacode.org/mw/index.php?action=edit&title=$escapedTaskName $baretaskname]\n"
}
if(exists $impldiff{$language}->{'omit'}->{$taskname})

}
# Note that there's no data in the template.
$implediff->{$language}->{'omit'} = 0
# Note that there's no data in the template.
if( "" eq $omitlisting );
$implediff->{$language}->{'omit'} = 0

return &build_posting("Template:$omittemplatename", $omitlisting);
if( "" eq $omitlisting );
return &build_posting("Template:$omittemplatename", $omitlisting);
}
}


Line 262: Line 107:
sub prep_listing_posting
sub prep_listing_posting
{
{
my $impldiff = shift;
my $impldiff = shift;
my $language = shift;
my $language = shift;

my $targetcount = $impldiff->{$language}->{'target_count'};
my $targetcount = $impldiff->{$language}->{'target_count'};
my $unimpcount = $impldiff->{$language}->{'unimp_count'};
my $unimpcount = $impldiff->{$language}->{'unimp_count'};

# Prepare template fields
# Prepare template fields
my $langfield = "|$language";
my $langfield = "|$language";
my $unimpfield = "|$unimpcount";
my $unimpfield = "|$unimpcount";
my $tcfield = "|$targetcount";
my $tcfield = "|$targetcount";
my $impperccalc = 0;
my $impperccalc = 0;
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100
unless ($targetcount == 0);
unless ($targetcount == 0);

my $imppercfield = sprintf "|%u", $impperccalc;
my $imppercfield = sprintf "|%u", $impperccalc;
# Prepare the listing page format.
# Prepare the listing page format.
my $pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}';
my $pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}';
$pagedata .= "{{" . &get_unimpl_template_name($language) . "$unimpfield}}";
$pagedata .= "{{" . &get_unimpl_template_name($language) . "$unimpfield}}";

if (0 != $impldiff->{$language}->{'omit'})
if (0 != $impldiff->{$language}->{'omit'})
{
{
$pagedata .= "{{omit_header" . "$langfield}}";
$pagedata .= "{{omit_header" . "$langfield}}";
$pagedata .= "{{" . &get_omit_template_name($language) ."}}";
$pagedata .= "{{" . &get_omit_template_name($language) ."}}";
}
}

$pagedata .= "{{unimpl_footer$langfield}}";
$pagedata .= "{{unimpl_footer$langfield}}";

return &build_posting(&get_listing_name($language), $pagedata);
return &build_posting(&get_listing_name($language), $pagedata);
}
}


Line 297: Line 142:
sub prep_omit_cat_posting
sub prep_omit_cat_posting
{
{
my $implediff = shift;
my $implediff = shift;
my $language = shift;
my $language = shift;
return &build_posting(&get_omit_cat_name($language), "{{omit_cat|$language}}");
return &build_posting(&get_omit_cat_name($language), "{{omit_cat|$language}}");
}
}


Line 305: Line 150:
sub get_listing_name
sub get_listing_name
{
{
my $language = shift;
my $language = shift;
return "Tasks not implemented in $language";
return "Tasks not implemented in $language";
}
}


Line 312: Line 157:
sub get_omit_template_name
sub get_omit_template_name
{
{
my $language = shift;
my $language = shift;
return "unimp_omit_body_$language";
return "unimp_omit_body_$language";
}
}


Line 319: Line 164:
sub get_unimpl_template_name
sub get_unimpl_template_name
{
{
my $language = shift;
my $language = shift;
return "unimp_body_$language";
return "unimp_body_$language";
}
}


Line 326: Line 171:
sub get_omit_cat_name
sub get_omit_cat_name
{
{
my $language = shift;
my $language = shift;
return "Category:$language/Omit";
return "Category:$language/Omit";
}
}


Line 333: Line 178:
sub process_language
sub process_language
{
{
my $implediff = shift;
my $implediff = shift;
my $language = shift;
my $language = shift;


my $unimpl = &prep_unimp_posting($implediff, $language);
my $unimpl = &prep_unimp_posting($implediff, $language);
my $omit = &prep_omit_posting($implediff, $language);
my $omit = &prep_omit_posting($implediff, $language);
my $listing = &prep_listing_posting($implediff, $language);
my $listing = &prep_listing_posting($implediff, $language);

my %langpostings = ( %$unimpl, %$omit, %$listing );
my %langpostings = ( %$unimpl, %$omit, %$listing );


unless ( exists $createdomitcategories{&get_omit_cat_name($language)} )
unless ( exists $createdomitcategories{&get_omit_cat_name($language)} )
{
{
my $omit_cat = &prep_omit_cat_posting($implediff, $language);
my $omit_cat = &prep_omit_cat_posting($implediff, $language);
%langpostings = ( %langpostings, %$omit_cat );
%langpostings = ( %langpostings, %$omit_cat );
}
}
return \%langpostings;
}


sub wikitxt_pathname {
return \%langpostings;
my $page_name = $_[0];
"test/" . sanitizenamefs($page_name) . ".wikitxt";
}
}


Line 356: Line 206:
sub processimplediff
sub processimplediff
{
{
my $implediff = shift;
my $implediff = shift;
my %work;
my %work;

# Prepare all our work.
# Prepare all our work.
foreach my $language (keys %impldiff)
foreach my $language (keys %$implediff)
{
{
&out("Preparing data for:$language\n", 4);
# &out("Preparing data for:$language\n", 4);
my $workitem = &process_language($implediff, $language);
my $workitem = &process_language($implediff, $language);
%work = ( %work, %$workitem );
%work = ( %work, %$workitem );
}
}
# Now that we've prepared all our work, commit it.

foreach my $pagename (keys %work)
# Now that we've prepared all our work, commit it.
{
foreach my $pagename (keys %work)
&postpage($pagename, $work{$pagename}, "Updating $pagename", 0);
{
}
&postpage($pagename, $work{$pagename}, "Updating $pagename", 0);
}
}
}


# Return the Mediawiki editor obect.
# Return the Mediawiki editor object.
sub geteditor
sub geteditor
{
{
# If we're not posting, and we're only drawing from cache
# If we're not posting, and we're only drawing from cache
# We don't actually need to pull from the wiki.
# We don't actually need to pull from the wiki.
if( exists $options{'cacheonly'} )
if( exists $options{'cacheonly'} )
{
{
return undef
return undef
unless exists $options{'post'};
unless exists $options{'post'};
}
}

# Handles interaction with the wiki.
# Handles interaction with the wiki.
# Note that I had to modify HTTP::Message to make it work
# Note that I had to modify HTTP::Message to make it work
# HTTP::Message silently failed when presented by MW
# HTTP::Message silently failed when presented by MW
# with an encoding type of "application/json" or some such.
# with an encoding type of "application/json" or some such.
&out("Creating editor\n", 3);
&out("Creating editor\n", 3);
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
my $editor = MediaWiki::Bot->new('ImpleSearchBot');
$editor->{debug} = 1;
$editor->{debug} = 1;

# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like
# my initial attempts from before I added this line.
# my initial attempts from before I added this line.
&out("Trying to set wiki.\n",3);
&out("Trying to set wiki.\n",3);
$editor->set_wiki('rosettacode.org','mw');
$editor->set_wiki('rosettacode.org','mw');

# If we're not posting, we don't need to log in if we're pulling from cache.
# If we're not posting, we don't need to log in if we're pulling from cache.
# Otherwise, attempt to log in.
# Otherwise, attempt to log in.
&out("Trying to log in.\n", 3);
&out("Trying to log in.\n", 3);
my $loginres = $editor->login($options{'username'}, $options{'password'});
my $loginres = $editor->login($options{'username'}, $options{'password'});

die "Unable to login: " . $loginres
die "Unable to login: " . $loginres
unless $loginres == 0;
unless $loginres == 0;
return $editor;
return $editor;
}
}


Line 413: Line 262:
sub out
sub out
{
{
my $string = shift;
my $string = shift;
my $loglevel = shift;
my $loglevel = shift;

chomp $string;
chomp $string;

if($options{'verbosity'} >= $loglevel)
if($options{'verbosity'} >= $loglevel)
{
{
if($loglevel == 1)
if($loglevel == 1)
{
{
warn $string; # use stderr.
warn $string; # use stderr.
}
}
else
else
{
{
print "$string\n";
print "$string\n";
}
}
}
}
}
}


Line 436: Line 285:
sub sanitizenamefs
sub sanitizenamefs
{
{
my $pagename = shift;
my $pagename = shift;
$pagename =~ tr/:\//__/;
$pagename =~ tr/:\//__/;
return $pagename;
return $pagename;
}
}


Line 444: Line 293:
sub diffcat_simple
sub diffcat_simple
{
{
my $first = shift;
my $first = shift;
my $second = shift;
my $second = shift;
my %firsthash = map { $_, 1 } @$first;
my %firsthash = map { $_, 1 } @$first;

my @new = ();
my @new = ();

foreach my $secondelement (@$second)
foreach my $secondelement (@$second)
{
{
push @new, $secondelement
push @new, $secondelement
unless exists $firsthash{$secondelement};
unless exists $firsthash{$secondelement};
}
}

return @new;
return @new;
}
}


Line 462: Line 311:
sub diffcat
sub diffcat
{
{
my $first = shift;
my $first = shift;
my $second = shift;
my $second = shift;
my @onlyinfirst = &diffcat_simple($second, $first);
my @onlyinfirst = &diffcat_simple($second, $first);
my @onlyinsecond = &diffcat_simple($first, $second);
my @onlyinsecond = &diffcat_simple($first, $second);
return (\@onlyinfirst, \@onlyinsecond);
return (\@onlyinfirst, \@onlyinsecond);
}

sub slurp_file {
my $pathname = $_[0];
open(my $stream, "<", $pathname) or do {warn"Can't open '$pathname': $!\n";
return};
my $ret;
while (1) {
my $len = read($stream, my $buf, 1024);
if (!defined($len)) { die "I/O error while reading '$pathname': $!"; }
elsif ($len == 0) { last }
else { $ret .= $buf }
}
$ret;
}
}


Line 472: Line 335:
sub postpage
sub postpage
{
{
my $pagename = shift;
my $pagename = shift;
my $pagedata = shift;
my $pagedata = shift;
my $remark = shift;
my $remark = shift;
my $minoredit = shift;
my $minoredit = shift;


# MediaWiki won't let us create blank, empty pages.
# MediaWiki won't let us create blank, empty pages.
# But since we don't want to query (or cache) to see if
# But since we don't want to query (or cache) to see if
# the page already exists, we'll just add an HTML
# the page already exists, we'll just add an HTML
# non-breaking-space entity if the page is truly empty.
# non-breaking-space entity if the page is truly empty.
$pagedata = "&nbsp;"
$pagedata = "&nbsp;"
if(0 == length $pagedata);
if(0 == length $pagedata);

++$pagesedited;
++$pagesedited;

unless( exists $options{'post'} )
unless( exists $options{'post'} )
{
{
$pagename = wikitxt_pathname($pagename);
# save it to disk, and out of the way.
# save it to disk, and out of the way.
$pagename = "test/" . &sanitizenamefs($pagename);
$pagename .= ".wikitxt";
&out("Saving: $pagename\n", 2);

&out("Saving: $pagename\n", 2);
open my $outfile, '>', $pagename
or &out("Failed to open $pagename: $!", 1);
return unless defined $outfile;
print $outfile $pagedata;
close $outfile;
}
else {
open my $outfile, '>', $pagename
&out("Posting $pagename\n", 2);
or &log("Failed to open $pagename: $!", 1);
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)

or &out("Failed to post page: " . $editor->{'errstr'}, 1);
return unless defined $outfile;
}

print $outfile $pagedata;
close $outfile;
}
else
{
sleep 1;
&out("Posting $pagename\n", 2);

$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)
or &out("Failed to post page: " . $editor->{'errstr'}, 1);
}
}
}

# This allows us to compare site state between now and when we
# This allows us to compare site state between now and when we
# last ran, so that we can learn to avoid doing unnecessary work.
# last ran, so that we can learn to avoid doing unnecessary work.
Line 537: Line 398:
return;
return;
}
}

print $outfile to_json($data);
print $outfile to_json($data);
close $outfile;
close $outfile;
&out(scalar $data . " members cached to $filename.\n", 5);
&out(scalar $data . " members cached to $filename.\n", 5);
}
}

# Return data we cached previously.
# Return data we cached previously.
sub getcacheddata
sub getcacheddata
Line 555: Line 416:
return [];
return [];
}
}

my $jsondata;
my $jsondata;
$jsondata .= $_ while <$infile>;
$jsondata .= $_ while <$infile>;
close $infile;
close $infile;

my $cacheddata = from_json($jsondata);
my $cacheddata = from_json($jsondata);
&out(scalar @$cacheddata . " cache members retrieved from $filename\n", 5);
&out(scalar @$cacheddata . " cache members retrieved from $filename\n", 5);

++$cachehits;
++$cachehits;
return $cacheddata;
return $cacheddata;
}
}

# Report the changes between two categories.
# Report the changes between two categories.
# More interesting than "x added, y removed"
# More interesting than "x added, y removed"
Line 574: Line 435:
my $old = shift;
my $old = shift;
my $new = shift;
my $new = shift;

my ($removed, $added) = &diffcat($old, $new);
my ($removed, $added) = &diffcat($old, $new);

my $out = "";
my $out = "";

$out .= "Removed from $category:\n"
$out .= "Removed from $category:\n"
if( scalar @$removed > 0 );
if( scalar @$removed > 0 );
$out .= "$_\n"
$out .= "$_\n"
foreach (@$removed);
foreach (@$removed);

$out .= "Added to $category:\n"
$out .= "Added to $category:\n"
if( scalar @$added > 0 );
if( scalar @$added > 0 );
$out .= "$_\n"
$out .= "$_\n"
foreach (@$added);
foreach (@$added);

&out($out, 2)
&out($out, 2)
if("" ne $out);
if("" ne $out);
}
}

# Pull the category data, or cached data if we're not pulling from the wiki.
# Pull the category data, or cached data if we're not pulling from the wiki.
sub getcategory
sub getcategory
Line 598: Line 459:
my $categoryname = shift;
my $categoryname = shift;
&out("Getting category contents for $categoryname...", 4);
&out("Getting category contents for $categoryname...", 4);

my $old;
my $old;
my $new;
my $new;

if( exists $options{'cacheonly'} )
if( exists $options{'cacheonly'} )
{
{
Line 615: Line 476:
sleep 1;
sleep 1;
}
}

&reportcatchanges($categoryname, $old, $new);
# &reportcatchanges($categoryname, $old, $new);

&cachedata("$categoryname", $new);
&cachedata("$categoryname", $new);

&out(scalar @$new . " members returned for $categoryname\n", 5);
&out(scalar @$new . " members returned for $categoryname\n", 5);

return $new;
return $new;
}
}

# Find if this category changed, report its contents if it has.
# Find if this category changed, report its contents if it has.
sub getwork
sub getwork
Line 631: Line 492:
&out("Getting work for $categoryname.\n",4);
&out("Getting work for $categoryname.\n",4);
my $cacheddata = &getcacheddata($categoryname);
my $cacheddata = &getcacheddata($categoryname);

# If we're on a cache-only basis, we'll just say we have no old data,
# If we're on a cache-only basis, we'll just say we have no old data,
# and that our cached data is our new data.
# and that our cached data is our new data.
Line 646: Line 507:
$newdata = &getcategory($categoryname);
$newdata = &getcategory($categoryname);
}
}

my ($removed, $added) = &diffcat($olddata, $newdata);
my ($removed, $added) = &diffcat($olddata, $newdata);
my $impl_count_change = scalar @$removed + @$added;
my $impl_count_change = scalar @$removed + @$added;
Line 652: Line 513:
$work = $newdata
$work = $newdata
if(($impl_count_change + $task_count_change) > 0);
if(($impl_count_change + $task_count_change) > 0);
&out(scalar @$work . " items to process for $categoryname.\n", 3);
&out(scalar @$work . " items to process for $categoryname.\n", 3);

return @$work;
return @$work;
}
}

# If changes occurred, the info is more important than if they didn't.
# If changes occurred, the info is more important than if they didn't.
sub getloglevelfromdiff
sub getloglevelfromdiff
Line 668: Line 529:
return $base + 1;
return $base + 1;
}
}

# Find all the work items for a given language.
# Find all the work items for a given language.
sub getlangwork
sub getlangwork
Line 684: Line 545:
$omit{$workitem} = 1;
$omit{$workitem} = 1;
}
}

# No work to do?
# No work to do?
return undef
return undef
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl));
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl));
# Language metadata
# Language metadata
my $unimpcount = scalar @$alltasks - scalar keys %impl;
my $unimpcount = scalar @$alltasks - scalar keys %impl;
my $targetcount = (scalar @$alltasks - scalar keys %omit);
my $targetcount = (scalar @$alltasks - scalar keys %omit);


# work to do.
# work to do.
return {'impl' => \%impl,
return {'impl' => \%impl,
Line 700: Line 561:
'target_count' => $targetcount };
'target_count' => $targetcount };
}
}

# Commit the cache.
# Commit the cache.
sub commitcache
sub commitcache
Line 707: Line 568:
my $cachepath = $options{'cachepath'};
my $cachepath = $options{'cachepath'};
opendir(CACHEDIR, $cachepath);
opendir(CACHEDIR, $cachepath);

my @current = readdir(CACHEDIR);
my @current = readdir(CACHEDIR);
close(CACHEDIR);
close(CACHEDIR);

# We need to run svn adds if we've created any new files.
# We need to run svn adds if we've created any new files.
# Maybe we'll use SVN::Client some day. Not right now.
# Maybe we'll use SVN::Client some day. Not right now.
my ($added, $removed) = &diffcat(\@current, \@initialcache);
my ($added, $removed) = &diffcat(\@current, \@initialcache);
&out("Detected " . scalar @$added . " new cache files and " . scalar @$removed . " removed\n",&getloglevelfromdiff(2, $added, $removed));
&out("Detected " . scalar @$added . " new cache files and " . scalar @$removed . " removed\n",&getloglevelfromdiff(2, $added, $removed));
if ((scalar @$added + scalar @$removed ) > 0)
if ((scalar @$added + scalar @$removed ) > 0)
{
{
Line 723: Line 584:
&svn('add', $cachepath . $cachefile);;
&svn('add', $cachepath . $cachefile);;
}
}

foreach my $cachefile (@$removed)
foreach my $cachefile (@$removed)
{
{
Line 729: Line 590:
}
}
}
}

&svn('ci', '--message="ImplSearchBot run"', $cachepath);
&svn('ci', '--message="ImplSearchBot run"', $cachepath);
&svn('update', $cachepath);
&svn('update', $cachepath);
}
}


# Wrap svn commands so we can log them.
# Wrap svn commands so we can log them.
sub svn
sub svn
Line 741: Line 602:
if(exists $options{'nosvn'});
if(exists $options{'nosvn'});
my @args = @_;
my @args = @_;

my $string = "system 'svn'";
my $string = "system 'svn'";
$string .= ", '$_'"
$string .= ", '$_'"
foreach (@args);
foreach (@args);

$string .= "\n";
$string .= "\n";

&out($string, 3);
&out($string, 3);

system 'svn', @args
system 'svn', @args
if(exists $options{'post'});
if(exists $options{'post'});
}
}

sub getopt {
#I don't care to pollute my global scope.
my $wiki = 'rosettacode.org';
my $username;
my $password;
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy.
my $post; # Is this an actual run?
my $cachepath = "/tmp/";
my $cacheonly; # Don't query the wiki for data. Just pull from cache.
my $nosvn;
my $rebuild_all;
my $opt_matrix = {
"wiki=s" => \$wiki,
"username=s" => \$username,
"password=s" => \$password,
"verbosity=s" => \$verbosity,
"post" => \$post,
"cacheonly" => \$cacheonly,
"nosvn" => \$nosvn,
"cachepath=s" => \$cachepath,
"rebuildall" => \$rebuild_all };
my $result = GetOptions( %$opt_matrix );
$options{'wiki'} = $wiki;
$options{'nosvn'} = $nosvn
if defined $nosvn;
$options{'username'} = $username
if defined $username;
$options{'password'} = $password
if defined $password;
$options{'post'} = $post
if defined $post;
$options{'verbosity'} = $verbosity;
$cachepath .= '/'
if('/' ne substr($cachepath, -1, 1));
$options{'cachepath'} = $cachepath;
$options{'cacheonly'} = $cacheonly
if defined $cacheonly;
$options{'rebuild_all'} = $rebuild_all
if defined $rebuild_all;
my $usage = "Usage: $0 (options)\n The available options are:\n";
$usage .= "\t--$_\n"
foreach (keys %$opt_matrix);
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n";
my $wikineeded;
$wikineeded = "yes"
unless ( exists $options{'cacheonly'} );
$wikineeded = "yes"
if ( exists $options{'post'} );
if(defined $wikineeded)
{
unless(exists $options{'username'} and exists $options{'password'})
{
die $usage;
}
}
}
sub main {

unless (@ARGV) { @ARGV = @_; }

getopt();
#Statistic tracking.
# Tracking for svn checkin at end.
# We *should* be the only ones writing to the cache path for now.
# Anyone else should have their own checkout of the data,
# and test runs are done with --cacheonly and --nosvn.
opendir(my $cachedir, $options{'cachepath'})
or die "Unable to open cache directory";
@initialcache = readdir($cachedir);
closedir($cachedir);
&out(scalar @initialcache . " categories initially cached\n", 4);
# Get our editor
$editor = &geteditor();
# Get a complete listing of the tasks.
&out("Getting tasks\n", 3);
$alltasks = &getcategory('Category:Programming Tasks');
# Get a complete listing of the languages.
&out("Getting the languages.\n", 3);
$alllanguages= &getcategory('Category:Programming Languages');
# Quick check. Did we add or lose any tasks? If so, we've got to recalc *all*
# of the pages. :-/
unless(exists $options{'rebuild_all'}) {
my $cacheddata = &getcacheddata('Category:Programming Tasks');
my ($added, $removed) = &diffcat($alltasks, $cacheddata);
$task_count_change = scalar @$added + scalar @$removed;
}
else {
$task_count_change = 1; # It just needs to be nonzero...
}

# We want the language name, not the fully-qualified wiki name.
$_ =~ s/^Category:// foreach (@$alllanguages);

# Get a list of the languages for which we've already provided bodies for the related omit categories.
# Store it as a hash, so the lookup will be faster.
my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated');
my %createdomitcategories = map {$_, 1} @$omitcatcontents;

&out("Identifying work to do\n", 3);

foreach my $lang (@$alllanguages) {
my $val = &getlangwork($lang);
next unless defined $val;
$impldiff{$lang} = $val;
}

&processimplediff(\%impldiff);


my $runtime = time() - $starttime;
my $statsdata = "Pages edited last run: $pagesedited<br/>Time to post all per-language updates: $runtime seconds<br/>Category pulls: $categorypulls<br/>Cache hits: $cachehits<br/>";
&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2);

&postpage("User:ImplSearchBot/Stats", $statsdata, "Updating stats data", 0);


&out("Updating bot code page\n", 4);

open my $sourcefile, '<', $0
or die "Finished without updating bot source page";

my $botsource;
$botsource .= $_ while <$sourcefile>;

close $sourcefile;

my $tag = "lang";

&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);

&out("Updating cache\n", 4);
&commitcache();

&out("Done\n", 3);

exit 0
}

if (!caller) { exit main }
</lang>
</lang>