User:ImplSearchBot/Code: Difference between revisions
Content added Content deleted
m (ImplSearchBot:0) |
m (ImplSearchBot:0) |
||
Line 4: | Line 4: | ||
use MediaWiki::Bot; |
use MediaWiki::Bot; |
||
use JSON qw/to_json from_json/; |
use JSON qw/to_json from_json/; |
||
our $editor; |
|||
our $starttime = time; |
|||
our $pagesedited = 0; |
|||
our $categorypulls = 0; |
|||
our $cachehits = 0; |
|||
our $task_count_change; |
|||
our $alltasks; |
|||
our %impldiff; |
|||
our %createdomitcategories; |
|||
our @initialcache; |
|||
our $alllanguages; |
|||
my %options; |
my %options; |
||
#I don't care to pollute my global scope. |
|||
{ |
|||
my $wiki = 'rosettacode.org'; |
|||
my $username; |
|||
my $password; |
|||
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy. |
|||
my $post; # Is this an actual run? |
|||
my $cachepath = "cache/"; |
|||
my $cacheonly; # Don't query the wiki for data. Just pull from cache. |
|||
my $nosvn; |
|||
my $rebuild_all; |
|||
my $opt_matrix = { |
|||
"wiki=s" => \$wiki, |
|||
"username=s" => \$username, |
|||
"password=s" => \$password, |
|||
"verbosity=s" => \$verbosity, |
|||
"post" => \$post, |
|||
"cacheonly" => \$cacheonly, |
|||
"nosvn" => \$nosvn, |
|||
"cachepath=s" => \$cachepath, |
|||
"rebuildall" => \$rebuild_all }; |
|||
my $result = GetOptions( %$opt_matrix ); |
|||
$options{'wiki'} = $wiki; |
|||
$options{'nosvn'} = $nosvn |
|||
if defined $nosvn; |
|||
$options{'username'} = $username |
|||
if defined $username; |
|||
$options{'password'} = $password |
|||
if defined $password; |
|||
$options{'post'} = $post |
|||
if defined $post; |
|||
$options{'verbosity'} = $verbosity; |
|||
$cachepath .= '/' |
|||
if('/' ne substr($cachepath, -1, 1)); |
|||
$options{'cachepath'} = $cachepath; |
|||
$options{'cacheonly'} = $cacheonly |
|||
if defined $cacheonly; |
|||
$options{'rebuild_all'} = $rebuild_all |
|||
if defined $rebuild_all; |
|||
my $usage = "Usage: $0 (options)\n The available options are:\n"; |
|||
$usage .= "\t--$_\n" |
|||
foreach (keys %$opt_matrix); |
|||
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n"; |
|||
my $wikineeded; |
|||
$wikineeded = "yes" |
|||
unless ( exists $options{'cacheonly'} ); |
|||
$wikineeded = "yes" |
|||
if ( exists $options{'post'} ); |
|||
if(defined $wikineeded) |
|||
{ |
|||
unless(exists $options{'username'} and exists $options{'password'}) |
|||
{ |
|||
die $usage; |
|||
} |
|||
} |
|||
} |
|||
#Statistic tracking. |
|||
my $starttime = time; |
|||
my $pagesedited = 0; |
|||
my $categorypulls = 0; |
|||
my $cachehits = 0; |
|||
# Tracking for svn checkin at end. |
|||
# We *should* be the only ones writing to the cache path for now. |
|||
# Anyone else should have their own checkout of the data, |
|||
# and test runs are done with --cacheonly and --nosvn. |
|||
opendir(CACHEDIR, $options{'cachepath'}) |
|||
or die "Unable to open cache directory"; |
|||
my @initialcache = readdir(CACHEDIR); |
|||
closedir(CACHEDIR); |
|||
&out(scalar @initialcache . " categories initially cached\n", 4); |
|||
# Get our editor |
|||
my $editor = &geteditor(); |
|||
# Get a complete listing of the tasks. |
|||
&out("Getting tasks\n", 3); |
|||
my $alltasks = &getcategory('Category:Programming Tasks'); |
|||
# Get a complete listing of the languages. |
|||
&out("Getting the languages.\n", 3); |
|||
my $alllanguages= &getcategory('Category:Programming Languages'); |
|||
# Quick check. Did we add or lose any tasks? If so, we've got to recalc *all* |
|||
# of the pages. :-/ |
|||
my $task_count_change; |
|||
unless(exists $options{'rebuild_all'}) |
|||
{ |
|||
my $cacheddata = &getcacheddata('Category:Programming Tasks'); |
|||
my ($added, $removed) = &diffcat($alltasks, $cacheddata); |
|||
$task_count_change = scalar @$added + scalar @$removed; |
|||
} |
|||
else |
|||
{ |
|||
$task_count_change = 1; # It just needs to be nonzero... |
|||
} |
|||
# We want the language name, not the fully-qualified wiki name. |
|||
$_ =~ s/^Category:// foreach (@$alllanguages); |
|||
# Get a list of the languages for which we've already provided bodies for the related omit categories. |
|||
# Store it as a hash, so the lookup will be faster. |
|||
my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated'); |
|||
my %createdomitcategories = map {$_, 1} @$omitcatcontents; |
|||
&out("Identifying work to do\n", 3); |
|||
my %impldiff; |
|||
foreach my $lang (@$alllanguages) |
|||
{ |
|||
my $val = &getlangwork($lang); |
|||
next unless defined $val; |
|||
$impldiff{$lang} = $val; |
|||
} |
|||
&processimplediff(\%impldiff); |
|||
my $runtime = time() - $starttime; |
|||
my $statsdata = "Pages edited last run: $pagesedited<br/>Time to post all per-language updates: $runtime seconds<br/>Category pulls: $categorypulls<br/>Cache hits: $cachehits<br/>"; |
|||
&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2); |
|||
&postpage("User:ImplSearchBot/Stats", $statsdata, "Updating stats data", 0); |
|||
&out("Updating bot code page\n", 4); |
|||
open my $sourcefile, '<', $0 |
|||
or die "Finished without updating bot source page"; |
|||
my $botsource; |
|||
$botsource .= $_ while <$sourcefile>; |
|||
close $sourcefile; |
|||
my $tag = "lang"; |
|||
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0); |
|||
&out("Updating cache\n", 4); |
|||
&commitcache(); |
|||
&out("Done\n", 3); |
|||
# END OF PROGRAM EXECUTION |
|||
exit(0); |
|||
#--------------------- |
#--------------------- |
||
Line 189: | Line 29: | ||
sub build_posting |
sub build_posting |
||
{ |
{ |
||
my $name = shift; |
|||
my $body = shift; |
|||
return {$name => $body}; |
|||
} |
} |
||
# Prepares the template body for the unimplemented data. |
# Prepares the template body for the unimplemented data. |
||
sub prep_unimp_posting |
sub prep_unimp_posting |
||
{ |
{ |
||
my $impldiff = shift; |
|||
my $language = shift; |
|||
my $unimplisting = ""; |
|||
foreach my $taskname (@$alltasks) |
|||
{ |
|||
{ |
|||
# If it's a category task, the task name will be slightly different. |
|||
my $baretaskname = $taskname; |
|||
$baretaskname =~ s/^Category://; |
|||
my $implpage = $taskname; |
|||
$implpage =~ s/^Category:(.*)/$1\/$language/; |
|||
my $link; |
|||
if($taskname eq $baretaskname) |
|||
{ |
|||
$link = "[[$taskname]]"; |
|||
} |
|||
else |
|||
{ |
|||
$link = "[[:$taskname|$baretaskname]]"; |
|||
} |
|||
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list. |
|||
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage}) |
|||
{ |
|||
$unimplisting .= "* $link\n"; |
|||
} |
|||
} |
|||
my $escapedImplPage = $implpage; |
|||
$escapedImplPage =~ s/ /_/g; |
|||
return &build_posting("Template:" . &get_unimpl_template_name($language), $unimplisting); |
|||
my $link; |
|||
# if($taskname eq $baretaskname) |
|||
# { |
|||
$link = "[http://rosettacode.org/mw/index.php?action=edit&title=$escapedImplPage $implpage]"; |
|||
# } |
|||
# else |
|||
# { |
|||
# $link = "[[:$taskname|$baretaskname]]"; |
|||
# } |
|||
# Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list. |
|||
unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage}) |
|||
{ |
|||
$unimplisting .= "* $link\n"; |
|||
} |
|||
} |
|||
return &build_posting("Template:" . &get_unimpl_template_name($language), $unimplisting); |
|||
} |
} |
||
Line 237: | Line 80: | ||
sub prep_omit_posting |
sub prep_omit_posting |
||
{ |
{ |
||
my $implediff = shift; |
|||
my $language = shift; |
|||
my $omitlisting = ""; |
|||
my $omittemplatename = &get_omit_template_name($language); |
|||
foreach my $taskname (@$alltasks) |
|||
{ |
|||
{ |
|||
# We want the task name, not the fully-qualified wiki name. |
|||
my $baretaskname = $taskname; |
|||
$baretaskname =~ s/^Category://; |
|||
# Add the task to the omission list, if it's omitted. |
|||
my $escapedTaskName = $baretaskname; |
|||
$escapedTaskName =~ s/ /_/g; |
|||
if(exists $impldiff{$language}->{'omit'}->{$taskname}) |
|||
$omitlisting .= "* [http://rosettacode.org/mw/index.php?action=edit&title=$escapedTaskName $baretaskname]\n" |
|||
} |
|||
if(exists $impldiff{$language}->{'omit'}->{$taskname}) |
|||
} |
|||
# Note that there's no data in the template. |
|||
$implediff->{$language}->{'omit'} = 0 |
|||
# Note that there's no data in the template. |
|||
if( "" eq $omitlisting ); |
|||
$implediff->{$language}->{'omit'} = 0 |
|||
if( "" eq $omitlisting ); |
|||
return &build_posting("Template:$omittemplatename", $omitlisting); |
|||
} |
} |
||
Line 262: | Line 107: | ||
sub prep_listing_posting |
sub prep_listing_posting |
||
{ |
{ |
||
my $impldiff = shift; |
|||
my $language = shift; |
|||
my $targetcount = $impldiff->{$language}->{'target_count'}; |
|||
my $unimpcount = $impldiff->{$language}->{'unimp_count'}; |
|||
# Prepare template fields |
|||
my $langfield = "|$language"; |
|||
my $unimpfield = "|$unimpcount"; |
|||
my $tcfield = "|$targetcount"; |
|||
my $impperccalc = 0; |
|||
$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 |
|||
unless ($targetcount == 0); |
unless ($targetcount == 0); |
||
my $imppercfield = sprintf "|%u", $impperccalc; |
|||
# Prepare the listing page format. |
|||
my $pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}'; |
|||
$pagedata .= "{{" . &get_unimpl_template_name($language) . "$unimpfield}}"; |
|||
if (0 != $impldiff->{$language}->{'omit'}) |
|||
{ |
|||
{ |
|||
$pagedata .= "{{omit_header" . "$langfield}}"; |
|||
$pagedata .= "{{" . &get_omit_template_name($language) ."}}"; |
|||
} |
|||
} |
|||
$pagedata .= "{{unimpl_footer$langfield}}"; |
|||
return &build_posting(&get_listing_name($language), $pagedata); |
|||
} |
} |
||
Line 297: | Line 142: | ||
sub prep_omit_cat_posting |
sub prep_omit_cat_posting |
||
{ |
{ |
||
my $implediff = shift; |
|||
my $language = shift; |
|||
return &build_posting(&get_omit_cat_name($language), "{{omit_cat|$language}}"); |
|||
} |
} |
||
Line 305: | Line 150: | ||
sub get_listing_name |
sub get_listing_name |
||
{ |
{ |
||
my $language = shift; |
|||
return "Tasks not implemented in $language"; |
|||
} |
} |
||
Line 312: | Line 157: | ||
sub get_omit_template_name |
sub get_omit_template_name |
||
{ |
{ |
||
my $language = shift; |
|||
return "unimp_omit_body_$language"; |
|||
} |
} |
||
Line 319: | Line 164: | ||
sub get_unimpl_template_name |
sub get_unimpl_template_name |
||
{ |
{ |
||
my $language = shift; |
|||
return "unimp_body_$language"; |
|||
} |
} |
||
Line 326: | Line 171: | ||
sub get_omit_cat_name |
sub get_omit_cat_name |
||
{ |
{ |
||
my $language = shift; |
|||
return "Category:$language/Omit"; |
|||
} |
} |
||
Line 333: | Line 178: | ||
sub process_language |
sub process_language |
||
{ |
{ |
||
my $implediff = shift; |
|||
my $language = shift; |
|||
my $unimpl = &prep_unimp_posting($implediff, $language); |
|||
my $omit = &prep_omit_posting($implediff, $language); |
|||
my $listing = &prep_listing_posting($implediff, $language); |
|||
my %langpostings = ( %$unimpl, %$omit, %$listing ); |
|||
unless ( exists $createdomitcategories{&get_omit_cat_name($language)} ) |
|||
{ |
|||
{ |
|||
my $omit_cat = &prep_omit_cat_posting($implediff, $language); |
|||
%langpostings = ( %langpostings, %$omit_cat ); |
|||
} |
|||
} |
|||
return \%langpostings; |
|||
} |
|||
sub wikitxt_pathname { |
|||
return \%langpostings; |
|||
my $page_name = $_[0]; |
|||
"test/" . sanitizenamefs($page_name) . ".wikitxt"; |
|||
} |
} |
||
Line 356: | Line 206: | ||
sub processimplediff |
sub processimplediff |
||
{ |
{ |
||
my $implediff = shift; |
|||
my %work; |
|||
# Prepare all our work. |
|||
foreach my $language (keys %$implediff) |
|||
{ |
|||
{ |
|||
# &out("Preparing data for:$language\n", 4); |
|||
my $workitem = &process_language($implediff, $language); |
|||
%work = ( %work, %$workitem ); |
|||
} |
|||
} |
|||
# Now that we've prepared all our work, commit it. |
|||
foreach my $pagename (keys %work) |
|||
# Now that we've prepared all our work, commit it. |
|||
{ |
|||
foreach my $pagename (keys %work) |
|||
&postpage($pagename, $work{$pagename}, "Updating $pagename", 0); |
|||
{ |
|||
} |
|||
&postpage($pagename, $work{$pagename}, "Updating $pagename", 0); |
|||
} |
|||
} |
} |
||
# Return the Mediawiki editor |
# Return the Mediawiki editor object. |
||
sub geteditor |
sub geteditor |
||
{ |
{ |
||
# If we're not posting, and we're only drawing from cache |
|||
# We don't actually need to pull from the wiki. |
|||
if( exists $options{'cacheonly'} ) |
|||
{ |
|||
{ |
|||
return undef |
|||
unless exists $options{'post'}; |
|||
} |
|||
} |
|||
# Handles interaction with the wiki. |
|||
# Note that I had to modify HTTP::Message to make it work |
|||
# HTTP::Message silently failed when presented by MW |
|||
# with an encoding type of "application/json" or some such. |
|||
&out("Creating editor\n", 3); |
|||
my $editor = MediaWiki::Bot->new('ImpleSearchBot'); |
|||
$editor->{debug} = 1; |
|||
# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like |
|||
# my initial attempts from before I added this line. |
|||
&out("Trying to set wiki.\n",3); |
|||
$editor->set_wiki('rosettacode.org','mw'); |
|||
# If we're not posting, we don't need to log in if we're pulling from cache. |
|||
# Otherwise, attempt to log in. |
|||
&out("Trying to log in.\n", 3); |
|||
my $loginres = $editor->login($options{'username'}, $options{'password'}); |
|||
die "Unable to login: " . $loginres |
|||
unless $loginres == 0; |
|||
return $editor; |
|||
} |
} |
||
Line 413: | Line 262: | ||
sub out |
sub out |
||
{ |
{ |
||
my $string = shift; |
|||
my $loglevel = shift; |
|||
chomp $string; |
|||
if($options{'verbosity'} >= $loglevel) |
|||
{ |
|||
{ |
|||
if($loglevel == 1) |
|||
{ |
|||
{ |
|||
warn $string; # use stderr. |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
{ |
|||
print "$string\n"; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
} |
||
Line 436: | Line 285: | ||
sub sanitizenamefs |
sub sanitizenamefs |
||
{ |
{ |
||
my $pagename = shift; |
|||
$pagename =~ tr/:\//__/; |
|||
return $pagename; |
|||
} |
} |
||
Line 444: | Line 293: | ||
sub diffcat_simple |
sub diffcat_simple |
||
{ |
{ |
||
my $first = shift; |
|||
my $second = shift; |
|||
my %firsthash = map { $_, 1 } @$first; |
|||
my @new = (); |
|||
foreach my $secondelement (@$second) |
|||
{ |
|||
{ |
|||
push @new, $secondelement |
|||
unless exists $firsthash{$secondelement}; |
|||
} |
|||
} |
|||
return @new; |
|||
} |
} |
||
Line 462: | Line 311: | ||
sub diffcat |
sub diffcat |
||
{ |
{ |
||
my $first = shift; |
|||
my $second = shift; |
|||
my @onlyinfirst = &diffcat_simple($second, $first); |
|||
my @onlyinsecond = &diffcat_simple($first, $second); |
|||
return (\@onlyinfirst, \@onlyinsecond); |
|||
} |
|||
sub slurp_file { |
|||
my $pathname = $_[0]; |
|||
open(my $stream, "<", $pathname) or do {warn"Can't open '$pathname': $!\n"; |
|||
return}; |
|||
my $ret; |
|||
while (1) { |
|||
my $len = read($stream, my $buf, 1024); |
|||
if (!defined($len)) { die "I/O error while reading '$pathname': $!"; } |
|||
elsif ($len == 0) { last } |
|||
else { $ret .= $buf } |
|||
} |
|||
$ret; |
|||
} |
} |
||
Line 472: | Line 335: | ||
sub postpage |
sub postpage |
||
{ |
{ |
||
my $pagename = shift; |
|||
my $pagedata = shift; |
|||
my $remark = shift; |
|||
my $minoredit = shift; |
|||
# MediaWiki won't let us create blank, empty pages. |
|||
# But since we don't want to query (or cache) to see if |
|||
# the page already exists, we'll just add an HTML |
|||
# non-breaking-space entity if the page is truly empty. |
|||
$pagedata = " " |
|||
if(0 == length $pagedata); |
|||
++$pagesedited; |
|||
unless( exists $options{'post'} ) |
|||
{ |
|||
{ |
|||
$pagename = wikitxt_pathname($pagename); |
|||
# save it to disk, and out of the way. |
|||
# save it to disk, and out of the way. |
|||
$pagename = "test/" . &sanitizenamefs($pagename); |
|||
$pagename .= ".wikitxt"; |
|||
&out("Saving: $pagename\n", 2); |
|||
&out("Saving: $pagename\n", 2); |
|||
open my $outfile, '>', $pagename |
|||
or &out("Failed to open $pagename: $!", 1); |
|||
return unless defined $outfile; |
|||
print $outfile $pagedata; |
|||
close $outfile; |
|||
} |
|||
else { |
|||
&out("Posting $pagename\n", 2); |
|||
or &log("Failed to open $pagename: $!", 1); |
|||
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) |
|||
or &out("Failed to post page: " . $editor->{'errstr'}, 1); |
|||
return unless defined $outfile; |
|||
} |
|||
print $outfile $pagedata; |
|||
close $outfile; |
|||
} |
|||
else |
|||
{ |
|||
sleep 1; |
|||
&out("Posting $pagename\n", 2); |
|||
$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) |
|||
or &out("Failed to post page: " . $editor->{'errstr'}, 1); |
|||
} |
|||
} |
} |
||
# This allows us to compare site state between now and when we |
# This allows us to compare site state between now and when we |
||
# last ran, so that we can learn to avoid doing unnecessary work. |
# last ran, so that we can learn to avoid doing unnecessary work. |
||
Line 537: | Line 398: | ||
return; |
return; |
||
} |
} |
||
print $outfile to_json($data); |
print $outfile to_json($data); |
||
close $outfile; |
close $outfile; |
||
&out(scalar $data . " members cached to $filename.\n", 5); |
&out(scalar $data . " members cached to $filename.\n", 5); |
||
} |
} |
||
# Return data we cached previously. |
# Return data we cached previously. |
||
sub getcacheddata |
sub getcacheddata |
||
Line 555: | Line 416: | ||
return []; |
return []; |
||
} |
} |
||
my $jsondata; |
my $jsondata; |
||
$jsondata .= $_ while <$infile>; |
$jsondata .= $_ while <$infile>; |
||
close $infile; |
close $infile; |
||
my $cacheddata = from_json($jsondata); |
my $cacheddata = from_json($jsondata); |
||
&out(scalar @$cacheddata . " cache members retrieved from $filename\n", 5); |
&out(scalar @$cacheddata . " cache members retrieved from $filename\n", 5); |
||
++$cachehits; |
++$cachehits; |
||
return $cacheddata; |
return $cacheddata; |
||
} |
} |
||
# Report the changes between two categories. |
# Report the changes between two categories. |
||
# More interesting than "x added, y removed" |
# More interesting than "x added, y removed" |
||
Line 574: | Line 435: | ||
my $old = shift; |
my $old = shift; |
||
my $new = shift; |
my $new = shift; |
||
my ($removed, $added) = &diffcat($old, $new); |
my ($removed, $added) = &diffcat($old, $new); |
||
my $out = ""; |
my $out = ""; |
||
$out .= "Removed from $category:\n" |
$out .= "Removed from $category:\n" |
||
if( scalar @$removed > 0 ); |
if( scalar @$removed > 0 ); |
||
$out .= "$_\n" |
$out .= "$_\n" |
||
foreach (@$removed); |
foreach (@$removed); |
||
$out .= "Added to $category:\n" |
$out .= "Added to $category:\n" |
||
if( scalar @$added > 0 ); |
if( scalar @$added > 0 ); |
||
$out .= "$_\n" |
$out .= "$_\n" |
||
foreach (@$added); |
foreach (@$added); |
||
&out($out, 2) |
&out($out, 2) |
||
if("" ne $out); |
if("" ne $out); |
||
} |
} |
||
# Pull the category data, or cached data if we're not pulling from the wiki. |
# Pull the category data, or cached data if we're not pulling from the wiki. |
||
sub getcategory |
sub getcategory |
||
Line 598: | Line 459: | ||
my $categoryname = shift; |
my $categoryname = shift; |
||
&out("Getting category contents for $categoryname...", 4); |
&out("Getting category contents for $categoryname...", 4); |
||
my $old; |
my $old; |
||
my $new; |
my $new; |
||
if( exists $options{'cacheonly'} ) |
if( exists $options{'cacheonly'} ) |
||
{ |
{ |
||
Line 615: | Line 476: | ||
sleep 1; |
sleep 1; |
||
} |
} |
||
&reportcatchanges($categoryname, $old, $new); |
# &reportcatchanges($categoryname, $old, $new); |
||
&cachedata("$categoryname", $new); |
&cachedata("$categoryname", $new); |
||
&out(scalar @$new . " members returned for $categoryname\n", 5); |
&out(scalar @$new . " members returned for $categoryname\n", 5); |
||
return $new; |
return $new; |
||
} |
} |
||
# Find if this category changed, report its contents if it has. |
# Find if this category changed, report its contents if it has. |
||
sub getwork |
sub getwork |
||
Line 631: | Line 492: | ||
&out("Getting work for $categoryname.\n",4); |
&out("Getting work for $categoryname.\n",4); |
||
my $cacheddata = &getcacheddata($categoryname); |
my $cacheddata = &getcacheddata($categoryname); |
||
# If we're on a cache-only basis, we'll just say we have no old data, |
# If we're on a cache-only basis, we'll just say we have no old data, |
||
# and that our cached data is our new data. |
# and that our cached data is our new data. |
||
Line 646: | Line 507: | ||
$newdata = &getcategory($categoryname); |
$newdata = &getcategory($categoryname); |
||
} |
} |
||
my ($removed, $added) = &diffcat($olddata, $newdata); |
my ($removed, $added) = &diffcat($olddata, $newdata); |
||
my $impl_count_change = scalar @$removed + @$added; |
my $impl_count_change = scalar @$removed + @$added; |
||
Line 652: | Line 513: | ||
$work = $newdata |
$work = $newdata |
||
if(($impl_count_change + $task_count_change) > 0); |
if(($impl_count_change + $task_count_change) > 0); |
||
&out(scalar @$work . " items to process for $categoryname.\n", 3); |
&out(scalar @$work . " items to process for $categoryname.\n", 3); |
||
return @$work; |
return @$work; |
||
} |
} |
||
# If changes occurred, the info is more important than if they didn't. |
# If changes occurred, the info is more important than if they didn't. |
||
sub getloglevelfromdiff |
sub getloglevelfromdiff |
||
Line 668: | Line 529: | ||
return $base + 1; |
return $base + 1; |
||
} |
} |
||
# Find all the work items for a given language. |
# Find all the work items for a given language. |
||
sub getlangwork |
sub getlangwork |
||
Line 684: | Line 545: | ||
$omit{$workitem} = 1; |
$omit{$workitem} = 1; |
||
} |
} |
||
# No work to do? |
# No work to do? |
||
return undef |
return undef |
||
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl)); |
if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl)); |
||
# Language metadata |
# Language metadata |
||
my $unimpcount = scalar @$alltasks - scalar keys %impl; |
my $unimpcount = scalar @$alltasks - scalar keys %impl; |
||
my $targetcount = (scalar @$alltasks - scalar keys %omit); |
my $targetcount = (scalar @$alltasks - scalar keys %omit); |
||
# work to do. |
# work to do. |
||
return {'impl' => \%impl, |
return {'impl' => \%impl, |
||
Line 700: | Line 561: | ||
'target_count' => $targetcount }; |
'target_count' => $targetcount }; |
||
} |
} |
||
# Commit the cache. |
# Commit the cache. |
||
sub commitcache |
sub commitcache |
||
Line 707: | Line 568: | ||
my $cachepath = $options{'cachepath'}; |
my $cachepath = $options{'cachepath'}; |
||
opendir(CACHEDIR, $cachepath); |
opendir(CACHEDIR, $cachepath); |
||
my @current = readdir(CACHEDIR); |
my @current = readdir(CACHEDIR); |
||
close(CACHEDIR); |
close(CACHEDIR); |
||
# We need to run svn adds if we've created any new files. |
# We need to run svn adds if we've created any new files. |
||
# Maybe we'll use SVN::Client some day. Not right now. |
# Maybe we'll use SVN::Client some day. Not right now. |
||
my ($added, $removed) = &diffcat(\@current, \@initialcache); |
my ($added, $removed) = &diffcat(\@current, \@initialcache); |
||
&out("Detected " . scalar @$added . " new cache files and " . scalar @$removed . " removed\n",&getloglevelfromdiff(2, $added, $removed)); |
&out("Detected " . scalar @$added . " new cache files and " . scalar @$removed . " removed\n",&getloglevelfromdiff(2, $added, $removed)); |
||
if ((scalar @$added + scalar @$removed ) > 0) |
if ((scalar @$added + scalar @$removed ) > 0) |
||
{ |
{ |
||
Line 723: | Line 584: | ||
&svn('add', $cachepath . $cachefile);; |
&svn('add', $cachepath . $cachefile);; |
||
} |
} |
||
foreach my $cachefile (@$removed) |
foreach my $cachefile (@$removed) |
||
{ |
{ |
||
Line 729: | Line 590: | ||
} |
} |
||
} |
} |
||
&svn('ci', '--message="ImplSearchBot run"', $cachepath); |
&svn('ci', '--message="ImplSearchBot run"', $cachepath); |
||
&svn('update', $cachepath); |
&svn('update', $cachepath); |
||
} |
} |
||
# Wrap svn commands so we can log them. |
# Wrap svn commands so we can log them. |
||
sub svn |
sub svn |
||
Line 741: | Line 602: | ||
if(exists $options{'nosvn'}); |
if(exists $options{'nosvn'}); |
||
my @args = @_; |
my @args = @_; |
||
my $string = "system 'svn'"; |
my $string = "system 'svn'"; |
||
$string .= ", '$_'" |
$string .= ", '$_'" |
||
foreach (@args); |
foreach (@args); |
||
$string .= "\n"; |
$string .= "\n"; |
||
&out($string, 3); |
&out($string, 3); |
||
system 'svn', @args |
system 'svn', @args |
||
if(exists $options{'post'}); |
if(exists $options{'post'}); |
||
} |
} |
||
sub getopt { |
|||
#I don't care to pollute my global scope. |
|||
my $wiki = 'rosettacode.org'; |
|||
my $username; |
|||
my $password; |
|||
my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy. |
|||
my $post; # Is this an actual run? |
|||
my $cachepath = "/tmp/"; |
|||
my $cacheonly; # Don't query the wiki for data. Just pull from cache. |
|||
my $nosvn; |
|||
my $rebuild_all; |
|||
my $opt_matrix = { |
|||
"wiki=s" => \$wiki, |
|||
"username=s" => \$username, |
|||
"password=s" => \$password, |
|||
"verbosity=s" => \$verbosity, |
|||
"post" => \$post, |
|||
"cacheonly" => \$cacheonly, |
|||
"nosvn" => \$nosvn, |
|||
"cachepath=s" => \$cachepath, |
|||
"rebuildall" => \$rebuild_all }; |
|||
my $result = GetOptions( %$opt_matrix ); |
|||
$options{'wiki'} = $wiki; |
|||
$options{'nosvn'} = $nosvn |
|||
if defined $nosvn; |
|||
$options{'username'} = $username |
|||
if defined $username; |
|||
$options{'password'} = $password |
|||
if defined $password; |
|||
$options{'post'} = $post |
|||
if defined $post; |
|||
$options{'verbosity'} = $verbosity; |
|||
$cachepath .= '/' |
|||
if('/' ne substr($cachepath, -1, 1)); |
|||
$options{'cachepath'} = $cachepath; |
|||
$options{'cacheonly'} = $cacheonly |
|||
if defined $cacheonly; |
|||
$options{'rebuild_all'} = $rebuild_all |
|||
if defined $rebuild_all; |
|||
my $usage = "Usage: $0 (options)\n The available options are:\n"; |
|||
$usage .= "\t--$_\n" |
|||
foreach (keys %$opt_matrix); |
|||
$usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n"; |
|||
my $wikineeded; |
|||
$wikineeded = "yes" |
|||
unless ( exists $options{'cacheonly'} ); |
|||
$wikineeded = "yes" |
|||
if ( exists $options{'post'} ); |
|||
if(defined $wikineeded) |
|||
{ |
|||
unless(exists $options{'username'} and exists $options{'password'}) |
|||
{ |
|||
die $usage; |
|||
} |
|||
} |
|||
} |
|||
sub main { |
|||
unless (@ARGV) { @ARGV = @_; } |
|||
getopt(); |
|||
#Statistic tracking. |
|||
# Tracking for svn checkin at end. |
|||
# We *should* be the only ones writing to the cache path for now. |
|||
# Anyone else should have their own checkout of the data, |
|||
# and test runs are done with --cacheonly and --nosvn. |
|||
opendir(my $cachedir, $options{'cachepath'}) |
|||
or die "Unable to open cache directory"; |
|||
@initialcache = readdir($cachedir); |
|||
closedir($cachedir); |
|||
&out(scalar @initialcache . " categories initially cached\n", 4); |
|||
# Get our editor |
|||
$editor = &geteditor(); |
|||
# Get a complete listing of the tasks. |
|||
&out("Getting tasks\n", 3); |
|||
$alltasks = &getcategory('Category:Programming Tasks'); |
|||
# Get a complete listing of the languages. |
|||
&out("Getting the languages.\n", 3); |
|||
$alllanguages= &getcategory('Category:Programming Languages'); |
|||
# Quick check. Did we add or lose any tasks? If so, we've got to recalc *all* |
|||
# of the pages. :-/ |
|||
unless(exists $options{'rebuild_all'}) { |
|||
my $cacheddata = &getcacheddata('Category:Programming Tasks'); |
|||
my ($added, $removed) = &diffcat($alltasks, $cacheddata); |
|||
$task_count_change = scalar @$added + scalar @$removed; |
|||
} |
|||
else { |
|||
$task_count_change = 1; # It just needs to be nonzero... |
|||
} |
|||
# We want the language name, not the fully-qualified wiki name. |
|||
$_ =~ s/^Category:// foreach (@$alllanguages); |
|||
# Get a list of the languages for which we've already provided bodies for the related omit categories. |
|||
# Store it as a hash, so the lookup will be faster. |
|||
my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated'); |
|||
my %createdomitcategories = map {$_, 1} @$omitcatcontents; |
|||
&out("Identifying work to do\n", 3); |
|||
foreach my $lang (@$alllanguages) { |
|||
my $val = &getlangwork($lang); |
|||
next unless defined $val; |
|||
$impldiff{$lang} = $val; |
|||
} |
|||
&processimplediff(\%impldiff); |
|||
my $runtime = time() - $starttime; |
|||
my $statsdata = "Pages edited last run: $pagesedited<br/>Time to post all per-language updates: $runtime seconds<br/>Category pulls: $categorypulls<br/>Cache hits: $cachehits<br/>"; |
|||
&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2); |
|||
&postpage("User:ImplSearchBot/Stats", $statsdata, "Updating stats data", 0); |
|||
&out("Updating bot code page\n", 4); |
|||
open my $sourcefile, '<', $0 |
|||
or die "Finished without updating bot source page"; |
|||
my $botsource; |
|||
$botsource .= $_ while <$sourcefile>; |
|||
close $sourcefile; |
|||
my $tag = "lang"; |
|||
&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0); |
|||
&out("Updating cache\n", 4); |
|||
&commitcache(); |
|||
&out("Done\n", 3); |
|||
exit 0 |
|||
} |
|||
if (!caller) { exit main } |
|||
</lang> |
</lang> |