User:ImplSearchBot/Code: Difference between revisions

← Older edit

User:ImplSearchBot/Code (view source)

Revision as of 21:16, 26 August 2009

8,000 bytes added , 14 years ago

m

ImplSearchBot:0

Anonymous user

rosettacode>ImplSearchBot

Revision as of 08:37, 14 March 2009 (view source) rosettacode>ImplSearchBot m (ImplSearchBot:0) ← Older edit		Latest revision as of 21:16, 26 August 2009 (view source) rosettacode>ImplSearchBot m (ImplSearchBot:0)
(22 intermediate revisions by the same user not shown)
Line 5: use JSON qw/to_json from_json/; our $editor; ~~my $usage = "Usage: $0 --username=(username) --password=(password) [--posttosite=yes]";~~ our $starttime = time; our $pagesedited = 0; our $categorypulls = 0; our $cachehits = 0; our $task_count_change; our $alltasks; our %impldiff; our %createdomitcategories; our @initialcache; our $alllanguages; our @logoutput; my %options; #--------------------- ~~#I don't care to pollute my global scope.~~ # These are all the functions that break up our work # into logical chunks. #--------------------- # Called in a lot of places to check the machine for overload. sub slumber { # Snooze. ~~my $wiki = 'rosettacode.org';~~ while(&snooze) {}; ~~my $username;~~ } ~~my $password;~~ ~~my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy.~~ ~~my $post; # Is this an actual run?~~ ~~my $cachepath = "cache/";~~ ~~my $cacheonly; # Don't query the wiki for data. Just pull from cache.~~ ~~my $nosvn;~~ ~~my $result = GetOptions(~~ ~~"wiki=s" => \$wiki,~~ ~~"username=s" => \$username,~~ ~~"password=s" => \$password,~~ ~~"verbosity=s" => \$verbosity,~~ ~~"post" => \$post,~~ ~~"cacheonly" => \$cacheonly,~~ ~~"nosvn" => \$nosvn,~~ ~~"cachepath=s" => \$cachepath);~~ ~~$options{'wiki'} = $wiki;~~ # A reversed snooze buton ~~$options{'nosvn'} = $nosvn~~ sub snooze ~~if defined $nosvn;~~ { open my $loadavg, '<', '/proc/loadavg' or die "Failed to check load average: $!"; my $loadstr = <$loadavg>; close $loadavg; # Wait one minute for every process in the wait queue. ~~$options{'username'} = $username~~ my @avgs = split / +/, $loadstr, 3; ~~if defined $username;~~ my $sleeptime = 0; ~~$options{'password'} = $password~~ ~~if defined $password;~~ # Sleep at least one minute for each process in the wait queue. ~~$options{'post'} = $post~~ $sleeptime = $avgs[0] * 60; ~~if defined $post;~~ ~~$options{'verbosity'} = $verbosity;~~ # If we're currently under notable load ~~$cachepath .= '/'~~ if($sleeptime > 30) ~~if('/' ne substr($cachepath, -1, 1));~~ ~~$options{'cachepath'} = $cachepath;~~ ~~$options{'cacheonly'} = $cacheonly~~ ~~if defined $cacheonly;~~ ~~# This could be reversed as a qualified statement, but I don't know~~ ~~# if that it'd be any less ugly.~~ ~~unless( exists $options{'cacheonly'})~~ { print "Calculated sleeptime: $sleeptime\n"; ~~if( exists $options{'post'} )~~ print "Load averages: " . join(' ', @avgs) . " ... Sleeping $sleeptime seconds\n"; { sleep $sleeptime; ~~unless(exists $options{'username'} and exists $options{'password'})~~ return $sleeptime; { ~~use Data::Dumper;~~ ~~print Dumper(%options);~~ ~~die $usage;~~ } } } return 0; } # Builds a simple hash ref associating a page name with body. # Used to help us prepare our postings in one pass, then commit them # in a second pass. sub build_posting { &slumber; my $name = shift; my $body = shift; return {$name => $body}; } # Prepares the template body for the unimplemented data. ~~#Statistic tracking.~~ sub prep_unimp_posting ~~my $starttime = time;~~ { ~~my $pagesedited = 0;~~ &slumber; my $impldiff = shift; my $language = shift; my $unimplisting = ""; foreach my $taskname (@$alltasks) { # If it's a category task, the task name will be slightly different. my $baretaskname = $taskname; $baretaskname =~ s/^Category://; my $implpage = $taskname; $implpage =~ s/^Category:(.)/$1\/$language/; # my $escapedImplPage = $implpage; ~~# Tracking for svn checkin at end.~~ ~~# We should* be the only ones writing to the cache path for now.~~ ~~# Eventually, we'll keep track of the vision we last ran at, and update to that.~~ ~~# In the mean time, since the structure of this is a bit unstable for the moment,~~ ~~# we'll do an update to HEAD just to catch anything silly I might have done in the~~ ~~# mean time.~~ ~~&svn('update', $options{'cachepath'});~~ ~~opendir(CACHEDIR, $options{'cachepath'})~~ ~~or die "Unable to open cache directory";~~ ~~my @initialcache = readdir(CACHEDIR);~~ ~~closedir(CACHEDIR);~~ # $escapedImplPage =~ s/ /_/g; ~~&out(scalar @initialcache . " categories initially cached\n", 4);~~ my $link; ~~# Get our editor~~ $link = "{{unimpl task link\|$baretaskname\|$language}}"; ~~my $editor = &geteditor();~~ # $link = "[http://rosettacode.org/mw/index.php?action=edit&title=$escapedImplPage $implpage]"; ~~# Get a complete listing of the tasks.~~ # Add the task to the unimplemented list, if it's unimplemented, and if it's not in the omit list. ~~&out("Getting tasks\n", 3);~~ ~~my $alltasks = &getcategory('Category:Programming Tasks');~~ unless(exists $impldiff->{$language}->{'impl'}->{$implpage} or exists $impldiff->{$language}->{'omit'}->{$implpage}) { ~~# Get a complete listing of the languages.~~ $unimplisting .= "* $link\n"; ~~&out("Getting the languages.\n", 3);~~ } ~~my $alllanguages = &getcategory('Category:Programming Languages');~~ } ~~# We want the language name, not the fully-qualified wiki name.~~ ~~$_ =~ s/^Category:// foreach (@$alllanguages);~~ return &build_posting("Template:" . &get_unimpl_template_name($language), $unimplisting); ~~# Get a list of the languages for which we've already provided bodies for the related omit categories.~~ } ~~# Store it as a hash, so the lookup will be faster.~~ ~~my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated');~~ ~~my %createdomitcategories = map {$_, 1} @$omitcatcontents;~~ ~~&out("Identifying work to do\n", 3);~~ ~~my %impldiff;~~ # Prepares the template body for the listings of omitted data for a page. ~~foreach my $lang (@$alllanguages)~~ sub prep_omit_posting { &slumber; ~~my $val = &getlangwork($lang);~~ my $implediff = shift; ~~next unless defined $val;~~ my $language = shift; ~~$impldiff{$lang} = $val;~~ my $omitlisting = ""; my $omittemplatename = &get_omit_template_name($language); foreach my $taskname (@$alltasks) { # We want the task name, not the fully-qualified wiki name. my $baretaskname = $taskname; $baretaskname =~ s/^Category://; # Add the task to the omission list, if it's omitted. my $escapedTaskName = $baretaskname; $escapedTaskName =~ s/ /_/g; $omitlisting .= "* [http://rosettacode.org/mw/index.php?action=edit&title=$escapedTaskName $baretaskname]\n" if(exists $impldiff{$language}->{'omit'}->{$taskname}) } # Note that there's no data in the template. $implediff->{$language}->{'omit'} = 0 if( "" eq $omitlisting ); return &build_posting("Template:$omittemplatename", $omitlisting); } # Prepares the page body that folks look at to find out what changed for a language. ~~foreach my $language (keys %impldiff)~~ sub prep_listing_posting { &slumber; ~~my $pagename = "Tasks not implemented in $language";~~ my $impldiff = shift; ~~&out("Preparing data for:$pagename\n", 4);~~ my $language = shift; ~~my $hashref = $impldiff{$language}->{'impl'};~~ ~~my %implemented = %$hashref;~~ my $~~hashref~~targetcount = $impldiff->{$language}->{'~~omit~~target_count'}; my $unimpcount = $impldiff->{$language}->{'unimp_count'}; ~~my %omitted = %$hashref;~~ ~~my $omitcount = scalar keys %omitted;~~ # Prepare template fields my $langfield = "\|$language"; my $unimpfield = "\|$unimpcount"; ~~# Language metadata~~ my $~~taskcount~~tcfield = ~~scalar @~~"\|$~~alltasks~~targetcount"; ~~my $unimpcount = $taskcount - scalar keys %implemented;~~ my $impperccalc = 0; ~~my $targetcount = ($taskcount - $omitcount);~~ $impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100 ~~# Language-specific page data.~~ ~~my $unimplisting = "";~~ ~~my $omitlisting = "";~~ ~~my $pagedata; # Not assembled until the end.~~ ~~foreach my $taskname (@$alltasks)~~ { ~~# We want the task name, not the fully-qualified wiki name.~~ ~~my $baretaskname = $taskname;~~ ~~$baretaskname =~ s/^Category://;~~ ~~# Add the task to the unimplemented list, if it's unimplemented.~~ ~~$unimplisting .= "* [[$baretaskname]]\n"~~ ~~unless(exists $implemented{$taskname});~~ ~~# Add the task to the omission list, if it's omitted.~~ ~~$omitlisting .= "* [[$baretaskname]]\n"~~ ~~if(exists $omitted{$taskname})~~ } ~~# Prepare template fields~~ ~~my $langfield = "\|$language";~~ ~~my $unimpfield = "\|$unimpcount";~~ ~~my $tcfield = "\|$targetcount";~~ ~~my $impperccalc = 0;~~ ~~$impperccalc = (($targetcount - $unimpcount) / $targetcount) * 100~~ unless ($targetcount == 0); my $imppercfield = sprintf "\|%u", $impperccalc; # Prepare the listing page format. my $pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}'; $pagedata .= "{{" . &get_unimpl_template_name($language) . "$unimpfield}}"; if (0 != $impldiff->{$language}->{'omit'}) { $pagedata .= "{{omit_header" . "$langfield}}"; $pagedata .= "{{" . &get_omit_template_name($language) ."}}"; } $pagedata .= "{{unimpl_footer$langfield}}"; return &build_posting(&get_listing_name($language), $pagedata); } # Prepare the body of the omit category. ~~my $imppercfield = sprintf "\|%u", $impperccalc;~~ sub prep_omit_cat_posting { &slumber; my $implediff = shift; my $language = shift; return &build_posting(&get_omit_cat_name($language), "{{omit_cat\|$language}}"); } # Prepare the page title for the page folks look at to see what change what. ~~my $unimpltemplatename = "unimp_body_$language";~~ sub get_listing_name ~~my $omittemplatename = "unimp_omit_body_$language";~~ { &slumber; my $language = shift; return "Tasks not implemented in $language"; } # Prepare the ~~listing~~template ~~page~~name for the omit listing ~~format.~~body sub get_omit_template_name ~~$pagedata = '{{unimpl_header' . $langfield . $unimpfield . $tcfield . $imppercfield . '}}';~~ { ~~$pagedata .= "{{$unimpltemplatename" . "$unimpfield}}";~~ &slumber; ~~$pagedata .= "{{omit_header" . "$langfield}}";~~ my $language = shift; ~~$pagedata .= "{{$omittemplatename}}";~~ return "unimp_omit_body_$language"; ~~$pagedata .= "{{unimpl_footer$langfield}}";~~ } # ~~Post~~Prepare the template ~~containing~~name for the ~~listing~~unimplimended ~~of unimplemented~~listing ~~tasks.~~body sub get_unimpl_template_name ~~&postpage("Template:$unimpltemplatename", "$unimplisting", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);~~ { &slumber; my $language = shift; return "unimp_body_$language"; } # ~~Post~~Return the ~~template~~name ~~containing~~of the ~~listing~~category ofto find the omitted ~~tasks~~pages for a particular language. sub get_omit_cat_name ~~&postpage("Template:$omittemplatename", "$omitlisting", "ImplSearchBot:Updating list body of unimplemented tasks.", 1);~~ { &slumber; my $language = shift; return "Category:$language/Omit"; } # Prepare all the pages associated with a specific language. ~~# I'll uncomment this if the layout has to change significantly again. In the mean time, that's over a hundred pages we don't have to edit...~~ sub process_language ~~# # Update the layout of the listing page, because it's changed.~~ { ~~# &postpage($pagename, $pagedata, "ImplSearchBot:Updating layout of listing page.",1);~~ &slumber; my $implediff = shift; my $language = shift; my $unimpl = &prep_unimp_posting($implediff, $language); my $omit = &prep_omit_posting($implediff, $language); my $listing = &prep_listing_posting($implediff, $language); my %langpostings = ( %$unimpl, %$omit, %$listing ); unless ( exists $createdomitcategories{&get_omit_cat_name($language)} ) { my $omit_cat = &prep_omit_cat_posting($implediff, $language); %langpostings = ( %langpostings, %$omit_cat ); } return \%langpostings; } sub wikitxt_pathname { ~~my $omitcategoryname = "Category:$language/Omit";~~ &slumber; ~~my $omitcatpagedata = "{{omit_cat\|$language}}";~~ my $page_name = $_[0]; ~~&postpage($omitcategoryname, $omitcatpagedata, "ImplSearchBot:Updating Omit category body", 0)~~ "test/" . sanitizenamefs($page_name) . ".wikitxt"; ~~unless ( exists $createdomitcategories{$omitcategoryname} );~~ } # Prepare and post all of the core pages for this run. sub processimplediff { &slumber; my $implediff = shift; my %work; # Prepare all our work. foreach my $language (keys %$implediff) { # &out("Preparing data for:$language\n", 4); my $workitem = &process_language($implediff, $language); %work = ( %work, %$workitem ); } # Now that we've prepared all our work, commit it. foreach my $pagename (keys %work) { &postpage($pagename, $work{$pagename}, "Updating $pagename", 0); } } # Return the Mediawiki editor object. ~~# To add here: Post stats on activities:~~ ~~# 1. Pages edited~~ ~~# 2. Time last run started~~ ~~# 3. Time to completion from start to just prior to stat post point.~~ ~~my $runtime = time() - $starttime;~~ ~~my $statsdata = "Pages edited last run: $pagesedited\nTime to post all per-language updates: $runtime seconds\n";~~ ~~&out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2);~~ ~~&postpage("User:ImplSearchBot/Stats", $statsdata, 0);~~ ~~&out("Updating bot code page\n", 4);~~ ~~open my $sourcefile, '<', $0~~ ~~or die "Finished without updating bot source page";~~ ~~my $botsource;~~ ~~$botsource .= $_ while <$sourcefile>;~~ ~~close $sourcefile;~~ ~~my $tag = "lang";~~ ~~&postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0);~~ ~~&out("Updating cache\n", 4);~~ ~~&commitcache();~~ ~~&out("Done\n", 3);~~ ~~exit(0);~~ sub geteditor { &slumber; ~~# If we're not posting, and we're only drawing from cache~~ # If we're not posting, and we're only drawing from cache ~~# We don't actually need to pull from the wiki.~~ # We don't actually need to pull from the wiki. ~~if( exists $options{'cacheonly'} )~~ if( exists $options{'cacheonly'} ) { { ~~return undef~~ return undef ~~unless exists $options{'post'};~~ unless exists $options{'post'}; } } ~~# Handles interaction with the wiki.~~ # Handles interaction with the wiki. ~~# Note that I had to modify HTTP::Message to make it work~~ # Note that I had to modify HTTP::Message ~~silently failed when~~to ~~presented~~make byit MWwork # HTTP::Message silently failed when presented by MW ~~# with an encoding type of "application/json" or some such.~~ # with an encoding type of "application/json" or some such. ~~&out("Creating editor\n", 3);~~ &out("Creating editor\n", 3); ~~my $editor = MediaWiki::Bot->new('ImpleSearchBot');~~ my $editor = MediaWiki::Bot->new('ImpleSearchBot'); ~~$editor->{debug} = 1;~~ $editor->{debug} = 1; ~~# Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like~~ # Tell the editor to edit Rosetta Code. I'm sure Wikipedia didn't like ~~# my initial attempts from before I added this line.~~ # my initial attempts from before I added this line. ~~&out("Trying to set wiki.\n",3);~~ &out("Trying to set wiki.\n",3); ~~$editor->set_wiki('rosettacode.org','w');~~ $editor->set_wiki('rosettacode.org','mw'); ~~# If we're not posting, we don't need to log in if we're pulling from cache.~~ # If we're not posting, we don't need to log in if we're pulling from cache. ~~# Otherwise, attempt to log in.~~ ~~&out("Trying~~ # Otherwise, attempt to log in.~~\n", 3);~~ &out("Trying to log in.\n", 3); ~~my $loginres = $editor->login($options{'username'}, $options{'password'});~~ my $loginres = $editor->login($options{'username'}, $options{'password'}); ~~die "Unable to login: " . $loginres~~ die "Unable to login: " . $loginres ~~unless $loginres == "Success";~~ unless $loginres == 0; ~~return $editor;~~ return $editor; } # Simple logging infrastructure. Current sends to STDWARN or STDOUT, which cron # emails to the user, which gets forwarded to Short Circuit. (Anyone want a copy?) sub out { &slumber; ~~my $string = shift;~~ my $~~loglevel~~string = shift; my $loglevel = shift; chomp $string; ~~chomp~~ push @logoutput, ($string); if($options{'verbosity'} >= $loglevel) { { if($loglevel == 1) { { warn $string; # use stderr. } } else { { print "$string\n"; } } } } } # Many popular filesystems can't andle : and \ in filenames. # Since I plan to open the SVN repo we save to the rest of the world at some point, # I'm trying to make sure the files are representable. sub sanitizenamefs { &slumber; ~~my $pagename = shift;~~ my $pagename =~ ~~tr/:\//__/~~shift; ~~return~~ $pagename =~ tr/:\//__/; return $pagename; } # Find all the entries that are in the second list ref, but not the first. sub diffcat_simple { &slumber; ~~my $first = shift;~~ my $~~second~~first = shift; my ~~%firsthash~~ = ~~map~~ {my $_,second 1= ~~} @$first~~shift; my %firsthash = map { $_, 1 } @$first; my ~~@new~~ = ~~();~~ my @new = (); ~~foreach my $secondelement (@$second)~~ foreach my $secondelement (@$second) { { ~~push @new, $secondelement~~ ~~unless~~ ~~exists~~ ~~$firsthash{~~ push @new, $secondelement}; unless exists $firsthash{$secondelement}; } } ~~return @new;~~ return @new; } # Find all the entries that are in one listref, but not the other. sub diffcat { &slumber; ~~my $first = shift;~~ my $~~second~~first = shift; my ~~@newinfirst~~ = ~~&diffcat_simple(~~ my $second, ~~$first)~~= shift; my @~~newinsecond~~onlyinfirst = &diffcat_simple($~~first~~second, $~~second~~first); my @onlyinsecond = &diffcat_simple($first, $second); ~~return (\@newinfirst, \@newinsecond);~~ return (\@onlyinfirst, \@onlyinsecond); } sub slurp_file { &slumber; my $pathname = $_[0]; open(my $stream, "<", $pathname) or do {warn"Can't open '$pathname': $!\n"; return}; my $ret; while (1) { my $len = read($stream, my $buf, 1024); if (!defined($len)) { die "I/O error while reading '$pathname': $!"; } elsif ($len == 0) { last } else { $ret .= $buf } } $ret; } # Post a page (or save it to disk, if we're testing.) sub postpage { &slumber; ~~my $pagename = shift;~~ my $~~pagedata~~pagename = shift; my $~~remark~~pagedata = shift; my $~~minoredit~~remark = shift; my $minoredit = shift; ~~++$pagesedited;~~ # MediaWiki won't let us create blank, empty pages. ~~unless( exists $options{'post'} )~~ # But since we don't want to query (or cache) to see if { # the page already exists, we'll just add an HTML ~~# save it to disk, and out of the way.~~ # non-breaking-space entity if the page is truly empty. ~~$pagename = "test/" . &sanitizenamefs($pagename);~~ $~~pagename~~pagedata .= "~~.wikitxt"~~ " if(0 == length $pagedata); ~~&out("Saving: $pagename\n", 3);~~ ++$pagesedited; unless( exists $options{'post'} ) { $pagename = wikitxt_pathname($pagename); # save it to disk, and out of the way. &out("Saving: $pagename\n", 2); open my $outfile, '>', $pagename or &out("Failed to open $pagename: $!", 1); return unless defined $outfile; print $outfile $pagedata; close $outfile; } else { ~~open~~ my ~~$outfile,~~ ~~'>',~~ &out("Posting $pagename\n", 2); ~~or &log("Failed to open $pagename: $!", 1);~~ $editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit) or &out("Failed to post page: " . $editor->{'errstr'}, 1); ~~return unless defined $outfile;~~ } ~~print $outfile $pagedata;~~ ~~close $outfile;~~ } ~~else~~ { ~~&out("Posting $pagename\n", 3);~~ ~~$editor->edit($pagename, $pagedata, "ImplSearchBot:$remark", $minoredit)~~ ~~or &out("Failed to post page: " . $editor->{'errstr'}, 1);~~ } } # This allows us to compare site state between now and when we # last ran, so that we can learn to avoid doing unnecessary work. # (Saves on server resources and bloated edit statistics...) # Also, Ithe ~~plan~~data ongets ~~publishing~~saved ~~the~~to an SVN ~~cache~~repo, ~~files~~so inthat amultiple ~~version~~bots ~~control~~can # use the history. We're using JSON, as JSON has broader ~~# system like SVN or Git, to allow multiple bots~~ # cross-language support than Data::Dumper, making it easier for others ~~# to share the data and to provide history. We're using JSON~~ # to use the data. ~~# Instead of Perl's native Data::Dumper as JSON has broader~~ ~~# cross-language support, making it easier for others to use the data.~~ ~~# SVN has the advantage that I already know how to use it.~~ ~~# Git has the advantage in that I can use GitHub and not tax~~ ~~# my Slice with Git traffic, and I don't have to punch a hole~~ ~~# in the firewall to access more services.~~ sub cachedata { &slumber; my $dataname = shift; my $data = shift; Line 375 ⟶ 451: return; } print $outfile to_json($data); close $outfile; &out(scalar @$data . " members cached to $filename.\n", 5); } # Return data we cached previously. sub getcacheddata { &slumber; my $dataname = shift; my $filename = $options{'cachepath'} . &sanitizenamefs($dataname . ".json"); Line 389 ⟶ 467: unless (open $infile, '<', $filename) { &out("Failed to load cached data $filename: $!\n", 15); return []; } my $jsondata; $jsondata .= $_ while <$infile>; close $infile; my $cacheddata = from_json($jsondata); &out(scalar @$cacheddata . " cache members retrieved from $filename\n", 5); ++$cachehits; return $cacheddata; } # Report the changes between two categories. # More interesting than "x added, y removed" sub reportcatchanges { &slumber; my $category = shift; my $old = shift; my $new = shift; my ($removed, $added) = &diffcat($old, $new); my $out = ""; $out .= "Removed from $category:\n" if( scalar @$removed > 0 ); $out .= "$_\n" foreach (@$removed); $out .= "Added to $category:\n" if( scalar @$added > 0 ); $out .= "$_\n" foreach (@$added); &out($out, 2) if("" ne $out); } # Pull the category data, or cached data if we're not pulling from the wiki. sub getcategory { &slumber; my $categoryname = shift; ~~# Return the cache data if we're not supposed to query the database.~~ ~~return &getcacheddata($categoryname)~~ ~~if( exists $options{'cacheonly'} );~~ &out("Getting category contents for $categoryname...", 4); ~~my @categorycontents = $editor->get_pages_in_category($categoryname);~~ my $old; ~~&out(scalar @categorycontents . " members retrieved for $categoryname\n", 5);~~ my $new; ~~my ($added, $removed) = &diffcat(&getcacheddata($categoryname), \@categorycontents);~~ if( exists $options{'cacheonly'} ) { ~~&out(scalar @$removed . " removed, " . @$added . " added to $categoryname\n", &getloglevelfromdiff(2, $removed, $added));~~ # Return the cache data if we're not supposed to query the database. $old = []; ~~&cachedata("$categoryname", \@categorycontents);~~ $new = &getcacheddata($categoryname); } ~~return \@categorycontents;~~ else { $old = &getcacheddata($categoryname); $new = [$editor->get_pages_in_category($categoryname)]; ++$categorypulls; } # &reportcatchanges($categoryname, $old, $new); &cachedata("$categoryname", $new); &out(scalar @$new . " members returned for $categoryname\n", 5); return $new; } # Find if this category changed, report its contents if it has. sub getwork { &slumber; my $categoryname = shift; &out("Getting work for $categoryname.\n",4); my $cacheddata = &getcacheddata($categoryname); # If we're on a cache-only basis, we'll just say we have no old data, # and that our cached data is our new data. Line 444 ⟶ 563: $newdata = &getcategory($categoryname); } my ($removed, $added) = &diffcat($olddata, $newdata); my $impl_count_change = scalar @$removed + @$added; my $work = []; $work = $newdata if((~~scalar @~~$~~removed~~impl_count_change + ~~scalar @~~$~~added~~task_count_change) > 0); &out(scalar @$work . " items to process for $categoryname.\n", 3); return @$work; } # If changes occurred, the info is more important than if they didn't. sub getloglevelfromdiff { &slumber; my $base = shift; my $first = shift; Line 464 ⟶ 586: return $base + 1; } # Find all the work items for a given language. sub getlangwork { &slumber; my $language = shift; &out("Getting lang work for $language\n", 4); Line 479 ⟶ 603: $omit{$workitem} = 1; } # No work to do? return undef if( ( 0 == scalar keys %omit) && (0 == scalar keys %impl)); # Language metadata my $unimpcount = scalar @$alltasks - scalar keys %impl; my $targetcount = (scalar @$alltasks - scalar keys %omit); # work to do. return {'impl' => \%impl, 'omit' => \%omit };, 'unimp_count' => $unimpcount, 'target_count' => $targetcount }; } # Commit the cache. sub commitcache { &slumber; # First, find out if we've added any files. my $cachepath = $options{'cachepath'}; opendir(CACHEDIR, $cachepath); my @current = readdir(CACHEDIR); close(CACHEDIR); # We need to run svn adds if we've created any new files. # Maybe we'll use SVN::Client some day. Not right now. my ($added, $removed) = &diffcat(\@current, \@initialcache); &out("Detected " . scalar @$added . " new cache files and " . scalar @$removed . " removed\n",&getloglevelfromdiff(2, $added, $removed)); if ((scalar @$added + scalar @$removed ) > 0) { Line 510 ⟶ 643: &svn('add', $cachepath . $cachefile);; } foreach my $cachefile (@$removed) { Line 516 ⟶ 649: } } &svn('ci', '--message="ImplSearchBot run"', $cachepath); &svn('update', $cachepath); } # Wrap svn commands so we can log them. sub svn { &slumber; return if(exists $options{'nosvn'}); my @args = @_; my $string = "system 'svn'"; $string .= ", '$_'" foreach (@args); $string .= "\n"; &out($string, 3); system 'svn', @args if(exists $options{'post'}); } sub getopt { &slumber; #I don't care to pollute my global scope. my $wiki = 'rosettacode.org'; my $username; my $password; my $verbosity = 2; # verbosity level. 0 is silent. 1 is error only. 2 is updates. 3 is process, more is noisy. my $post; # Is this an actual run? my $cachepath = "/tmp/"; my $cacheonly; # Don't query the wiki for data. Just pull from cache. my $nosvn; my $rebuild_all; my $opt_matrix = { "wiki=s" => \$wiki, "username=s" => \$username, "password=s" => \$password, "verbosity=s" => \$verbosity, "post" => \$post, "cacheonly" => \$cacheonly, "nosvn" => \$nosvn, "cachepath=s" => \$cachepath, "rebuildall" => \$rebuild_all }; my $result = GetOptions( %$opt_matrix ); $options{'wiki'} = $wiki; $options{'nosvn'} = $nosvn if defined $nosvn; $options{'username'} = $username if defined $username; $options{'password'} = $password if defined $password; $options{'post'} = $post if defined $post; $options{'verbosity'} = $verbosity; $cachepath .= '/' if('/' ne substr($cachepath, -1, 1)); $options{'cachepath'} = $cachepath; $options{'cacheonly'} = $cacheonly if defined $cacheonly; $options{'rebuild_all'} = $rebuild_all if defined $rebuild_all; my $usage = "Usage: $0 (options)\n The available options are:\n"; $usage .= "\t--$_\n" foreach (keys %$opt_matrix); $usage .= "Username and password are required if you need to pull data from the wiki. Wiki defaults to Rosetta Code.\n"; my $wikineeded; $wikineeded = "yes" unless ( exists $options{'cacheonly'} ); $wikineeded = "yes" if ( exists $options{'post'} ); if(defined $wikineeded) { unless(exists $options{'username'} and exists $options{'password'}) { die $usage; } } } sub main { &slumber; unless (@ARGV) { @ARGV = @_; } getopt(); #Statistic tracking. # Tracking for svn checkin at end. # We should be the only ones writing to the cache path for now. # Anyone else should have their own checkout of the data, # and test runs are done with --cacheonly and --nosvn. opendir(my $cachedir, $options{'cachepath'}) or die "Unable to open cache directory"; @initialcache = readdir($cachedir); closedir($cachedir); &out(scalar @initialcache . " categories initially cached\n", 4); # Get our editor $editor = &geteditor(); # Get a complete listing of the tasks. &out("Getting tasks\n", 3); $alltasks = &getcategory('Category:Programming Tasks'); # Get a complete listing of the languages. &out("Getting the languages.\n", 3); $alllanguages= &getcategory('Category:Programming Languages'); # Quick check. Did we add or lose any tasks? If so, we've got to recalc all # of the pages. :-/ unless(exists $options{'rebuild_all'}) { my $cacheddata = &getcacheddata('Category:Programming Tasks'); my ($added, $removed) = &diffcat($alltasks, $cacheddata); $task_count_change = scalar @$added + scalar @$removed; } else { $task_count_change = 1; # It just needs to be nonzero... } # We want the language name, not the fully-qualified wiki name. $_ =~ s/^Category:// foreach (@$alllanguages); # Get a list of the languages for which we've already provided bodies for the related omit categories. # Store it as a hash, so the lookup will be faster. my $omitcatcontents = &getcategory('Category:Maintenance/OmitCategoriesCreated'); my %createdomitcategories = map {$_, 1} @$omitcatcontents; &out("Identifying work to do\n", 3); foreach my $lang (@$alllanguages) { my $val = &getlangwork($lang); next unless defined $val; $impldiff{$lang} = $val; } &processimplediff(\%impldiff); my $runtime = time() - $starttime; my $statsdata = "Pages edited last run: $pagesedited<br/>Time to post all per-language updates: $runtime seconds<br/>Category pulls: $categorypulls<br/>Cache hits: $cachehits<br/>"; &out("Updating stats page. Runtime ($runtime), Pages edited ($pagesedited)\n", 2); &postpage("User:ImplSearchBot/Stats", $statsdata, "Updating stats data", 0); &out("Updating bot code page\n", 4); open my $sourcefile, '<', $0 or die "Finished without updating bot source page"; my $botsource; $botsource .= $_ while <$sourcefile>; close $sourcefile; my $tag = "lang"; &postpage("User:ImplSearchBot/Code", "<$tag perl>$botsource</$tag>", 0); my $logdata = "<pre>" . join( "\n", @logoutput) . "</pre>"; &postpage("User:ImplSearchBot/Log", $logdata, 0); &out("Updating cache\n", 4); &commitcache(); &out("Done\n", 3); exit 0 } if (!caller) { exit main } </lang>