Rosetta Code/List authors of task descriptions: Difference between revisions

m
→‎{{header|Perl 6}}: Replaced accidentally overwritten code - Change to version using mediawiki API
(Update full list)
m (→‎{{header|Perl 6}}: Replaced accidentally overwritten code - Change to version using mediawiki API)
Line 273:
 
=={{header|Perl 6}}==
{{works with|Rakudo|2017.0811}}
 
The pseudocode above is no longer really useful as the page format has changed significantly sinesince this task was written. Rather than checking '''every''' edit to see if it was a change to the task description, we'll just assume the user that created the page is the task author. This isn't 100% accurate; a very few pages got renamed and recreated by someone other than the original author without preserving the history, so they are misreported (15 Puzzle Game for instance,) but is as good as it is likely to get without extensive manual intervention. Subsequent edits to the task description are not credited. As it is, we must still make ''thousands'' of requests and pound the server pretty hard. Checking '''every''' edit would make the task several of orders of magnitude more abusive of the server (and my internet connection.)
 
The task names and author information are saved to local files so it can pick up where it left off if it gets interrupted during processing. As the task creation time (and original editor) never change, don't bother to re-download every time. Just update the category (Draft or Task) as that is the only thing that really changes. If a task name gets edited, manual intervention is required. Either edit the JSON file with the task information or just delete it and recreate it from scratch.
 
<lang perl6>use HTTP::UserAgent;
use GumboJSON::Fast;
use Sort::Naturally;
use JSON::Fast;
 
# Friendlier descriptions for task categories
Line 290 ⟶ 287:
);
 
my $client = HTTP::UserAgent.new;
# Month names for date manipulations
 
my %months = <January February March April May June July August
my $url = 'http://rosettacode.org/mw';
September October November December> Z=> 1..12;
 
my $hashfile = './RC_hash.json';
my $htmlfile = './RC_Authors.html';
my $hashfile = './RC_Authors.json';
 
my %tasks;
my $ua = HTTP::UserAgent.new;
 
#=begin skip update
for %cat.keys -> $category
{ # Get lists of Tasks & Draft Tasks
#last; # Uncomment to skip this step
say "Updating $category list...";
my $page = "http://rosettacode.org/wiki/Category:$category";
my $html = $ua.get($page).content;
my $xmldoc = parse-html($html, :TAG<div>, :id<mw-pages>);
my @tasks = parse-html($xmldoc[0].Str, :TAG<li>).Str.comb( /'/wiki/' <-["]>+ / )».substr(6); #"
my $f = open("./RC_{$category}.txt", :w) or die "$!\n";
note "Writing $category file...";
$f.print( @tasks.join("\n") );
$f.close;
}
 
for %cat.keys -> $category {
note "Reading JSON hash file...";
mediawiki-query(
my %tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );
$url, 'pages',
 
:generator<categorymembers>,
for %cat.keys -> $category
:gcmtitle("Category:$category"),
{ # Scrape info from each page.
:gcmlimit<350>,
#last; # Uncomment to skip this step
:rawcontinue(),
note "Loading $category file...";
:prop<title>
my @entries = "./RC_{$category}.txt".IO.slurp.lines;
).map({
 
mediawiki-query(
for @entries -> $title {
$url, 'pages',
# Update the category as that is the only thing that can really change.
:titles(.<title>),
%tasks{$title}{'category'} = %cat{$category};
:prop<revisions>,
# Otherwise skip if it has already been indexed. The creation date can't change
:rvprop<user|timestamp>,
# the task name *can* change, but it is exceedingly rare
:rvstart<2000-01-01T01:01:01Z>,
if %tasks{$title}{'title'}:exists {
note $title;:rvdir<newer>,
next;:rvlimit<1>
)}
).map({
note $category,': ', .[0]<title>;
%tasks{.[0]<title>}{'category'} = %cat{$category};
%tasks{.[0]<title>}{'author'} = .[0]<revisions>[0]<user>;
%tasks{.[0]<title>}{'date'} = .[0]<revisions>[0]<timestamp>.subst(/'T'.+$/, '')
}
);
}
 
$hashfile.IO.spurt(%tasks.&to-json);
# Get the earliest edit
my $html = $ua.get: "http://rosettacode.org/mw/index.php?title={$title}&dir=prev&limit=1&action=history";
 
#=end skip update
# Filter out the actual history links
$html.content ~~ m|'<li><span class="mw-history-histlinks">' (.+?) '</ul>'|; #"'
 
%tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );
# Only interested in the oldest (last in the list)
my $line = $0.lines.tail;
 
# Parse out the User name
$line ~~ m| 'title="User:' <-[>]>+? '>' (.+?) '</a>' |;
my $auth = $0 ?? $0.Str !! '';
# Oops, no user name, must be anonymous, get IP address instead
unless $auth {
$line ~~ m| '"mw-userlink mw-anonuserlink">' (.+?) '</a>' |;
$auth = $0.Str;
}
%tasks{$title}{'author'} = $auth;
 
# Parse out human readable title
$line ~~ m| '<a href="/mw/index.php?title=' $title '&amp;' .+? 'title="'(<-["]>+)'"' |; #'
%tasks{$title}{'title'} = $0.Str;
 
# Parse out date task was added, convert date to ISO format
$line ~~ m| 'class="mw-changeslist-date">' <-[\s]>+ (<-[<]>+) '</a>‎' |;
%tasks{$title}{'date'} = $0.Str.trim.&toISO8601;
 
# report progress
note $title;
 
# save to a file
$hashfile.IO.spurt(%tasks.&to-json);
 
sleep 3; # Don't pound the server
}
}
 
# Convert saved task / author info to an HTML table
Line 390 ⟶ 352:
# and add them
$out.print( "<li>{$task.value.<date>} - {$task.value.<category>}",
" [[{uri-encode $task.key}|{$task.value.<title>key}]]</li>"
);
}
Line 398 ⟶ 360:
$out.close;
 
saynote "HTML table file saved as: {$htmlfile.IO.absolute}";
 
sub mediawiki-query ($site, $type, *%query) {
sub toISO8601 ($date) { # convert day month year to YYYY-MM-DD
my @dmy$url = "$datesite/api.split:php?" '~ ';uri-query-string(
:action<query>, :format<json>, :formatversion(2), |%query);
sprintf "%4d-%02d-%02d", @dmy[2].Int, %months{@dmy[1]}, @dmy[0].Int;
my $continue = '';
 
gather loop {
my $response = $client.get("$url&$continue");
my $data = from-json($response.content);
take $_ for $data.<query>.{$type}.values;
$continue = uri-query-string |($data.<query-continue>{*}».hash.hash or last);
}
}
 
sub uri-query-string (*%fields) { %fields.map({ "{.key}={uri-encode .value}" }).join("&") }
 
sub uri-encode ($str) {
if $str ~~ /<:!ASCII>/ {
my $enc = $str.subst(/<[\x00..\x7f]-[a..zA..Z0..9_.~-]>/, *.ord.fmt('%%%02X'), :g);
return $enc.subst(/<:!ASCII-[\w]>/, *.Str.encode('utf8').list.fmt("%%%02X"), :g ).subst(' ','',:g);
}
$str.subst(/<[\x00..\x7f]-[a..zA..Z0..9_.~-]>/, *.ord.fmt('%%%02X'), :g);
}
</lang>
Line 408 ⟶ 388:
;Sample output
 
<table border="1" cellpadding="4"><tr><th colspan="2">As of 2017-1012-1021 | Total: 10671071 / Tasks: 859867 / Draft Tasks: 208204 / By 247 Authors<tr><th>User</th><th>Authored</th></tr>
<tr><td><ul>[[User:2Powers|2Powers]] [[Special:Contributions/2Powers|?]]</ul></td><td><ul><ol><li>2013-05-16 - Draft: [[Names_to_numbersNames%20to%20numbers|Names to numbers]]</li><li>2013-05-16 - Draft: [[Solving_coin_problemsSolving%20coin%20problems|Solving coin problems]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:12.175.32.19|12.175.32.19]] [[Special:Contributions/12.175.32.19|?]]</ul></td><td><ul><ol><li>2009-11-12 - Task: [[Soundex|Soundex]]</li></ol></ul></td></tr>
<tr><td><ul>[[User:12Me21|12Me21]] [[Special:Contributions/12Me21|?]]</ul></td><td><ul><ol><li>2015-05-04 - Task: [[Draw_a_rotating_cubeDraw%20a%20rotating%20cube|Draw a rotating cube]]</li></ol></ul></td></tr>
 
<tr><td colspan='2'><br/> Many rows omitted... <br/></td></tr>
10,333

edits