Rosetta Code:Village Pump/Lang-tag bot/Source: Difference between revisions
Content added Content deleted
Underscore (talk | contribs) (Created page with '<lang perl>use warnings; use strict; use feature 'say'; use List::Util '&minstr'; use HTML::Entities '&decode_entities'; use MediaWiki::API; use constant DELAY_BETWEEN_EDITS => …') |
No edit summary |
||
(4 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
< |
<syntaxhighlight lang="perl">use warnings; |
||
use strict; |
use strict; |
||
use feature 'say'; |
use feature 'say'; |
||
use List::Util '&minstr'; |
use List::Util '&minstr'; |
||
use HTML::Entities '&decode_entities'; |
use HTML::Entities '&decode_entities'; |
||
use YAML::XS qw(&DumpFile &LoadFile); |
|||
use MediaWiki::API; |
use MediaWiki::API; |
||
use constant DELAY_BETWEEN_EDITS => |
use constant DELAY_BETWEEN_EDITS => 10 * 60; # In seconds. |
||
my $username = 'UnderBot'; |
|||
my $tasks_list_path = '/home/hippo/Temporary/tasks'; |
|||
my $password = 'secret'; |
|||
my $kill_switch_page = 'User talk:UnderBot'; |
|||
my %langtags = ( |
|||
my $kill_switch_trigger = qr/stopediting/; |
|||
'4d' => '4d', |
|||
'actionscript' => 'actionscript', |
|||
my $tasks_path = '/home/hippo/Temporary/tasks.yml'; |
|||
'ada' => 'ada', |
|||
'agda2' => 'agda2', |
|||
my %langtags = ('4d' => '4d', |
|||
'algol 60' => 'algol60', |
|||
'actionscript' => 'actionscript', |
|||
'algol 68' => 'algol68', |
|||
'ada' => 'ada', |
|||
'amigae' => 'amigae', |
|||
'agda2' => 'agda2', |
|||
#'apl' => 'apl', |
|||
'algol 60' => 'algol60', |
|||
'applescript' => 'applescript', |
|||
'algol 68' => 'algol68', |
|||
'assembly' => 'asm', |
|||
'amigae' => 'amigae', |
|||
'autohotkey' => 'autohotkey', |
|||
'apl' => 'apl', |
|||
'awk' => 'awk', |
|||
'applescript' => 'applescript', |
|||
'bc' => 'bc', |
|||
'assembly' => 'asm', |
|||
'befunge' => 'befunge', |
|||
'autohotkey' => 'autohotkey', |
|||
'brainf***' => 'bf', |
|||
'awk' => 'awk', |
|||
'caml' => 'caml', |
|||
'bc' => 'bc', |
|||
'c' => 'c', |
|||
'befunge' => 'befunge', |
|||
'c++' => 'cpp', |
|||
'brainf***' => 'bf', |
|||
'clean' => 'clean', |
|||
'caml' => 'caml', |
|||
'clojure' => 'lisp', |
|||
'c' => 'c', |
|||
'cobol' => 'cobol', |
|||
'c++' => 'cpp', |
|||
'coldfusion' => 'cfm', |
|||
'clean' => 'clean', |
|||
'common lisp' => 'lisp', |
|||
'clojure' => 'lisp', |
|||
'component pascal' => 'pascal', |
|||
'cobol' => 'cobol', |
|||
'coq' => 'coq', |
|||
'coldfusion' => 'cfm', |
|||
'c sharp|c#' => 'csharp', |
|||
'common lisp' => 'lisp', |
|||
'c sharp' => 'csharp', |
|||
'component pascal' => 'pascal', |
|||
'dc' => 'dc', |
|||
'coq' => 'coq', |
|||
'd' => 'd', |
|||
'c sharp|c#' => 'csharp', |
|||
'delphi' => 'delphi', |
|||
'c sharp' => 'csharp', |
|||
'dos batch file' => 'dos', |
|||
'dc' => 'dc', |
|||
'ec' => 'ec', |
|||
'd' => 'd', |
|||
'e' => 'e', |
|||
'delphi' => 'delphi', |
|||
'eiffel' => 'eiffel', |
|||
'dos batch file' => 'dos', |
|||
'ella' => 'ella', |
|||
'ec' => 'ec', |
|||
'emacs lisp' => 'lisp', |
|||
'e' => 'e', |
|||
'erlang' => 'erlang', |
|||
'eiffel' => 'eiffel', |
|||
'esql' => 'sql', |
|||
'ella' => 'ella', |
|||
'factor' => 'factor', |
|||
'emacs lisp' => 'lisp', |
|||
'false' => 'false', |
|||
'erlang' => 'erlang', |
|||
'fan' => 'fan', |
|||
'esql' => 'sql', |
|||
'f' => 'f', |
|||
'factor' => 'factor', |
|||
'forth' => 'forth', |
|||
'false' => 'false', |
|||
'fortran' => 'fortran', |
|||
'fan' => 'fan', |
|||
'fp' => 'fp', |
|||
'f' => 'f', |
|||
'f sharp|f#' => 'fsharp', |
|||
'forth' => 'forth', |
|||
'f_sharp|f#' => 'fsharp', |
|||
'fortran' => 'fortran', |
|||
'gap' => 'gap', |
|||
'fp' => 'fp', |
|||
'genyris' => 'genyris', |
|||
'f sharp|f#' => 'fsharp', |
|||
'gnuplot' => 'gnuplot', |
|||
'f_sharp|f#' => 'fsharp', |
|||
'go' => 'go', |
|||
'gap' => 'gap', |
|||
'groovy' => 'groovy', |
|||
'genyris' => 'genyris', |
|||
'haskell' => 'haskell', |
|||
'gnuplot' => 'gnuplot', |
|||
'haxe' => 'haxe', |
|||
'go' => 'go', |
|||
'hq9+' => 'hq9p', |
|||
'groovy' => 'groovy', |
|||
'html' => 'html4strict', |
|||
'haskell' => 'haskell', |
|||
'icon' => 'icon', |
|||
'haxe' => 'haxe', |
|||
'idl' => 'idl', |
|||
'hq9+' => 'hq9p', |
|||
'io' => 'io', |
|||
'html' => 'html4strict', |
|||
'javafx script' => 'javafx', |
|||
'icon' => 'icon', |
|||
'java' => 'java', |
|||
'idl' => 'idl', |
|||
'javascript' => 'javascript', |
|||
'io' => 'io', |
|||
'j' => 'j', |
|||
'javafx script' => 'javafx', |
|||
'jocaml' => 'jocaml', |
|||
'java' => 'java', |
|||
'joy' => 'joy', |
|||
'javascript' => 'javascript', |
|||
'jscript.net' => 'jscript.net', |
|||
'j' => 'j', |
|||
'json' => 'json', |
|||
'jocaml' => 'jocaml', |
|||
'judoscript' => 'judoscript', |
|||
'joy' => 'joy', |
|||
'korn shell' => 'korn', |
|||
'jscript.net' => 'jscript.net', |
|||
'labview' => 'labview', |
|||
'json' => 'json', |
|||
'latex' => 'latex', |
|||
'judoscript' => 'judoscript', |
|||
'lisaac' => 'lisaac', |
|||
'korn shell' => 'korn', |
|||
'lisp' => 'lisp', |
|||
'labview' => 'labview', |
|||
'logo' => 'logo', |
|||
'latex' => 'latex', |
|||
'logtalk' => 'logtalk', |
|||
'lisaac' => 'lisaac', |
|||
'lotusscript' => 'lotusscript', |
|||
'lisp' => 'lisp', |
|||
'lse64' => 'lse64', |
|||
'logo' => 'logo', |
|||
'lua' => 'lua', |
|||
'logtalk' => 'logtalk', |
|||
'lucid' => 'lucid', |
|||
'lotusscript' => 'lotusscript', |
|||
'm4' => 'm4', |
|||
'lse64' => 'lse64', |
|||
'make' => 'make', |
|||
'lua' => 'lua', |
|||
'maple' => 'maple', |
|||
'lucid' => 'lucid', |
|||
'mathematica' => 'mathematica', |
|||
'm4' => 'm4', |
|||
'matlab' => 'matlab', |
|||
'make' => 'make', |
|||
'maxima' => 'maxima', |
|||
'maple' => 'maple', |
|||
'maxscript' => 'maxscript', |
|||
'mathematica' => 'mathematica', |
|||
'metafont' => 'metafont', |
|||
'matlab' => 'matlab', |
|||
'mirc scripting language' => 'mirc', |
|||
'maxima' => 'maxima', |
|||
'mmix' => 'mmix', |
|||
'maxscript' => 'maxscript', |
|||
'modula-2' => 'modula2', |
|||
'metafont' => 'metafont', |
|||
'modula-3' => 'modula3', |
|||
'mirc scripting language' => 'mirc', |
|||
'moo' => 'moo', |
|||
'mmix' => 'mmix', |
|||
'mpif90' => 'mpif90', |
|||
'modula-2' => 'modula2', |
|||
'ms sql' => 'sql', |
|||
'modula-3' => 'modula3', |
|||
'mysql' => 'sql', |
|||
'moo' => 'moo', |
|||
'newlisp' => 'lisp', |
|||
'mpif90' => 'mpif90', |
|||
'nial' => 'nial', |
|||
'ms sql' => 'sql', |
|||
'oberon-2' => 'oberon2', |
|||
'mysql' => 'sql', |
|||
'objective-c' => 'objc', |
|||
'newlisp' => 'lisp', |
|||
'object pascal' => 'objectpascal', |
|||
'nial' => 'nial', |
|||
'ocaml' => 'ocaml', |
|||
'oberon-2' => 'oberon2', |
|||
'octave' => 'octave', |
|||
'objective-c' => 'objc', |
|||
'omega' => 'omega', |
|||
'object pascal' => 'objectpascal', |
|||
'openedge/progress' => 'openedge', |
|||
'ocaml' => 'ocaml', |
|||
'oz' => 'oz', |
|||
'octave' => 'octave', |
|||
'pari/gp' => 'parigp', |
|||
'omega' => 'omega', |
|||
'pascal' => 'pascal', |
|||
'openedge/progress' => 'openedge', |
|||
'perl 6' => 'perl6', |
|||
'oz' => 'oz', |
|||
'perl' => 'perl', |
|||
'pari/gp' => 'parigp', |
|||
'php' => 'php', |
|||
'pascal' => 'pascal', |
|||
'pike' => 'pike', |
|||
'perl 6' => 'perl6', |
|||
'plaintex' => 'tex', |
|||
'perl' => 'perl', |
|||
'pl/i' => 'pli', |
|||
'php' => 'php', |
|||
'pl/pgsql' => 'plpgsql', |
|||
'pike' => 'pike', |
|||
'pl/sql' => 'plsql', |
|||
'plaintex' => 'tex', |
|||
'pop11' => 'pop11', |
|||
'pl/i' => 'pli', |
|||
'postgresql' => 'sql', |
|||
'pl/pgsql' => 'plpgsql', |
|||
'postscript' => 'postscript', |
|||
'pl/sql' => 'plsql', |
|||
'powerbasic' => 'powerbasic', |
|||
'pop11' => 'pop11', |
|||
'powershell' => 'powershell', |
|||
'postgresql' => 'sql', |
|||
'prolog' => 'prolog', |
|||
'postscript' => 'postscript', |
|||
'pure' => 'pure', |
|||
'powerbasic' => 'powerbasic', |
|||
'python' => 'python', |
|||
'powershell' => 'powershell', |
|||
'q' => 'q', |
|||
'prolog' => 'prolog', |
|||
'rapidq' => 'rapidq', |
|||
'pure' => 'pure', |
|||
'raven' => 'raven', |
|||
'python' => 'python', |
|||
'rexx' => 'rexx', |
|||
'q' => 'q', |
|||
'rhope' => 'rhope', |
|||
'rapidq' => 'rapidq', |
|||
'r' => 'r', |
|||
'raven' => 'raven', |
|||
'ruby' => 'ruby', |
|||
'rexx' => 'rexx', |
|||
'sas' => 'sas', |
|||
'rhope' => 'rhope', |
|||
'scala' => 'scala', |
|||
'r' => 'r', |
|||
'scheme' => 'scheme', |
|||
'ruby' => 'ruby', |
|||
'script3d' => 'script3d', |
|||
'sas' => 'sas', |
|||
'seed7' => 'seed7', |
|||
'scala' => 'scala', |
|||
'self' => 'self', |
|||
'scheme' => 'scheme', |
|||
'setl' => 'setl', |
|||
'script3d' => 'script3d', |
|||
'slate' => 'slate', |
|||
'seed7' => 'seed7', |
|||
'smalltalk' => 'smalltalk', |
|||
'self' => 'self', |
|||
'smeql' => 'smeql', |
|||
'setl' => 'setl', |
|||
'snusp' => 'snusp', |
|||
'slate' => 'slate', |
|||
'sql' => 'sql', |
|||
'smalltalk' => 'smalltalk', |
|||
'standard ml' => 'sml', |
|||
'smeql' => 'smeql', |
|||
'supercollider' => 'supercollider', |
|||
'snusp' => 'snusp', |
|||
'svg' => 'xml', |
|||
'sql' => 'sql', |
|||
'tcl' => 'tcl', |
|||
'standard ml' => 'sml', |
|||
'ti-83 basic' => 'ti83b', |
|||
'supercollider' => 'supercollider', |
|||
'ti-89 basic' => 'ti89b', |
|||
'svg' => 'xml', |
|||
'toka' => 'toka', |
|||
'tcl' => 'tcl', |
|||
'transact-sql' => 'sql', |
|||
'ti-83 basic' => 'ti83b', |
|||
'tr' => 'tr', |
|||
'ti-89 basic' => 'ti89b', |
|||
'twelf' => 'twelf', |
|||
'toka' => 'toka', |
|||
'unixpipes' => 'bash', |
|||
'transact-sql' => 'sql', |
|||
'unix shell' => 'bash', |
|||
'tr' => 'tr', |
|||
'unlambda' => 'unlambda', |
|||
'twelf' => 'twelf', |
|||
'ursala' => 'ursala', |
|||
'unixpipes' => 'bash', |
|||
'vbscript' => 'vbscript', |
|||
'unix shell' => 'bash', |
|||
'vedit macro language' => 'vedit', |
|||
'unlambda' => 'unlambda', |
|||
'visual basic .net' => 'vbnet', |
|||
'ursala' => 'ursala', |
|||
'visual basic' => 'vb', |
|||
'vbscript' => 'vbscript', |
|||
'visual objects' => 'visobj', |
|||
'vedit macro language' => 'vedit', |
|||
'vorpal' => 'vorpal', |
|||
'visual basic .net' => 'vbnet', |
|||
'v' => 'v', |
|||
'visual basic' => 'vb', |
|||
'wrapl' => 'wrapl', |
|||
'visual objects' => 'visobj', |
|||
'xquery' => 'xquery', |
|||
'vorpal' => 'vorpal', |
|||
'xslt' => 'xml', |
|||
'v' => 'v', |
|||
'xtalk' => 'xtalk', |
|||
'wrapl' => 'wrapl', |
|||
); |
|||
'xquery' => 'xquery', |
|||
'xslt' => 'xml', |
|||
'xtalk' => 'xtalk',); |
|||
my $h = qr/(?:\t| )*/; |
my $h = qr/(?:\t| )*/; |
||
# Vaguely like Perl 6's \h. |
|||
# Vaguely like Perl 6's \h. |
|||
my $lwsl = qr/(?:\t| )+\S[^\n]*/; |
my $lwsl = qr/(?:\t| )+\S[^\n]*/; |
||
# Leading WhiteSpace Line. |
|||
# Leading WhiteSpace Line. |
|||
# ------------------------------------------------------------ |
# ------------------------------------------------------------ |
||
our (%tasks, @done, @todo); |
|||
local *tasks = LoadFile $tasks_path; |
|||
or die qq(Couldn't read "$tasks_list_path".\n($!)\n); |
|||
local *done = $tasks{done}; |
|||
local *todo = $tasks{todo}; |
|||
s/\s+\z// foreach @titles; |
|||
my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'}); |
my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'}); |
||
$mw->login({lgname => |
$mw->login({lgname => $username, lgpassword => $password}) |
||
or die q{Couldn't log in. (}, $mw->{error}->{code}, ': ', |
|||
$mw->{error}->{details}, ')'; |
|||
while (@ |
while (@todo) { |
||
my $pagetitle = shift @todo; |
|||
say "TITLE: $pagetitle"; |
say "TITLE: $pagetitle"; |
||
sleep DELAY_BETWEEN_EDITS; |
|||
# Check the kill switch. |
|||
my $p = $mw->get_page({title => $pagetitle}) || die; |
|||
my $ |
my $p = $mw->get_page({title => $kill_switch_page}) || die; |
||
$p->{'*'} =~ $kill_switch_trigger and die "Killed.\n"; |
|||
$p = $mw->get_page({title => $pagetitle}) || die; |
|||
my $timestamp = $p->{timestamp}; # To prevent edit conflicts. |
|||
my $text = $p->{'*'}; |
|||
$text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die; |
$text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die; |
||
my $newtext = $1; |
my $newtext = $1; |
||
# So $newtext just contains the task description so far. |
|||
# So $newtext just contains the task description so far. |
|||
while ($text =~ s! \A |
|||
while ( |
|||
$text =~ s! \A |
|||
( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n ) |
( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n ) |
||
Line 214: | Line 228: | ||
( \z | == $h {{ ) |
( \z | == $h {{ ) |
||
!$4!xs |
!$4!xs |
||
) { |
|||
{my ($header, $langname, $body) = ($1, $2, $3); |
|||
my ($header, $langname, $body) = ($1, $2, $3); |
|||
s/\bC #/C#/ foreach $header, $langname; |
s/\bC #/C#/ foreach $header, $langname; |
||
# Why some people put a space there, I have no idea. |
|||
my $tag = $langtags{lc $langname} || |
|||
$langname =~ /assembl/i && 'asm' || |
|||
## BASIC dialect-guessing is commented out because |
|||
## on some pages, programs for more than one dialect |
|||
## appear under "BASIC". Really we ought to treat |
|||
## each dialect as its own language. |
|||
#$langname =~ /basic/i && |
|||
# ($body =~ /q(uick)?basic/i && 'qbasic' || |
|||
# $body =~ /f(ree)?basic/i && 'freebasic' || |
|||
# $body =~ /t(hin)?basic/i && 'thinbasic') || |
|||
undef; |
|||
# Why some people put a space there, I have no idea. |
|||
if ($tag) |
|||
my $tag = |
|||
$langtags{lc $langname} |
|||
|| $langname =~ /assembl/i && 'asm' |
|||
|| |
|||
## BASIC dialect-guessing is commented out because |
|||
## on some pages, programs for more than one dialect |
|||
## appear under "BASIC". Really we ought to treat |
|||
## each dialect as its own language. |
|||
#$langname =~ /basic/i && |
|||
# ($body =~ /q(uick)?basic/i && 'qbasic' || |
|||
# $body =~ /f(ree)?basic/i && 'freebasic' || |
|||
# $body =~ /t(hin)?basic/i && 'thinbasic') || |
|||
undef; |
|||
if ($tag) { |
|||
$tag = "<lang $tag>"; |
|||
if ($body =~ /<lang/) { # Use the correct identifier. |
|||
$body =~ s {$h (<lang [^>]* >)} |
$body =~ s {$h (<lang [^>]* >)} |
||
{my $s = $1; |
{my $s = $1; |
||
Line 239: | Line 256: | ||
? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java" |
? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java" |
||
: $tag}gxe; |
: $tag}gxe; |
||
# Get rid of any indenting spaces left behind when |
# Get rid of any indenting spaces left behind when |
||
# someone else added the lang tags. |
# someone else added the lang tags. |
||
Line 248: | Line 266: | ||
# indentation is probably intentional (as |
# indentation is probably intentional (as |
||
# in many J examples). |
# in many J examples). |
||
{my $space = minstr($b =~ /^( +)/gm); |
{my $space = minstr($b =~ /^( +)\S/gm); |
||
$b =~ s/^$space//gm;} |
$b =~ s/^$space//gm;} |
||
"$t$b\x3c/lang>"}xges; |
"$t$b\x3c/lang>"}xges; |
||
} |
|||
elsif ($body =~ /<pre/) |
elsif ($body =~ /<pre/) |
||
# Just assume they should all be lang tags. |
# Just assume they should all be lang tags. |
||
{ |
|||
$body =~ s |
|||
{ <pre [^>]* > \s* |
|||
(.+?) |
(.+?) |
||
\s* </pre> } |
\s* </pre> } |
||
{decode_entities "$tag$1\x3c/lang>"}xseg; |
{decode_entities "$tag$1\x3c/lang>"}xseg; |
||
} |
|||
# HTML entities don't work in lang tags. |
|||
# But they aren't necessary, either. |
|||
# HTML entities don't work in lang tags. |
|||
# But they aren't necessary, either. |
|||
else |
else |
||
# Turn indented passages into lang-tagged passages. |
# Turn indented passages into lang-tagged passages. |
||
{ |
|||
$body =~ s |
|||
{ ( ^ $lwsl \n |
{ ( ^ $lwsl \n |
||
(?: (?: $lwsl \n | $h \n )* |
(?: (?: $lwsl \n | $h \n )* |
||
$lwsl \n )? ) } |
$lwsl \n )? ) } |
||
{my $t = $1; |
{my $t = $1; |
||
my $space = minstr($t =~ /^( +)/gm); |
my $space = minstr($t =~ /^( +)\S/gm); |
||
$t =~ s/^$space//gm; |
$t =~ s/^$space//gm; |
||
$t =~ s/\s+\z//; |
$t =~ s/\s+\z//; |
||
decode_entities("$tag$t\x3c/lang>\n");}mgex; |
decode_entities("$tag$t\x3c/lang>\n");}mgex; |
||
} |
|||
} |
|||
$body =~ s |
$body =~ s |
||
{(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>} |
{(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>} |
||
{$1$2\x3c/lang>}gsx; |
{$1$2\x3c/lang>}gsx; |
||
$newtext .= $header . $body; |
$newtext .= $header . $body; |
||
} |
|||
$newtext .= $text; |
|||
$newtext =~ s/\s*\z/\n/; |
|||
my $success = $mw->edit( |
|||
{ |
|||
action => 'edit', |
|||
title => $pagetitle, |
|||
basetimestamp => $timestamp, |
|||
text => $newtext, |
|||
minor => 1, |
|||
bot => 1, |
|||
# All we're doing, ultimately, is formatting. |
|||
bot => 1, |
|||
nocreate => 1, |
|||
# we probably shouldn't resurrect it! |
|||
summary => 'Fixed lang tags (automatic edit).'}, |
|||
# If the page was deleted while we were regexing, |
|||
# we probably shouldn't resurrect it! |
|||
summary => 'Fixed lang tags.' |
|||
}, |
|||
{skip_encoding => 1}); |
{skip_encoding => 1}); |
||
# Without the skip_encoding option, non-ASCII characters |
|||
# will get corrupted. |
|||
# Without the skip_encoding option, non-ASCII characters |
|||
if ($success) |
|||
# will get corrupted. |
|||
{say 'Committed!';} |
|||
else |
|||
if ($success) { |
|||
# Probably an edit conflict. |
|||
say(exists $success->{edit}->{nochange} |
|||
{say "Couldn't commit; I'll try again later."; |
|||
? 'Unchanged.' |
|||
: 'Committed!'); |
|||
push @done, $pagetitle; |
|||
} |
|||
else |
|||
# Probably an edit conflict. |
|||
{ |
|||
say "Couldn't commit; I'll try again later."; |
|||
push @todo, $pagetitle; |
|||
} |
|||
DumpFile $tasks_path, \%tasks; |
|||
sleep DELAY_BETWEEN_EDITS; |
|||
} |
|||
</syntaxhighlight> |
|||
[[Category:Perl]] |
Latest revision as of 08:47, 21 July 2023
use warnings;
use strict;
use feature 'say';
use List::Util '&minstr';
use HTML::Entities '&decode_entities';
use YAML::XS qw(&DumpFile &LoadFile);
use MediaWiki::API;
use constant DELAY_BETWEEN_EDITS => 10 * 60; # In seconds.
my $username = 'UnderBot';
my $password = 'secret';
my $kill_switch_page = 'User talk:UnderBot';
my $kill_switch_trigger = qr/stopediting/;
my $tasks_path = '/home/hippo/Temporary/tasks.yml';
my %langtags = ('4d' => '4d',
'actionscript' => 'actionscript',
'ada' => 'ada',
'agda2' => 'agda2',
'algol 60' => 'algol60',
'algol 68' => 'algol68',
'amigae' => 'amigae',
'apl' => 'apl',
'applescript' => 'applescript',
'assembly' => 'asm',
'autohotkey' => 'autohotkey',
'awk' => 'awk',
'bc' => 'bc',
'befunge' => 'befunge',
'brainf***' => 'bf',
'caml' => 'caml',
'c' => 'c',
'c++' => 'cpp',
'clean' => 'clean',
'clojure' => 'lisp',
'cobol' => 'cobol',
'coldfusion' => 'cfm',
'common lisp' => 'lisp',
'component pascal' => 'pascal',
'coq' => 'coq',
'c sharp|c#' => 'csharp',
'c sharp' => 'csharp',
'dc' => 'dc',
'd' => 'd',
'delphi' => 'delphi',
'dos batch file' => 'dos',
'ec' => 'ec',
'e' => 'e',
'eiffel' => 'eiffel',
'ella' => 'ella',
'emacs lisp' => 'lisp',
'erlang' => 'erlang',
'esql' => 'sql',
'factor' => 'factor',
'false' => 'false',
'fan' => 'fan',
'f' => 'f',
'forth' => 'forth',
'fortran' => 'fortran',
'fp' => 'fp',
'f sharp|f#' => 'fsharp',
'f_sharp|f#' => 'fsharp',
'gap' => 'gap',
'genyris' => 'genyris',
'gnuplot' => 'gnuplot',
'go' => 'go',
'groovy' => 'groovy',
'haskell' => 'haskell',
'haxe' => 'haxe',
'hq9+' => 'hq9p',
'html' => 'html4strict',
'icon' => 'icon',
'idl' => 'idl',
'io' => 'io',
'javafx script' => 'javafx',
'java' => 'java',
'javascript' => 'javascript',
'j' => 'j',
'jocaml' => 'jocaml',
'joy' => 'joy',
'jscript.net' => 'jscript.net',
'json' => 'json',
'judoscript' => 'judoscript',
'korn shell' => 'korn',
'labview' => 'labview',
'latex' => 'latex',
'lisaac' => 'lisaac',
'lisp' => 'lisp',
'logo' => 'logo',
'logtalk' => 'logtalk',
'lotusscript' => 'lotusscript',
'lse64' => 'lse64',
'lua' => 'lua',
'lucid' => 'lucid',
'm4' => 'm4',
'make' => 'make',
'maple' => 'maple',
'mathematica' => 'mathematica',
'matlab' => 'matlab',
'maxima' => 'maxima',
'maxscript' => 'maxscript',
'metafont' => 'metafont',
'mirc scripting language' => 'mirc',
'mmix' => 'mmix',
'modula-2' => 'modula2',
'modula-3' => 'modula3',
'moo' => 'moo',
'mpif90' => 'mpif90',
'ms sql' => 'sql',
'mysql' => 'sql',
'newlisp' => 'lisp',
'nial' => 'nial',
'oberon-2' => 'oberon2',
'objective-c' => 'objc',
'object pascal' => 'objectpascal',
'ocaml' => 'ocaml',
'octave' => 'octave',
'omega' => 'omega',
'openedge/progress' => 'openedge',
'oz' => 'oz',
'pari/gp' => 'parigp',
'pascal' => 'pascal',
'perl 6' => 'perl6',
'perl' => 'perl',
'php' => 'php',
'pike' => 'pike',
'plaintex' => 'tex',
'pl/i' => 'pli',
'pl/pgsql' => 'plpgsql',
'pl/sql' => 'plsql',
'pop11' => 'pop11',
'postgresql' => 'sql',
'postscript' => 'postscript',
'powerbasic' => 'powerbasic',
'powershell' => 'powershell',
'prolog' => 'prolog',
'pure' => 'pure',
'python' => 'python',
'q' => 'q',
'rapidq' => 'rapidq',
'raven' => 'raven',
'rexx' => 'rexx',
'rhope' => 'rhope',
'r' => 'r',
'ruby' => 'ruby',
'sas' => 'sas',
'scala' => 'scala',
'scheme' => 'scheme',
'script3d' => 'script3d',
'seed7' => 'seed7',
'self' => 'self',
'setl' => 'setl',
'slate' => 'slate',
'smalltalk' => 'smalltalk',
'smeql' => 'smeql',
'snusp' => 'snusp',
'sql' => 'sql',
'standard ml' => 'sml',
'supercollider' => 'supercollider',
'svg' => 'xml',
'tcl' => 'tcl',
'ti-83 basic' => 'ti83b',
'ti-89 basic' => 'ti89b',
'toka' => 'toka',
'transact-sql' => 'sql',
'tr' => 'tr',
'twelf' => 'twelf',
'unixpipes' => 'bash',
'unix shell' => 'bash',
'unlambda' => 'unlambda',
'ursala' => 'ursala',
'vbscript' => 'vbscript',
'vedit macro language' => 'vedit',
'visual basic .net' => 'vbnet',
'visual basic' => 'vb',
'visual objects' => 'visobj',
'vorpal' => 'vorpal',
'v' => 'v',
'wrapl' => 'wrapl',
'xquery' => 'xquery',
'xslt' => 'xml',
'xtalk' => 'xtalk',);
my $h = qr/(?:\t| )*/;
# Vaguely like Perl 6's \h.
my $lwsl = qr/(?:\t| )+\S[^\n]*/;
# Leading WhiteSpace Line.
# ------------------------------------------------------------
our (%tasks, @done, @todo);
local *tasks = LoadFile $tasks_path;
local *done = $tasks{done};
local *todo = $tasks{todo};
my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
$mw->login({lgname => $username, lgpassword => $password})
or die q{Couldn't log in. (}, $mw->{error}->{code}, ': ',
$mw->{error}->{details}, ')';
while (@todo) {
my $pagetitle = shift @todo;
say "TITLE: $pagetitle";
# Check the kill switch.
my $p = $mw->get_page({title => $kill_switch_page}) || die;
$p->{'*'} =~ $kill_switch_trigger and die "Killed.\n";
$p = $mw->get_page({title => $pagetitle}) || die;
my $timestamp = $p->{timestamp}; # To prevent edit conflicts.
my $text = $p->{'*'};
$text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die;
my $newtext = $1;
# So $newtext just contains the task description so far.
while (
$text =~ s! \A
( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n )
(.+?)
( \z | == $h {{ )
!$4!xs
) {
my ($header, $langname, $body) = ($1, $2, $3);
s/\bC #/C#/ foreach $header, $langname;
# Why some people put a space there, I have no idea.
my $tag =
$langtags{lc $langname}
|| $langname =~ /assembl/i && 'asm'
||
## BASIC dialect-guessing is commented out because
## on some pages, programs for more than one dialect
## appear under "BASIC". Really we ought to treat
## each dialect as its own language.
#$langname =~ /basic/i &&
# ($body =~ /q(uick)?basic/i && 'qbasic' ||
# $body =~ /f(ree)?basic/i && 'freebasic' ||
# $body =~ /t(hin)?basic/i && 'thinbasic') ||
undef;
if ($tag) {
$tag = "<lang $tag>";
if ($body =~ /<lang/) { # Use the correct identifier.
$body =~ s {$h (<lang [^>]* >)}
{my $s = $1;
lc($s) eq $tag || $s =~ /java5/i
? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java"
: $tag}gxe;
# Get rid of any indenting spaces left behind when
# someone else added the lang tags.
lc($langname) eq 'whitespace' or $body =~ s
{(<lang [^>]* >) ((?:$h\n)*) (.+?) \s* \x3c/lang>}
{my ($t, $leading, $b) = ($1, $2, $3);
if ($b !~ /^\S/m and ($leading or $b =~ /\n/))
# It there's no newline in $b, the
# indentation is probably intentional (as
# in many J examples).
{my $space = minstr($b =~ /^( +)\S/gm);
$b =~ s/^$space//gm;}
"$t$b\x3c/lang>"}xges;
}
elsif ($body =~ /<pre/)
# Just assume they should all be lang tags.
{
$body =~ s
{ <pre [^>]* > \s*
(.+?)
\s* </pre> }
{decode_entities "$tag$1\x3c/lang>"}xseg;
}
# HTML entities don't work in lang tags.
# But they aren't necessary, either.
else
# Turn indented passages into lang-tagged passages.
{
$body =~ s
{ ( ^ $lwsl \n
(?: (?: $lwsl \n | $h \n )*
$lwsl \n )? ) }
{my $t = $1;
my $space = minstr($t =~ /^( +)\S/gm);
$t =~ s/^$space//gm;
$t =~ s/\s+\z//;
decode_entities("$tag$t\x3c/lang>\n");}mgex;
}
}
$body =~ s
{(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>}
{$1$2\x3c/lang>}gsx;
$newtext .= $header . $body;
}
$newtext .= $text;
$newtext =~ s/\s*\z/\n/;
my $success = $mw->edit(
{
action => 'edit',
title => $pagetitle,
basetimestamp => $timestamp,
text => $newtext,
minor => 1,
# All we're doing, ultimately, is formatting.
bot => 1,
nocreate => 1,
# If the page was deleted while we were regexing,
# we probably shouldn't resurrect it!
summary => 'Fixed lang tags.'
},
{skip_encoding => 1});
# Without the skip_encoding option, non-ASCII characters
# will get corrupted.
if ($success) {
say(exists $success->{edit}->{nochange}
? 'Unchanged.'
: 'Committed!');
push @done, $pagetitle;
}
else
# Probably an edit conflict.
{
say "Couldn't commit; I'll try again later.";
push @todo, $pagetitle;
}
DumpFile $tasks_path, \%tasks;
sleep DELAY_BETWEEN_EDITS;
}