Rosetta Code:Village Pump/Lang-tag bot/Source: Difference between revisions

From Rosetta Code
Content added Content deleted
(Changed to persistently keep track of which pages have been edited and to tag APL. Added to Category:Perl.)
No edit summary
 
(2 intermediate revisions by 2 users not shown)
Line 1: Line 1:
<lang perl>use warnings;
<syntaxhighlight lang="perl">use warnings;
use strict;
use strict;
use feature 'say';
use feature 'say';
Line 7: Line 7:
use MediaWiki::API;
use MediaWiki::API;


use constant DELAY_BETWEEN_EDITS => 10*60; # In seconds.
use constant DELAY_BETWEEN_EDITS => 10 * 60; # In seconds.


my $username = 'UnderBot';
my $username = 'UnderBot';
my $password = 'secret';
my $password = 'secret';


my $kill_switch_page = 'User talk:UnderBot';
my $kill_switch_page = 'User talk:UnderBot';
my $kill_switch_trigger = qr/stopediting/;
my $kill_switch_trigger = qr/stopediting/;


my $tasks_path = '/home/hippo/Temporary/tasks.yml';
my $tasks_path = '/home/hippo/Temporary/tasks.yml';


my %langtags = (
my %langtags = ('4d' => '4d',
'actionscript' => 'actionscript',
'4d' => '4d',
'ada' => 'ada',
'actionscript' => 'actionscript',
'agda2' => 'agda2',
'ada' => 'ada',
'algol 60' => 'algol60',
'agda2' => 'agda2',
'algol 60' => 'algol60',
'algol 68' => 'algol68',
'amigae' => 'amigae',
'algol 68' => 'algol68',
'apl' => 'apl',
'amigae' => 'amigae',
'applescript' => 'applescript',
'apl' => 'apl',
'assembly' => 'asm',
'applescript' => 'applescript',
'autohotkey' => 'autohotkey',
'assembly' => 'asm',
'awk' => 'awk',
'autohotkey' => 'autohotkey',
'bc' => 'bc',
'awk' => 'awk',
'befunge' => 'befunge',
'bc' => 'bc',
'brainf***' => 'bf',
'befunge' => 'befunge',
'caml' => 'caml',
'brainf***' => 'bf',
'c' => 'c',
'caml' => 'caml',
'c++' => 'cpp',
'c' => 'c',
'clean' => 'clean',
'c++' => 'cpp',
'clojure' => 'lisp',
'clean' => 'clean',
'cobol' => 'cobol',
'clojure' => 'lisp',
'coldfusion' => 'cfm',
'cobol' => 'cobol',
'common lisp' => 'lisp',
'coldfusion' => 'cfm',
'component pascal' => 'pascal',
'common lisp' => 'lisp',
'coq' => 'coq',
'component pascal' => 'pascal',
'c sharp|c#' => 'csharp',
'coq' => 'coq',
'c sharp|c#' => 'csharp',
'c sharp' => 'csharp',
'dc' => 'dc',
'c sharp' => 'csharp',
'd' => 'd',
'dc' => 'dc',
'delphi' => 'delphi',
'd' => 'd',
'dos batch file' => 'dos',
'delphi' => 'delphi',
'ec' => 'ec',
'dos batch file' => 'dos',
'e' => 'e',
'ec' => 'ec',
'eiffel' => 'eiffel',
'e' => 'e',
'ella' => 'ella',
'eiffel' => 'eiffel',
'emacs lisp' => 'lisp',
'ella' => 'ella',
'erlang' => 'erlang',
'emacs lisp' => 'lisp',
'esql' => 'sql',
'erlang' => 'erlang',
'factor' => 'factor',
'esql' => 'sql',
'false' => 'false',
'factor' => 'factor',
'fan' => 'fan',
'false' => 'false',
'f' => 'f',
'fan' => 'fan',
'forth' => 'forth',
'f' => 'f',
'fortran' => 'fortran',
'forth' => 'forth',
'fp' => 'fp',
'fortran' => 'fortran',
'f sharp|f#' => 'fsharp',
'fp' => 'fp',
'f sharp|f#' => 'fsharp',
'f_sharp|f#' => 'fsharp',
'gap' => 'gap',
'f_sharp|f#' => 'fsharp',
'genyris' => 'genyris',
'gap' => 'gap',
'gnuplot' => 'gnuplot',
'genyris' => 'genyris',
'go' => 'go',
'gnuplot' => 'gnuplot',
'groovy' => 'groovy',
'go' => 'go',
'haskell' => 'haskell',
'groovy' => 'groovy',
'haxe' => 'haxe',
'haskell' => 'haskell',
'hq9+' => 'hq9p',
'haxe' => 'haxe',
'html' => 'html4strict',
'hq9+' => 'hq9p',
'icon' => 'icon',
'html' => 'html4strict',
'idl' => 'idl',
'icon' => 'icon',
'io' => 'io',
'idl' => 'idl',
'javafx script' => 'javafx',
'io' => 'io',
'java' => 'java',
'javafx script' => 'javafx',
'javascript' => 'javascript',
'java' => 'java',
'j' => 'j',
'javascript' => 'javascript',
'jocaml' => 'jocaml',
'j' => 'j',
'joy' => 'joy',
'jocaml' => 'jocaml',
'jscript.net' => 'jscript.net',
'joy' => 'joy',
'json' => 'json',
'jscript.net' => 'jscript.net',
'judoscript' => 'judoscript',
'json' => 'json',
'korn shell' => 'korn',
'judoscript' => 'judoscript',
'labview' => 'labview',
'korn shell' => 'korn',
'latex' => 'latex',
'labview' => 'labview',
'lisaac' => 'lisaac',
'latex' => 'latex',
'lisp' => 'lisp',
'lisaac' => 'lisaac',
'logo' => 'logo',
'lisp' => 'lisp',
'logtalk' => 'logtalk',
'logo' => 'logo',
'lotusscript' => 'lotusscript',
'logtalk' => 'logtalk',
'lse64' => 'lse64',
'lotusscript' => 'lotusscript',
'lua' => 'lua',
'lse64' => 'lse64',
'lucid' => 'lucid',
'lua' => 'lua',
'm4' => 'm4',
'lucid' => 'lucid',
'make' => 'make',
'm4' => 'm4',
'maple' => 'maple',
'make' => 'make',
'mathematica' => 'mathematica',
'maple' => 'maple',
'matlab' => 'matlab',
'mathematica' => 'mathematica',
'maxima' => 'maxima',
'matlab' => 'matlab',
'maxscript' => 'maxscript',
'maxima' => 'maxima',
'metafont' => 'metafont',
'maxscript' => 'maxscript',
'mirc scripting language' => 'mirc',
'metafont' => 'metafont',
'mmix' => 'mmix',
'mirc scripting language' => 'mirc',
'modula-2' => 'modula2',
'mmix' => 'mmix',
'modula-2' => 'modula2',
'modula-3' => 'modula3',
'moo' => 'moo',
'modula-3' => 'modula3',
'mpif90' => 'mpif90',
'moo' => 'moo',
'ms sql' => 'sql',
'mpif90' => 'mpif90',
'ms sql' => 'sql',
'mysql' => 'sql',
'newlisp' => 'lisp',
'mysql' => 'sql',
'nial' => 'nial',
'newlisp' => 'lisp',
'oberon-2' => 'oberon2',
'nial' => 'nial',
'objective-c' => 'objc',
'oberon-2' => 'oberon2',
'object pascal' => 'objectpascal',
'objective-c' => 'objc',
'ocaml' => 'ocaml',
'object pascal' => 'objectpascal',
'octave' => 'octave',
'ocaml' => 'ocaml',
'omega' => 'omega',
'octave' => 'octave',
'openedge/progress' => 'openedge',
'omega' => 'omega',
'oz' => 'oz',
'openedge/progress' => 'openedge',
'pari/gp' => 'parigp',
'oz' => 'oz',
'pascal' => 'pascal',
'pari/gp' => 'parigp',
'perl 6' => 'perl6',
'pascal' => 'pascal',
'perl 6' => 'perl6',
'perl' => 'perl',
'php' => 'php',
'perl' => 'perl',
'pike' => 'pike',
'php' => 'php',
'plaintex' => 'tex',
'pike' => 'pike',
'pl/i' => 'pli',
'plaintex' => 'tex',
'pl/i' => 'pli',
'pl/pgsql' => 'plpgsql',
'pl/pgsql' => 'plpgsql',
'pl/sql' => 'plsql',
'pop11' => 'pop11',
'pl/sql' => 'plsql',
'postgresql' => 'sql',
'pop11' => 'pop11',
'postscript' => 'postscript',
'postgresql' => 'sql',
'powerbasic' => 'powerbasic',
'postscript' => 'postscript',
'powershell' => 'powershell',
'powerbasic' => 'powerbasic',
'prolog' => 'prolog',
'powershell' => 'powershell',
'pure' => 'pure',
'prolog' => 'prolog',
'python' => 'python',
'pure' => 'pure',
'q' => 'q',
'python' => 'python',
'rapidq' => 'rapidq',
'q' => 'q',
'raven' => 'raven',
'rapidq' => 'rapidq',
'rexx' => 'rexx',
'raven' => 'raven',
'rhope' => 'rhope',
'rexx' => 'rexx',
'r' => 'r',
'rhope' => 'rhope',
'ruby' => 'ruby',
'r' => 'r',
'sas' => 'sas',
'ruby' => 'ruby',
'scala' => 'scala',
'sas' => 'sas',
'scheme' => 'scheme',
'scala' => 'scala',
'script3d' => 'script3d',
'scheme' => 'scheme',
'seed7' => 'seed7',
'script3d' => 'script3d',
'self' => 'self',
'seed7' => 'seed7',
'setl' => 'setl',
'self' => 'self',
'slate' => 'slate',
'setl' => 'setl',
'smalltalk' => 'smalltalk',
'slate' => 'slate',
'smeql' => 'smeql',
'smalltalk' => 'smalltalk',
'snusp' => 'snusp',
'smeql' => 'smeql',
'sql' => 'sql',
'snusp' => 'snusp',
'standard ml' => 'sml',
'sql' => 'sql',
'supercollider' => 'supercollider',
'standard ml' => 'sml',
'svg' => 'xml',
'supercollider' => 'supercollider',
'tcl' => 'tcl',
'svg' => 'xml',
'ti-83 basic' => 'ti83b',
'tcl' => 'tcl',
'ti-83 basic' => 'ti83b',
'ti-89 basic' => 'ti89b',
'toka' => 'toka',
'ti-89 basic' => 'ti89b',
'transact-sql' => 'sql',
'toka' => 'toka',
'tr' => 'tr',
'transact-sql' => 'sql',
'twelf' => 'twelf',
'tr' => 'tr',
'unixpipes' => 'bash',
'twelf' => 'twelf',
'unixpipes' => 'bash',
'unix shell' => 'bash',
'unlambda' => 'unlambda',
'unix shell' => 'bash',
'ursala' => 'ursala',
'unlambda' => 'unlambda',
'vbscript' => 'vbscript',
'ursala' => 'ursala',
'vedit macro language' => 'vedit',
'vbscript' => 'vbscript',
'visual basic .net' => 'vbnet',
'vedit macro language' => 'vedit',
'visual basic .net' => 'vbnet',
'visual basic' => 'vb',
'visual basic' => 'vb',
'visual objects' => 'visobj',
'vorpal' => 'vorpal',
'visual objects' => 'visobj',
'v' => 'v',
'vorpal' => 'vorpal',
'wrapl' => 'wrapl',
'v' => 'v',
'xquery' => 'xquery',
'wrapl' => 'wrapl',
'xslt' => 'xml',
'xquery' => 'xquery',
'xtalk' => 'xtalk',);
'xslt' => 'xml',
'xtalk' => 'xtalk',
);


my $h = qr/(?:\t| )*/;
my $h = qr/(?:\t| )*/;

# Vaguely like Perl 6's \h.
# Vaguely like Perl 6's \h.
my $lwsl = qr/(?:\t| )+\S[^\n]*/;
my $lwsl = qr/(?:\t| )+\S[^\n]*/;

# Leading WhiteSpace Line.
# Leading WhiteSpace Line.


# ------------------------------------------------------------
# ------------------------------------------------------------
Line 196: Line 196:
our (%tasks, @done, @todo);
our (%tasks, @done, @todo);
local *tasks = LoadFile $tasks_path;
local *tasks = LoadFile $tasks_path;
local *done = $tasks{done};
local *done = $tasks{done};
local *todo = $tasks{todo};
local *todo = $tasks{todo};


my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
$mw->login({lgname => $username, lgpassword => $password})
$mw->login({lgname => $username, lgpassword => $password})
or die q(Couldn't log in.);
or die q{Couldn't log in. (}, $mw->{error}->{code}, ': ',
$mw->{error}->{details}, ')';


while (@todo)
while (@todo) {
{my $pagetitle = shift @todo;
my $pagetitle = shift @todo;
say "TITLE: $pagetitle";
say "TITLE: $pagetitle";


Line 212: Line 213:


$p = $mw->get_page({title => $pagetitle}) || die;
$p = $mw->get_page({title => $pagetitle}) || die;
my $timestamp = $p->{timestamp}; # To prevent edit conflicts.
my $timestamp = $p->{timestamp}; # To prevent edit conflicts.
my $text = $p->{'*'};
my $text = $p->{'*'};


$text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die;
$text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die;
my $newtext = $1;
my $newtext = $1;

# So $newtext just contains the task description so far.
# So $newtext just contains the task description so far.
while ($text =~ s! \A
while (
$text =~ s! \A
( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n )
( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n )


Line 225: Line 228:
( \z | == $h {{ )
( \z | == $h {{ )


!$4!xs)
!$4!xs
) {
{my ($header, $langname, $body) = ($1, $2, $3);
my ($header, $langname, $body) = ($1, $2, $3);
s/\bC #/C#/ foreach $header, $langname;
s/\bC #/C#/ foreach $header, $langname;
# Why some people put a space there, I have no idea.
my $tag = $langtags{lc $langname} ||
$langname =~ /assembl/i && 'asm' ||
## BASIC dialect-guessing is commented out because
## on some pages, programs for more than one dialect
## appear under "BASIC". Really we ought to treat
## each dialect as its own language.
#$langname =~ /basic/i &&
# ($body =~ /q(uick)?basic/i && 'qbasic' ||
# $body =~ /f(ree)?basic/i && 'freebasic' ||
# $body =~ /t(hin)?basic/i && 'thinbasic') ||
undef;


# Why some people put a space there, I have no idea.
if ($tag)
{$tag = "<lang $tag>";
my $tag =
if ($body =~ /<lang/)
$langtags{lc $langname}
{# Use the correct identifier.
|| $langname =~ /assembl/i && 'asm'
||
## BASIC dialect-guessing is commented out because
## on some pages, programs for more than one dialect
## appear under "BASIC". Really we ought to treat
## each dialect as its own language.
#$langname =~ /basic/i &&
# ($body =~ /q(uick)?basic/i && 'qbasic' ||
# $body =~ /f(ree)?basic/i && 'freebasic' ||
# $body =~ /t(hin)?basic/i && 'thinbasic') ||
undef;

if ($tag) {
$tag = "<lang $tag>";
if ($body =~ /<lang/) { # Use the correct identifier.
$body =~ s {$h (<lang [^>]* >)}
$body =~ s {$h (<lang [^>]* >)}
{my $s = $1;
{my $s = $1;
Line 250: Line 256:
? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java"
? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java"
: $tag}gxe;
: $tag}gxe;

# Get rid of any indenting spaces left behind when
# Get rid of any indenting spaces left behind when
# someone else added the lang tags.
# someone else added the lang tags.
Line 259: Line 266:
# indentation is probably intentional (as
# indentation is probably intentional (as
# in many J examples).
# in many J examples).
{my $space = minstr($b =~ /^( +)/gm);
{my $space = minstr($b =~ /^( +)\S/gm);
$b =~ s/^$space//gm;}
$b =~ s/^$space//gm;}
"$t$b\x3c/lang>"}xges;}
"$t$b\x3c/lang>"}xges;
}
elsif ($body =~ /<pre/)
elsif ($body =~ /<pre/)

# Just assume they should all be lang tags.
# Just assume they should all be lang tags.
{$body =~ s
{
{ <pre [^>]* > \s*
$body =~ s
{ <pre [^>]* > \s*
(.+?)
(.+?)
\s* </pre> }
\s* </pre> }
{decode_entities "$tag$1\x3c/lang>"}xseg;}
{decode_entities "$tag$1\x3c/lang>"}xseg;
}
# HTML entities don't work in lang tags.

# But they aren't necessary, either.
# HTML entities don't work in lang tags.
# But they aren't necessary, either.
else
else

# Turn indented passages into lang-tagged passages.
# Turn indented passages into lang-tagged passages.
{$body =~ s
{
$body =~ s
{ ( ^ $lwsl \n
{ ( ^ $lwsl \n
(?: (?: $lwsl \n | $h \n )*
(?: (?: $lwsl \n | $h \n )*
$lwsl \n )? ) }
$lwsl \n )? ) }
{my $t = $1;
{my $t = $1;
my $space = minstr($t =~ /^( +)/gm);
my $space = minstr($t =~ /^( +)\S/gm);
$t =~ s/^$space//gm;
$t =~ s/^$space//gm;
$t =~ s/\s+\z//;
$t =~ s/\s+\z//;
decode_entities("$tag$t\x3c/lang>\n");}mgex;}}
decode_entities("$tag$t\x3c/lang>\n");}mgex;
}
}


$body =~ s
$body =~ s
{(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>}
{(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>}
{$1$2\x3c/lang>}gsx;
{$1$2\x3c/lang>}gsx;
$newtext .= $header . $body;}
$newtext .= $header . $body;
}


$newtext .= $text;
$newtext .= $text;
$newtext =~ s/\s*\z/\n/;
$newtext =~ s/\s*\z/\n/;

my $success = $mw->edit
my $success = $mw->edit(
({action => 'edit',
{
title => $pagetitle,
action => 'edit',
basetimestamp => $timestamp,
title => $pagetitle,
text => $newtext,
basetimestamp => $timestamp,
minor => 1,
text => $newtext,
# All we're doing, ultimately, is formatting.
minor => 1,

bot => 1,
nocreate => 1,
# All we're doing, ultimately, is formatting.
# If the page was deleted while we were regexing,
bot => 1,
nocreate => 1,
# we probably shouldn't resurrect it!

summary => 'Fixed lang tags (automatic edit).'},
# If the page was deleted while we were regexing,
# we probably shouldn't resurrect it!
summary => 'Fixed lang tags.'
},
{skip_encoding => 1});
{skip_encoding => 1});
# Without the skip_encoding option, non-ASCII characters
# will get corrupted.


# Without the skip_encoding option, non-ASCII characters
if ($success)
{say 'Committed!';
# will get corrupted.

push @done, $pagetitle;}
else
if ($success) {
# Probably an edit conflict.
say(exists $success->{edit}->{nochange}
{say "Couldn't commit; I'll try again later.";
? 'Unchanged.'
push @todo, $pagetitle;}
: 'Committed!');
DumpFile $tasks_path, \%tasks;
push @done, $pagetitle;
}
else

# Probably an edit conflict.
{
say "Couldn't commit; I'll try again later.";
push @todo, $pagetitle;
}
DumpFile $tasks_path, \%tasks;


sleep DELAY_BETWEEN_EDITS;}</lang>
sleep DELAY_BETWEEN_EDITS;
}
</syntaxhighlight>


[[Category:Perl]]
[[Category:Perl]]

Latest revision as of 08:47, 21 July 2023

use warnings;
use strict;
use feature 'say';
use List::Util '&minstr';
use HTML::Entities '&decode_entities';
use YAML::XS qw(&DumpFile &LoadFile);
use MediaWiki::API;

use constant DELAY_BETWEEN_EDITS => 10 * 60;    # In seconds.

my $username = 'UnderBot';
my $password = 'secret';

my $kill_switch_page    = 'User talk:UnderBot';
my $kill_switch_trigger = qr/stopediting/;

my $tasks_path = '/home/hippo/Temporary/tasks.yml';

my %langtags = ('4d'                      => '4d',
                'actionscript'            => 'actionscript',
                'ada'                     => 'ada',
                'agda2'                   => 'agda2',
                'algol 60'                => 'algol60',
                'algol 68'                => 'algol68',
                'amigae'                  => 'amigae',
                'apl'                     => 'apl',
                'applescript'             => 'applescript',
                'assembly'                => 'asm',
                'autohotkey'              => 'autohotkey',
                'awk'                     => 'awk',
                'bc'                      => 'bc',
                'befunge'                 => 'befunge',
                'brainf***'               => 'bf',
                'caml'                    => 'caml',
                'c'                       => 'c',
                'c++'                     => 'cpp',
                'clean'                   => 'clean',
                'clojure'                 => 'lisp',
                'cobol'                   => 'cobol',
                'coldfusion'              => 'cfm',
                'common lisp'             => 'lisp',
                'component pascal'        => 'pascal',
                'coq'                     => 'coq',
                'c sharp|c#'              => 'csharp',
                'c sharp'                 => 'csharp',
                'dc'                      => 'dc',
                'd'                       => 'd',
                'delphi'                  => 'delphi',
                'dos batch file'          => 'dos',
                'ec'                      => 'ec',
                'e'                       => 'e',
                'eiffel'                  => 'eiffel',
                'ella'                    => 'ella',
                'emacs lisp'              => 'lisp',
                'erlang'                  => 'erlang',
                'esql'                    => 'sql',
                'factor'                  => 'factor',
                'false'                   => 'false',
                'fan'                     => 'fan',
                'f'                       => 'f',
                'forth'                   => 'forth',
                'fortran'                 => 'fortran',
                'fp'                      => 'fp',
                'f sharp|f#'              => 'fsharp',
                'f_sharp|f#'              => 'fsharp',
                'gap'                     => 'gap',
                'genyris'                 => 'genyris',
                'gnuplot'                 => 'gnuplot',
                'go'                      => 'go',
                'groovy'                  => 'groovy',
                'haskell'                 => 'haskell',
                'haxe'                    => 'haxe',
                'hq9+'                    => 'hq9p',
                'html'                    => 'html4strict',
                'icon'                    => 'icon',
                'idl'                     => 'idl',
                'io'                      => 'io',
                'javafx script'           => 'javafx',
                'java'                    => 'java',
                'javascript'              => 'javascript',
                'j'                       => 'j',
                'jocaml'                  => 'jocaml',
                'joy'                     => 'joy',
                'jscript.net'             => 'jscript.net',
                'json'                    => 'json',
                'judoscript'              => 'judoscript',
                'korn shell'              => 'korn',
                'labview'                 => 'labview',
                'latex'                   => 'latex',
                'lisaac'                  => 'lisaac',
                'lisp'                    => 'lisp',
                'logo'                    => 'logo',
                'logtalk'                 => 'logtalk',
                'lotusscript'             => 'lotusscript',
                'lse64'                   => 'lse64',
                'lua'                     => 'lua',
                'lucid'                   => 'lucid',
                'm4'                      => 'm4',
                'make'                    => 'make',
                'maple'                   => 'maple',
                'mathematica'             => 'mathematica',
                'matlab'                  => 'matlab',
                'maxima'                  => 'maxima',
                'maxscript'               => 'maxscript',
                'metafont'                => 'metafont',
                'mirc scripting language' => 'mirc',
                'mmix'                    => 'mmix',
                'modula-2'                => 'modula2',
                'modula-3'                => 'modula3',
                'moo'                     => 'moo',
                'mpif90'                  => 'mpif90',
                'ms sql'                  => 'sql',
                'mysql'                   => 'sql',
                'newlisp'                 => 'lisp',
                'nial'                    => 'nial',
                'oberon-2'                => 'oberon2',
                'objective-c'             => 'objc',
                'object pascal'           => 'objectpascal',
                'ocaml'                   => 'ocaml',
                'octave'                  => 'octave',
                'omega'                   => 'omega',
                'openedge/progress'       => 'openedge',
                'oz'                      => 'oz',
                'pari/gp'                 => 'parigp',
                'pascal'                  => 'pascal',
                'perl 6'                  => 'perl6',
                'perl'                    => 'perl',
                'php'                     => 'php',
                'pike'                    => 'pike',
                'plaintex'                => 'tex',
                'pl/i'                    => 'pli',
                'pl/pgsql'                => 'plpgsql',
                'pl/sql'                  => 'plsql',
                'pop11'                   => 'pop11',
                'postgresql'              => 'sql',
                'postscript'              => 'postscript',
                'powerbasic'              => 'powerbasic',
                'powershell'              => 'powershell',
                'prolog'                  => 'prolog',
                'pure'                    => 'pure',
                'python'                  => 'python',
                'q'                       => 'q',
                'rapidq'                  => 'rapidq',
                'raven'                   => 'raven',
                'rexx'                    => 'rexx',
                'rhope'                   => 'rhope',
                'r'                       => 'r',
                'ruby'                    => 'ruby',
                'sas'                     => 'sas',
                'scala'                   => 'scala',
                'scheme'                  => 'scheme',
                'script3d'                => 'script3d',
                'seed7'                   => 'seed7',
                'self'                    => 'self',
                'setl'                    => 'setl',
                'slate'                   => 'slate',
                'smalltalk'               => 'smalltalk',
                'smeql'                   => 'smeql',
                'snusp'                   => 'snusp',
                'sql'                     => 'sql',
                'standard ml'             => 'sml',
                'supercollider'           => 'supercollider',
                'svg'                     => 'xml',
                'tcl'                     => 'tcl',
                'ti-83 basic'             => 'ti83b',
                'ti-89 basic'             => 'ti89b',
                'toka'                    => 'toka',
                'transact-sql'            => 'sql',
                'tr'                      => 'tr',
                'twelf'                   => 'twelf',
                'unixpipes'               => 'bash',
                'unix shell'              => 'bash',
                'unlambda'                => 'unlambda',
                'ursala'                  => 'ursala',
                'vbscript'                => 'vbscript',
                'vedit macro language'    => 'vedit',
                'visual basic .net'       => 'vbnet',
                'visual basic'            => 'vb',
                'visual objects'          => 'visobj',
                'vorpal'                  => 'vorpal',
                'v'                       => 'v',
                'wrapl'                   => 'wrapl',
                'xquery'                  => 'xquery',
                'xslt'                    => 'xml',
                'xtalk'                   => 'xtalk',);

my $h = qr/(?:\t| )*/;

# Vaguely like Perl 6's \h.
my $lwsl = qr/(?:\t| )+\S[^\n]*/;

# Leading WhiteSpace Line.

# ------------------------------------------------------------

our (%tasks, @done, @todo);
local *tasks = LoadFile $tasks_path;
local *done  = $tasks{done};
local *todo  = $tasks{todo};

my $mw = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
$mw->login({lgname => $username, lgpassword => $password})
  or die q{Couldn't log in. (}, $mw->{error}->{code}, ': ',
  $mw->{error}->{details}, ')';

while (@todo) {
    my $pagetitle = shift @todo;
    say "TITLE: $pagetitle";

    # Check the kill switch.
    my $p = $mw->get_page({title => $kill_switch_page}) || die;
    $p->{'*'} =~ $kill_switch_trigger and die "Killed.\n";

    $p = $mw->get_page({title => $pagetitle}) || die;
    my $timestamp = $p->{timestamp};    # To prevent edit conflicts.
    my $text      = $p->{'*'};

    $text =~ s/ (.+? \n) (== \s* {{) /$2/xs or die;
    my $newtext = $1;

    # So $newtext just contains the task description so far.
    while (
        $text =~ s! \A
             ( == $h {{ $h header $h \| $h ([^}]+?) $h }} $h == $h \n )

             (.+?)

             ( \z | == $h {{ )

              !$4!xs
      ) {
        my ($header, $langname, $body) = ($1, $2, $3);
        s/\bC #/C#/ foreach $header, $langname;

        # Why some people put a space there, I have no idea.
        my $tag =
             $langtags{lc $langname}
          || $langname =~ /assembl/i && 'asm'
          ||
          ## BASIC dialect-guessing is commented out because
          ## on some pages, programs for more than one dialect
          ## appear under "BASIC". Really we ought to treat
          ## each dialect as its own language.
          #$langname =~ /basic/i &&
          #   ($body =~ /q(uick)?basic/i && 'qbasic' ||
          #    $body =~ /f(ree)?basic/i && 'freebasic' ||
          #    $body =~ /t(hin)?basic/i && 'thinbasic') ||
          undef;

        if ($tag) {
            $tag = "<lang $tag>";
            if ($body =~ /<lang/) {    # Use the correct identifier.
                $body =~ s {$h (<lang [^>]* >)}
                   {my $s = $1;
                    lc($s) eq $tag || $s =~ /java5/i
                    ? $s # Don't replace "Mathematica" with "mathematica" or "java5" with "java"
                    : $tag}gxe;

                # Get rid of any indenting spaces left behind when
                # someone else added the lang tags.
                lc($langname) eq 'whitespace' or $body =~ s
                    {(<lang [^>]* >) ((?:$h\n)*) (.+?) \s* \x3c/lang>}
                    {my ($t, $leading, $b) = ($1, $2, $3);
                     if ($b !~ /^\S/m and ($leading or $b =~ /\n/))
                       # It there's no newline in $b, the
                       # indentation is probably intentional (as
                       # in many J examples).
                        {my $space = minstr($b =~ /^( +)\S/gm);
                         $b =~ s/^$space//gm;}
                      "$t$b\x3c/lang>"}xges;
            }
            elsif ($body =~ /<pre/)

              # Just assume they should all be lang tags.
            {
                $body =~ s
                    { <pre [^>]* > \s*
                      (.+?)
                      \s* </pre> }
                    {decode_entities "$tag$1\x3c/lang>"}xseg;
            }

            # HTML entities don't work in lang tags.
            # But they aren't necessary, either.
            else

              # Turn indented passages into lang-tagged passages.
            {
                $body =~ s
                   { (  ^ $lwsl \n
                      (?: (?: $lwsl \n | $h \n )*
                           $lwsl \n )? ) }
                   {my $t = $1;
                    my $space = minstr($t =~ /^( +)\S/gm);
                    $t =~ s/^$space//gm;
                    $t =~ s/\s+\z//;
                    decode_entities("$tag$t\x3c/lang>\n");}mgex;
            }
        }

        $body =~ s
            {(<lang [^>]*>) <nowiki> \s* (.+?) \s* </nowiki> \x3c/lang>}
            {$1$2\x3c/lang>}gsx;
        $newtext .= $header . $body;
    }

    $newtext .= $text;
    $newtext =~ s/\s*\z/\n/;

    my $success = $mw->edit(
        {
         action        => 'edit',
         title         => $pagetitle,
         basetimestamp => $timestamp,
         text          => $newtext,
         minor         => 1,

         # All we're doing, ultimately, is formatting.
         bot      => 1,
         nocreate => 1,

         # If the page was deleted while we were regexing,
         # we probably shouldn't resurrect it!
         summary => 'Fixed lang tags.'
        },
        {skip_encoding => 1});

    # Without the skip_encoding option, non-ASCII characters
    # will get corrupted.

    if ($success) {
        say(exists $success->{edit}->{nochange}
            ? 'Unchanged.'
            : 'Committed!');
        push @done, $pagetitle;
    }
    else

      # Probably an edit conflict.
    {
        say "Couldn't commit; I'll try again later.";
        push @todo, $pagetitle;
    }
    DumpFile $tasks_path, \%tasks;

    sleep DELAY_BETWEEN_EDITS;
}