Rosetta Code/Fix code tags: Difference between revisions
(→{{header|J}}: alternative work around for displaying closing lang tag in code on wiki) |
m (→{{header|J}}: simplify) |
||
Line 40: | Line 40: | ||
patterns=:'<%s>;<lang %s>',LF,'</%s>;</','lang>',LF,'<code %s>;<lang %s>',LF,'</code>;</','lang>',LF |
patterns=:'<%s>;<lang %s>',LF,'</%s>;</','lang>',LF,'<code %s>;<lang %s>',LF,'</code>;</','lang>',LF |
||
fixCodeTags=: rplc&(, |
fixCodeTags=: rplc&(, (<;._2@,&';');._2 &> patterns vbsprintf _5]\ 5#langs)</lang> |
||
'''Example Usage:''' |
'''Example Usage:''' |
Revision as of 09:33, 21 June 2010
You are encouraged to solve this task according to the task description, using any language you may know.
Fix Rosetta Code deprecated code tags, with these rules:
Change <%s> to <lang %s> Change </%s> to </lang> Change <code %s> to <lang %s> Change </code> to </lang>
Usage:
./convert.py < wikisource.txt > converted.txt
AutoHotkey
<lang AutoHotkey>; usage: > fixtags.ahk input.txt ouput.txt FileRead, text, %1% langs = ada,awk,autohotkey,etc slang = /lang slang := "<" . slang . "/>" Loop, Parse, langs, `, {
tag1 = <%A_LoopField%>
tag2 = </%A_LoopField%>
text := RegExReplace(text, tag1, "<lang " . A_LoopField . ">")
text := RegExReplace(text, tag2, slang)
text := RegExReplace(text, "(.*?)
"
, "<lang $1>$2" . slang)
} FileAppend, % text, %2% </lang>
J
Solution: <lang j>require 'printf strings' langs=: <;._1 LF -.~ noun define NB. replace with real lang strings
foo bar baz
)
patterns=:'<%s>;<lang %s>',LF,'</%s>;</','lang>',LF,';<lang %s>',LF,'
;</','lang>',LF
fixCodeTags=: rplc&(, (<;._2@,&';');._2 &> patterns vbsprintf _5]\ 5#langs)</lang>
Example Usage:
SampleText=: noun define Lorem ipsum <code foo>saepe audire</code> elaboraret ne quo, id equidem atomorum inciderint usu. <foo>In sit inermis deleniti percipit</foo>, ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu altera electram. Tota adhuc altera te sea, <code bar>soluta appetere ut mel</bar>. Quo quis graecis vivendo te, <baz>posse nullam lobortis ex usu</code>. Eam volumus perpetua constituto id, mea an omittam fierent vituperatoribus. ) fixCodeTags SampleText Lorem ipsum <lang foo>saepe audire</lang> elaboraret ne quo, id equidem atomorum inciderint usu. <lang foo>In sit inermis deleniti percipit</lang>, ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu altera electram. Tota adhuc altera te sea, <lang bar>soluta appetere ut mel</lang>. Quo quis graecis vivendo te, <lang baz>posse nullam lobortis ex usu</lang>. Eam volumus perpetua constituto id, mea an omittam fierent vituperatoribus.
JavaScript
<lang javascript>var langs = ['foo', 'bar', 'baz']; // real list of langs goes here var end_tag = '</'+'lang>';
var line; while (line = readline()) {
line = line.replace(new RegExp('', 'gi'), end_tag); for (var i = 0; i < langs.length; i++) line = line.replace(new RegExp('<(?:code )?(' + langs[i] + ')>', 'gi'), '<lang $1>') .replace(new RegExp('</' + langs[i] + '>', 'gi'), end_tag); print(line);
}</lang>
Lua
<lang lua> --thanks, random python guy langs = {'ada', 'cpp-qt', 'pascal', 'lscript', 'z80', 'visualprolog', 'html4strict', 'cil', 'objc', 'asm', 'progress', 'teraterm', 'hq9plus', 'genero', 'tsql', 'email', 'pic16', 'tcl', 'apt_sources', 'io', 'apache', 'vhdl', 'avisynth', 'winbatch', 'vbnet', 'ini', 'scilab', 'ocaml-brief', 'sas', 'actionscript3', 'qbasic', 'perl', 'bnf', 'cobol', 'powershell', 'php', 'kixtart', 'visualfoxpro', 'mirc', 'make', 'javascript', 'cpp', 'sdlbasic', 'cadlisp', 'php-brief', 'rails', 'verilog', 'xml', 'csharp', 'actionscript', 'nsis', 'bash', 'typoscript', 'freebasic', 'dot', 'applescript', 'haskell', 'dos', 'oracle8', 'cfdg', 'glsl', 'lotusscript', 'mpasm', 'latex', 'sql', 'klonec', 'ruby', 'ocaml', 'smarty', 'python', 'oracle11', 'caddcl', 'robots', 'groovy', 'smalltalk', 'diff', 'fortran', 'cfm', 'lua', 'modula3', 'vb', 'autoit', 'java', 'text', 'scala', 'lotusformulas', 'pixelbender', 'reg', '_div', 'whitespace', 'providex', 'asp', 'css', 'lolcode', 'lisp', 'inno', 'mysql', 'plsql', 'matlab', 'oobas', 'vim', 'delphi', 'xorg_conf', 'gml', 'prolog', 'bf', 'per', 'scheme', 'mxml', 'd', 'basic4gl', 'm68k', 'gnuplot', 'idl', 'abap', 'intercal', 'c_mac', 'thinbasic', 'java5', 'xpp', 'boo', 'klonecpp', 'blitzbasic', 'eiffel', 'povray', 'c', 'gettext'}
for line in io.lines() do
for i, v in ipairs(langs) do
line = line:gsub("<" .. v .. ">", "<lang " .. v .. ">")
line = line:gsub("", "<lang " .. v .. ">")
line = line:gsub("</" .. v .. ">", "</" .. "lang>") --the weird concatenation is to prevent the markup from breaking
line = line:gsub("</" .. "code>", "</" .. "lang>")
end
print(line)
end
</lang>
OCaml
<lang ocaml>#load "str.cma"
let langs =
Str.split (Str.regexp " ")
"actionscript ada algol68 amigae applescript autohotkey awk bash basic \
befunge bf c cfm cobol cpp csharp d delphi e eiffel factor false forth \
fortran fsharp haskell haxe j java javascript lisaac lisp logo lua m4 \
mathematica maxscript modula3 moo objc ocaml octave oz pascal perl \
perl6 php pike pop11 powershell prolog python qbasic r rebol ruby \
scala scheme slate smalltalk tcl ti89b vbnet vedit"
let read_in ic =
let buf = Buffer.create 16384
and tmp = String.create 4096 in
let rec aux() =
let bytes = input ic tmp 0 4096 in
if bytes > 0 then begin
Buffer.add_substring buf tmp 0 bytes;
aux()
end
in
(try aux() with End_of_file -> ());
(Buffer.contents buf)
let repl pat tpl str =
let reg = Str.regexp_string_case_fold pat in
let str = Str.global_replace reg tpl str in
(str)
(* change <%s> to <lang %s> *)
let repl1 lang str =
let pat = "<" ^ lang ^ ">"
and tpl = "<lang " ^ lang ^ ">" in
(repl pat tpl str)
(* change </%s> to </la\ng> *)
let repl2 lang str =
let pat = "</" ^ lang ^ ">"
and tpl = "</lang"^">" in
(repl pat tpl str)
(* change to <lang %s> *)
let repl3 lang str =
let pat = ""
and tpl = "<lang " ^ lang ^ ">" in
(repl pat tpl str)
(* change
to </la\ng> *)
let repl4 lang str =
let pat = "
"
and tpl = "</lang"^">" in
(repl pat tpl str)
let () =
print_string (
List.fold_left (fun str lang ->
(repl4 lang (repl3 lang (repl2 lang (repl1 lang str))))
) (read_in stdin) langs)</lang>
(in the code the strings </lang> have been split in order to not confuse the wiki)
this line of code:
<lang ocaml> (repl4 lang (repl3 lang (repl2 lang (repl1 lang str))))</lang>
could also be written like this:
<lang ocaml> List.fold_right (fun repl -> repl lang) [repl1; repl2; repl3; repl4] str</lang>
Here we implement the read_in
function to read all the content from an input channel because there is no such function in the standard library, but we can use the extLib which provides the function Std.input_all
(in its module Std).
Perl
<lang perl>
my @langs = qw(ada cpp-qt pascal lscript z80 visualprolog
html4strict cil objc asm progress teraterm hq9plus genero tsql
email pic16 tcl apt_sources io apache vhdl avisynth winbatch
vbnet ini scilab ocaml-brief sas actionscript3 qbasic perl bnf
cobol powershell php kixtart visualfoxpro mirc make javascript
cpp sdlbasic cadlisp php-brief rails verilog xml csharp
actionscript nsis bash typoscript freebasic dot applescript
haskell dos oracle8 cfdg glsl lotusscript mpasm latex sql klonec
ruby ocaml smarty python oracle11 caddcl robots groovy smalltalk
diff fortran cfm lua modula3 vb autoit java text scala
lotusformulas pixelbender reg _div whitespace providex asp css
lolcode lisp inno mysql plsql matlab oobas vim delphi xorg_conf
gml prolog bf per scheme mxml d basic4gl m68k gnuplot idl abap
intercal c_mac thinbasic java5 xpp boo klonecpp blitzbasic eiffel
povray c gettext);
my $text = join "", <STDIN>;
my $slang="/lang";
for (@langs) {
$text =~ s|<$_>|<lang $_>|g;
$text =~ s|</$_>|<$slang>|g;
}
$text =~ s|(.*?)
|<lang $1>$2<$slang>|sg;
print $text;
</lang>
PicoLisp
<lang PicoLisp>#!bin/picolisp lib.l
(let Lang '("ada" "awk" "c" "forth" "prolog" "python" "z80")
(in NIL
(while (echo "<")
(let S (till ">" T)
(cond
((pre? "code " S) (prin "<lang" (cddddr (chop S))))
((member S Lang) (prin "<lang " S))
((= S "/code") (prin "</lang"))
((and (pre? "/" S) (member (pack (cdr (chop S))) Lang))
(prin "</lang") )
(T (prin "<" S)) ) ) ) ) )
(bye)</lang>
Python
<lang python># coding: utf-8
import sys
import re
langs = ['ada', 'cpp-qt', 'pascal', 'lscript', 'z80', 'visualprolog',
'html4strict', 'cil', 'objc', 'asm', 'progress', 'teraterm', 'hq9plus',
'genero', 'tsql', 'email', 'pic16', 'tcl', 'apt_sources', 'io', 'apache',
'vhdl', 'avisynth', 'winbatch', 'vbnet', 'ini', 'scilab', 'ocaml-brief',
'sas', 'actionscript3', 'qbasic', 'perl', 'bnf', 'cobol', 'powershell',
'php', 'kixtart', 'visualfoxpro', 'mirc', 'make', 'javascript', 'cpp',
'sdlbasic', 'cadlisp', 'php-brief', 'rails', 'verilog', 'xml', 'csharp',
'actionscript', 'nsis', 'bash', 'typoscript', 'freebasic', 'dot',
'applescript', 'haskell', 'dos', 'oracle8', 'cfdg', 'glsl', 'lotusscript',
'mpasm', 'latex', 'sql', 'klonec', 'ruby', 'ocaml', 'smarty', 'python',
'oracle11', 'caddcl', 'robots', 'groovy', 'smalltalk', 'diff', 'fortran',
'cfm', 'lua', 'modula3', 'vb', 'autoit', 'java', 'text', 'scala',
'lotusformulas', 'pixelbender', 'reg', '_div', 'whitespace', 'providex',
'asp', 'css', 'lolcode', 'lisp', 'inno', 'mysql', 'plsql', 'matlab',
'oobas', 'vim', 'delphi', 'xorg_conf', 'gml', 'prolog', 'bf', 'per',
'scheme', 'mxml', 'd', 'basic4gl', 'm68k', 'gnuplot', 'idl', 'abap',
'intercal', 'c_mac', 'thinbasic', 'java5', 'xpp', 'boo', 'klonecpp',
'blitzbasic', 'eiffel', 'povray', 'c', 'gettext']
slang = '/lang'
code='code'
text = sys.stdin.read()
for i in langs:
text = text.replace("<%s>" % i,"<lang %s>" % i)
text = text.replace("</%s>" % i, "<%s>" % slang)
text = re.sub("(?s)<%s (.+?)>(.*?)</%s>"%(code,code), r"<lang \1>\2<%s>" % slang, text)
sys.stdout.write(text)
</lang>
R
Note that the instances of ##### are to stop the wiki getting confused. Please remove them before running the code.
<lang R>
fixtags <- function(page)
{
langs <- c("c", "c-sharp", "r") # a complete list is required, obviously
langs <- paste(langs, collapse="|")
page <- gsub(paste("<(", langs, ")>", sep=""), "<lang \\1>", page)
page <- gsub(paste("</(", langs, ")>", sep=""), "</#####lang>", page)
page <- gsub(paste("<code(", langs, ")>", sep=""), "<lang \\1>", page)
page <- gsub(paste("
", sep=""), "</#####lang>", page)
page
}
page <- "lorem ipsum <c>some c code</c>dolor sit amet,<c-sharp>some c-sharp code</c-sharp>
consectetur adipisicing elit,some r code
sed do eiusmod tempor incididunt"
fixtags(page)
</lang>
Ruby
<lang ruby># get all stdin in one string
- text = $stdin.read
- for testing, use
text = DATA.read slash_lang = '/lang' langs = %w(foo bar baz) # actual list of languages declared here for lang in langs
text.gsub!(Regexp.new("<(#{lang})>")) {"<lang #$1>"} text.gsub!(Regexp.new("</#{lang}>"), "<#{slash_lang}>")
end
text.gsub!(//, '<lang \1>')
text.gsub!(/<\/code>/, "<#{slash_lang}>")
print text
__END__
Lorem ipsum
saepe audire
elaboraret ne quo, id equidem
atomorum inciderint usu. <foo>In sit inermis deleniti percipit</foo>,
ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu
altera electram. Tota adhuc altera te sea, soluta appetere ut mel</bar>.
Quo quis graecis vivendo te, <baz>posse nullam lobortis ex usu
. Eam volumus perpetua
constituto id, mea an omittam fierent vituperatoribus. </lang>
Lorem ipsum <lang foo>saepe audire</lang> elaboraret ne quo, id equidem
atomorum inciderint usu. <lang foo>In sit inermis deleniti percipit</lang>,
ius ex tale civibus omittam. <barf>Vix ut doctus cetero invenire</barf>, his eu
altera electram. Tota adhuc altera te sea, <lang bar>soluta appetere ut mel</lang>.
Quo quis graecis vivendo te, <lang baz>posse nullam lobortis ex usu</lang>. Eam volumus perpetua
constituto id, mea an omittam fierent vituperatoribus.
Tcl
<lang tcl>set langs {
ada cpp-qt pascal lscript z80 visualprolog html4strict cil objc asm progress teraterm
hq9plus genero tsql email pic16 tcl apt_sources io apache vhdl avisynth winbatch vbnet
ini scilab ocaml-brief sas actionscript3 qbasic perl bnf cobol powershell php kixtart
visualfoxpro mirc make javascript cpp sdlbasic cadlisp php-brief rails verilog xml
csharp actionscript nsis bash typoscript freebasic dot applescript haskell dos oracle8
cfdg glsl lotusscript mpasm latex sql klonec ruby ocaml smarty python oracle11 caddcl
robots groovy smalltalk diff fortran cfm lua modula3 vb autoit java text scala lotusformulas
pixelbender reg _div whitespace providex asp css lolcode lisp inno mysql plsql matlab
oobas vim delphi xorg_conf gml prolog bf per scheme mxml d basic4gl m68k gnuplot idl
abap intercal c_mac thinbasic java5 xpp boo klonecpp blitzbasic eiffel povray c gettext
}
set text [read stdin]
set slang /lang
foreach lang $langs {
set text [regsub -all "<$lang>" $text "<lang $lang>"]
set text [regsub -all "</$lang>" $text "<$slang>"]
}
set text [regsub -all "(.+?)
" $text "<lang \\1>\\2<$slang>"]</lang>
Alternatively, for foreach loop may be replaced with:
<lang tcl>set text [regexp -all "<([join $langs |])>" $text {<lang \1>}]
set text [regexp -all "</(?:[join $langs |])>" $text "<$slang>"]</lang>
This task does not require regular expressions at all:
<lang tcl>set replacements [list
<$slang>]
foreach lang $langs {
lappend replacements "<$lang>" "<lang $lang>"
lappend replacements "</$lang>" "<$slang>"
lappend replacements "" "<lang $lang>"
}
set text [string map $replacements $text]</lang>