WiktionaryDumps to words: Difference between revisions

m
simplify state logic
m (write to a file not to the terminal)
m (simplify state logic)
Line 312:
 
=={{header|Julia}}==
Uses Regex and a state variablesvariable instead of XML parsing. Default setting prints the first 80 French words found.
<lang julia>using CodecBzip2
 
function getwords(io::IO, output::IO, languagemark = "==French==", maxwords = 80)
title, txopen, txclose = "<title>", "<text", "</text>"
got_title_last, got_text_last = false, false
wordcount, titleword = 0, ""
for line in eachline(io)
if occursin(title, line)
got_title_last, got_text_last = true, false
titleword = (m = match(r"<title>([^<]+)</title>", line)) != nothing ? m[1] : ""
elseif occursin(txopen, line)
got_title_last, got_text_last = false, true
elseif occursin(languagemark, line)
if got_text_last && titleword != ""
Line 330:
(wordcount += 1) >= maxwords && break
end
got_title_last, got_text_last = false, false
elseif occursin(txclose, line)
got_title_last, got_text_last = false, false
end
end
4,108

edits