Rosetta Code/Find bare lang tags: Difference between revisions

Content added Content deleted
(→‎{{header|Wren}}: Removed superfluous line.)
m (→‎{{header|Phix}}: syntax coloured)
Line 1,461: Line 1,461:
=={{header|Phix}}==
=={{header|Phix}}==
Both extra credits. Would probably benefit from excluding <pre></pre> sections first.
Both extra credits. Would probably benefit from excluding <pre></pre> sections first.
<!--<lang Phix>(notonline)-->
<lang Phix>-- demo\rosetta\Find_bare_lang_tags.exw
<span style="color: #000080;font-style:italic;">--
--
-- demo\rosetta\Find_bare_lang_tags.exw
-- Finds/counts no of "<lang>" as opposed to eg "<lang Phix>" tags.
-- ====================================
-- Since downloading all the pages can be very slow, this uses a cache.
--
--
-- (Uses '&' instead of/as well as 'a', for everyone's sanity..)
constant include_drafts = true,
-- Finds/counts no of "&lt;l&ng&gt;" as opposed to eg "&lt;l&ng Phix&gt;" tags.
sort_by_task = false,
-- Since downloading all the pages can be very slow, this uses a cache.
sort_by_lang = not sort_by_task -- (one or t'other)
--</span>

<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (fairly obviously this will never ever run in a browser!)</span>
integer lp = 0
<span style="color: #008080;">constant</span> <span style="color: #000000;">include_drafts</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span>
procedure progress(string msg, sequence args = {})
<span style="color: #000000;">sort_by_task</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span>
if length(args) then msg = sprintf(msg,args) end if
<span style="color: #000000;">sort_by_lang</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">not</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #000080;font-style:italic;">-- (one or t'other)</span>
integer lm = length(msg)
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if
<span style="color: #008080;">include</span> <span style="color: #000000;">rosettacode_cache</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> <span style="color: #000080;font-style:italic;">-- see [[Rosetta_Code/Count_examples#Phix]]</span>
puts(1,msg)
lp = iff(msg[$]='\r'?lm:0)
<span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">x"E28093"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">},</span>
end procedure
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"E28099"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">},</span>

<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A8"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span>
include builtins\timedate.e
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A9"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span>
integer refresh_cache = timedelta(days:=365) -- 0 for always
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"K"</span><span style="color: #0000FF;">},</span>
--integer refresh_cache = timedelta(days:=1) -- 0 for always
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09C"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"M"</span><span style="color: #0000FF;">}})</span>

include builtins\libcurl.e
<span style="color: #008080;">function</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
atom curl = NULL
<span style="color: #008080;">return</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">)</span>
atom pErrorBuffer
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>

function write_callback(atom pData, integer size, integer nmemb, integer fn)
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
integer bytes_written = size * nmemb
<span style="color: #000080;font-style:italic;">-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]"</span>
puts(fn,peek({pData,bytes_written}))
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span>
return bytes_written
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
end function
<span style="color: #008080;">if</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
constant write_cb = call_back({'+', routine_id("write_callback")})
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>

<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span>
function open_download(string filename, url)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
bool refetch = true
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
if not create_directory("rc_cache") then
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
crash("cannot create rc_cache directory")
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
filename = join_path({"rc_cache",filename})
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span>
if file_exists(filename) then
<span style="color: #000080;font-style:italic;">-- Similar to multi_lang() but with task[indexes]</span>
-- use existing file if <= refresh_cache (365 days) old
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span>
sequence last_mod = get_file_date(filename) -- (0.8.1+)
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
atom delta = timedate_diff(last_mod,date())
<span style="color: #004080;">integer</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
refetch = (delta>refresh_cache) or get_file_size(filename)=0
<span style="color: #004080;">string</span> <span style="color: #000000;">tsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">si</span><span style="color: #0000FF;">])</span>
else
<span style="color: #008080;">if</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
string directory = get_file_path(filename)
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if get_file_type(directory)!=FILETYPE_DIRECTORY then
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">tsi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span>
if not create_directory(directory,make_parent:=true) then
<span style="color: #008080;">else</span>
crash("cannot create %s directory",{directory})
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tsi</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end if
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
object text
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
if not refetch then
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)></span><span style="color: #000000;">8</span> <span style="color: #008080;">then</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">4</span><span style="color: #0000FF;">..-</span><span style="color: #000000;">4</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"..."</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
text = trim(get_text(filename))
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span>
refetch = (not sequence(text)) or (length(text)<10)
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end if
if refetch then
<span style="color: #004080;">bool</span> <span style="color: #000000;">first</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
progress("Downloading %s...\r",{filename})
if curl=NULL then
<span style="color: #008080;">function</span> <span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()</span>
curl_global_init()
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span>
curl = curl_easy_init()
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
pErrorBuffer = allocate(CURL_ERROR_SIZE)
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create rc_cache directory"</span><span style="color: #0000FF;">)</span>
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #000080;font-style:italic;">-- note this lot use web scraping (as cribbed from a similar task) ...</span>
url = substitute(url,"%3A",":")
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Programming_Tasks"</span><span style="color: #0000FF;">))</span>
url = substitute(url,"%2A","*")
<span style="color: #008080;">if</span> <span style="color: #000000;">include_drafts</span> <span style="color: #008080;">then</span>
curl_easy_setopt(curl, CURLOPT_URL, url)
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Draft_Programming_Tasks"</span><span style="color: #0000FF;">))</span>
integer fn = open(filename,"wb")
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span>
if fn=-1 then ?9/0 end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn)
<span style="color: #004080;">integer</span> <span style="color: #000000;">blt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Rosetta_Code/Find_bare_lang_tags"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- not this one!</span>
while true do
<span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">..</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
CURLcode res = curl_easy_perform(curl)
if res=CURLE_OK then exit end if
<span style="color: #000080;font-style:italic;">-- ... whereas the individual tasks use the web api instead (3x smaller/faster)</span>
string error = sprintf("%d",res)
<span style="color: #004080;">integer</span> <span style="color: #000000;">total_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
if res=CURLE_COULDNT_RESOLVE_HOST then
<span style="color: #000000;">lt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">),</span>
error &= " [CURLE_COULDNT_RESOLVE_HOST]"
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
end if
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks found\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">})</span>
progress("Error %s downloading file, retry?(Y/N):",{error})
<span style="color: #004080;">sequence</span> <span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
if lower(wait_key())!='y' then abort(0) end if
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{}),</span>
printf(1,"Y\n")
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">({},</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{})</span>
end while
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
close(fn)
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span>
refresh_cache += timedelta(days:=1) -- did I mention it is slow?
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"http://rosettacode.org/mw/index.php?title=%s&action=raw"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">}),</span>
text = get_text(filename)
<span style="color: #000000;">contents</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".raw"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">url</span><span style="color: #0000FF;">),</span>
end if
<span style="color: #000000;">curr</span>
return text
<span style="color: #004080;">integer</span> <span style="color: #000000;">count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">header</span>
end function
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>

<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`&lt;l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang&gt;`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
function open_category(string filename)
<span style="color: #008080;">if</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename)
<span style="color: #000080;font-style:italic;">-- look backward for the nearest header</span>
end function
<span style="color: #000000;">header</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>

<span style="color: #008080;">if</span> <span style="color: #000000;">header</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
function dewiki(string s)
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"no language"</span>
-- extract tasks from eg `<li><a href="/wiki/100_doors"`
<span style="color: #008080;">else</span>
sequence tasks = {}
<span style="color: #000000;">header</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">)</span>
integer start = 1, finish = match(`<div class="printfooter">`,s)
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">header</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>}}</nowiki>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">header</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span>
s = s[1..finish-1]
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
while true do
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_lang</span> <span style="color: #008080;">then</span>
start = match("<li><a href=\"/wiki/",s,start)
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">)</span>
if start=0 then exit end if
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
start += length("<li><a href=\"/wiki/")
<span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">,</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span>
finish = find('"',s,start)
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span>
string task = s[start..finish-1]
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
task = substitute_all(task,{"*",":"},{"%2A","%3A"})
<span style="color: #008080;">else</span>
if task!="Rosetta_Code/Find_bare_lang_tags" then -- not this one!
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
tasks = append(tasks,task)
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
-- if length(tasks)>10 then exit end if -- (debug aid)
<span style="color: #008080;">else</span>
start = finish+1
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return tasks
<span style="color: #000000;">count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end function
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`&lt;l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang&gt;`</span><span style="color: #0000FF;">)</span>

<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
constant {html,ascii} = columnize({{"%2A","*"},
<span style="color: #008080;">if</span> <span style="color: #000000;">count</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
{"%3A",":"},
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span>
{"%27","'"},
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">count</span>
{"%2B","+"},
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{"%22","\""},
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
{"%E2%80%93","-"},
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{"%E2%80%99","'"},
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks kept, %d to go\r"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span>
{"%C3%A8","e"},
<span style="color: #000000;">total_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">count</span>
{"%C3%A9","e"}})
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>

<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
function html_clean(string s)
<span style="color: #000000;">curl_cleanup</span><span style="color: #0000FF;">()</span>
return substitute_all(s,html,ascii)
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks with bare lang tags\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">})</span>
end function
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">)))</span>

<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
constant {utf8,ansi} = columnize({{x"E28093","-"},
<span style="color: #004080;">integer</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span>
{x"E28099","'"},
<span style="color: #000000;">tc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]</span>
{x"C3A8","e"},
<span style="color: #008080;">if</span> <span style="color: #000000;">tc</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
{x"C3A9","e"},
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span>
{x"D09A","K"},
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]),</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">])})</span>
{x"D09C","M"}})
<span style="color: #008080;">else</span> <span style="color: #000080;font-style:italic;">-- (sort_by_count)</span>

<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)})</span>
function utf8_clean(string s)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
return substitute_all(s,utf8,ansi)
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">total_count</span>

<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
function multi_lang(sequence s)
-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]"
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()})</span>
integer i = 1, j = 2
while i<length(s) do
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
if s[i]=s[j] then
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
while j<length(s) and s[i]=s[j+1] do j+=1 end while
<!--</lang>-->
s[i..j] = {sprintf("%s[%d]",{s[i],j-i+1})}
end if
i += 1
j = i+1
end while
return join(s,",")
end function

function multi_task(sequence s, tasks)
-- Similar to multi_lang() but with task[indexes]
integer i = 1, j = 2
while i<=length(s) do
integer si = s[i]
string tsi = html_clean(tasks[si])
if j<=length(s) and si=s[j] then
while j<length(s) and si=s[j+1] do j+=1 end while
s[i..j] = {sprintf("%s[%d]",{tsi,j-i+1})}
else
s[i] = tsi
end if
i += 1
j = i+1
end while
if length(s)>8 then s[4..-4] = {"..."} end if
return join(s,",")
end function

function find_bare_lang_tags()
-- note this lot use web scraping (as cribbed from a similar task) ...
sequence tasks = dewiki(open_category("Programming_Tasks"))
if include_drafts then
tasks &= dewiki(open_category("Draft_Programming_Tasks"))
tasks = sort(tasks)
end if
-- ... whereas the individual tasks use the web api instead (3x smaller/faster)
integer total_count = 0,
lt = length(tasks),
kept = 0
progress("%d tasks found\n",{lt})
sequence task_langs = {},
task_counts = iff(sort_by_task?repeat(0,lt):{}),
task_things = iff(sort_by_task?repeat({},lt):{})
for i=1 to length(tasks) do
string ti = tasks[i],
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=raw",{ti}),
contents = open_download(ti&".raw",url),
this
integer count = 0, start = 1, header
while true do
start = match(`<lang>`,contents,start)
if start=0 then exit end if
-- look backward for the nearest header
header = rmatch(`{{header|`,contents,start)
if header=0 then
-- this = ""
this = "no language"
else
header += length(`{{header|`)
this = utf8_clean(contents[header..match(`}}`,contents,header)-1])
end if
if sort_by_lang then
integer k = find(this,task_langs)
if k=0 then
task_langs = append(task_langs,this)
task_things = append(task_things,{i})
task_counts = append(task_counts,1)
else
task_things[k] = append(task_things[k],i)
task_counts[k] += 1
end if
else
task_things[i] = append(task_things[i],this)
end if
count += 1
start += length(`<lang>`)
end while
if count!=0 then
if sort_by_task then
task_counts[i] = count
end if
kept += 1
end if
progress("%d tasks kept, %d to go\r",{kept,lt-i})
total_count += count
if get_key()=#1B then progress("escape keyed\n") exit end if
end for
if curl!=NULL then
curl_easy_cleanup(curl)
free(pErrorBuffer)
curl = NULL
pErrorBuffer = NULL
end if
progress("%d tasks with bare lang tags\n",{kept})
sequence tags = custom_sort(task_counts,tagset(length(task_counts)))
for i=length(tags) to 1 by -1 do
integer ti = tags[i],
tc = task_counts[ti]
if tc=0 then exit end if
--if tc>5 then
if sort_by_task then
progress("%s %d (%s)\n",{html_clean(tasks[ti]),tc,multi_lang(task_things[ti])})
else -- (sort_by_count)
progress("%s %d (%s)\n",{task_langs[ti],tc,multi_task(task_things[ti],tasks)})
end if
--end if
end for
return total_count
end function

progress("Total: %d\n",{find_bare_lang_tags()})</lang>
{{out}}
{{out}}
as of 26/7/19, sort_by_task:
as of 26/7/19, sort_by_task: