Rosetta Code/Find bare lang tags: Difference between revisions
Content added Content deleted
(→{{header|Wren}}: Removed superfluous line.) |
m (→{{header|Phix}}: syntax coloured) |
||
Line 1,461: | Line 1,461: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Both extra credits. Would probably benefit from excluding <pre></pre> sections first. |
Both extra credits. Would probably benefit from excluding <pre></pre> sections first. |
||
<!--<lang Phix>(notonline)--> |
|||
<lang Phix>-- demo\rosetta\Find_bare_lang_tags.exw |
|||
<span style="color: #000080;font-style:italic;">-- |
|||
-- |
|||
-- demo\rosetta\Find_bare_lang_tags.exw |
|||
-- Finds/counts no of "<lang>" as opposed to eg "<lang Phix>" tags. |
|||
-- ==================================== |
|||
-- Since downloading all the pages can be very slow, this uses a cache. |
|||
-- |
-- |
||
-- (Uses '&' instead of/as well as 'a', for everyone's sanity..) |
|||
constant include_drafts = true, |
|||
-- Finds/counts no of "<l&ng>" as opposed to eg "<l&ng Phix>" tags. |
|||
sort_by_task = false, |
|||
-- Since downloading all the pages can be very slow, this uses a cache. |
|||
sort_by_lang = not sort_by_task -- (one or t'other) |
|||
--</span> |
|||
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (fairly obviously this will never ever run in a browser!)</span> |
|||
integer lp = 0 |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">include_drafts</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span> |
|||
procedure progress(string msg, sequence args = {}) |
|||
<span style="color: #000000;">sort_by_task</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span> |
|||
if length(args) then msg = sprintf(msg,args) end if |
|||
<span style="color: #000000;">sort_by_lang</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">not</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #000080;font-style:italic;">-- (one or t'other)</span> |
|||
integer lm = length(msg) |
|||
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if |
|||
<span style="color: #008080;">include</span> <span style="color: #000000;">rosettacode_cache</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> <span style="color: #000080;font-style:italic;">-- see [[Rosetta_Code/Count_examples#Phix]]</span> |
|||
puts(1,msg) |
|||
lp = iff(msg[$]='\r'?lm:0) |
|||
<span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">x"E28093"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">},</span> |
|||
end procedure |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"E28099"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">},</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A8"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span> |
|||
include builtins\timedate.e |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"C3A9"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span> |
|||
integer refresh_cache = timedelta(days:=365) -- 0 for always |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"K"</span><span style="color: #0000FF;">},</span> |
|||
--integer refresh_cache = timedelta(days:=1) -- 0 for always |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">x"D09C"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"M"</span><span style="color: #0000FF;">}})</span> |
|||
include builtins\libcurl.e |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
atom curl = NULL |
|||
<span style="color: #008080;">return</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">utf8</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ansi</span><span style="color: #0000FF;">)</span> |
|||
atom pErrorBuffer |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
function write_callback(atom pData, integer size, integer nmemb, integer fn) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
integer bytes_written = size * nmemb |
|||
<span style="color: #000080;font-style:italic;">-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]"</span> |
|||
puts(fn,peek({pData,bytes_written})) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span> |
|||
return bytes_written |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
end function |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> |
|||
constant write_cb = call_back({'+', routine_id("write_callback")}) |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span> |
|||
function open_download(string filename, url) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
bool refetch = true |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> |
|||
if not create_directory("rc_cache") then |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
crash("cannot create rc_cache directory") |
|||
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span> |
|||
end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
end if |
|||
filename = join_path({"rc_cache",filename}) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> |
|||
if file_exists(filename) then |
|||
<span style="color: #000080;font-style:italic;">-- Similar to multi_lang() but with task[indexes]</span> |
|||
-- use existing file if <= refresh_cache (365 days) old |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">2</span> |
|||
sequence last_mod = get_file_date(filename) -- (0.8.1+) |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
atom delta = timedate_diff(last_mod,date()) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">si</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
refetch = (delta>refresh_cache) or get_file_size(filename)=0 |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">tsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">si</span><span style="color: #0000FF;">])</span> |
|||
else |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> |
|||
string directory = get_file_path(filename) |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+=</span><span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
if get_file_type(directory)!=FILETYPE_DIRECTORY then |
|||
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s[%d]"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">tsi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})}</span> |
|||
if not create_directory(directory,make_parent:=true) then |
|||
<span style="color: #008080;">else</span> |
|||
crash("cannot create %s directory",{directory}) |
|||
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tsi</span> |
|||
end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end if |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
end if |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> |
|||
object text |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
if not refetch then |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)></span><span style="color: #000000;">8</span> <span style="color: #008080;">then</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">4</span><span style="color: #0000FF;">..-</span><span style="color: #000000;">4</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"..."</span><span style="color: #0000FF;">}</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
text = trim(get_text(filename)) |
|||
<span style="color: #008080;">return</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">","</span><span style="color: #0000FF;">)</span> |
|||
refetch = (not sequence(text)) or (length(text)<10) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
end if |
|||
if refetch then |
|||
<span style="color: #004080;">bool</span> <span style="color: #000000;">first</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span> |
|||
progress("Downloading %s...\r",{filename}) |
|||
if curl=NULL then |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()</span> |
|||
curl_global_init() |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span> |
|||
curl = curl_easy_init() |
|||
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
pErrorBuffer = allocate(CURL_ERROR_SIZE) |
|||
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create rc_cache directory"</span><span style="color: #0000FF;">)</span> |
|||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end if |
|||
<span style="color: #000080;font-style:italic;">-- note this lot use web scraping (as cribbed from a similar task) ...</span> |
|||
url = substitute(url,"%3A",":") |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Programming_Tasks"</span><span style="color: #0000FF;">))</span> |
|||
url = substitute(url,"%2A","*") |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">include_drafts</span> <span style="color: #008080;">then</span> |
|||
curl_easy_setopt(curl, CURLOPT_URL, url) |
|||
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Draft_Programming_Tasks"</span><span style="color: #0000FF;">))</span> |
|||
integer fn = open(filename,"wb") |
|||
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> |
|||
if fn=-1 then ?9/0 end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">blt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Rosetta_Code/Find_bare_lang_tags"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- not this one!</span> |
|||
while true do |
|||
<span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">..</span><span style="color: #000000;">blt</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
CURLcode res = curl_easy_perform(curl) |
|||
if res=CURLE_OK then exit end if |
|||
<span style="color: #000080;font-style:italic;">-- ... whereas the individual tasks use the web api instead (3x smaller/faster)</span> |
|||
string error = sprintf("%d",res) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">total_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> |
|||
if res=CURLE_COULDNT_RESOLVE_HOST then |
|||
<span style="color: #000000;">lt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">),</span> |
|||
error &= " [CURLE_COULDNT_RESOLVE_HOST]" |
|||
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
end if |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks found\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">})</span> |
|||
progress("Error %s downloading file, retry?(Y/N):",{error}) |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span> |
|||
if lower(wait_key())!='y' then abort(0) end if |
|||
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{}),</span> |
|||
printf(1,"Y\n") |
|||
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">sort_by_task</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">({},</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">):{})</span> |
|||
end while |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
close(fn) |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> |
|||
refresh_cache += timedelta(days:=1) -- did I mention it is slow? |
|||
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"http://rosettacode.org/mw/index.php?title=%s&action=raw"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">}),</span> |
|||
text = get_text(filename) |
|||
<span style="color: #000000;">contents</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".raw"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">url</span><span style="color: #0000FF;">),</span> |
|||
end if |
|||
<span style="color: #000000;">curr</span> |
|||
return text |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">header</span> |
|||
end function |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span> |
|||
function open_category(string filename) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename) |
|||
<span style="color: #000080;font-style:italic;">-- look backward for the nearest header</span> |
|||
end function |
|||
<span style="color: #000000;">header</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">rmatch</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">header</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
function dewiki(string s) |
|||
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"no language"</span> |
|||
-- extract tasks from eg `<li><a href="/wiki/100_doors"` |
|||
<span style="color: #008080;">else</span> |
|||
sequence tasks = {} |
|||
<span style="color: #000000;">header</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`{`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`{he`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ader|`</span><span style="color: #0000FF;">)</span> |
|||
integer start = 1, finish = match(`<div class="printfooter">`,s) |
|||
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf8_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">header</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>}}</nowiki>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">header</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
s = s[1..finish-1] |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
while true do |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_lang</span> <span style="color: #008080;">then</span> |
|||
start = match("<li><a href=\"/wiki/",s,start) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">)</span> |
|||
if start=0 then exit end if |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
start += length("<li><a href=\"/wiki/") |
|||
<span style="color: #000000;">task_langs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">,</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span> |
|||
finish = find('"',s,start) |
|||
<span style="color: #000000;">task_things</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span> |
|||
string task = s[start..finish-1] |
|||
<span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> |
|||
task = substitute_all(task,{"*",":"},{"%2A","%3A"}) |
|||
<span style="color: #008080;">else</span> |
|||
if task!="Rosetta_Code/Find_bare_lang_tags" then -- not this one! |
|||
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span> |
|||
tasks = append(tasks,task) |
|||
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
-- if length(tasks)>10 then exit end if -- (debug aid) |
|||
<span style="color: #008080;">else</span> |
|||
start = finish+1 |
|||
<span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">curr</span><span style="color: #0000FF;">)</span> |
|||
end while |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
return tasks |
|||
<span style="color: #000000;">count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
end function |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<l`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`ang>`</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
constant {html,ascii} = columnize({{"%2A","*"}, |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">count</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
{"%3A",":"}, |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span> |
|||
{"%27","'"}, |
|||
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">count</span> |
|||
{"%2B","+"}, |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
{"%22","\""}, |
|||
<span style="color: #000000;">kept</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
{"%E2%80%93","-"}, |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
{"%E2%80%99","'"}, |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks kept, %d to go\r"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lt</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span> |
|||
{"%C3%A8","e"}, |
|||
<span style="color: #000000;">total_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">count</span> |
|||
{"%C3%A9","e"}}) |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
function html_clean(string s) |
|||
<span style="color: #000000;">curl_cleanup</span><span style="color: #0000FF;">()</span> |
|||
return substitute_all(s,html,ascii) |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks with bare lang tags\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">kept</span><span style="color: #0000FF;">})</span> |
|||
end function |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">)))</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
constant {utf8,ansi} = columnize({{x"E28093","-"}, |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> |
|||
{x"E28099","'"}, |
|||
<span style="color: #000000;">tc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]</span> |
|||
{x"C3A8","e"}, |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">tc</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
{x"C3A9","e"}, |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_task</span> <span style="color: #008080;">then</span> |
|||
{x"D09A","K"}, |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]),</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_lang</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">])})</span> |
|||
{x"D09C","M"}}) |
|||
<span style="color: #008080;">else</span> <span style="color: #000080;font-style:italic;">-- (sort_by_count)</span> |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s %d (%s)\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">task_langs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tc</span><span style="color: #0000FF;">,</span><span style="color: #000000;">multi_task</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_things</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)})</span> |
|||
function utf8_clean(string s) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
return substitute_all(s,utf8,ansi) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
end function |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">total_count</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
function multi_lang(sequence s) |
|||
-- Convert eg {"Algol","Algol","C","C","C"} to "Algol[2],C[3]" |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">find_bare_lang_tags</span><span style="color: #0000FF;">()})</span> |
|||
integer i = 1, j = 2 |
|||
while i<length(s) do |
|||
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span> |
|||
if s[i]=s[j] then |
|||
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span> |
|||
while j<length(s) and s[i]=s[j+1] do j+=1 end while |
|||
<!--</lang>--> |
|||
s[i..j] = {sprintf("%s[%d]",{s[i],j-i+1})} |
|||
end if |
|||
i += 1 |
|||
j = i+1 |
|||
end while |
|||
return join(s,",") |
|||
end function |
|||
function multi_task(sequence s, tasks) |
|||
-- Similar to multi_lang() but with task[indexes] |
|||
integer i = 1, j = 2 |
|||
while i<=length(s) do |
|||
integer si = s[i] |
|||
string tsi = html_clean(tasks[si]) |
|||
if j<=length(s) and si=s[j] then |
|||
while j<length(s) and si=s[j+1] do j+=1 end while |
|||
s[i..j] = {sprintf("%s[%d]",{tsi,j-i+1})} |
|||
else |
|||
s[i] = tsi |
|||
end if |
|||
i += 1 |
|||
j = i+1 |
|||
end while |
|||
if length(s)>8 then s[4..-4] = {"..."} end if |
|||
return join(s,",") |
|||
end function |
|||
function find_bare_lang_tags() |
|||
-- note this lot use web scraping (as cribbed from a similar task) ... |
|||
sequence tasks = dewiki(open_category("Programming_Tasks")) |
|||
if include_drafts then |
|||
tasks &= dewiki(open_category("Draft_Programming_Tasks")) |
|||
tasks = sort(tasks) |
|||
end if |
|||
-- ... whereas the individual tasks use the web api instead (3x smaller/faster) |
|||
integer total_count = 0, |
|||
lt = length(tasks), |
|||
kept = 0 |
|||
progress("%d tasks found\n",{lt}) |
|||
sequence task_langs = {}, |
|||
task_counts = iff(sort_by_task?repeat(0,lt):{}), |
|||
task_things = iff(sort_by_task?repeat({},lt):{}) |
|||
for i=1 to length(tasks) do |
|||
string ti = tasks[i], |
|||
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=raw",{ti}), |
|||
contents = open_download(ti&".raw",url), |
|||
this |
|||
integer count = 0, start = 1, header |
|||
while true do |
|||
start = match(`<lang>`,contents,start) |
|||
if start=0 then exit end if |
|||
-- look backward for the nearest header |
|||
header = rmatch(`{{header|`,contents,start) |
|||
if header=0 then |
|||
-- this = "" |
|||
this = "no language" |
|||
else |
|||
header += length(`{{header|`) |
|||
this = utf8_clean(contents[header..match(`}}`,contents,header)-1]) |
|||
end if |
|||
if sort_by_lang then |
|||
integer k = find(this,task_langs) |
|||
if k=0 then |
|||
task_langs = append(task_langs,this) |
|||
task_things = append(task_things,{i}) |
|||
task_counts = append(task_counts,1) |
|||
else |
|||
task_things[k] = append(task_things[k],i) |
|||
task_counts[k] += 1 |
|||
end if |
|||
else |
|||
task_things[i] = append(task_things[i],this) |
|||
end if |
|||
count += 1 |
|||
start += length(`<lang>`) |
|||
end while |
|||
if count!=0 then |
|||
if sort_by_task then |
|||
task_counts[i] = count |
|||
end if |
|||
kept += 1 |
|||
end if |
|||
progress("%d tasks kept, %d to go\r",{kept,lt-i}) |
|||
total_count += count |
|||
if get_key()=#1B then progress("escape keyed\n") exit end if |
|||
end for |
|||
if curl!=NULL then |
|||
curl_easy_cleanup(curl) |
|||
free(pErrorBuffer) |
|||
curl = NULL |
|||
pErrorBuffer = NULL |
|||
end if |
|||
progress("%d tasks with bare lang tags\n",{kept}) |
|||
sequence tags = custom_sort(task_counts,tagset(length(task_counts))) |
|||
for i=length(tags) to 1 by -1 do |
|||
integer ti = tags[i], |
|||
tc = task_counts[ti] |
|||
if tc=0 then exit end if |
|||
--if tc>5 then |
|||
if sort_by_task then |
|||
progress("%s %d (%s)\n",{html_clean(tasks[ti]),tc,multi_lang(task_things[ti])}) |
|||
else -- (sort_by_count) |
|||
progress("%s %d (%s)\n",{task_langs[ti],tc,multi_task(task_things[ti],tasks)}) |
|||
end if |
|||
--end if |
|||
end for |
|||
return total_count |
|||
end function |
|||
progress("Total: %d\n",{find_bare_lang_tags()})</lang> |
|||
{{out}} |
{{out}} |
||
as of 26/7/19, sort_by_task: |
as of 26/7/19, sort_by_task: |