Rosetta Code/List authors of task descriptions: Difference between revisions
(Added Wren) |
m (→{{header|Phix}}: added syntax colouring, marked p2js incompatible) |
||
Line 227: | Line 227: | ||
properly thrash the rosettacode servers. |
properly thrash the rosettacode servers. |
||
{{libheader|Phix/libcurl}} |
{{libheader|Phix/libcurl}} |
||
<lang Phix>-- |
<!--<lang Phix>(notonline)--> |
||
<span style="color: #000080;font-style:italic;">-- demo\rosetta\List_task_authors.exw</span> |
|||
include builtins\libcurl.e |
|||
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (libcurl, file i/o, peek, progress..)</span> |
|||
atom curl = NULL |
|||
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #000000;">libcurl</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> |
|||
atom pErrorBuffer |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">curl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span> |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">pErrorBuffer</span> |
|||
function write_callback(atom pData, integer size, integer nmemb, integer fn) |
|||
integer bytes_written = size * nmemb |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">write_callback</span><span style="color: #0000FF;">(</span><span style="color: #004080;">atom</span> <span style="color: #000000;">pData</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">size</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">nmemb</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span> |
|||
puts(fn,peek({pData,bytes_written})) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">bytes_written</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">size</span> <span style="color: #0000FF;">*</span> <span style="color: #000000;">nmemb</span> |
|||
return bytes_written |
|||
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">peek</span><span style="color: #0000FF;">({</span><span style="color: #000000;">pData</span><span style="color: #0000FF;">,</span><span style="color: #000000;">bytes_written</span><span style="color: #0000FF;">}))</span> |
|||
end function |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">bytes_written</span> |
|||
constant write_cb = call_back({'+', routine_id("write_callback")}) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">write_cb</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">call_back</span><span style="color: #0000FF;">({</span><span style="color: #008000;">'+'</span><span style="color: #0000FF;">,</span> <span style="color: #7060A8;">routine_id</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"write_callback"</span><span style="color: #0000FF;">)})</span> |
|||
integer lp = 0 -- (last \r'd progress message length) |
|||
procedure progress(string msg, sequence args = {}) |
|||
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #004080;">timedate</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> |
|||
if length(args) then msg = sprintf(msg,args) end if |
|||
<span style="color: #000080;font-style:italic;">-- for [Draft_]Programming_Tasks aka non-.hist files only:</span> |
|||
integer lm = length(msg) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">refresh_cache</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">timedelta</span><span style="color: #0000FF;">(</span><span style="color: #000000;">days</span><span style="color: #0000FF;">:=</span><span style="color: #000000;">31</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- 0 for always</span> |
|||
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if |
|||
puts(1,msg) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">filename</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">url</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> |
|||
lp = iff(msg[$]='\r'?lm:0) |
|||
<span style="color: #004080;">bool</span> <span style="color: #000000;">refetch</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span> |
|||
end procedure |
|||
<span style="color: #004080;">object</span> <span style="color: #000000;">text</span> |
|||
<span style="color: #000000;">filename</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join_path</span><span style="color: #0000FF;">({</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">})</span> |
|||
include builtins\timedate.e |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">file_exists</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
-- for [Draft_]Programming_Tasks aka non-.hist files only: |
|||
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">))</span> |
|||
integer refresh_cache = timedelta(days:=31) -- 0 for always |
|||
<span style="color: #000000;">refetch</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">(</span><span style="color: #008080;">not</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">))</span> <span style="color: #008080;">or</span> <span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)<</span><span style="color: #000000;">10</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">refetch</span> <span style="color: #008080;">and</span> <span style="color: #008080;">not</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">".hist"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
function open_download(string filename, url, integer i, n) |
|||
<span style="color: #000080;font-style:italic;">-- use existing file if <= refresh_cache (31 days) old</span> |
|||
bool refetch = false |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">last_mod</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_file_date</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (0.8.1+)</span> |
|||
object text |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">delta</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">timedate_diff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">last_mod</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">date</span><span style="color: #0000FF;">())</span> |
|||
filename = join_path({"rc_cache",filename}) |
|||
<span style="color: #000000;">refetch</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">delta</span><span style="color: #0000FF;">></span><span style="color: #000000;">refresh_cache</span><span style="color: #0000FF;">)</span> |
|||
if file_exists(filename) then |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
text = trim(get_text(filename)) |
|||
<span style="color: #008080;">else</span> |
|||
refetch = (not sequence(text)) or (length(text)<10) |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">directory</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_file_path</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> |
|||
if not refetch and not match(".hist",filename) then |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #000000;">directory</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span> |
|||
-- use existing file if <= refresh_cache (31 days) old |
|||
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #000000;">directory</span><span style="color: #0000FF;">,</span><span style="color: #000000;">make_parent</span><span style="color: #0000FF;">:=</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
sequence last_mod = get_file_date(filename) -- (0.8.1+) |
|||
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create %s directory"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">directory</span><span style="color: #0000FF;">})</span> |
|||
atom delta = timedate_diff(last_mod,date()) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
refetch = (delta>refresh_cache) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end if |
|||
<span style="color: #000000;">refetch</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span> |
|||
else |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
string directory = get_file_path(filename) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">refetch</span> <span style="color: #008080;">then</span> |
|||
if get_file_type(directory)!=FILETYPE_DIRECTORY then |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Downloading %d/%d %s...\r"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">})</span> |
|||
if not create_directory(directory,make_parent:=true) then |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">curl</span><span style="color: #0000FF;">=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span> |
|||
crash("cannot create %s directory",{directory}) |
|||
<span style="color: #7060A8;">curl_global_init</span><span style="color: #0000FF;">()</span> |
|||
end if |
|||
<span style="color: #000000;">curl</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">curl_easy_init</span><span style="color: #0000FF;">()</span> |
|||
end if |
|||
<span style="color: #000000;">pErrorBuffer</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">allocate</span><span style="color: #0000FF;">(</span><span style="color: #000000;">CURL_ERROR_SIZE</span><span style="color: #0000FF;">)</span> |
|||
refetch = true |
|||
<span style="color: #7060A8;">curl_easy_setopt</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">CURLOPT_ERRORBUFFER</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">pErrorBuffer</span><span style="color: #0000FF;">)</span> |
|||
end if |
|||
<span style="color: #7060A8;">curl_easy_setopt</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">,</span> <span style="color: #004600;">CURLOPT_WRITEFUNCTION</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">write_cb</span><span style="color: #0000FF;">)</span> |
|||
if refetch then |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
progress("Downloading %d/%d %s...\r",{i,n,filename}) |
|||
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute</span><span style="color: #0000FF;">(</span><span style="color: #000000;">url</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%3A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">":"</span><span style="color: #0000FF;">)</span> |
|||
if curl=NULL then |
|||
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute</span><span style="color: #0000FF;">(</span><span style="color: #000000;">url</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%2A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">)</span> |
|||
curl_global_init() |
|||
<span style="color: #7060A8;">curl_easy_setopt</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">,</span> <span style="color: #004600;">CURLOPT_URL</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">url</span><span style="color: #0000FF;">)</span> |
|||
curl = curl_easy_init() |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">fn</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">open</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"wb"</span><span style="color: #0000FF;">)</span> |
|||
pErrorBuffer = allocate(CURL_ERROR_SIZE) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">fn</span><span style="color: #0000FF;">=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer) |
|||
<span style="color: #7060A8;">curl_easy_setopt</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">CURLOPT_WRITEDATA</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span> |
|||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb) |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
end if |
|||
<span style="color: #004080;">CURLcode</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">curl_easy_perform</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">)</span> |
|||
url = substitute(url,"%3A",":") |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">=</span><span style="color: #004600;">CURLE_OK</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
url = substitute(url,"%2A","*") |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">error</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> |
|||
curl_easy_setopt(curl, CURLOPT_URL, url) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">=</span><span style="color: #000000;">CURLE_COULDNT_RESOLVE_HOST</span> <span style="color: #008080;">then</span> |
|||
integer fn = open(filename,"wb") |
|||
<span style="color: #000000;">error</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">" [CURLE_COULDNT_RESOLVE_HOST]"</span> |
|||
if fn=-1 then ?9/0 end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn) |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Error %s downloading file, retry?(Y/N):"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">error</span><span style="color: #0000FF;">})</span> |
|||
while true do |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">())!=</span><span style="color: #008000;">'y'</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">abort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
CURLcode res = curl_easy_perform(curl) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Y\n"</span><span style="color: #0000FF;">)</span> |
|||
if res=CURLE_OK then exit end if |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
string error = sprintf("%d",res) |
|||
<span style="color: #7060A8;">close</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fn</span><span style="color: #0000FF;">)</span> |
|||
if res=CURLE_COULDNT_RESOLVE_HOST then |
|||
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">)</span> |
|||
error &= " [CURLE_COULDNT_RESOLVE_HOST]" |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end if |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">text</span> |
|||
progress("Error %s downloading file, retry?(Y/N):",{error}) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
if lower(wait_key())!='y' then abort(0) end if |
|||
printf(1,"Y\n") |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">filename</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> |
|||
end while |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".htm"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"http://rosettacode.org/wiki/Category:"</span><span style="color: #0000FF;">&</span><span style="color: #000000;">filename</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">n</span><span style="color: #0000FF;">)</span> |
|||
close(fn) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
text = get_text(filename) |
|||
end if |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
return text |
|||
<span style="color: #000080;font-style:italic;">-- extract tasks from eg `<li><a href="/wiki/100_doors"`</span> |
|||
end function |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">finish</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<div class="printfooter">`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
function open_category(string filename, integer i, n) |
|||
<span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">finish</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n) |
|||
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span> |
|||
end function |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<li><a href="/wiki/`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
function dewiki(string s) |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<li><a href="/wiki/`</span><span style="color: #0000FF;">)</span> |
|||
-- extract tasks from eg `<li><a href="/wiki/100_doors"` |
|||
<span style="color: #000000;">finish</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'"'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span> |
|||
sequence tasks = {} |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">task</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">start</span><span style="color: #0000FF;">..</span><span style="color: #000000;">finish</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
integer start = 1, finish = match(`<div class="printfooter">`,s) |
|||
<span style="color: #000000;">task</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">":"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"%2A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%3A"</span><span style="color: #0000FF;">})</span> |
|||
s = s[1..finish-1] |
|||
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">,</span><span style="color: #000000;">task</span><span style="color: #0000FF;">)</span> |
|||
while true do |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">finish</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> |
|||
start = match(`<li><a href="/wiki/`,s,start) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
if start=0 then exit end if |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">tasks</span> |
|||
start += length(`<li><a href="/wiki/`) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
finish = find('"',s,start) |
|||
string task = s[start..finish-1] |
|||
<span style="color: #008080;">constant</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">hex</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ascii</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({{</span><span style="color: #008000;">"%2A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"*"</span><span style="color: #0000FF;">},</span> |
|||
task = substitute_all(task,{"*",":"},{"%2A","%3A"}) |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%3A"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">":"</span><span style="color: #0000FF;">},</span> |
|||
tasks = append(tasks,task) |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%27"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">},</span> |
|||
start = finish+1 |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%2B"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"+"</span><span style="color: #0000FF;">},</span> |
|||
end while |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%22"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">`"`</span><span style="color: #0000FF;">},</span> |
|||
return tasks |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%E2%80%93"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"-"</span><span style="color: #0000FF;">},</span> |
|||
end function |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%E2%80%99"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'"</span><span style="color: #0000FF;">},</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%C3%A8"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">},</span> |
|||
constant {hex,ascii} = columnize({{"%2A","*"}, |
|||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"%C3%A9"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e"</span><span style="color: #0000FF;">}})</span> |
|||
{"%3A",":"}, |
|||
{"%27","'"}, |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
{"%2B","+"}, |
|||
<span style="color: #008080;">return</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">hex</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ascii</span><span style="color: #0000FF;">)</span> |
|||
{"%22",`"`}, |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
{"%E2%80%93","-"}, |
|||
{"%E2%80%99","'"}, |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">history_user</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">`<span class='history-user'><a href="`</span> |
|||
{"%C3%A8","e"}, |
|||
{"%C3%A9","e"}}) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">count_tasks</span><span style="color: #0000FF;">()</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span> |
|||
function html_clean(string s) |
|||
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
return substitute_all(s,hex,ascii) |
|||
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create rc_cache directory"</span><span style="color: #0000FF;">)</span> |
|||
end function |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
constant history_user = `<span class='history-user'><a href="` |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Programming_Tasks"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #0000FF;">&</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Draft_Programming_Tasks"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">2</span><span style="color: #0000FF;">))</span> |
|||
function count_tasks() |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ntasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> |
|||
sequence tasks = dewiki(open_category("Programming_Tasks",1,2)) |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">users</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span> |
|||
& dewiki(open_category("Draft_Programming_Tasks",2,2)) |
|||
<span style="color: #000000;">utask</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span> |
|||
integer ntasks = length(tasks) |
|||
<span style="color: #000000;">ntask</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
sequence users = {}, |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">ntasks</span> <span style="color: #008080;">do</span> |
|||
utask = {}, |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> |
|||
ntask = {} |
|||
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">}),</span> |
|||
for i=1 to ntasks do |
|||
<span style="color: #000000;">contents</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".hist"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">url</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ntasks</span><span style="color: #0000FF;">)</span> |
|||
string ti = tasks[i], |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">history_user</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">)</span> |
|||
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1",{ti}), |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
contents = open_download(ti&".hist",url,i,ntasks) |
|||
<span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #008000;">'>'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">k</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">history_user</span><span style="color: #0000FF;">))</span> |
|||
integer k = match(history_user,contents) |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
if k=0 then ?9/0 end if |
|||
<span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
k = find('>',contents,k+length(history_user)) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"</a>"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">k</span><span style="color: #0000FF;">)</span> |
|||
if k=0 then ?9/0 end if |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">e</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
k += 1 |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">user</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">contents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">..</span><span style="color: #000000;">e</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
integer e = match("</a>",contents,k) |
|||
if e=0 then ?9/0 end if |
|||
<span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">user</span><span style="color: #0000FF;">,</span><span style="color: #000000;">users</span><span style="color: #0000FF;">)</span> |
|||
string user = contents[k..e-1] |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">users</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">users</span><span style="color: #0000FF;">,</span><span style="color: #000000;">user</span><span style="color: #0000FF;">)</span> |
|||
k = find(user,users) |
|||
<span style="color: #000000;">utask</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">utask</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">})</span> |
|||
if k=0 then |
|||
<span style="color: #000000;">ntask</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ntask</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> |
|||
users = append(users,user) |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #000000;">utask</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">i</span> |
|||
ntask = append(ntask,1) |
|||
<span style="color: #000000;">ntask</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
else |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
utask[k] &= i |
|||
<span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (in case you want to show them)</span> |
|||
ntask[k] += 1 |
|||
<span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ti</span> |
|||
end if |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
tasks[i] = html_clean(ti) -- (in case you want to show them) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
if get_key()=#1B then progress("escape keyed\n") exit end if |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">curl</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span> |
|||
end for |
|||
<span style="color: #7060A8;">curl_easy_cleanup</span><span style="color: #0000FF;">(</span><span style="color: #000000;">curl</span><span style="color: #0000FF;">)</span> |
|||
if curl!=NULL then |
|||
<span style="color: #7060A8;">free</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pErrorBuffer</span><span style="color: #0000FF;">)</span> |
|||
curl_easy_cleanup(curl) |
|||
<span style="color: #000000;">curl</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span> |
|||
free(pErrorBuffer) |
|||
<span style="color: #000000;">pErrorBuffer</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span> |
|||
curl = NULL |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
pErrorBuffer = NULL |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span> |
|||
end if |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">nusers</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">users</span><span style="color: #0000FF;">)</span> |
|||
progress("\n") |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ntask</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nusers</span><span style="color: #0000FF;">))</span> |
|||
sequence tags = custom_sort(ntask,tagset(length(ntask))) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">top5</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
integer top5 = 0 |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">nusers</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
for i=length(tags) to 1 by -1 do |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ui</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
integer ui = tags[i] |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ntask</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ui</span><span style="color: #0000FF;">]<</span><span style="color: #000000;">5</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
printf(1,"%s tasks:%d\n",{users[ui],ntask[ui]}) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s tasks:%d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">users</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ui</span><span style="color: #0000FF;">],</span><span style="color: #000000;">ntask</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ui</span><span style="color: #0000FF;">]})</span> |
|||
top5 += 1 if top5>5 then exit end if |
|||
<span style="color: #000000;">top5</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">if</span> <span style="color: #000000;">top5</span><span style="color: #0000FF;">></span><span style="color: #000000;">5</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
return ntasks |
|||
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">ntasks</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nusers</span><span style="color: #0000FF;">}</span> |
|||
end function |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
progress("Total: %d\n",{count_tasks()})</lang> |
|||
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d tasks by %d authors\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">count_tasks</span><span style="color: #0000FF;">())</span> |
|||
<!--</lang>--> |
|||
{{out}} |
{{out}} |
||
As of 6th Jan 2020 |
As of 6th Jan 2020 |
||
Line 404: | Line 406: | ||
Short Circuit tasks:33 |
Short Circuit tasks:33 |
||
Total: 1219 |
Total: 1219 |
||
</pre> |
|||
As of 6th Jan 2022 |
|||
<pre> |
|||
Paddy3118 tasks:199 |
|||
CalmoSoft tasks:128 |
|||
Markhobley tasks:71 |
|||
Gerard Schildberger tasks:66 |
|||
Mwn3d tasks:55 |
|||
Thundergnat tasks:44 |
|||
Total: 1492 tasks by 307 authors |
|||
</pre> |
</pre> |
||
Revision as of 19:07, 6 January 2022
In this task, the goal is to compile an authorship list for task descriptions. A pseudocode example (in imperative style) that should accomplish this is as follows:
<lang pseudocode>for each task page
grab page source, discard everything after the first ==section==.
Cache as $previous. Note $author.
for each revision grab page source, discard everything after first ==section==.
Cache as $previous2. Note $author2
compare $previous2 to $previous. If different, record $author to $list. replace $previous with $previous2 replace $author with $author2</lang>
The following resources for HTTP interface information for MediaWiki may prove to be useful:
- https://www.mediawiki.org/wiki/Index.php#Raw
- https://www.mediawiki.org/wiki/Index.php#History
- https://www.mediawiki.org/wiki/API:Main_page
Conversely, some languages have libraries which abstract these interfaces into language-native idioms. Use of these abstractions is perfectly fine.
Please DO NOT add a full output for each programming language; just show a representative sample. For an full listing, see Rosetta_Code/List_authors_of_task_descriptions/Full_list.
Go
<lang go>package main
import (
"fmt" "io/ioutil" "net/http" "regexp" "sort" "strings"
)
type authorNumber struct {
author string number int
}
func main() {
ex1 := `
- Output:
As of 5th March 2020:
Total tasks : 1237 Total authors : 287 The top 20 authors by number of tasks created are: Pos Tasks Author === ===== ====== 1: 178 Paddy3118 2: 71 Markhobley 3: 61 Gerard Schildberger 4: 55 Mwn3d 5: 39 NevilleDNZ 6: 33 Short Circuit 7: 30 Nigel Galloway 8: 29 Thundergnat 9: 23 Grondilu 10: 21 Dkf 11: 20 Fwend 11: 20 Blue Prawn 13: 19 CalmoSoft 14: 18 Kernigh 15: 17 ShinTakezou 15: 17 Dmitry-kazakov 15: 17 Ledrug 18: 13 Abu 18: 13 Paulo Jorente 18: 13 Waldorf
Nim
<lang Nim>import algorithm, httpclient, re, strutils, tables
let
re1 = re("""- Sort the authors in descending order by number of tasks created.
- Print the top twenty.
- Output:
On 2021-06-29.
The top 20 authors by number of tasks created are: Pos Tasks Author === ===== ====== 1 196 Paddy3118 2 84 CalmoSoft 3 72 Markhobley 4 66 Gerard_Schildberger 5 55 Mwn3d 6 39 NevilleDNZ 7 39 Thundergnat 8 33 Nigel_Galloway 9 33 Short_Circuit 10 23 Grondilu 11 21 Blue_Prawn 12 20 Fwend 13 20 Dkf 14 18 Kernigh 15 17 Ledrug 16 17 ShinTakezou 17 17 Dmitry kazakov 18 14 Wherrera 19 13 Waldorf 20 13 Abu
Phix
To keep the output nice and short, lists the top 5 task creators.
Uses a cache: once a .hist file has been downloaded for a given
task, it is assumed to be good forever. Each task is about 20K,
so it will download around 25MB in total, for >= 1,219 tasks.
It does those sequentially, using curl_easy_ handles. I guess
if you really wanted to then using curl_multi_ handles would
properly thrash the rosettacode servers.
-- demo\rosetta\List_task_authors.exw without js -- (libcurl, file i/o, peek, progress..) include builtins\libcurl.e atom curl = NULL atom pErrorBuffer function write_callback(atom pData, integer size, integer nmemb, integer fn) integer bytes_written = size * nmemb puts(fn,peek({pData,bytes_written})) return bytes_written end function constant write_cb = call_back({'+', routine_id("write_callback")}) include builtins\timedate.e -- for [Draft_]Programming_Tasks aka non-.hist files only: integer refresh_cache = timedelta(days:=31) -- 0 for always function open_download(string filename, url, integer i, n) bool refetch = false object text filename = join_path({"rc_cache",filename}) if file_exists(filename) then text = trim(get_text(filename)) refetch = (not sequence(text)) or (length(text)<10) if not refetch and not match(".hist",filename) then -- use existing file if <= refresh_cache (31 days) old sequence last_mod = get_file_date(filename) -- (0.8.1+) atom delta = timedate_diff(last_mod,date()) refetch = (delta>refresh_cache) end if else string directory = get_file_path(filename) if get_file_type(directory)!=FILETYPE_DIRECTORY then if not create_directory(directory,make_parent:=true) then crash("cannot create %s directory",{directory}) end if end if refetch = true end if if refetch then progress("Downloading %d/%d %s...\r",{i,n,filename}) if curl=NULL then curl_global_init() curl = curl_easy_init() pErrorBuffer = allocate(CURL_ERROR_SIZE) curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer) curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb) end if url = substitute(url,"%3A",":") url = substitute(url,"%2A","*") curl_easy_setopt(curl, CURLOPT_URL, url) integer fn = open(filename,"wb") if fn=-1 then ?9/0 end if curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn) while true do CURLcode res = curl_easy_perform(curl) if res=CURLE_OK then exit end if string error = sprintf("%d",res) if res=CURLE_COULDNT_RESOLVE_HOST then error &= " [CURLE_COULDNT_RESOLVE_HOST]" end if progress("Error %s downloading file, retry?(Y/N):",{error}) if lower(wait_key())!='y' then abort(0) end if printf(1,"Y\n") end while close(fn) text = get_text(filename) end if return text end function function open_category(string filename, integer i, n) return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n) end function function dewiki(string s) -- extract tasks from eg `<li><a href="/wiki/100_doors"` sequence tasks = {} integer start = 1, finish = match(`<div class="printfooter">`,s) s = s[1..finish-1] while true do start = match(`<li><a href="/wiki/`,s,start) if start=0 then exit end if start += length(`<li><a href="/wiki/`) finish = find('"',s,start) string task = s[start..finish-1] task = substitute_all(task,{"*",":"},{"%2A","%3A"}) tasks = append(tasks,task) start = finish+1 end while return tasks end function constant {hex,ascii} = columnize({{"%2A","*"}, {"%3A",":"}, {"%27","'"}, {"%2B","+"}, {"%22",`"`}, {"%E2%80%93","-"}, {"%E2%80%99","'"}, {"%C3%A8","e"}, {"%C3%A9","e"}}) function html_clean(string s) return substitute_all(s,hex,ascii) end function constant history_user = `<span class='history-user'><a href="` function count_tasks() if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then if not create_directory("rc_cache") then crash("cannot create rc_cache directory") end if end if sequence tasks = dewiki(open_category("Programming_Tasks",1,2)) & dewiki(open_category("Draft_Programming_Tasks",2,2)) integer ntasks = length(tasks) sequence users = {}, utask = {}, ntask = {} for i=1 to ntasks do string ti = tasks[i], url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1",{ti}), contents = open_download(ti&".hist",url,i,ntasks) integer k = match(history_user,contents) if k=0 then ?9/0 end if k = find('>',contents,k+length(history_user)) if k=0 then ?9/0 end if k += 1 integer e = match("</a>",contents,k) if e=0 then ?9/0 end if string user = contents[k..e-1] k = find(user,users) if k=0 then users = append(users,user) utask = append(utask,{i}) ntask = append(ntask,1) else utask[k] &= i ntask[k] += 1 end if ti = html_clean(ti) -- (in case you want to show them) tasks[i] = ti if get_key()=#1B then progress("escape keyed\n") exit end if end for if curl!=NULL then curl_easy_cleanup(curl) free(pErrorBuffer) curl = NULL pErrorBuffer = NULL end if progress("\n") integer nusers = length(users) sequence tags = custom_sort(ntask,tagset(nusers)) integer top5 = 0 for i=nusers to 1 by -1 do integer ui = tags[i] if ntask[ui]<5 then exit end if printf(1,"%s tasks:%d\n",{users[ui],ntask[ui]}) top5 += 1 if top5>5 then exit end if end for return {ntasks,nusers} end function progress("Total: %d tasks by %d authors\n",count_tasks())
- Output:
As of 6th Jan 2020
Paddy3118 tasks:176 Markhobley tasks:71 Gerard Schildberger tasks:59 Mwn3d tasks:55 NevilleDNZ tasks:39 Short Circuit tasks:33 Total: 1219
As of 6th Jan 2022
Paddy3118 tasks:199 CalmoSoft tasks:128 Markhobley tasks:71 Gerard Schildberger tasks:66 Mwn3d tasks:55 Thundergnat tasks:44 Total: 1492 tasks by 307 authors
Raku
(formerly Perl 6)
The pseudocode above is no longer really useful as the page format has changed significantly since this task was written. Rather than checking every edit to see if it was a change to the task description, we'll just assume the user that created the page is the task author. This isn't 100% accurate; a very few pages got renamed and recreated by someone other than the original author without preserving the history, so they are misreported (15 Puzzle Game for instance,) but is as good as it is likely to get without extensive manual intervention. Subsequent edits to the task description are not credited. As it is, we must still make thousands of requests and pound the server pretty hard. Checking every edit would make the task several of orders of magnitude more abusive of the server (and my internet connection.)
<lang perl6>use HTTP::UserAgent; use URI::Escape; use JSON::Fast; use Sort::Naturally;
- Friendlier descriptions for task categories
my %cat = (
'Programming_Tasks' => 'Task', 'Draft_Programming_Tasks' => 'Draft'
);
my $client = HTTP::UserAgent.new;
my $url = 'http://rosettacode.org/mw';
my $tablefile = './RC_Authors.txt'; my $hashfile = './RC_Authors.json';
my %tasks;
- clear screen
run($*DISTRO.is-win ?? 'cls' !! 'clear');
%tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( ); sleep 1;
- =begin update
note 'Retrieving task information...';
my %filter; for %cat.keys.sort -> $category {
mediawiki-query( $url, 'pages', :generator<categorymembers>, :gcmtitle("Category:$category"), :gcmlimit<350>, :rawcontinue(), :prop<title> ).map( { %filter{.<title>} = %cat{$category} } )
}
my $delete = %tasks.keys (-) %filter.keys;
%tasks.delete($_) for $delete.keys; #Tasks that have changed names or been removed
my @add; for %filter.keys -> $title {
if %tasks{$title}:exists { %tasks{$title}<category> = %filter{$title} # update status } else { @add.push: $title => %filter{$title} # New Tasks }
}
if @add {
.say for 'Adding new tasks:', |@add;
}
for @add -> $task {
mediawiki-query( $url, 'pages', :titles($task.key), :prop<revisions>, :rvprop<user|timestamp>, :rvstart<2000-01-01T01:01:01Z>, :rvdir<newer>, :rvlimit<1> ).map: { print clear, 1 + $++, ' ', .[0]<title>; %tasks{.[0]<title>}<category> = $task.value; %tasks{.[0]<title>}<author> = .[0]<revisions>[0]<user>; %tasks{.[0]<title>}<date> = .[0]<revisions>[0]<timestamp>.subst(/'T'.+$/, ) }
}
print clear;
- Save information to a local file
note "\nTask information saved to local file: {$hashfile.IO.absolute}"; $hashfile.IO.spurt(%tasks.&to-json);
- =end update
- Load information from local file
%tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );
- Convert saved task / author info to a table
note "\nBuilding table..."; my $count = +%tasks; my $taskcnt = +%tasks.grep: *.value.<category> eq %cat<Programming_Tasks>; my $draftcnt = $count - $taskcnt;
- Open a file handle to dump table in
my $out = open($tablefile, :w) or die "$!\n";
- Add table boilerplate and header
$out.say:
"\{|class=\"wikitable sortable\"\n",
"|+ As of { Date.today } :: Total Tasks: { $count }:: Tasks: { $taskcnt }",
" :: Draft Tasks: { $draftcnt } ",
":: By {+%tasks{*}».<author>.unique} Authors\n",
"! Author !! Tasks !! Authored"
- Get sorted unique list of task authors
for %tasks{*}».<author>.unique.sort(&naturally) -> $author {
# Get list of tasks by this author my @these = %tasks.grep( { $_.value.<author> eq $author } ); my $s = +@these == 1 ?? !! 's';
# Add author and contributions link to the first two cells $out.say: $author ~~ /\d/ ?? "|- id=\"$author\"\n|data-sort-value=\"{ sort-key $author }\"|$author\n"~ "|data-sort-value=\"{ +@these }\"|"~ "{ +@these } task{ $s }" !! "|- id=\"$author\"\n|$author\n"~ "|data-sort-value=\"{ +@these }\"|"~ "{ +@these } task{ $s }" ;
if +@these > 2 { $out.say: "|style=\"padding: 0px;\"|\n", "\{|class=\"broadtable sortable\" style=\"width: 100%;\"\n", "! Task Name !! Date Added !! Status"; } else { $out.say: "|style=\"padding: 0px;\"|\n", "\{|class=\"broadtable\" style=\"width: 100%;\""; }
# Tasks by this author, sorted by name for @these.sort({.key.&naturally}) -> $task {
my $color = $task.value.<category> eq 'Draft' ?? '#ffd' !! '#fff';
# add the task link, date and status to the table in the second cell $out.say: "|-\n|style=\"background-color: $color;\"", ( $task.key ~~ /\d/ ?? " data-sort-value=\"{ sort-key $task.key }\"| [[{uri-escape $task.key}|{$task.key}]]\n" !! "| [[{uri-escape $task.key}|{$task.key}]]\n" ), "|style=\"width: 10em; background-color: $color;\"| {$task.value.<date>}\n", "|style=\"width: 6em; background-color: $color;\"| {$task.value.<category>}", } $out.say: '|}'
} $out.say( "|}\n" ); $out.close;
note "Table file saved as: {$tablefile.IO.absolute}";
sub mediawiki-query ($site, $type, *%query) {
my $url = "$site/api.php?" ~ uri-query-string( :action<query>, :format<json>, :formatversion<2>, |%query); my $continue = ;
gather loop { my $response = $client.get("$url&$continue"); my $data = from-json($response.content); take $_ for $data.<query>.{$type}.values; $continue = uri-query-string |($data.<query-continue>{*}».hash.hash or last); }
}
sub uri-query-string (*%fields) { %fields.map({ "{.key}={uri-escape .value}" }).join("&") }
sub sort-key ($a) { $a.lc.subst(/(\d+)/, ->$/ {0~(65+($0.chars)).chr~$0},:g) }
sub clear { "\r" ~ ' ' x 100 ~ "\r" }</lang>
- Sample output:
See full output at Rosetta_Code/List_authors_of_task_descriptions/Full_list
Author | Tasks | Authored | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2Powers | 2 tasks |
| ||||||||||||
12.175.32.19 | 1 task |
| ||||||||||||
12Me21 | 1 task |
| ||||||||||||
many rows omitted... | ||||||||||||||
Zorro1024 | 2 tasks |
| ||||||||||||
Zzo38 | 1 task |
| ||||||||||||
Русский | 3 tasks |
|
Wren
An embedded program so we can use libcurl.
Takes a little over an hour to run as the history page(s) for each task need to be downloaded and parsed to find the author. <lang ecmascript>/* rc_list_authors_of_task_descriptions.wren */
import "./pattern" for Pattern import "./fmt" for Fmt
var CURLOPT_URL = 10002 var CURLOPT_FOLLOWLOCATION = 52 var CURLOPT_WRITEFUNCTION = 20011 var CURLOPT_WRITEDATA = 10001
foreign class Buffer {
construct new() {} // C will allocate buffer of a suitable size
foreign value // returns buffer contents as a string
}
foreign class Curl {
construct easyInit() {}
foreign easySetOpt(opt, param)
foreign easyPerform()
foreign easyCleanup()
}
var curl = Curl.easyInit()
var getContent = Fn.new { |url|
var buffer = Buffer.new() curl.easySetOpt(CURLOPT_URL, url) curl.easySetOpt(CURLOPT_FOLLOWLOCATION, 1) curl.easySetOpt(CURLOPT_WRITEFUNCTION, 0) // write function to be supplied by C curl.easySetOpt(CURLOPT_WRITEDATA, buffer) curl.easyPerform() return buffer.value
}
var p1 = Pattern.new("We now embed this script in the following C program, build and run. <lang c>/* gcc rc_list_authors_of_task_descriptions.c -o rc_list_authors_of_task_descriptions -lcurl -lwren -lm */
- include <stdio.h>
- include <stdlib.h>
- include <string.h>
- include <curl/curl.h>
- include "wren.h"
- Output:
Position as at 6th January, 2022.
Total tasks : 1492 Total authors : 307 The top 30 authors by number of tasks created are: Pos Tasks Author ==== ===== ====== 1 199 Paddy3118 2 128 CalmoSoft 3 71 Markhobley 4 66 Gerard Schildberger 5 55 Mwn3d 6 44 Thundergnat 7 39 NevilleDNZ 8 36 Nigel Galloway 9 33 Short Circuit 10 23 Grondilu 11 21 Blue Prawn 12 20 Fwend 12= 20 Dkf 14 18 Kernigh 15 17 Dmitry-kazakov 15= 17 Wherrera 15= 17 ShinTakezou 15= 17 Ledrug 19 16 PureFox 20 13 Paulo Jorente 20= 13 Waldorf 20= 13 Abu 23 12 Ce 23= 12 Kevin Reid 25 10 Tinku99 25= 10 Bearophile 27 9 TimSC 27= 9 Puppydrum64 27= 9 Trizen 27= 9 EMBee