Rosetta Code/List authors of task descriptions: Difference between revisions

Phix
(→‎{{header|Perl 6}}: More efficient, only update things that been changed (The creation date never changes))
(Phix)
Line 288:
|}
|}
 
=={{header|Phix}}==
To keep the output nice and short, lists the top 5 task creators.<br>
Uses a cache: once a .hist file has been downloaded for a given
task, it is assumed to be good forever. Each task is about 20K,
so it will download around 25MB in total, for >= 1,219 tasks.
It does those sequentially, using curl_easy_ handles. I guess
if you really wanted to then using curl_multi_ handles would
properly thrash the rosettacode servers.
<lang Phix>-- demo\rosetta\List_task_authors.exw
include builtins\libcurl.e
atom curl = NULL
atom pErrorBuffer
 
function write_callback(atom pData, integer size, integer nmemb, integer fn)
integer bytes_written = size * nmemb
puts(fn,peek({pData,bytes_written}))
return bytes_written
end function
constant write_cb = call_back({'+', routine_id("write_callback")})
 
integer lp = 0 -- (last \r'd progress message length)
procedure progress(string msg, sequence args = {})
if length(args) then msg = sprintf(msg,args) end if
integer lm = length(msg)
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if
puts(1,msg)
lp = iff(msg[$]='\r'?lm:0)
end procedure
 
include builtins\timedate.e
-- for [Draft_]Programming_Tasks aka non-.hist files only:
integer refresh_cache = timedelta(days:=31) -- 0 for always
 
function open_download(string filename, url, integer i, n)
bool refetch = false
object text
filename = join_path({"rc_cache",filename})
if file_exists(filename) then
text = trim(get_text(filename))
refetch = (not sequence(text)) or (length(text)<10)
if not refetch and not match(".hist",filename) then
-- use existing file if <= refresh_cache (31 days) old
sequence last_mod = get_file_date(filename) -- (0.8.1+)
atom delta = timedate_diff(last_mod,date())
refetch = (delta>refresh_cache)
end if
else
string directory = get_file_path(filename)
if get_file_type(directory)!=FILETYPE_DIRECTORY then
if not create_directory(directory,make_parent:=true) then
crash("cannot create %s directory",{directory})
end if
end if
refetch = true
end if
if refetch then
progress("Downloading %d/%d %s...\r",{i,n,filename})
if curl=NULL then
curl_global_init()
curl = curl_easy_init()
pErrorBuffer = allocate(CURL_ERROR_SIZE)
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer)
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb)
end if
url = substitute(url,"%3A",":")
url = substitute(url,"%2A","*")
curl_easy_setopt(curl, CURLOPT_URL, url)
integer fn = open(filename,"wb")
if fn=-1 then ?9/0 end if
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn)
while true do
CURLcode res = curl_easy_perform(curl)
if res=CURLE_OK then exit end if
string error = sprintf("%d",res)
if res=CURLE_COULDNT_RESOLVE_HOST then
error &= " [CURLE_COULDNT_RESOLVE_HOST]"
end if
progress("Error %s downloading file, retry?(Y/N):",{error})
if lower(wait_key())!='y' then abort(0) end if
printf(1,"Y\n")
end while
close(fn)
text = get_text(filename)
end if
return text
end function
 
function open_category(string filename, integer i, n)
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n)
end function
 
function dewiki(string s)
-- extract tasks from eg `<li><a href="/wiki/100_doors"`
sequence tasks = {}
integer start = 1, finish = match(`<div class="printfooter">`,s)
s = s[1..finish-1]
while true do
start = match(`<li><a href="/wiki/`,s,start)
if start=0 then exit end if
start += length(`<li><a href="/wiki/`)
finish = find('"',s,start)
string task = s[start..finish-1]
task = substitute_all(task,{"*",":"},{"%2A","%3A"})
tasks = append(tasks,task)
start = finish+1
end while
return tasks
end function
 
constant {hex,ascii} = columnize({{"%2A","*"},
{"%3A",":"},
{"%27","'"},
{"%2B","+"},
{"%22",`"`},
{"%E2%80%93","-"},
{"%E2%80%99","'"},
{"%C3%A8","e"},
{"%C3%A9","e"}})
 
function html_clean(string s)
return substitute_all(s,hex,ascii)
end function
 
constant history_user = `<span class='history-user'><a href="`
 
function count_tasks()
sequence tasks = dewiki(open_category("Programming_Tasks",1,2))
& dewiki(open_category("Draft_Programming_Tasks",2,2))
integer ntasks = length(tasks)
sequence users = {},
utask = {},
ntask = {}
for i=1 to ntasks do
string ti = tasks[i],
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1",{ti}),
contents = open_download(ti&".hist",url,i,ntasks)
integer k = match(history_user,contents)
if k=0 then ?9/0 end if
k = find('>',contents,k+length(history_user))
if k=0 then ?9/0 end if
k += 1
integer e = match("</a>",contents,k)
if e=0 then ?9/0 end if
string user = contents[k..e-1]
 
k = find(user,users)
if k=0 then
users = append(users,user)
utask = append(utask,{i})
ntask = append(ntask,1)
else
utask[k] &= i
ntask[k] += 1
end if
tasks[i] = html_clean(ti) -- (in case you want to show them)
if get_key()=#1B then progress("escape keyed\n") exit end if
end for
if curl!=NULL then
curl_easy_cleanup(curl)
free(pErrorBuffer)
curl = NULL
pErrorBuffer = NULL
end if
progress("\n")
sequence tags = custom_sort(ntask,tagset(length(ntask)))
integer top5 = 0
for i=length(tags) to 1 by -1 do
integer ui = tags[i]
printf(1,"%s tasks:%d\n",{users[ui],ntask[ui]})
top5 += 1 if top5>5 then exit end if
end for
return ntasks
end function
 
progress("Total: %d\n",{count_tasks()})</lang>
{{out}}
As of 6th Jan 2020
<pre>
Paddy3118 tasks:176
Markhobley tasks:71
Gerard Schildberger tasks:59
Mwn3d tasks:55
NevilleDNZ tasks:39
Short Circuit tasks:33
Total: 1219
</pre>
7,815

edits