Rosetta Code/List authors of task descriptions: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) (→{{header|Perl 6}}: More efficient, only update things that been changed (The creation date never changes)) |
(Phix) |
||
Line 288: | Line 288: | ||
|} |
|} |
||
|} |
|} |
||
=={{header|Phix}}== |
|||
To keep the output nice and short, lists the top 5 task creators.<br> |
|||
Uses a cache: once a .hist file has been downloaded for a given |
|||
task, it is assumed to be good forever. Each task is about 20K, |
|||
so it will download around 25MB in total, for >= 1,219 tasks. |
|||
It does those sequentially, using curl_easy_ handles. I guess |
|||
if you really wanted to then using curl_multi_ handles would |
|||
properly thrash the rosettacode servers. |
|||
<lang Phix>-- demo\rosetta\List_task_authors.exw |
|||
include builtins\libcurl.e |
|||
atom curl = NULL |
|||
atom pErrorBuffer |
|||
function write_callback(atom pData, integer size, integer nmemb, integer fn) |
|||
integer bytes_written = size * nmemb |
|||
puts(fn,peek({pData,bytes_written})) |
|||
return bytes_written |
|||
end function |
|||
constant write_cb = call_back({'+', routine_id("write_callback")}) |
|||
integer lp = 0 -- (last \r'd progress message length) |
|||
procedure progress(string msg, sequence args = {}) |
|||
if length(args) then msg = sprintf(msg,args) end if |
|||
integer lm = length(msg) |
|||
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if |
|||
puts(1,msg) |
|||
lp = iff(msg[$]='\r'?lm:0) |
|||
end procedure |
|||
include builtins\timedate.e |
|||
-- for [Draft_]Programming_Tasks aka non-.hist files only: |
|||
integer refresh_cache = timedelta(days:=31) -- 0 for always |
|||
function open_download(string filename, url, integer i, n) |
|||
bool refetch = false |
|||
object text |
|||
filename = join_path({"rc_cache",filename}) |
|||
if file_exists(filename) then |
|||
text = trim(get_text(filename)) |
|||
refetch = (not sequence(text)) or (length(text)<10) |
|||
if not refetch and not match(".hist",filename) then |
|||
-- use existing file if <= refresh_cache (31 days) old |
|||
sequence last_mod = get_file_date(filename) -- (0.8.1+) |
|||
atom delta = timedate_diff(last_mod,date()) |
|||
refetch = (delta>refresh_cache) |
|||
end if |
|||
else |
|||
string directory = get_file_path(filename) |
|||
if get_file_type(directory)!=FILETYPE_DIRECTORY then |
|||
if not create_directory(directory,make_parent:=true) then |
|||
crash("cannot create %s directory",{directory}) |
|||
end if |
|||
end if |
|||
refetch = true |
|||
end if |
|||
if refetch then |
|||
progress("Downloading %d/%d %s...\r",{i,n,filename}) |
|||
if curl=NULL then |
|||
curl_global_init() |
|||
curl = curl_easy_init() |
|||
pErrorBuffer = allocate(CURL_ERROR_SIZE) |
|||
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer) |
|||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb) |
|||
end if |
|||
url = substitute(url,"%3A",":") |
|||
url = substitute(url,"%2A","*") |
|||
curl_easy_setopt(curl, CURLOPT_URL, url) |
|||
integer fn = open(filename,"wb") |
|||
if fn=-1 then ?9/0 end if |
|||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn) |
|||
while true do |
|||
CURLcode res = curl_easy_perform(curl) |
|||
if res=CURLE_OK then exit end if |
|||
string error = sprintf("%d",res) |
|||
if res=CURLE_COULDNT_RESOLVE_HOST then |
|||
error &= " [CURLE_COULDNT_RESOLVE_HOST]" |
|||
end if |
|||
progress("Error %s downloading file, retry?(Y/N):",{error}) |
|||
if lower(wait_key())!='y' then abort(0) end if |
|||
printf(1,"Y\n") |
|||
end while |
|||
close(fn) |
|||
text = get_text(filename) |
|||
end if |
|||
return text |
|||
end function |
|||
function open_category(string filename, integer i, n) |
|||
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n) |
|||
end function |
|||
function dewiki(string s) |
|||
-- extract tasks from eg `<li><a href="/wiki/100_doors"` |
|||
sequence tasks = {} |
|||
integer start = 1, finish = match(`<div class="printfooter">`,s) |
|||
s = s[1..finish-1] |
|||
while true do |
|||
start = match(`<li><a href="/wiki/`,s,start) |
|||
if start=0 then exit end if |
|||
start += length(`<li><a href="/wiki/`) |
|||
finish = find('"',s,start) |
|||
string task = s[start..finish-1] |
|||
task = substitute_all(task,{"*",":"},{"%2A","%3A"}) |
|||
tasks = append(tasks,task) |
|||
start = finish+1 |
|||
end while |
|||
return tasks |
|||
end function |
|||
constant {hex,ascii} = columnize({{"%2A","*"}, |
|||
{"%3A",":"}, |
|||
{"%27","'"}, |
|||
{"%2B","+"}, |
|||
{"%22",`"`}, |
|||
{"%E2%80%93","-"}, |
|||
{"%E2%80%99","'"}, |
|||
{"%C3%A8","e"}, |
|||
{"%C3%A9","e"}}) |
|||
function html_clean(string s) |
|||
return substitute_all(s,hex,ascii) |
|||
end function |
|||
constant history_user = `<span class='history-user'><a href="` |
|||
function count_tasks() |
|||
sequence tasks = dewiki(open_category("Programming_Tasks",1,2)) |
|||
& dewiki(open_category("Draft_Programming_Tasks",2,2)) |
|||
integer ntasks = length(tasks) |
|||
sequence users = {}, |
|||
utask = {}, |
|||
ntask = {} |
|||
for i=1 to ntasks do |
|||
string ti = tasks[i], |
|||
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1",{ti}), |
|||
contents = open_download(ti&".hist",url,i,ntasks) |
|||
integer k = match(history_user,contents) |
|||
if k=0 then ?9/0 end if |
|||
k = find('>',contents,k+length(history_user)) |
|||
if k=0 then ?9/0 end if |
|||
k += 1 |
|||
integer e = match("</a>",contents,k) |
|||
if e=0 then ?9/0 end if |
|||
string user = contents[k..e-1] |
|||
k = find(user,users) |
|||
if k=0 then |
|||
users = append(users,user) |
|||
utask = append(utask,{i}) |
|||
ntask = append(ntask,1) |
|||
else |
|||
utask[k] &= i |
|||
ntask[k] += 1 |
|||
end if |
|||
tasks[i] = html_clean(ti) -- (in case you want to show them) |
|||
if get_key()=#1B then progress("escape keyed\n") exit end if |
|||
end for |
|||
if curl!=NULL then |
|||
curl_easy_cleanup(curl) |
|||
free(pErrorBuffer) |
|||
curl = NULL |
|||
pErrorBuffer = NULL |
|||
end if |
|||
progress("\n") |
|||
sequence tags = custom_sort(ntask,tagset(length(ntask))) |
|||
integer top5 = 0 |
|||
for i=length(tags) to 1 by -1 do |
|||
integer ui = tags[i] |
|||
printf(1,"%s tasks:%d\n",{users[ui],ntask[ui]}) |
|||
top5 += 1 if top5>5 then exit end if |
|||
end for |
|||
return ntasks |
|||
end function |
|||
progress("Total: %d\n",{count_tasks()})</lang> |
|||
{{out}} |
|||
As of 6th Jan 2020 |
|||
<pre> |
|||
Paddy3118 tasks:176 |
|||
Markhobley tasks:71 |
|||
Gerard Schildberger tasks:59 |
|||
Mwn3d tasks:55 |
|||
NevilleDNZ tasks:39 |
|||
Short Circuit tasks:33 |
|||
Total: 1219 |
|||
</pre> |