Jump to content

Rosetta Code/Count examples: Difference between revisions

m (→‎{{header|Perl 6}}: Minor tweak to regex)
Line 2,059:
Counts no of "=={{header|" via web api (but gets tasks via scraping).<br>
Since downloading all the pages can be very slow, this uses a cache.<br>
Limiting (notdone) by "Phix" fairly obviously speeds it up tenfold :-)
<lang Phix>-- demo\rosetta\Count_examples.exw
constant include_drafts = true,
sort_by_count = false,
-- notlang = "Phix" -- or "" (ie a zero length string) for all
notlang = ""
integer lp = 0
procedure progress(string msg, sequence args = {})
if length(args) then msg = sprintf(msg,args) end if
integer lm = length(msg)
if lm<lp then msg[$..$] = repeat(' ',lp-lm)&msg[$] end if
lp = iff(msg[$]='\r'?lm:0)
end procedure
include builtins\timedate.e
integer refresh_cache = timedelta(days:=365) -- 0 for always
--integer refresh_cache = timedelta(days:=1) -- 0 for always
include builtins\libcurl.e
atom curl = NULL
atom pErrorBuffer
function write_callback(atom pData, integer size, integer nmemb, integer fn)
integer bytes_written = size * nmemb
return bytes_written
end function
constant write_cb = call_back({'+', routine_id("write_callback")})
function open_download(string filename, url)
bool refetch = true
if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then
if not create_directory("rc_cache") then
crash("cannot create rc_cache directory")
end if
end if
filename = join_path({"rc_cache",filename})
if file_exists(filename) then
-- use existing file if <= refresh_cache (365 days) old
sequence last_mod = get_file_date(filename) -- (0.8.1+)
atom delta = timedate_diff(last_mod,date())
refetch = (delta>refresh_cache) or get_file_size(filename)=0
string directory = get_file_path(filename)
if get_file_type(directory)!=FILETYPE_DIRECTORY then
if not create_directory(directory,make_parent:=true) then
crash("cannot create %s directory",{directory})
end if
end if
end if
object text
if not refetch then
text = trim(get_text(filename))
refetch = (not sequence(text)) or (length(text)<10)
end if
if refetch then
progress("Downloading %s...\r",{filename})
if curl=NULL then
curl = curl_easy_init()
pErrorBuffer = allocate(CURL_ERROR_SIZE)
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer)
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb)
end if
url = substitute(url,"%3A",":")
url = substitute(url,"%2A","*")
curl_easy_setopt(curl, CURLOPT_URL, url)
integer fn = open(filename,"wb")
if fn=-1 then ?9/0 end if
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn)
while true do
CURLcode res = curl_easy_perform(curl)
if res=CURLE_OK then exit end if
string error = sprintf("%d",res)
end if
progress("Error %s downloading file, retry?(Y/N):",{error})
if lower(wait_key())!='y' then abort(0) end if
end while
refresh_cache += timedelta(days:=1) -- did I mention it is slow?
text = get_text(filename)
end if
return text
end function
function open_category(string filename)
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename)
end function
function dewiki(string s)
-- extract tasks from eg `<li><a href="/wiki/100_doors"`
sequence tasks = {}
integer start = 1, finish = match(`<div class="printfooter">`,s)
s = s[1..finish-1]
while true do
start = match("<li><a href=\"/wiki/",s,start)
if start=0 then exit end if
start += length("<li><a href=\"/wiki/")
finish = find('"',s,start)
string task = s[start..finish-1]
task = substitute_all(task,{"*",":"},{"%2A","%3A"})
tasks = append(tasks,task)
start = finish+1
end while
return tasks
end function
constant {hex,ascii} = columnize({{"%2A","*"},
function html_clean(string s)
return substitute_all(s,hex,ascii)
end function
function count_tasks()
-- note this lot use web scraping (as cribbed from a similar task) ...
sequence tasks = dewiki(open_category("Programming_Tasks"))
if include_drafts then
tasks &= dewiki(open_category("Draft_Programming_Tasks"))
tasks = sort(tasks)
end if
if length(notlang) then
-- filter already done in specified language
string langurl = "http://rosettacode.org/wiki/Category:"&notlang
sequence done = dewiki(open_download(notlang&".htm",langurl))
integer k = 0
for i=1 to length(tasks) do
if not find(tasks[i],done) then
k += 1
tasks[k] = tasks[i]
end if
end for
tasks = tasks[1..k]
done = {}
end if
progress("%d tasks found\n",{length(tasks)})
-- ... whereas the individual tasks use the web api instead (3x smaller/faster)
integer total_count = 0
sequence task_counts = repeat(0,length(tasks))
for i=1 to length(tasks) do
string ti = tasks[i],
url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=raw",{ti}),
contents = open_download(ti&".raw",url),
prev = "", this
integer count = 0, start = 1
while true do
start = match(`{{header|`,contents,start)
if start=0 then exit end if
-- skip duplicates/we also have to cope with eg
-- =={{header|Python}}== \
-- ==={{header|Python}} Original=== } count
-- ==={{header|Python}} Succinct=== } once
-- ==={{header|Python}} Recursive === /
-- =={{header|Mathematica}} / {{header|Wolfram Language}}== \
-- =={{header|Icon}} and {{header|Unicon}}== } count
-- == {{header|Icon}} and {{header|Unicon}} == / both
-- == {{header|Java}}==
-- etc. Note however that this /does/ count eg
-- ==={{header|Applesoft BASIC}}=== \
-- ==={{header|BASIC256}}=== } count
-- ==={{header|Commodore BASIC}}=== } 'em
-- ==={{header|IS-BASIC}}=== } all
-- ==={{header|Sinclair ZX81 BASIC}}=== /
this = contents[start..match(`}}`,contents,start+1)]
if this!=prev then
count += 1
end if
prev = this
start += length(`{{header|`)
end while
if sort_by_count then
task_counts[i] = count
elsif length(notlang) or i<=2 or i>=length(tasks)-1 or mod(i,200)=0 then
progress("%s %d\n",{html_clean(ti),count})
end if
total_count += count
if get_key()=#1B then progress("escape keyed\n") exit end if
end for
if curl!=NULL then
curl = NULL
pErrorBuffer = NULL
end if
if sort_by_count then
sequence tags = custom_sort(task_counts,tagset(length(tasks)))
for i=length(tags) to 1 by -1 do
integer ti = tags[i]
progress("%s %d\n",{html_clean(tasks[ti]),task_counts[ti]})
end for
end if
return total_count
end function
progress("Total: %d\n",{count_tasks()})</lang>
{{out}} (as of 24/7/19, showing first two, every 200th, and last two)
1175 tasks found
100_doors 294
15_Puzzle_Game 55
Compiler/virtual_machine_interpreter 14
General_FizzBuzz 51
Maximum_triangle_path_sum 46
Random_number_generator_(included) 89
String_concatenation 155
Zhang-Suen_thinning_algorithm 29
Zig-zag_matrix 99
Total: 64405
Line 2,081 ⟶ 2,308:
Abstract type: 29


Cookies help us deliver our services. By using our services, you agree to our use of cookies.