Rosetta Code/List authors of task descriptions: Difference between revisions
Rosetta Code/List authors of task descriptions (view source)
Revision as of 17:10, 3 February 2024
, 4 months ago→{{header|Wren}}: Minor tidy
m (→{{header|Phix}}: added syntax colouring, marked p2js incompatible) |
m (→{{header|Wren}}: Minor tidy) |
||
(7 intermediate revisions by 4 users not shown) | |||
Line 1:
{{draft task}}In this task, the goal is to compile an authorship list for task descriptions. A pseudocode example (in imperative style) that should accomplish this is as follows:
<
grab page source, discard everything after the first ==section==.
Cache as $previous. Note $author.
Line 9:
compare $previous2 to $previous. If different, record $author to $list.
replace $previous with $previous2
replace $author with $author2</
The following resources for HTTP interface information for MediaWiki may prove to be useful:
Line 22:
=={{header|Go}}==
<
import (
Line 100:
fmt.Printf("%2d: %3d %s\n", j+1, authorNumber.number, authorNumber.author)
}
}</
{{out}}
Line 132:
18: 13 Paulo Jorente
18: 13 Waldorf
</pre>
=={{header|Julia}}==
<syntaxhighlight lang="julia">""" Rosetta code task rosettacode.org/wiki/Rosetta_Code/List_authors_of_task_descriptions """
using Dates
using DataFrames
using EzXML
using HTTP
using JSON3
""" Get Rosetta Code authors of tasks, output as dataframe """
function rosetta_code_authors(verbose = false)
URL = "https://rosettacode.org/w/api.php?"
PARAMS = ["action" => "query", "format" => "json", "formatversion" => "2", "generator" => "categorymembers",
"gcmtitle" => "Category:Programming_Tasks", "gcmlimit" => "500", "rawcontinue" => "", "prop" => "title"]
DRAFTPARAMS = ["action" => "query", "format" => "json", "formatversion" => "2", "generator" => "categorymembers",
"gcmtitle" => "Category:Draft_Programming_Tasks", "gcmlimit" => "500", "rawcontinue" => "", "prop" => "title"]
titles = Pair{String, Bool}[]
dateformat = DateFormat("HH:SS, d U y")
df = empty!(DataFrame([[""], [""], [now()], [true]], ["Author", "Title", "CreationDate", "IsDraftTask"]))
for param in [PARAMS, DRAFTPARAMS] # get the titles of the tasks and draft tasks, store list in alltasks
continueposition = ""
queryparams = copy(param)
isdraft = param == DRAFTPARAMS
while true
resp = HTTP.get(URL * join(map(p -> p[1] * (p[2] == "" ? "" : ("=" * p[2])), queryparams), "&"))
json = JSON3.read(String(resp.body))
pages = json.query.pages
for p in pages
push!(titles, p.title => isdraft)
end
!haskey(json, "query-continue") && break # break if no more pages, else continue to next pages
queryparams = vcat(param, "gcmcontinue" => json["query-continue"]["categorymembers"]["gcmcontinue"])
end
end
for (i, title) in pairs(titles) # Get author of first revision of each page, assumed to be task creator/author
resp = HTTP.get("https://rosettacode.org/w/index.php?title=" * escape(title[1]) * "&dir=prev&action=history")
html = root(parsehtml(String(resp.body)))
xpath = "//span[@class=\"history-user\"]/a"
header = findlast(xpath, html)
author = header != nothing ? nodecontent(header) : ""
xpath2 = "//a[@class=\"mw-changeslist-date\"]"
header2 = findlast(xpath2, html)
creationdate = header2 != nothing ? DateTime(nodecontent(header2), dateformat) : missing
if author != ""
author = replace(author, r".+>" => "") # clean up from the hosting change
push!(df, [author, title[1], creationdate, title[2]])
verbose && println("Processed author $author of $title created $creationdate: page $i of ", length(titles))
end
end
sort!(df, :CreationDate, rev = true)
authorfreqs = sort!(combine(groupby(df, :Author), nrow => :Freq), :Freq, rev = true)
return df, authorfreqs
end
rosetta_code_authors()
</syntaxhighlight>{{out}}
<pre>
(1569×4 DataFrame
Row │ Author Title CreationDate IsDraftTask
│ String String DateTime Bool
──────┼───────────────────────────────────────────────────────────────────────────────────
1 │ Markjreed Sieve of Pritchard 2022-08-25T19:00:09 false
2 │ Thundergnat Penta-power prime seeds 2022-08-19T20:00:59 true
3 │ Thundergnat Quad-power prime seeds 2022-08-19T20:00:23 true
4 │ Thundergnat Riordan numbers 2022-08-18T18:00:20 true
5 │ Thundergnat Pairs with common factors 2022-08-18T12:00:07 true
6 │ Thundergnat Klarner-Rado sequence 2022-08-17T22:00:36 true
⋮ │ ⋮ ⋮ ⋮ ⋮
1564 │ Created by: X Determine if a string is numeric 2007-01-21T19:00:47 false
1565 │ MikeMol Empty program 2007-01-18T15:00:11 false
1566 │ 207.74.29.206 Window creation 2007-01-15T19:00:41 false
1567 │ MikeMol Table creation 2007-01-14T20:00:07 true
1568 │ MikeMol Hello world/Text 2007-01-09T16:00:45 false
1569 │ MikeMol File input/output 2007-01-09T14:00:45 false
1557 rows omitted,
315×2 DataFrame
Row │ Author Freq
│ String Int64
─────┼────────────────────────────
1 │ Paddy3118 199
2 │ CalmoSoft 135
3 │ Thundergnat 74
4 │ Markhobley 71
5 │ Gerard Schildberger 66
6 │ Mwn3d 55
⋮ │ ⋮ ⋮
310 │ Til 1
311 │ Backupbrain 1
312 │ Fabian 1
313 │ Vcelier 1
314 │ Created by: X 1
315 │ 207.74.29.206 1
303 rows omitted)
</pre>
=={{header|Nim}}==
{{trans|Go}}
<
let
Line 189 ⟶ 288:
inc pos
echo ($pos).align(2), " ", ($count).align(3), " ", author
if pos == 20: break</
{{out}}
Line 227 ⟶ 326:
properly thrash the rosettacode servers.
{{libheader|Phix/libcurl}}
<!--<
<span style="color: #000080;font-style:italic;">-- demo\rosetta\List_task_authors.exw</span>
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (libcurl, file i/o, peek, progress..)</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">
<span style="color: #008080;">constant</span> <span style="color: #000000;">history_user</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">`<span class='history-user'><a href="`</span>
Line 375 ⟶ 371:
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nusers</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">users</span><span style="color: #0000FF;">)</span>
Line 395 ⟶ 386:
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d tasks by %d authors\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">count_tasks</span><span style="color: #0000FF;">())</span>
<!--</
{{out}}
As of 6th Jan 2020
Line 424 ⟶ 415:
The pseudocode above is no longer really useful as the page format has changed significantly since this task was written. Rather than checking '''every''' edit to see if it was a change to the task description, we'll just assume the user that created the page is the task author. This isn't 100% accurate; a very few pages got renamed and recreated by someone other than the original author without preserving the history, so they are misreported (15 Puzzle Game for instance,) but is as good as it is likely to get without extensive manual intervention. Subsequent edits to the task description are not credited. As it is, we must still make ''thousands'' of requests and pound the server pretty hard. Checking '''every''' edit would make the task several of orders of magnitude more abusive of the server (and my internet connection.)
<syntaxhighlight lang="raku"
use URI::Escape;
use JSON::Fast;
Line 437 ⟶ 428:
my $client = HTTP::UserAgent.new;
my $url = '
my $tablefile = './RC_Authors.txt';
Line 596 ⟶ 587:
sub sort-key ($a) { $a.lc.subst(/(\d+)/, ->$/ {0~(65+($0.chars)).chr~$0},:g) }
sub clear { "\r" ~ ' ' x 100 ~ "\r" }</
{{out|Sample output}}
Line 687 ⟶ 678:
=={{header|Wren}}==
{{libheader|libcurl}}
{{libheader|Wren-pattern}}
Line 693 ⟶ 683:
An embedded program so we can use libcurl.
Takes
<syntaxhighlight lang="wren">/* Rosetta_Code_List_authors_of_task_descriptions.wren */
import "./pattern" for Pattern
Line 732 ⟶ 722:
}
var p1 = Pattern.new("
var
var pi = "\"&"
var p3 = Pattern.new("a href/=\"//[wiki//User:|w//index.php?title/=User:|wiki//Special:Contributions//][+1/I]\"", 0, pi)
var findTasks = Fn.new { |category|
var url = "
var cmcontinue = ""
var tasks = []
while (true) {
var
var matches1 = p1.findAll(content)
var title = m.capsText[0].replace("'", "'").replace(""", "\"")
}
var m2 = p2.find(content)
if (m2) cmcontinue = "&cmcontinue=%(m2.capsText[0])" else break
}
return tasks
}
var tasks = findTasks.call("Programming_Tasks") // 'full' tasks only
tasks.addAll(findTasks.call("Draft_Programming_Tasks"))
var tc = tasks.count
var authors = {}
var task = tasks[0].replace(" ", "_").replace("+", "\%2B")
// check the last or only history page for each task
var url = "
tasks.removeAt(0)
var content = getContent.call(url)
content = content.replace("http://www.rosettacode.org", "")
var matches = p3.findAll(content)
// if there are no matches there must have been a 'bad gateway' or other error
if (matches.count == 0) {
// add back a failed task until it eventually succeeds
tasks.add(task)
continue
}
// the task author should be the final user on that page
var author = matches[-1].capsText[1].replace("_", " ")
Line 768 ⟶ 775:
var authorNumbers = authors.toList
authorNumbers.sort { |a, b| a.value > b.value }
// print
System.print("
System.print("Total tasks : %(tc)")
System.print("Total authors : %(authors.count)")
System.print("\nThe
System.print("Pos Tasks Author")
System.print("==== ===== ======")
var lastNumber = 0
var lastIndex = -1
var i = 0
for (authorNumber in authorNumbers.
var j = i
var eq = " "
Line 790 ⟶ 798:
i = i + 1
}
curl.easyCleanup()</
<br>
We now embed this script in the following C program, build and run.
<
#include <stdio.h>
Line 967 ⟶ 975:
WrenVM* vm = wrenNewVM(&config);
const char* module = "main";
const char* fileName = "
char *script = readFile(fileName);
WrenInterpretResult result = wrenInterpret(vm, module, script);
Line 983 ⟶ 991:
free(script);
return 0;
}</
{{out}}
<pre>
As at 10th September 2022:
Total tasks : 1569
Total authors : 315
The authors who have created at least 9 tasks are:
Pos Tasks Author
==== ===== ======
1 199 Paddy3118
2
3
4
5
6
7 39 NevilleDNZ
9 33
10
11
12
15
20 13 Paulo Jorente
20= 13 Abu
20= 13 Waldorf
23 12 Ce
23= 12 Kevin Reid
28= 9 Trizen
</pre>
|