Jump to content

Rosetta Code/List authors of task descriptions

From Rosetta Code
Rosetta Code/List authors of task descriptions is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

In this task, the goal is to compile an authorship list for task descriptions. A pseudocode example (in imperative style) that should accomplish this is as follows:

for each task page
  grab page source, discard everything after the first ==section==.
Cache as $previous. Note $author.
  for each revision
    grab page source, discard everything after first ==section==.
Cache as $previous2. Note $author2
    compare $previous2 to $previous. If different, record $author to $list.
    replace $previous with $previous2
    replace $author with $author2

The following resources for HTTP interface information for MediaWiki may prove to be useful:

Conversely, some languages have libraries which abstract these interfaces into language-native idioms. Use of these abstractions is perfectly fine.


Please DO NOT add a full output for each programming language; just show a representative sample. For an full listing, see Rosetta_Code/List_authors_of_task_descriptions/Full_list.

Go

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "regexp"
    "sort"
    "strings"
)

type authorNumber struct {
    author string
    number int
}

func main() {
    ex1 := `<li><a href="/wiki/(.*?)"`
    ex2 := `a href="/(wiki/User:|mw/index\.php\?title=User:|wiki/Special:Contributions/)([^"&]+)`
    re1 := regexp.MustCompile(ex1)
    re2 := regexp.MustCompile(ex2)
    url1 := "http://rosettacode.org/wiki/Category:Programming_Tasks"
    url2 := "http://rosettacode.org/wiki/Category:Draft_Programming_Tasks"
    urls := []string{url1, url2}
    var tasks []string
    for _, url := range urls {
        resp, _ := http.Get(url)
        body, _ := ioutil.ReadAll(resp.Body)
        // find all tasks
        matches := re1.FindAllStringSubmatch(string(body), -1)
        resp.Body.Close()
        for _, match := range matches {
            // exclude any 'category' references
            if !strings.HasPrefix(match[1], "Category:") {
                tasks = append(tasks, match[1])
            }
        }
    }
    authors := make(map[string]int)
    for _, task := range tasks {
        // check the last or only history page for each task
        page := fmt.Sprintf("http://rosettacode.org/mw/index.php?title=%s&dir=prev&action=history", task)
        resp, _ := http.Get(page)
        body, _ := ioutil.ReadAll(resp.Body)
        // find all the users in that page
        matches := re2.FindAllStringSubmatch(string(body), -1)
        resp.Body.Close()
        //  the task author should be the final user on that page
        author := matches[len(matches)-1][2]
        author = strings.ReplaceAll(author, "_", " ")
        // add this task to the author's count
        authors[author]++
    }
    // sort the authors in descending order by number of tasks created
    authorNumbers := make([]authorNumber, 0, len(authors))
    for k, v := range authors {
        authorNumbers = append(authorNumbers, authorNumber{k, v})
    }
    sort.Slice(authorNumbers, func(i, j int) bool {
        return authorNumbers[i].number > authorNumbers[j].number
    })
    // print the top twenty say
    fmt.Println("Total tasks   :", len(tasks))
    fmt.Println("Total authors :", len(authors))
    fmt.Println("\nThe top 20 authors by number of tasks created are:\n")
    fmt.Println("Pos  Tasks  Author")
    fmt.Println("===  =====  ======")
    lastNumber, lastIndex := 0, -1
    for i, authorNumber := range authorNumbers[0:20] {
        j := i
        if authorNumber.number == lastNumber {
            j = lastIndex
        } else {
            lastIndex = i
            lastNumber = authorNumber.number
        }
        fmt.Printf("%2d:   %3d   %s\n", j+1, authorNumber.number, authorNumber.author)
    }
}
Output:

As of 5th March 2020:

Total tasks   : 1237
Total authors : 287

The top 20 authors by number of tasks created are:

Pos  Tasks  Author
===  =====  ======
 1:   178   Paddy3118
 2:    71   Markhobley
 3:    61   Gerard Schildberger
 4:    55   Mwn3d
 5:    39   NevilleDNZ
 6:    33   Short Circuit
 7:    30   Nigel Galloway
 8:    29   Thundergnat
 9:    23   Grondilu
10:    21   Dkf
11:    20   Fwend
11:    20   Blue Prawn
13:    19   CalmoSoft
14:    18   Kernigh
15:    17   ShinTakezou
15:    17   Dmitry-kazakov
15:    17   Ledrug
18:    13   Abu
18:    13   Paulo Jorente
18:    13   Waldorf

Julia

""" Rosetta code task rosettacode.org/wiki/Rosetta_Code/List_authors_of_task_descriptions """

using Dates
using DataFrames
using EzXML
using HTTP
using JSON3

""" Get Rosetta Code authors of tasks, output as dataframe """
function rosetta_code_authors(verbose = false)
    URL = "https://rosettacode.org/w/api.php?"
    PARAMS = ["action" => "query", "format" => "json", "formatversion" => "2", "generator" => "categorymembers",
       "gcmtitle" => "Category:Programming_Tasks", "gcmlimit" => "500", "rawcontinue" => "", "prop" => "title"]
    DRAFTPARAMS = ["action" => "query", "format" => "json", "formatversion" => "2", "generator" => "categorymembers",
       "gcmtitle" => "Category:Draft_Programming_Tasks", "gcmlimit" => "500", "rawcontinue" => "", "prop" => "title"]

    titles = Pair{String, Bool}[]
    dateformat = DateFormat("HH:SS, d U y")
    df = empty!(DataFrame([[""], [""], [now()], [true]], ["Author", "Title", "CreationDate", "IsDraftTask"]))

    for param in [PARAMS, DRAFTPARAMS] # get the titles of the tasks and draft tasks, store list in alltasks
        continueposition = ""
        queryparams = copy(param)
        isdraft = param == DRAFTPARAMS
        while true
            resp = HTTP.get(URL * join(map(p -> p[1] * (p[2] == "" ? "" : ("=" * p[2])), queryparams), "&"))
            json = JSON3.read(String(resp.body))
            pages = json.query.pages
            for p in pages
                push!(titles,  p.title => isdraft)
            end
            !haskey(json, "query-continue") && break  # break if no more pages, else continue to next pages
            queryparams = vcat(param, "gcmcontinue" => json["query-continue"]["categorymembers"]["gcmcontinue"])
        end
    end


    for (i, title) in pairs(titles) # Get author of first revision of each page, assumed to be task creator/author
        resp = HTTP.get("https://rosettacode.org/w/index.php?title=" * escape(title[1]) * "&dir=prev&action=history")
        html = root(parsehtml(String(resp.body)))
        xpath = "//span[@class=\"history-user\"]/a"
        header = findlast(xpath, html)
        author = header != nothing ? nodecontent(header) : ""
        xpath2 = "//a[@class=\"mw-changeslist-date\"]"
        header2 = findlast(xpath2, html)
        creationdate = header2 != nothing ? DateTime(nodecontent(header2), dateformat) : missing
        if author != ""
            author = replace(author, r".+>" => "")  # clean up from the hosting change
            push!(df, [author, title[1], creationdate, title[2]])
            verbose && println("Processed author $author of $title created $creationdate: page $i of ", length(titles))
        end
    end
    sort!(df, :CreationDate, rev = true)
    authorfreqs = sort!(combine(groupby(df, :Author), nrow => :Freq), :Freq, rev = true)
    return df, authorfreqs
end

rosetta_code_authors()
Output:
(1569×4 DataFrame
  Row │ Author         Title                             CreationDate         IsDraftTask 
      │ String         String                            DateTime             Bool        
──────┼───────────────────────────────────────────────────────────────────────────────────
    1 │ Markjreed      Sieve of Pritchard                2022-08-25T19:00:09        false
    2 │ Thundergnat    Penta-power prime seeds           2022-08-19T20:00:59         true
    3 │ Thundergnat    Quad-power prime seeds            2022-08-19T20:00:23         true
    4 │ Thundergnat    Riordan numbers                   2022-08-18T18:00:20         true
    5 │ Thundergnat    Pairs with common factors         2022-08-18T12:00:07         true
    6 │ Thundergnat    Klarner-Rado sequence             2022-08-17T22:00:36         true
  ⋮   │       ⋮                       ⋮                           ⋮                ⋮
 1564 │ Created by: X  Determine if a string is numeric  2007-01-21T19:00:47        false
 1565 │ MikeMol        Empty program                     2007-01-18T15:00:11        false
 1566 │ 207.74.29.206  Window creation                   2007-01-15T19:00:41        false
 1567 │ MikeMol        Table creation                    2007-01-14T20:00:07         true
 1568 │ MikeMol        Hello world/Text                  2007-01-09T16:00:45        false
 1569 │ MikeMol        File input/output                 2007-01-09T14:00:45        false
                                                                         1557 rows omitted,
315×2 DataFrame
 Row │ Author               Freq  
     │ String               Int64 
─────┼────────────────────────────
   1 │ Paddy3118              199
   2 │ CalmoSoft              135
   3 │ Thundergnat             74
   4 │ Markhobley              71
   5 │ Gerard Schildberger     66
   6 │ Mwn3d                   55
  ⋮  │          ⋮             ⋮
 310 │ Til                      1
 311 │ Backupbrain              1
 312 │ Fabian                   1
 313 │ Vcelier                  1
 314 │ Created by: X            1
 315 │ 207.74.29.206            1
                  303 rows omitted)

Nim

Translation of: Go
import algorithm, httpclient, re, strutils, tables

let
  re1 = re("""<li><a href="/wiki/(.*?)"""")
  re2 = re("""a href="/wiki/User:|mw/index\.php\?title=User:|wiki/Special:Contributions/([^"&]+)""")

const
  Url1 = "http://rosettacode.org/wiki/Category:Programming_Tasks"
  Url2 = "http://rosettacode.org/wiki/Category:Draft_Programming_Tasks"
  Urls = [Url1, Url2]

var client = newHttpClient()

var tasks: seq[string]
var matches: array[1, string]
var start = 0
for url in Urls:
  let body = client.getContent(url)
  # Find all tasks.
  while true:
    start = body.find(re1, matches, start) + 1
    if start == 0: break
    if not matches[0].startsWith("Category:"):
      tasks.add matches[0]

var authors: CountTable[string]
for task in tasks:
  # Check the last or only history page for each task.
  let page = "http://rosettacode.org/mw/index.php?title=$#&dir=prev&action=history".format(task)
  let body = client.getContent(page)
  # Find all the users in that page. The task author should be the final user on that page.
  var matches: array[1, string]
  start = 0
  while true:
    start = body.find(re2, matches, start) + 1
    if start == 0: break
  let author = matches[0].replace('-', ' ')
  # Add this task to the author's count.
  authors.inc(author)

# Sort the authors in descending order by number of tasks created.
authors.sort(Descending)

# Print the top twenty.
echo "Total tasks:   ", tasks.len
echo "Total authors: ", authors.len
echo "\nThe top 20 authors by number of tasks created are:\n"
echo "Pos  Tasks  Author"
echo "===  =====  ======"
var pos = 0
for author, count in authors.pairs:
  inc pos
  echo ($pos).align(2), "    ", ($count).align(3), "   ", author
  if pos == 20: break
Output:

On 2021-06-29.

The top 20 authors by number of tasks created are:

Pos  Tasks  Author
===  =====  ======
 1    196   Paddy3118
 2     84   CalmoSoft
 3     72   Markhobley
 4     66   Gerard_Schildberger
 5     55   Mwn3d
 6     39   NevilleDNZ
 7     39   Thundergnat
 8     33   Nigel_Galloway
 9     33   Short_Circuit
10     23   Grondilu
11     21   Blue_Prawn
12     20   Fwend
13     20   Dkf
14     18   Kernigh
15     17   Ledrug
16     17   ShinTakezou
17     17   Dmitry kazakov
18     14   Wherrera
19     13   Waldorf
20     13   Abu

Phix

To keep the output nice and short, lists the top 5 task creators.
Uses a cache: once a .hist file has been downloaded for a given task, it is assumed to be good forever. Each task is about 20K, so it will download around 25MB in total, for >= 1,219 tasks. It does those sequentially, using curl_easy_ handles. I guess if you really wanted to then using curl_multi_ handles would properly thrash the rosettacode servers.

Library: Phix/libcurl
-- demo\rosetta\List_task_authors.exw
without js -- (libcurl, file i/o, peek, progress..)
include rosettacode_cache.e -- see Rosetta_Code/Count_examples#Phix

constant history_user = `<span class='history-user'><a href="`

function count_tasks()
    if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then
        if not create_directory("rc_cache") then
            crash("cannot create rc_cache directory")
        end if
    end if
    sequence tasks = dewiki(open_category("Programming_Tasks",1,2))
                   & dewiki(open_category("Draft_Programming_Tasks",2,2))
    integer ntasks = length(tasks)
    sequence users = {},
             utask = {},
             ntask = {}
    for i=1 to ntasks do
        string ti = tasks[i],
               url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=history&dir=prev&limit=1",{ti}),
               contents = open_download(ti&".hist",url,i,ntasks)
        integer k = match(history_user,contents)
        if k=0 then ?9/0 end if
        k = find('>',contents,k+length(history_user))
        if k=0 then ?9/0 end if
        k += 1
        integer e = match("</a>",contents,k)
        if e=0 then ?9/0 end if
        string user = contents[k..e-1]

        k = find(user,users)
        if k=0 then
            users = append(users,user)
            utask = append(utask,{i})
            ntask = append(ntask,1)
        else
            utask[k] &= i
            ntask[k] += 1
        end if
        ti = html_clean(ti) -- (in case you want to show them)
        tasks[i] = ti
        if get_key()=#1B then progress("escape keyed\n") exit end if
    end for
    curl_cleanup()
    progress("\n")
    integer nusers = length(users)
    sequence tags = custom_sort(ntask,tagset(nusers))
    integer top5 = 0
    for i=nusers to 1 by -1 do
        integer ui = tags[i]
        if ntask[ui]<5 then exit end if
        printf(1,"%s tasks:%d\n",{users[ui],ntask[ui]})
        top5 += 1 if top5>5 then exit end if
    end for
    return {ntasks,nusers}
end function

progress("Total: %d tasks by %d authors\n",count_tasks())
Output:

As of 6th Jan 2020

Paddy3118 tasks:176
Markhobley tasks:71
Gerard Schildberger tasks:59
Mwn3d tasks:55
NevilleDNZ tasks:39
Short Circuit tasks:33
Total: 1219

As of 6th Jan 2022

Paddy3118 tasks:199
CalmoSoft tasks:128
Markhobley tasks:71
Gerard Schildberger tasks:66
Mwn3d tasks:55
Thundergnat tasks:44
Total: 1492 tasks by 307 authors

Raku

(formerly Perl 6)

Works with: Rakudo version 2018.03

The pseudocode above is no longer really useful as the page format has changed significantly since this task was written. Rather than checking every edit to see if it was a change to the task description, we'll just assume the user that created the page is the task author. This isn't 100% accurate; a very few pages got renamed and recreated by someone other than the original author without preserving the history, so they are misreported (15 Puzzle Game for instance,) but is as good as it is likely to get without extensive manual intervention. Subsequent edits to the task description are not credited. As it is, we must still make thousands of requests and pound the server pretty hard. Checking every edit would make the task several of orders of magnitude more abusive of the server (and my internet connection.)

use HTTP::UserAgent;
use URI::Escape;
use JSON::Fast;
use Sort::Naturally;

# Friendlier descriptions for task categories
my %cat = (
    'Programming_Tasks' => 'Task',
    'Draft_Programming_Tasks' => 'Draft'
);

my $client = HTTP::UserAgent.new;

my $url = 'https://rosettacode.org/w';

my $tablefile = './RC_Authors.txt';
my $hashfile  = './RC_Authors.json';

my %tasks;

# clear screen
run($*DISTRO.is-win ?? 'cls' !! 'clear');

%tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );
sleep 1;

#=begin update

note 'Retrieving task information...';

my %filter;
for %cat.keys.sort -> $category {
    mediawiki-query(
        $url, 'pages',
        :generator<categorymembers>,
        :gcmtitle("Category:$category"),
        :gcmlimit<350>,
        :rawcontinue(),
        :prop<title>
    ).map( { %filter{.<title>} = %cat{$category} } )
}

my $delete = %tasks.keys (-) %filter.keys;

%tasks.delete($_) for $delete.keys; #Tasks that have changed names or been removed

my @add;
for %filter.keys -> $title {
    if %tasks{$title}:exists {
        %tasks{$title}<category> = %filter{$title} # update status
    } else {
        @add.push: $title => %filter{$title} # New Tasks
    }
}

if @add {
    .say for 'Adding new tasks:', |@add;
}

for @add -> $task {
    mediawiki-query(
        $url, 'pages',
        :titles($task.key),
        :prop<revisions>,
        :rvprop<user|timestamp>,
        :rvstart<2000-01-01T01:01:01Z>,
        :rvdir<newer>,
        :rvlimit<1>
    ).map: {
        print clear, 1 + $++, ' ', .[0]<title>;
        %tasks{.[0]<title>}<category> = $task.value;
        %tasks{.[0]<title>}<author> = .[0]<revisions>[0]<user>;
        %tasks{.[0]<title>}<date> = .[0]<revisions>[0]<timestamp>.subst(/'T'.+$/, '')
    }
}

print clear;

# Save information to a local file
note "\nTask information saved to local file: {$hashfile.IO.absolute}";
$hashfile.IO.spurt(%tasks.&to-json);

#=end update

# Load information from local file
%tasks = $hashfile.IO.e ?? $hashfile.IO.slurp.&from-json !! ( );

# Convert saved task / author info to a table
note "\nBuilding table...";
my $count    = +%tasks;
my $taskcnt  = +%tasks.grep: *.value.<category> eq %cat<Programming_Tasks>;
my $draftcnt = $count - $taskcnt;

# Open a file handle to dump table in
my $out = open($tablefile, :w)  or die "$!\n";

# Add table boilerplate and header
$out.say:
    "\{|class=\"wikitable sortable\"\n",
    "|+ As of { Date.today } :: Total Tasks: { $count }:: Tasks: { $taskcnt }",
    " ::<span style=\"background-color:#ffd\"> Draft Tasks: { $draftcnt } </span>",
    ":: By {+%tasks{*}».<author>.unique} Authors\n",
    "! Author !! Tasks !! Authored"
;

# Get sorted unique list of task authors
for %tasks{*}».<author>.unique.sort(&naturally) -> $author {

    # Get list of tasks by this author
    my @these = %tasks.grep( { $_.value.<author> eq $author } );
    my $s = +@these == 1 ?? '' !! 's';

    # Add author and contributions link to the first two cells
    $out.say:
    $author ~~ /\d/
      ?? "|- id=\"$author\"\n|data-sort-value=\"{ sort-key $author }\"|[[User:$author|$author]]\n"~
         "|data-sort-value=\"{ +@these }\"|[[Special:Contributions/$author|"~
         "{ +@these } task{ $s }]]"
      !! "|- id=\"$author\"\n|[[User:$author|$author]]\n"~
         "|data-sort-value=\"{ +@these }\"|[[Special:Contributions/$author|"~
         "{ +@these } task{ $s }]]"
    ;

    if +@these > 2 {
        $out.say: "|style=\"padding: 0px;\"|\n",
          "\{|class=\"broadtable sortable\" style=\"width: 100%;\"\n",
          "! Task Name !! Date Added !! Status";
    }
    else {
        $out.say: "|style=\"padding: 0px;\"|\n",
          "\{|class=\"broadtable\" style=\"width: 100%;\"";
   }

    # Tasks by this author, sorted by name
    for @these.sort({.key.&naturally}) -> $task {

        my $color = $task.value.<category> eq 'Draft' ?? '#ffd' !! '#fff';

        # add the task link, date and status to the table in the second cell
        $out.say: "|-\n|style=\"background-color: $color;\"",
          ( $task.key ~~ /\d/
            ?? " data-sort-value=\"{ sort-key $task.key }\"| [[{uri-escape $task.key}|{$task.key}]]\n"
            !! "| [[{uri-escape $task.key}|{$task.key}]]\n"
          ),
          "|style=\"width: 10em; background-color: $color;\"| {$task.value.<date>}\n",
          "|style=\"width: 6em; background-color: $color;\"| {$task.value.<category>}",
    }
     $out.say: '|}'
}
$out.say( "|}\n" );
$out.close;


note "Table file saved as: {$tablefile.IO.absolute}";

sub mediawiki-query ($site, $type, *%query) {
    my $url = "$site/api.php?" ~ uri-query-string(
        :action<query>, :format<json>, :formatversion<2>, |%query);
    my $continue = '';

    gather loop {
        my $response = $client.get("$url&$continue");
        my $data = from-json($response.content);
        take $_ for $data.<query>.{$type}.values;
        $continue = uri-query-string |($data.<query-continue>{*}».hash.hash or last);
    }
}

sub uri-query-string (*%fields) { %fields.map({ "{.key}={uri-escape .value}" }).join("&") }

sub sort-key ($a) { $a.lc.subst(/(\d+)/, ->$/ {0~(65+($0.chars)).chr~$0},:g) }

sub clear { "\r" ~ ' ' x 100 ~ "\r" }
Sample output:

See full output at Rosetta_Code/List_authors_of_task_descriptions/Full_list

As of 2018-04-10 :: Total Tasks: 1080:: Tasks: 871 :: Draft Tasks: 209 :: By 251 Authors
Author Tasks Authored
2Powers 2 tasks
Names to numbers 2013-05-16 Draft
Solving coin problems 2013-05-16 Draft
12.175.32.19 1 task
Soundex 2009-11-12 Task
12Me21 1 task
Draw a rotating cube 2015-05-04 Task
many rows omitted...
Zorro1024 2 tasks
Perfect shuffle 2015-04-16 Task
Vector 2015-03-21 Draft
Zzo38 1 task
Thue-Morse 2015-09-20 Task
Русский 3 tasks
Task Name Date Added Status
Main step of GOST 28147-89 2012-08-31 Task
Old Russian measure of length 2013-01-09 Draft
Transportation problem 2013-05-24 Draft

Wren

Library: libcurl
Library: Wren-pattern
Library: Wren-fmt

An embedded program so we can use libcurl.

Takes upwards of 80 minutes to run as the history page(s) for each task need to be downloaded and parsed to find the author. Worse still, given we are pounding a busy server pretty hard, there are lots of 'bad gateway' and other errors (94 on this particular run!) which necessitate adding tasks back to the task list until they are eventually downloaded and parsed successfully which can add several minutes to the overall time.

/* Rosetta_Code_List_authors_of_task_descriptions.wren */

import "./pattern" for Pattern
import "./fmt" for Fmt

var CURLOPT_URL = 10002
var CURLOPT_FOLLOWLOCATION = 52
var CURLOPT_WRITEFUNCTION = 20011
var CURLOPT_WRITEDATA = 10001

foreign class Buffer {
    construct new() {}  // C will allocate buffer of a suitable size

    foreign value       // returns buffer contents as a string
}

foreign class Curl {
    construct easyInit() {}

    foreign easySetOpt(opt, param)

    foreign easyPerform()

    foreign easyCleanup()
}

var curl = Curl.easyInit()

var getContent = Fn.new { |url|
    var buffer = Buffer.new()
    curl.easySetOpt(CURLOPT_URL, url)
    curl.easySetOpt(CURLOPT_FOLLOWLOCATION, 1)
    curl.easySetOpt(CURLOPT_WRITEFUNCTION, 0)  // write function to be supplied by C
    curl.easySetOpt(CURLOPT_WRITEDATA, buffer)
    curl.easyPerform()
    return buffer.value
}

var p1 = Pattern.new("title/=\"[+1^\"]\"")
var p2 = Pattern.new("cmcontinue/=\"[+1^\"]\"")
var pi = "\"&"
var p3 = Pattern.new("a href/=\"//[wiki//User:|w//index.php?title/=User:|wiki//Special:Contributions//][+1/I]\"", 0, pi)

var findTasks = Fn.new { |category|
    var url = "https://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:%(category)&cmlimit=500&format=xml"
    var cmcontinue = ""
    var tasks = []
    while (true) {
        var content = getContent.call(url + cmcontinue)
        var matches1 = p1.findAll(content)
        for (m in matches1) {
            var title = m.capsText[0].replace("&#039;", "'").replace("&quot;", "\"")
            tasks.add(title)
        }
        var m2 = p2.find(content)
        if (m2) cmcontinue = "&cmcontinue=%(m2.capsText[0])" else break
    }
    return tasks
}

var tasks = findTasks.call("Programming_Tasks") // 'full' tasks only
tasks.addAll(findTasks.call("Draft_Programming_Tasks"))
var tc = tasks.count
var authors = {}
while (tasks.count > 0) {
    var task = tasks[0].replace(" ", "_").replace("+", "\%2B")
    // check the last or only history page for each task
    var url = "https://rosettacode.org/w/index.php?title=%(task)&dir=prev&action=history"
    tasks.removeAt(0)
    var content = getContent.call(url)
    content = content.replace("http://www.rosettacode.org", "")
    var matches = p3.findAll(content)
    // if there are no matches there must have been a 'bad gateway' or other error
    if (matches.count == 0) {
        // add back a failed task until it eventually succeeds
        tasks.add(task)
        continue
    }
    // the task author should be the final user on that page
    var author = matches[-1].capsText[1].replace("_", " ")
    // add this task to the author's count
    if (authors.containsKey(author)) {
        authors[author] = authors[author] + 1
    } else {
        authors[author] = 1
    }
}

// sort the authors in descending order by number of tasks created
var authorNumbers = authors.toList
authorNumbers.sort { |a, b| a.value > b.value }
// print those who've completed at least 9 tasks
System.print("As at 10th September 2022:\n")
System.print("Total tasks   : %(tc)")
System.print("Total authors : %(authors.count)")
System.print("\nThe authors who have created at least 9 tasks are:\n")
System.print("Pos    Tasks  Author")
System.print("====   =====  ======")
var lastNumber = 0
var lastIndex = -1
var i = 0
for (authorNumber in authorNumbers.where { |me| me.value >= 9 }) {
    var j = i
    var eq = " "
    if (authorNumber.value == lastNumber) {
        j = lastIndex
        eq = "="
    } else {
        lastIndex = i
        lastNumber = authorNumber.value
    }
    Fmt.print("$3d$s    $3d   $s", j+1, eq, authorNumber.value, authorNumber.key)
    i = i + 1
}
curl.easyCleanup()


We now embed this script in the following C program, build and run.

/* gcc Rosetta_Code_List_authors_of_task_descriptions.c -o Rosetta_Code_List_authors_of_task_descriptions -lcurl -lwren -lm  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
#include "wren.h"

struct MemoryStruct {
    char *memory;
    size_t size;
};

/* C <=> Wren interface functions */

static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
    size_t realsize = size * nmemb;
    struct MemoryStruct *mem = (struct MemoryStruct *)userp;
 
    char *ptr = realloc(mem->memory, mem->size + realsize + 1);
    if(!ptr) {
        /* out of memory! */
        printf("not enough memory (realloc returned NULL)\n");
        return 0;
    }

    mem->memory = ptr;
    memcpy(&(mem->memory[mem->size]), contents, realsize);
    mem->size += realsize;
    mem->memory[mem->size] = 0;
    return realsize;
}

void C_bufferAllocate(WrenVM* vm) {
    struct MemoryStruct *ms = (struct MemoryStruct *)wrenSetSlotNewForeign(vm, 0, 0, sizeof(struct MemoryStruct));
    ms->memory = malloc(1);
    ms->size = 0;
}

void C_bufferFinalize(void* data) {
    struct MemoryStruct *ms = (struct MemoryStruct *)data;
    free(ms->memory);
}

void C_curlAllocate(WrenVM* vm) {
    CURL** pcurl = (CURL**)wrenSetSlotNewForeign(vm, 0, 0, sizeof(CURL*));
    *pcurl = curl_easy_init();
}

void C_value(WrenVM* vm) {
    struct MemoryStruct *ms = (struct MemoryStruct *)wrenGetSlotForeign(vm, 0);
    wrenSetSlotString(vm, 0, ms->memory);
}

void C_easyPerform(WrenVM* vm) {
    CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
    curl_easy_perform(curl);
}

void C_easyCleanup(WrenVM* vm) {
    CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
    curl_easy_cleanup(curl);
}

void C_easySetOpt(WrenVM* vm) {
    CURL* curl = *(CURL**)wrenGetSlotForeign(vm, 0);
    CURLoption opt = (CURLoption)wrenGetSlotDouble(vm, 1);
    if (opt < 10000) {
        long lparam = (long)wrenGetSlotDouble(vm, 2);
        curl_easy_setopt(curl, opt, lparam);
    } else if (opt < 20000) {
        if (opt == CURLOPT_WRITEDATA) {
            struct MemoryStruct *ms = (struct MemoryStruct *)wrenGetSlotForeign(vm, 2);
            curl_easy_setopt(curl, opt, (void *)ms);
        } else if (opt == CURLOPT_URL) {
            const char *url = wrenGetSlotString(vm, 2);
            curl_easy_setopt(curl, opt, url);
        }
    } else if (opt < 30000) {
        if (opt == CURLOPT_WRITEFUNCTION) {
            curl_easy_setopt(curl, opt, &WriteMemoryCallback);
        }
    }
}

WrenForeignClassMethods bindForeignClass(WrenVM* vm, const char* module, const char* className) {
    WrenForeignClassMethods methods;
    methods.allocate = NULL;
    methods.finalize = NULL;
    if (strcmp(module, "main") == 0) {
        if (strcmp(className, "Buffer") == 0) {
            methods.allocate = C_bufferAllocate;
            methods.finalize = C_bufferFinalize;
        } else if (strcmp(className, "Curl") == 0) {
            methods.allocate = C_curlAllocate;
        }
    }
    return methods;
}

WrenForeignMethodFn bindForeignMethod(
    WrenVM* vm,
    const char* module,
    const char* className,
    bool isStatic,
    const char* signature) {
    if (strcmp(module, "main") == 0) {
        if (strcmp(className, "Buffer") == 0) {
            if (!isStatic && strcmp(signature, "value") == 0)           return C_value;
        } else if (strcmp(className, "Curl") == 0) {
            if (!isStatic && strcmp(signature, "easySetOpt(_,_)") == 0) return C_easySetOpt;
            if (!isStatic && strcmp(signature, "easyPerform()") == 0)   return C_easyPerform;
            if (!isStatic && strcmp(signature, "easyCleanup()") == 0)   return C_easyCleanup;
        }
    }
    return NULL;
}

static void writeFn(WrenVM* vm, const char* text) {
    printf("%s", text);
}

void errorFn(WrenVM* vm, WrenErrorType errorType, const char* module, const int line, const char* msg) {
    switch (errorType) {
        case WREN_ERROR_COMPILE:
            printf("[%s line %d] [Error] %s\n", module, line, msg);
            break;
        case WREN_ERROR_STACK_TRACE:
            printf("[%s line %d] in %s\n", module, line, msg);
            break;
        case WREN_ERROR_RUNTIME:
            printf("[Runtime Error] %s\n", msg);
            break;
    }
}

char *readFile(const char *fileName) {
    FILE *f = fopen(fileName, "r");
    fseek(f, 0, SEEK_END);
    long fsize = ftell(f);
    rewind(f);
    char *script = malloc(fsize + 1);
    fread(script, 1, fsize, f);
    fclose(f);
    script[fsize] = 0;
    return script;
}

static void loadModuleComplete(WrenVM* vm, const char* module, WrenLoadModuleResult result) {
    if( result.source) free((void*)result.source);
}

WrenLoadModuleResult loadModule(WrenVM* vm, const char* name) {
    WrenLoadModuleResult result = {0};
    if (strcmp(name, "random") != 0 && strcmp(name, "meta") != 0) {
        result.onComplete = loadModuleComplete;
        char fullName[strlen(name) + 6];
        strcpy(fullName, name);
        strcat(fullName, ".wren");
        result.source = readFile(fullName);
    }
    return result;
}

int main(int argc, char **argv) {
    WrenConfiguration config;
    wrenInitConfiguration(&config);
    config.writeFn = &writeFn;
    config.errorFn = &errorFn;
    config.bindForeignClassFn = &bindForeignClass;
    config.bindForeignMethodFn = &bindForeignMethod;
    config.loadModuleFn = &loadModule;
    WrenVM* vm = wrenNewVM(&config);
    const char* module = "main";
    const char* fileName = "Rosetta_Code_List_authors_of_task_descriptions.wren";
    char *script = readFile(fileName);
    WrenInterpretResult result = wrenInterpret(vm, module, script);
    switch (result) {
        case WREN_RESULT_COMPILE_ERROR:
            printf("Compile Error!\n");
            break;
        case WREN_RESULT_RUNTIME_ERROR:
            printf("Runtime Error!\n");
            break;
        case WREN_RESULT_SUCCESS:
            break;
    }
    wrenFreeVM(vm);
    free(script);
    return 0;
}
Output:
As at 10th September 2022:

Total tasks   : 1569
Total authors : 315

The authors who have created at least 9 tasks are:

Pos    Tasks  Author
====   =====  ======
  1     199   Paddy3118
  2     135   CalmoSoft
  3      74   Thundergnat
  4      71   Markhobley
  5      66   Gerard Schildberger
  6      55   Mwn3d
  7      39   NevilleDNZ
  7=     39   Nigel Galloway
  9      33   MikeMol
 10      27   PureFox
 11      23   Grondilu
 12      21   Blue Prawn
 13      20   Fwend
 13=     20   Dkf
 15      19   Wherrera
 16      18   Kernigh
 17      17   Dmitry-kazakov
 17=     17   ShinTakezou
 17=     17   Ledrug
 20      13   Paulo Jorente
 20=     13   Abu
 20=     13   Waldorf
 23      12   Ce
 23=     12   Kevin Reid
 23=     12   Puppydrum64
 26      10   Bearophile
 26=     10   Tinku99
 28       9   TimSC
 28=      9   Petelomax
 28=      9   EMBee
 28=      9   Trizen
Cookies help us deliver our services. By using our services, you agree to our use of cookies.