Rosetta Code/Rank languages by number of users
Sort most popular programming languages based on the number of users on Rosetta Code. Show the languages with at least 100 users.
A way to solve the task:
Users of a language X are those referenced in the page https://rosettacode.org/wiki/Category:X_User, or preferably https://rosettacode.org/mw/index.php?title=Category:X_User&redirect=no to avoid redirections. In order to find the list of such categories, it's possible to first parse the entries of http://rosettacode.org/mw/index.php?title=Special:Categories&limit=5000. Then download and parse each language users category to count the users.
Sample output on 18 february 2019:
Language Users -------------------------- C 391 Java 276 C++ 275 Python 262 JavaScript 238 Perl 171 PHP 167 SQL 138 UNIX Shell 131 BASIC 120 C sharp 118 Pascal 116 Haskell 102
A Rosetta Code user usually declares using a language with the mylang template. This template is expected to appear on the User page. However, in some cases it appears in a user Talk page. It's not necessary to take this into account. For instance, among the 373 C users in the table above, 3 are actually declared in a Talk page.
Perl
<lang perl>use strict; use warnings; use JSON; use URI::Escape; use LWP::UserAgent;
my $client = LWP::UserAgent->new; $client->agent("Rosettacode Perl task solver"); my $url = 'http://rosettacode.org/mw'; my $minimum = 100;
sub uri_query_string {
my(%fields) = @_; 'action=query&format=json&formatversion=2&' . join '&', map { $_ . '=' . uri_escape($fields{$_}) } keys %fields
}
sub mediawiki_query {
my($site, $type, %query) = @_; my $url = "$site/api.php?" . uri_query_string(%query); my %languages = ();
my $req = HTTP::Request->new( GET => $url ); my $response = $client->request($req); $response->is_success or die "Failed to GET '$url': ", $response->status_line; my $data = decode_json($response->content); for my $row ( @{${$data}{query}{pages}} ) { next unless defined $$row{categoryinfo} && $$row{title} =~ /User/; my($title) = $$row{title} =~ /Category:(.*?) User/; my($count) = $$row{categoryinfo}{pages}; $languages{$title} = $count; } %languages;
}
my %table = mediawiki_query(
$url, 'pages', ( generator => 'categorymembers', gcmtitle => 'Category:Language users', gcmlimit => '999', prop => 'category info', rawcontinue => , )
);
for my $k (sort { $table{$b} <=> $table{$a} } keys %table) {
printf "%4d %s\n", $table{$k}, $k if $table{$k} > $minimum;
}</lang>
- Output:
397 C 278 Java 278 C++ 266 Python 240 JavaScript 171 Perl 168 PHP 139 SQL 131 UNIX Shell 121 C sharp 120 BASIC 118 Pascal 102 Haskell
Perl 6
Use the mediawiki API rather than web scraping since it is much faster and less resource intensive. Show languages with more than 25 users since that is still a pretty short list and to demonstrate how tied rankings are handled. Change the $minimum parameter to adjust what the cut-off point will be.
This is all done in a single pass; ties are not detected until a language has the same count as a previous one, so ties are marked by a T next to the count indicating that this language has the same count as the previous.
<lang perl6>use HTTP::UserAgent; use URI::Escape; use JSON::Fast;
my $client = HTTP::UserAgent.new;
my $url = 'http://rosettacode.org/mw';
my $start-time = now;
say "========= Generated: { DateTime.new(time) } =========";
my $lang = 1; my $rank = 0; my $last = 0; my $tie = ' '; my $minimum = 25;
.say for
mediawiki-query( $url, 'pages', :generator<categorymembers>, :gcmtitle<Category:Language users>, :gcmlimit<350>, :rawcontinue(), :prop<categoryinfo> )
.map({ %( count => .<categoryinfo><pages> || 0, lang => .<title>.subst(/^'Category:' (.+) ' User'/, ->$/ {$0}) ) })
.sort( { -.<count>, .<lang> } )
.map( { last if .<count> < $minimum; display(.<count>, .<lang>) } );
say "========= elapsed: {(now - $start-time).round(.01)} seconds =========";
sub display ($count, $which) {
if $last != $count { $last = $count; $rank = $lang; $tie = ' ' } else { $tie = 'T' }; sprintf "#%3d Rank: %2d %s with %-4s users: %s", $lang++, $rank, $tie, $count, $which;
}
sub mediawiki-query ($site, $type, *%query) {
my $url = "$site/api.php?" ~ uri-query-string( :action<query>, :format<json>, :formatversion<2>, |%query); my $continue = ;
gather loop { my $response = $client.get("$url&$continue"); my $data = from-json($response.content); take $_ for $data.<query>.{$type}.values; $continue = uri-query-string |($data.<query-continue>{*}».hash.hash or last); }
}
sub uri-query-string (*%fields) {
join '&', %fields.map: { "{.key}={uri-escape .value}" }
}</lang>
- Output:
========= Generated: 2018-06-01T22:09:26Z ========= # 1 Rank: 1 with 380 users: C # 2 Rank: 2 with 269 users: Java # 3 Rank: 3 with 266 users: C++ # 4 Rank: 4 with 251 users: Python # 5 Rank: 5 with 234 users: JavaScript # 6 Rank: 6 with 167 users: Perl # 7 Rank: 7 with 166 users: PHP # 8 Rank: 8 with 134 users: SQL # 9 Rank: 9 with 125 users: UNIX Shell # 10 Rank: 10 with 119 users: BASIC # 11 Rank: 11 with 116 users: C sharp # 12 Rank: 12 with 112 users: Pascal # 13 Rank: 13 with 99 users: Haskell # 14 Rank: 14 with 93 users: Ruby # 15 Rank: 15 with 74 users: Fortran # 16 Rank: 16 with 67 users: Visual Basic # 17 Rank: 17 with 62 users: Prolog # 18 Rank: 18 with 61 users: Scheme # 19 Rank: 19 with 58 users: Common Lisp # 20 Rank: 20 with 55 users: Lua # 21 Rank: 21 with 53 users: AWK # 22 Rank: 22 with 52 users: HTML # 23 Rank: 23 with 46 users: Assembly # 24 Rank: 24 with 44 users: Batch File # 25 Rank: 25 with 42 users: Bash # 26 Rank: 25 T with 42 users: X86 Assembly # 27 Rank: 27 with 40 users: Erlang # 28 Rank: 28 with 38 users: Forth # 29 Rank: 29 with 37 users: MATLAB # 30 Rank: 30 with 36 users: Lisp # 31 Rank: 31 with 35 users: J # 32 Rank: 31 T with 35 users: Visual Basic .NET # 33 Rank: 33 with 34 users: Delphi # 34 Rank: 34 with 33 users: APL # 35 Rank: 34 T with 33 users: Ada # 36 Rank: 34 T with 33 users: Brainf*** # 37 Rank: 34 T with 33 users: Objective-C # 38 Rank: 34 T with 33 users: Tcl # 39 Rank: 39 with 32 users: R # 40 Rank: 40 with 31 users: COBOL # 41 Rank: 41 with 30 users: Go # 42 Rank: 42 with 29 users: Perl 6 # 43 Rank: 43 with 27 users: Clojure # 44 Rank: 43 T with 27 users: Mathematica # 45 Rank: 45 with 25 users: AutoHotkey ========= elapsed: 1.45 seconds =========
Stata
<lang stata>copy "http://rosettacode.org/mw/index.php?title=Special:Categories&limit=5000" categ.html, replace import delimited categ.html, delim("@") enc("utf-8") clear keep if ustrpos(v1,"/wiki/Category:") & ustrpos(v1,"_User") gen i = ustrpos(v1,"href=") gen j = ustrpos(v1,char(34),i+1) gen k = ustrpos(v1,char(34),j+1) gen s = usubstr(v1,j+7,k-j-7) replace i = ustrpos(v1,"title=") replace j = ustrpos(v1,">",i+1) replace k = ustrpos(v1," User",j+1) gen lang = usubstr(v1,j+1,k-j) keep s lang gen users=.
forval i=1/`c(N)' { local s preserve copy `"https://rosettacode.org/mw/index.php?title=`=s[`i']'&redirect=no"' `i'.html, replace import delimited `i'.html, delim("@") enc("utf-8") clear count if ustrpos(v1,"/wiki/User") local m `r(N)' restore replace users=`m' in `i' erase `i'.html }
drop s gsort -users lang compress leftalign list in f/50 save rc_users, replace</lang>
Output (2019-02-18)
+----------------------------+ | lang users | |----------------------------| 1. | C 391 | 2. | Java 276 | 3. | C++ 275 | 4. | Python 262 | 5. | JavaScript 238 | |----------------------------| 6. | Perl 171 | 7. | PHP 167 | 8. | SQL 138 | 9. | UNIX Shell 131 | 10. | BASIC 120 | |----------------------------| 11. | C sharp 118 | 12. | Pascal 116 | 13. | Haskell 102 | 14. | Ruby 93 | 15. | Fortran 79 | |----------------------------| 16. | Visual Basic 68 | 17. | Prolog 65 | 18. | Scheme 61 | 19. | Common Lisp 58 | 20. | AWK 57 | |----------------------------| 21. | Lua 57 | 22. | HTML 52 | 23. | Assembly 45 | 24. | Batch File 44 | 25. | X86 Assembly 44 | |----------------------------| 26. | Bash 43 | 27. | Erlang 40 | 28. | Lisp 39 | 29. | MATLAB 39 | 30. | Forth 38 | |----------------------------| 31. | Ada 36 | 32. | Visual Basic .NET 36 | 33. | Delphi 35 | 34. | J 35 | 35. | APL 34 | |----------------------------| 36. | Brainf*** 34 | 37. | Tcl 34 | 38. | Objective-C 33 | 39. | Smalltalk 33 | 40. | COBOL 32 | |----------------------------| 41. | R 32 | 42. | Go 30 | 43. | Mathematica 30 | 44. | Perl 6 29 | 45. | Clojure 27 | |----------------------------| 46. | AutoHotkey 25 | 47. | REXX 25 | 48. | LaTeX 23 | 49. | OCaml 23 | 50. | Sed 23 | +----------------------------+
zkl
Uses libraries cURL and YAJL (yet another json library) <lang zkl>const MIN_USERS=60; var [const] CURL=Import("zklCurl"), YAJL=Import("zklYAJL")[0];
fcn rsGet{
continueValue,r,curl := "",List, CURL(); do{ // eg 5 times page:=("http://rosettacode.org/mw/api.php?action=query" "&generator=categorymembers&prop=categoryinfo"
"&gcmtitle=Category%%3ALanguage%%20users" "&rawcontinue=&format=json&gcmlimit=350" "%s").fmt(continueValue);
page=curl.get(page); page=page[0].del(0,page[1]); // get rid of HTML header json:=YAJL().write(page).close(); json["query"]["pages"].pump(r.append,'wrap(x){ x=x[1]; //("2708",Dictionary(title:Category:C User,...,categoryinfo:D(pages:373,size:373,...)))
// or title:SmartBASIC if((pgs:=x.find("categoryinfo")) and (pgs=pgs.find("pages")) and pgs>=MIN_USERS) return(pgs,x["title"].replace("Category:","").replace(" User","")); return(Void.Skip);
}); if(continueValue=json.find("query-continue","")) continueValue=String("&gcmcontinue=",
continueValue["categorymembers"]["gcmcontinue"]);
}while(continueValue); r
}
allLangs:=rsGet(); allLangs=allLangs.sort(fcn(a,b){ a[0]>b[0] }); println("========== ",Time.Date.prettyDay()," =========="); foreach n,pgnm in ([1..].zip(allLangs))
{ println("#%3d with %4s users: %s".fmt(n,pgnm.xplode())) }</lang>
- Output:
========== Wednesday, the 20th of December 2017 ========== # 1 with 373 users: C # 2 with 261 users: C++ # 3 with 257 users: Java # 4 with 243 users: Python # 5 with 228 users: JavaScript # 6 with 163 users: PHP # 7 with 162 users: Perl # 8 with 131 users: SQL # 9 with 120 users: UNIX Shell # 10 with 118 users: BASIC # 11 with 113 users: C sharp # 12 with 109 users: Pascal # 13 with 98 users: Haskell # 14 with 91 users: Ruby # 15 with 71 users: Fortran # 16 with 65 users: Visual Basic # 17 with 60 users: Scheme