Rosetta Code/Rank languages by popularity

From Rosetta Code

Jump to: navigation, search
Task
Rosetta Code/Rank languages by popularity
You are encouraged to solve this task according to the task description, using any language you may know.

Sort most popular programming languages based in number of members in Rosetta Code categories (from http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000)

Sample output on 16 May 2010:

1. 397- Tcl
2. 364- Python
3. 350- Ruby
4. 331- J
5. 324- C
6. 319- OCaml
7. 318- Haskell
8. 300- Perl
9. 287- Common Lisp
10. 270- AutoHotkey
11. 270- Java
12. 268- Oz
13. 266- Ada
14. 257- D
15. 249- R
16. 247- E
17. 243- C++
18. 237- JavaScript
19. 223- ALGOL 68
20. 222- PureBasic
 ...

Filtering wrong results is optional. You can check against http://www.rosettacode.org/wiki/Special:MostLinkedCategories

Contents

[edit] Ada

Library: AWS


This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.


NB: The 'ASCII.Quotation' in the declaration of Title_Str is a rather clunky workaround to the Ada->HTML engine not being able to correctly colourize '"' properly.

with Ada.Integer_Text_IO;   use Ada.Integer_Text_IO;
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
with Ada.Strings.Unbounded; use Ada.Strings.Unbounded;
with Ada.Text_IO; use Ada.Text_IO;
 
with Ada.Containers.Ordered_Sets;
with Ada.Strings.Less_Case_Insensitive;
 
with AWS.Client;
with AWS.Response;
 
procedure Test is
 
use Ada.Strings;
 
function "+" (S : String) return Unbounded_String renames To_Unbounded_String;
 
type A_Language_Count is
record
Count  : Integer := 0;
Language : Unbounded_String;
end record;
 
function "=" (L, R : A_Language_Count) return Boolean is
begin
return L.Count = R.Count;
end "=";
 
function "<" (L, R : A_Language_Count) return Boolean is
begin
-- Sort by 'Count' and then by Language name
return L.Count < R.Count
or else (L.Count = R.Count
and then Less_Case_Insensitive (Left => To_String (L.Language),
Right => To_String (R.Language)));
end "<";
 
package Sets is new Ada.Containers.Ordered_Sets (A_Language_Count);
use Sets;
 
Counts : Set;
 
procedure Find_Counts (S : String) is
Title_Str : constant String  := "title=" & ASCII.Quotation & "Category:";
End_A_Str : constant String  := "</a> (";
 
Title_At  : constant Natural := Index (S, Title_Str);
Bracket_At : constant Natural := Index (S (Title_At + Title_Str'Length .. S'Last), ">");
End_A_At  : constant Natural := Index (S (Bracket_At + 1 .. S'Last), End_A_Str);
Space_At  : constant Natural := Index (S (End_A_At + End_A_Str'Length .. S'Last), " ");
begin
if Title_At /= 0 and then Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then
declare
Count  : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1));
Language : constant String  := S (Title_At + Title_Str'Length .. Bracket_At - 2);
begin
Counts.Insert (New_Item => (Count, +Language));
end;
 
-- Recursively parse the string for languages and counts
Find_Counts (S (Space_At + 1 .. S'Last));
end if;
exception
when others =>
null; -- Catch and ignore the error after the last language
end Find_Counts;
 
Place : Natural := 1;
 
procedure Display (C : Cursor) is
begin
Put (Place, Width => 1); Put (". ");
Put (Element (C).Count, Width => 1); Put (" - ");
Put_Line (To_String (Element (C).Language));
Place := Place + 1;
end Display;
 
Http_Source : constant AWS.Response.Data :=
AWS.Client.Get ("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
begin
Find_Counts (AWS.Response.Message_Body (Http_Source));
Counts.Reverse_Iterate (Display'Access);
end Test;

[edit] ALGOL 68

Works with: ALGOL 68G version mk8+ for Unix and Linux - tested with release mk15-0.8b.fc9.i386 - uses non-standard library routines http content and grep in string. Note: the routine http content is currently not available on Win32 systems.


This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.


PROC good page = (REF STRING page) BOOL:
IF grep in string("^HTTP/[0-9.]* 200", page, NIL, NIL) = 0
THEN TRUE
ELSE IF INT start, end;
grep in string("^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", page,
start, end) = 0
THEN print (page[start : end])
ELSE print ("unknown error retrieving page")
FI;
FALSE
FI;
 
MODE LISTOFSTRING = STRUCT(REF LINK first, last, INT upb);
MODE LINK = STRUCT(STRING value, REF LINK next);
 
PRIO LISTINIT = 1;
OP LISTINIT = (REF LISTOFSTRING new, REF LINK first)REF LISTOFSTRING: (
new := (first, first, (first IS REF LINK(NIL) | 0 | 1 ));
new
);
 
OP +:= = (REF LISTOFSTRING list, []CHAR item)VOID: (
HEAP LINK new := (STRING(item), REF LINK(NIL));
IF first OF list IS REF LINK(NIL) THEN
first OF list := new
ELSE
next OF last OF list := new
FI;
last OF list := new;
upb OF list +:= 1
);
 
OP UPB = (LISTOFSTRING list)INT: upb OF list;
 
OP ARRAYOFSTRING = (LISTOFSTRING list)[]STRING:(
[UPB list]STRING out;
REF LINK this := first OF list;
FOR i TO UPB list DO out[i] := value OF this; this := next OF this OD;
out
);
 
INT match=0, no match=1, out of memory error=2, other error=3;
 
PROC re split = (STRING re split, REF STRING beetles)[]STRING:(
LISTOFSTRING out := (NIL, NIL, 0); # LISTINIT REF LINK NIL; #
INT start := 1, pos, end;
WHILE grep in string(re split, beetles[start:], pos, end) = match DO
out +:= beetles[start:start+pos-2];
out +:= beetles[start+pos-1:start+end-1];
start +:= end
OD;
IF start > UPB beetles THEN
out +:= beetles[start:]
FI;
ARRAYOFSTRING(out)
);
 
 
IF STRING reply;
INT rc =
http content (reply, "www.rosettacode.org", "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500", 0);
rc /= 0 OR NOT good page (reply)
THEN print (("Error:",strerror (rc)))
ELSE
STRING # hack: HTML should be parsed by an official HTML parsing library #
re html tag = "<[^>]*>",
re a href category = "^<a href=""/wiki/Category:.*"" title=",
re members = "([1-9][0-9]* members)";
 
MODE STATISTIC = STRUCT(INT members, STRING category);
FLEX[0]STATISTIC stats;
 
OP +:= = (REF FLEX[]STATISTIC in out, STATISTIC item)VOID:(
[LWB in out: UPB in out+1]STATISTIC new;
new[LWB in out: UPB in out]:=in out;
new[UPB new]:=item;
in out := new
);
 
# hack: needs to be manually maintained #
STRING re ignore ="Programming Tasks|WikiStubs|Maintenance/OmitCategoriesCreated|"+
"Unimplemented tasks by language|Programming Languages|"+
"Solutions by Programming Language|Implementations|"+
"Solutions by Library|Encyclopedia|Language users|"+
"Solutions by Programming Task|Basic language learning|"+
"RCTemplates|Language Implementations";
 
FORMAT category fmt = $"<a href=""/wiki/Category:"g""" title=""Category:"g""""$;
STRING encoded category, category;
FORMAT members fmt = $" ("g" members)"$;
INT members;
 
FLEX[0]STRING tokens := re split(re html tag, reply);
FOR token index TO UPB tokens DO
STRING token := tokens[token index];
FILE file;
IF grep in string(re a href category, token, NIL, NIL) = match THEN
associate(file, token);
make term(file,"""");
getf(file, (category fmt, encoded category, category));
close(file)
ELIF grep in string(re members, token, NIL, NIL) = match THEN
IF grep in string(re ignore, category, NIL, NIL) /= match THEN
associate(file, token);
getf(file, (members fmt, members));
stats +:= STATISTIC(members, category);
close(file)
FI
FI
OD;
 
OP < = (STATISTIC a,b)BOOL:
members OF a < members OF b;
 
MODE SORTSTRUCT = STATISTIC;
PR READ "prelude/sort.a68" PR;
 
stats := in place shell sort reverse(stats);
 
INT max = 10;
FOR i TO (UPB stats > max | max | UPB stats) DO
printf(($g(-0)". "g(-0)" - "gl$,i,stats[i]))
OD
FI

Output:

1. 233 - Python
2. 222 - Ada
3. 203 - OCaml
4. 203 - C
5. 201 - Perl
6. 193 - Haskell
7. 182 - Java
8. 179 - D
9. 178 - ALGOL 68
10. 160 - Ruby

[edit] AutoHotkey

StringCaseSense, On
Progress, b2 w120 zh0 fs9, Please wait ...
Sleep, 10
 
Link = http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000
FileDelete, Cats.html
URLDownloadToFile, %Link%, Cats.html
FileRead, Cats, Cats.html
 
Link1 = http://rosettacode.org/wiki/Category:Programming_Languages
FileDelete, lang1.htm
URLDownloadToFile, %Link1%, Lang1.htm
FileRead, Lang1, Lang1.htm
 
LookFor = (\(previous 200\) \(<a href=")(.+?)" title="Category:Programming Languages">next 200
RegExMatch(Lang1, LookFor, Link) ; Link2
StringReplace, Link2, Link2, &amp;, &
 
FileDelete, lang2.htm
URLDownloadToFile, http://www.rosettacode.org%Link2%, Lang2.htm
FileRead, Lang2, Lang2.htm
Languages := Lang1 Lang2
 
; create list of categories with member count
Loop, Parse, Cats, `n, `r
{
If InStr(A_LoopField, "<li>") {
LookFor = title=\"Category:(.+?)"
RegExMatch(A_LoopField, LookFor, Name)
RegExMatch(A_LoopField, "(\d*)\smembers", Count)
CatsList .= Count1 "|" Name1 "`r`n"
}
}
 
; create list of languages
RegExMatch(Languages, "(<h2>Subcategories</h2>)(.*)previous 200", Match)
LookFor = <a href="/wiki/Category:.*?" title="Category:.*?">(.*?)</a>(.*)
While RegExMatch(Match2, LookFor, Match)
LangList .= Match1 "`r`n"
 
; create the final list
Loop, Parse, CatsList, `n, `r
{
StringSplit, out, A_LoopField, |
If RegExMatch(LangList, "m)^" out2 "$")
FinalList .= A_LoopField "`r`n"
}
 
Sort, FinalList, RN
Gui, -MinimizeBox
Gui, Margin, 6
Gui, Add, ListView, y10 w363 r20 Grid, Rank|Members|Category
Loop, Parse, FinalList, `n, `r
{
If A_LoopField {
StringSplit, Item, A_LoopField, |
LV_Add("", A_Index, Item1, Item2)
}
}
 
LV_ModifyCol(1, "Integer")
LV_ModifyCol(2, "Integer")
LV_ModifyCol(3, 250)
FormatTime, Timestamp,, dd MMM yyyy
Progress, Off
Gui, Show,, Rosetta Categories - %Timestamp%
Return
 
GuiClose:
ExitApp
Return

[edit] AWK

Works with: gawk

This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.


This solution needs help from external tools to fetch the HTML from rosettacode.org, and also to do a numeric sort

printf "GET %s HTTP/1.0\n\n" 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500' |
nc www.rosettacode.org 80 |
gawk '
# ignore categories that are not languages
/Basic language learning/ {next}
/Encyclopedia/ {next}
/Implementations/ {next}
/Language Implementations/ {next}
/Language users/ {next}
/Maintenance\/OmitCategoriesCreated/ {next}
/Programming Languages/ {next}
/Programming Tasks/ {next}
/RCTemplates/ {next}
/Solutions by Library/ {next}
/Solutions by Programming Language/ {next}
/Solutions by Programming Task/ {next}
/Unimplemented tasks by language/ {next}
/WikiStubs/ {next}
 
match($0, /<li.*>([^<]*)<\/a> \(([[:digit:]]+) members?/, m) {
lang[++i] = m[2] " - " m[1]
}
 
END {
len = 0; for (i in lang) len++
sorter = "sort -rn"
for (i = 1; i <= len; i++) {
print lang[i] |& sorter
}
close(sorter, "to")
i = 1
while((sorter |& getline line) > 0) {
print i++ ". " line
}
close(sorter)
}
'

[edit] C#

Sorting only programming languages.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
 
class Program
{
static void Main(string[] args)
{
string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000");
 
ArrayList langs = new ArrayList();
Dictionary<string, int> qtdmbr = new Dictionary<string, int>();
 
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
 
foreach (Match lang in match1) langs.Add(lang.Groups[1].Value);
 
foreach (Match match in match2)
{
if (langs.Contains(match.Groups[1].Value))
{
qtdmbr.Add(match.Groups[1].Value, Int32.Parse(match.Groups[2].Value));
}
}
 
string[] test = qtdmbr.OrderByDescending(x => x.Value).Select(x => String.Format("{0,3} - {1}", x.Value, x.Key)).ToArray();
 
int count = 1;
 
foreach (string i in test)
{
Console.WriteLine("{0,3}. {1}", count, i);
count++;
}
}
}

Output (as of May 30, 2010):

 1. 397 - Tcl
 2. 368 - Python
 3. 350 - Ruby
 4. 333 - J
 5. 332 - C
 6. 322 - Haskell
 7. 322 - OCaml
 8. 302 - Perl
 9. 290 - Common Lisp
10. 289 - AutoHotkey
    . . .

[edit] Object-oriented solution

using System;
using System.Net;
using System.Linq;
using System.Text.RegularExpressions;
using System.Collections.Generic;
 
class Category {
private string _title;
private int _members;
 
public Category(string title, int members) {
_title = title;
_members = members;
}
 
public string Title {
get {
return _title;
}
}
 
public int Members {
get {
return _members;
}
}
}
 
class Program {
static void Main(string[] args) {
string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000");
 
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
 
string[] valids = match1.Cast<Match>().Select(x => x.Groups[1].Value).ToArray();
List<Category> langs = new List<Category>();
 
foreach (Match match in match2) {
string category = match.Groups[1].Value;
int members = Int32.Parse(match.Groups[2].Value);
 
if (valids.Contains(category)) langs.Add(new Category(category, members));
}
 
langs = langs.OrderByDescending(x => x.Members).ToList();
int count = 1;
 
foreach (Category i in langs) {
Console.WriteLine("{0,3}. {1,3} - {2}", count, i.Members, i.Title);
count++;
}
}
}

[edit] C++

This example is incorrect. The result may be wrong because there are more than 800 categories now. Please fix the code and remove this message.


Library: Boost

using g++ under Linux with g++ -lboost_thread -lboost_system -lboost_regex

#include <string>
#include <boost/regex.hpp>
#include <boost/asio.hpp>
#include <vector>
#include <utility>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <algorithm>
#include <iomanip>
 
struct Sort { //sorting programming languages according to frequency
bool operator( ) ( const std::pair<std::string,int> & a , const std::pair<std::string,int> & b )
const {
return a.second > b.second ;
}
} ;
 
int main( ) {
try {
//setting up an io service , with templated subelements for resolver and query
boost::asio::io_service io_service ;
boost::asio::ip::tcp::resolver resolver ( io_service ) ;
boost::asio::ip::tcp::resolver::query query ( "rosettacode.org" , "http" ) ;
boost::asio::ip::tcp::resolver::iterator endpoint_iterator = resolver.resolve( query ) ;
boost::asio::ip::tcp::resolver::iterator end ;
boost::asio::ip::tcp::socket socket( io_service ) ;
boost::system::error_code error = boost::asio::error::host_not_found ;
//looking for an endpoint the socket will be able to connect to
while ( error && endpoint_iterator != end ) {
socket.close( ) ;
socket.connect( *endpoint_iterator++ , error ) ;
}
if ( error )
throw boost::system::system_error ( error ) ;
//we send a request
boost::asio::streambuf request ;
std::ostream request_stream( &request ) ;
request_stream << "GET " << "/mw/index.php?title=Special:Categories&limit=800" << " HTTP/1.0\r\n" ;
request_stream << "Host: " << "rosettacode.org" << "\r\n" ;
request_stream << "Accept: */*\r\n" ;
request_stream << "Connection: close\r\n\r\n" ;
//send the request
boost::asio::write( socket , request ) ;
//we receive the response analyzing every line and storing the programming language
boost::asio::streambuf response ;
std::istream response_stream ( &response ) ;
boost::asio::read_until( socket , response , "\r\n\r\n" ) ;
boost::regex e( "<li><a href=\"[^<>]+?\">([a-zA-Z\\+#1-9]+?)</a>\\s?\\((\\d+) members\\)</li>" ) ;
//using the wrong regex produces incorrect sorting!!
std::ostringstream line ;
std::vector<std::pair<std::string , int> > languages ; //holds language and number of examples
boost::smatch matches ;
while ( boost::asio::read( socket , response , boost::asio::transfer_at_least( 1 ) , error ) ) {
line << &response ;
if ( boost::regex_search( line.str( ) , matches , e ) ) {
std::string lang( matches[2].first , matches[2].second ) ;
int zahl = atoi ( lang.c_str( ) ) ;
languages.push_back( std::make_pair( matches[ 1 ] , zahl ) ) ;
}
line.str( "") ;//we have to erase the string buffer for the next read
}
if ( error != boost::asio::error::eof )
throw boost::system::system_error( error ) ;
//we sort the vector entries , see the struct above
std::sort( languages.begin( ) , languages.end( ) , Sort( ) ) ;
int n = 1 ;
for ( std::vector<std::pair<std::string , int> >::const_iterator spi = languages.begin( ) ;
spi != languages.end( ) ; ++spi ) {
std::cout << std::setw( 3 ) << std::right << n << '.' << std::setw( 4 ) << std::right <<
spi->second << " - " << spi->first << '\n' ;
n++ ;
}
} catch ( std::exception &ex ) {
std::cout << "Exception: " << ex.what( ) << '\n' ;
}
return 0 ;
}

Sample output ( just the "top ten" ):

 1. 367 - Tcl
 2. 334 - Python
 3. 319 - Ruby
 4. 286 - C
 5. 277 - Perl
 6. 272 - OCaml
 7. 264 - Ada
 8. 241 - E
 9. 239 - AutoHotkey
10. 193 - Forth

[edit] Haskell

import Network.Browser
import Network.HTTP
import Network.URI
import Data.List
import Data.Maybe
import Text.XML.Light
import Control.Arrow
import Data.Ord
 
getRespons url = do
rsp <- Network.Browser.browse $ do
setAllowRedirects True
setOutHandler $ const (return ()) -- quiet
request $ getRequest url
return $ rspBody $ snd rsp
 
 
mostPopLang = do
rsp <-getRespons $ "http://www.rosettacode.org/w/api.php?action=query&list=" ++
"categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=xml"
mbrs <- getRespons "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000"
let xmls = onlyElems $ parseXML rsp
langs = concatMap (map ((\\"Category:"). fromJust.findAttr (unqual "title")). filterElementsName (== unqual "cm")) xmls
 
let catMbr = second (read.takeWhile(/=' '). drop 6). break (=='<'). drop 1. dropWhile(/='>') . drop 5
catNmbs :: [(String, Int)]
catNmbs = map catMbr $ filter (isPrefixOf "<li>") $ lines mbrs
printFmt (n,(l,m)) = putStrLn $ take 6 (show n ++ ". ") ++ (show m) ++ " " ++ l
 
mapM_ printFmt $ zip [1..] $ sortBy (flip (comparing snd)) $ map (id &&& fromJust.flip lookup catNmbs) langs

First 20:

*Main> mostPopLang
1.    421  Tcl
2.    392  Python
3.    365  PicoLisp
4.    363  J
5.    360  Ruby
6.    354  C
7.    344  Haskell
8.    337  OCaml
9.    316  Perl
10.   308  PureBasic
11.   302  AutoHotkey
12.   299  Common Lisp
13.   295  D
14.   295  Java
15.   293  Ada
16.   278  Oz
17.   260  R
18.   259  C sharp
19.   257  C++
20.   255  ALGOL 68

[edit] HicEst

CHARACTER cats*50000, catlist*50000, sortedCat*50000, sample*100
DIMENSION RankNr(1)
 
READ(ClipBoard) cats
catlist = ' '
pos = 1 ! find language entries like * 100 doors (2 members)
nr = 0
! after next '*' find next "name" = '100 doors' and next "(...)" = '(2 members)' :
1 EDIT(Text=cats, SetPos=pos, Right='*', R, Mark1, R='(', Left, M2, Parse=name, R=2, P=members, GetPos=pos)
IF(pos > 0) THEN
READ(Text=members) count
IF(count > 0) THEN
nr = nr + 1
WRITE(Text=catlist, Format='i4, 1x, 2a', APPend) count, name, ';'
ENDIF
GOTO 1 ! no WHILE in HicEst
ENDIF ! catlist is now = " 1 ... User ; 2 100 doors ; 3 3D ; 8 4D ; ..."
 
ALLOCATE(RankNr, nr)
EDIT(Text=catlist, SePaRators=';', Option=1+4, SorTtoIndex=RankNr) ! case (1) and back (4)
 
sortedCat = ' ' ! get the sorted list in the sequence of RankNr:
ok = 0
DO i = 1, nr
EDIT(Text=catlist, SePaRators=';', ITeM=RankNr(i), CoPyto=sample)
discard = EDIT(Text=sample, LeXicon='user,attention,solutions,tasks,program,language,implementation,')
IF(discard == 0) THEN ! removes many of the non-language entries
ok = ok + 1
WRITE(Text=sortedCat, APPend, Format='F5.0, 2A') ok, TRIM(sample), $CRLF
ENDIF
ENDDO
DLG(Text=sortedCat, Format=$CRLF)
END
2010-04-24 18:31
Top 10 entries (not all are languages)
1. 394 Tcl
2. 363 Python
3. 346 Ruby
4. 328 J
5. 319 C
6. 317 OCaml
7. 315 Haskell
8. 298 Perl
9. 288 WikiStubs
10. 281 Common Lisp

[edit] J

Solution:
 
require 'web/gethttp xml/sax/x2j regex'
 
x2jclass 'rcPopLang'
 
rx =: (<0 1) {:: (2#a:) ,~ rxmatches rxfrom ]
 
'Popular Languages' x2jDefn
/  := langs  : langs =: 0 2 $ a:
html/body/div/div/div/div/div/ul/li  := langs =: langs ,^:(a:~:{.@[)~ lang ; ' \((\d+) members?\)' rx y
html/body/div/div/div/div/div/ul/li/a := lang =: '^\s*((?:.(?!User|Tasks|Omit|attention|operations|by))+)\s*$' rx y
)
 
cocurrent'base'
 
sortTab =. \: __ ". [: ;:^:_1: {:"1
formatTab =: [: ;:^:_1: [: (20 A. (<'-') , |. , [: ('.' <"1@:,.~ ":) 1 + 1 i.@,~ 1{$)&.|: sortTab f.
 
rcPopLangs =: formatTab@:process_rcPopLang_@:gethttp
 
Example:
 
10 {. rcPopLangs 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=800'
1. 354 - Tcl
2. 321 - Python
3. 268 - OCaml
4. 268 - J
5. 267 - C
6. 265 - Common Lisp
7. 261 - Ada
8. 255 - Perl
9. 253 - Haskell
10. 243 - Java
 

Notes:

See some notes on the J solution.

[edit] Oz

Library: OzHttpClient

Using web scraping. Does not filter non-language categories.

declare
[HTTPClient] = {Module.link ['x-ozlib://mesaros/net/HTTPClient.ozf']}
[Regex] = {Module.link ['x-oz://contrib/regex']}
 
fun {GetPage RawUrl}
Client = {New HTTPClient.urlGET init(inPrms(toFile:false toStrm:true) _)}
Url = {VirtualString.toString RawUrl}
OutParams
HttpResponseParams
in
{Client getService(Url ?OutParams ?HttpResponseParams)}
{Client closeAll(true)}
OutParams.sOut
end
 
fun {GetCategories Doc}
{Map {Regex.allMatches "<li><a[^>]+>([^<]+)</a> \\(([0-9]+) member" Doc}
fun {$ Match}
Category = {Regex.group 1 Match Doc}
Count = {String.toInt {ByteString.toString {Regex.group 2 Match Doc}}}
in
Category#Count
end
}
end
 
Url = "http://www.rosettacode.org/mw/index.php?title=Special:Categories&limit=5000"
 
{System.showInfo "Retrieving..."}
Doc = {GetPage Url}
 
{System.showInfo "Parsing..."}
Cs = {GetCategories Doc}
in
for
Cat#Count in {Sort Cs fun {$ _#C1 _#C2} C1 > C2 end}
I in 1..20
do
{System.showInfo I#". "#Count#" - "#Cat}
end

Output:

1. 371 - Tcl
2. 369 - Programming Tasks
3. 338 - Python
4. 324 - Ruby
5. 306 - Haskell
...
17. 225 - Oz
18. 214 - C++
19. 209 - JavaScript
20. 208 - ALGOL 68

[edit] Perl

Sorting only programming languages.

use MediaWiki::API;
my $api = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
 
my @pairs =
sort {$b->[1] <=> $a->[1] or $a->[0] cmp $b->[0]}
map {$_->{title} =~ s/\ACategory://;
[$_->{title}, $_->{categoryinfo}{size} || 0];}
values %{$api->api
({action => 'query',
generator => 'categorymembers',
gcmtitle => 'Category:Programming Languages',
gcmlimit => 'max',
prop => 'categoryinfo'})->{query}{pages}};
 
for (my $n = 1 ; @pairs ; ++$n)
{my ($lang, $tasks) = @{shift @pairs};
printf "%3d. %3d - %s\n", $n, $tasks, $lang;}

[edit] PicoLisp

(load "@lib/http.l")
 
(for (I . X)
(flip
(sort
(make
(client "rosettacode.org" 80
"mw/index.php?title=Special:Categories&limit=5000"
(while (from "<li><a href=\"/wiki/Category:")
(let Cat (till "\"")
(from "(")
(when (format (till " " T))
(link (cons @ (ht:Pack Cat))) ) ) ) ) ) ) )
(prinl (align 3 I) ". " (car X) " - " (cdr X)) )

Output (07apr10):

  1. 390 - Tcl
  2. 389 - Programming_Tasks
  3. 359 - Python
  4. 344 - Ruby
  5. 326 - J
  6. 316 - OCaml
  7. 315 - C
  8. 312 - Haskell
  9. 296 - Perl
 10. 281 - Common_Lisp
...

[edit] PureBasic

Some lines in this example are too long (more than 80 characters). Please fix the code if it's possible and remove this message.
Structure Language
count.i
Name.s
EndStructure
Dim Row.Language(2000)
ignore$ = "Basic language learning Encyclopedia Implementations Language Implementations Language users Maintenance/OmitCategoriesCreated"
ignore$ + "Programming Languages Programming Tasks RCTemplates Solutions by Library Solutions by Programming Language"
ignore$ + "Solutions by Programming Task Unimplemented tasks by language WikiStubs Examples needing attention"
ignore$ + "Impl needed"
 
URLDownloadToFile_( #Null, "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000", "special.htm", 0, #Null)
ReadFile(0, "special.htm")
While Not Eof(0)
i + 1
x1$ = ReadString(0)
x2$ = Mid(x1$, FindString(x1$, "member", 1) - 4 , 3)
Row(i)\count = Val(Trim(RemoveString(x2$, "(")))
 
x3$ = Mid(x1$, FindString(x1$, Chr(34) + ">", 1) + 2, 30)
Row(i)\Name = Left(x3$, FindString(x3$, "<", 1) - 1)
If FindString(ignore$, Row(i)\Name, 1) Or Row(i)\Name = ""
Row(i)\count = 0
EndIf
Wend
SortStructuredArray(Row(), #PB_Sort_Descending, OffsetOf(Language\count), #PB_Sort_Integer)
OpenConsole()
For i = 0 To 20
PrintN( Str(i + 1) + ". " + Str(Row(i)\count) + " - " + Row(i)\Name)
Next
Input()

[edit] Python

Works with: Python version 2.6

This uses MediaWiki's JSON API to query the members of Category:Programming Languages and then scrapes Special:Categories for the number of pages in each language's category.

import urllib, re
 
key1 = lambda x: int(x[1])
 
get1 = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json").read()
get2 = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000").read()
 
langs = re.findall("\"title\":\"Category:(.+?)\"",get1)
qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> \((\d+) members\)",get2)
 
result = [(x,int(y)) for x,y in qtdmbr if x in langs]
 
for n, i in enumerate(sorted(result,key=key1,reverse=True)):
print "%3d. %3d - %s" % (n+1, i[1], i[0])

Output (as of May 30, 2010):

 1. 397 - Tcl
 2. 368 - Python
 3. 350 - Ruby
 4. 333 - J
 5. 332 - C
 6. 322 - Haskell
 7. 322 - OCaml
 8. 302 - Perl
 9. 290 - Common Lisp
10. 289 - AutoHotkey
    . . .

[edit] R

 
library(RJSONIO)
langUrl <- "http://rosettacode.org/mw/api.php?action=query&format=json&cmtitle=Category:Solutions_by_Programming_Language&list=categorymembers&cmlimit=500"
 
languages <- fromJSON(langUrl)$query$categorymembers
languages <- sapply(languages, function(x) sub("Category:", "", x$title))
 
# fails if there are more than 500 users per language
user <- function (lang) {
userBaseUrl <- "http://rosettacode.org/mw/api.php?action=query&format=json&list=categorymembers&cmlimit=500&cmtitle=Category:"
userUrl <- paste(userBaseUrl, URLencode(paste(lang, " User", sep="")),sep="")
length(fromJSON(userUrl)$query$categorymembers)
}
 
users <- sapply(languages, user)
head(sort(users, decreasing=TRUE),15)
 

Output (as of March, 13, 2010)

         C        C++       Java     Python JavaScript       Perl UNIX Shell 
        55         55         37         32         27         27         22 
    Pascal      BASIC        PHP        SQL    Haskell        AWK    C sharp 
        20         19         19         18         17         16         16 
      Ruby 
        14 

[edit] Ruby

Works with: Ruby version 1.8.7 Now that there are more than 500 categories, the URL given in the task description is insufficient. I use the RC API to grab the categories, and then count the members of each category.

Uses the RosettaCode module from Count programming examples#Ruby

require 'rosettacode'
 
langs = []
RosettaCode.category_members("Programming Languages") {|lang| langs << lang}
 
langcount = {}
langs.each_slice(50) do |sublist|
url = RosettaCode.get_api_url({
"action" => "query",
"prop" => "categoryinfo",
"format" => "xml",
"titles" => sublist.join("|"),
})
 
doc = REXML::Document.new open(url)
REXML::XPath.each(doc, "//page") do |page|
lang = page.attribute("title").value
info = REXML::XPath.first(page, "categoryinfo")
langcount[lang] = info.nil? ? 0 : info.attribute("pages").value.to_i
end
end
 
puts Time.now
puts "There are #{langcount.length} languages"
puts "the top 25:"
langcount.sort_by {|key,val| val}.reverse[0,25].each_with_index do |(lang, count), i|
puts "#{i+1}. #{count} - #{lang.sub(/Category:/, '')}"
end

Results

2010-07-08 14:52:46 -0500
There are 306 languages
the top 25:
1. 399 - Tcl
2. 370 - Python
3. 352 - Ruby
4. 338 - J
5. 337 - C
6. 333 - PicoLisp
7. 322 - OCaml
8. 322 - Haskell
9. 299 - Perl
10. 299 - AutoHotkey
11. 288 - Common Lisp
12. 280 - Java
13. 275 - Ada
14. 270 - D
15. 267 - Oz
16. 253 - R
17. 252 - PureBasic
18. 245 - E
19. 243 - C++
20. 241 - C sharp
21. 239 - ALGOL 68
22. 236 - JavaScript
23. 221 - Forth
24. 207 - Clojure
25. 201 - Fortran

[edit] Tcl

[edit] By web scraping

package require Tcl 8.5
package require http
 
set response [http::geturl http://rosettacode.org/mw/index.php?title=Special:Categories&limit=1000]
 
array set ignore {
"Basic language learning" 1
"Encyclopedia" 1
"Implementations" 1
"Language Implementations" 1
"Language users" 1
"Maintenance/OmitCategoriesCreated" 1
"Programming Languages" 1
"Programming Tasks" 1
"RCTemplates" 1
"Solutions by Library" 1
"Solutions by Programming Language" 1
"Solutions by Programming Task" 1
"Unimplemented tasks by language" 1
"WikiStubs" 1
"Examples needing attention" 1
"Impl needed" 1
}
 
foreach line [split [http::data $response] \n] {
if {[regexp {>([^<]+)</a> \((\d+) member} $line -> lang num]} {
if {![info exists ignore($lang)]} {
lappend langs [list $num $lang]
}
}
}
 
foreach entry [lsort -integer -index 0 -decreasing $langs] {
lassign $entry num lang
puts [format "%d. %d - %s" [incr i] $num $lang]
}

Produces this output on 31 July 2009 (top 15 entries only):

1. 329 - Tcl
2. 292 - Python
3. 270 - Ruby
4. 250 - C
5. 247 - Ada
6. 238 - Perl
7. 223 - E
8. 221 - Java
9. 220 - AutoHotkey
10. 219 - OCaml
11. 210 - Haskell
12. 197 - ALGOL 68
13. 188 - D
14. 179 - C++
15. 175 - Forth
……

[edit] By using the API

Inspired by the Ruby version...

Works with: Tcl version 8.5
Library: tDOM

package require Tcl 8.5
package require http
package require tdom
 
namespace eval rc {
### Utility function that handles the low-level querying ###
proc rcq {q xp vn b} {
upvar 1 $vn v
dict set q action "query"
# Loop to pick up all results out of a category query
while 1 {
set url "http://rosettacode.org/mw/api.php?[http::formatQuery {*}$q]"
puts -nonewline stderr . ;# Indicate query progress...
set token [http::geturl $url]
set doc [dom parse [http::data $token]]
http::cleanup $token
 
# Spoon out the DOM nodes that the caller wanted
foreach v [$doc selectNodes $xp] {
uplevel 1 $b
}
 
# See if we want to go round the loop again
set next [$doc selectNodes "//query-continue/categorymembers"]
if {![llength $next]} break
dict set q cmcontinue [[lindex $next 0] getAttribute "cmcontinue"]
}
}
 
### API function: Iterate over the members of a category ###
proc members {page varName script} {
upvar 1 $varName var
set query [dict create cmtitle "Category:$page" {*}{
list "categorymembers"
format "xml"
cmlimit "500"
}]
rcq $query "//cm" item {
# Tell the caller's script about the item
set var [$item getAttribute "title"]
uplevel 1 $script
}
}
 
### API function: Count the members of a list of categories ###
proc count {cats catVar countVar script} {
upvar 1 $catVar cat $countVar count
set query [dict create prop "categoryinfo" format "xml"]
for {set n 0} {$n<[llength $cats]} {incr n 40} {
dict set query titles [join [lrange $cats $n $n+39] |]
rcq $query "//page" item {
# Get title and count
set cat [$item getAttribute "title"]
set info [$item getElementsByTagName "categoryinfo"]
if {[llength $info]} {
set count [[lindex $info 0] getAttribute "pages"]
} else {
set count 0
}
# Let the caller's script figure out what to do with them
uplevel 1 $script
}
}
}
 
### Assemble the bits into a whole API ###
namespace export members count
namespace ensemble create
}
 
# Get the list of programming languages
rc members "Solutions by Programming Language" lang {
lappend langs $lang
}
# Get the count of solutions for each, stripping "Category:" prefix
rc count $langs l c {
lappend count [list [regsub {^Category:} $l {}] $c]
}
puts stderr "" ;# Because of the progress dots...
# Print the output
puts "There are [llength $count] languages"
puts "Here are the top fifteen:"
set count [lsort -index 1 -integer -decreasing $count]
foreach item [lrange $count 0 14] {
puts [format "%1\$3d. %3\$3d - %2\$s" [incr n] {*}$item]
}

[edit] UnixPipes

This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.


echo "GET http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500 HTTP/1.0\n\n" 
| nc www.rosettacode.org 80
| sed -n -e 's,<[^>]*>,,g' -e's,^\([^(]*\)(\([^)]*\) members*) *,\2 - \1,g' -e'/^[0-9]\+./p'
| sort -rn
Personal tools
Support