Rosetta Code/Rank languages by popularity
From Rosetta Code
Sort most popular programming languages based in number of members in Rosetta Code categories (from http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=5000)
Sample output on 30 October 2009:
1. 351 - Tcl 2. 314 - Python 3. 307 - Ruby 4. 265 - Common Lisp 5. 265 - OCaml 6. 263 - C 7. 260 - J 8. 258 - Ada 9. 250 - Perl 10. 242 - Java 11. 237 - E 12. 236 - Haskell 13. 227 - AutoHotkey 14. 226 - D 15. 213 - R 16. 195 - ALGOL 68 17. 194 - C++ 18. 185 - Forth 19. 171 - JavaScript 20. 161 - Fortran ...
Filtering wrong results is optional. You can check against http://www.rosettacode.org/wiki/Special:MostLinkedCategories
Contents |
[edit] Ada
Library: AWS
NB: The 'ASCII.Quotation' in the declaration of Title_Str is a rather clunky workaround to the Ada->HTML engine not being able to correctly colourize '"' properly.
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
with Ada.Strings.Unbounded; use Ada.Strings.Unbounded;
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Containers.Ordered_Sets;
with Ada.Strings.Less_Case_Insensitive;
with AWS.Client;
with AWS.Response;
procedure Test is
use Ada.Strings;
function "+" (S : String) return Unbounded_String renames To_Unbounded_String;
type A_Language_Count is
record
Count : Integer := 0;
Language : Unbounded_String;
end record;
function "=" (L, R : A_Language_Count) return Boolean is
begin
return L.Count = R.Count;
end "=";
function "<" (L, R : A_Language_Count) return Boolean is
begin
-- Sort by 'Count' and then by Language name
return L.Count < R.Count
or else (L.Count = R.Count
and then Less_Case_Insensitive (Left => To_String (L.Language),
Right => To_String (R.Language)));
end "<";
package Sets is new Ada.Containers.Ordered_Sets (A_Language_Count);
use Sets;
Counts : Set;
procedure Find_Counts (S : String) is
Title_Str : constant String := "title=" & ASCII.Quotation & "Category:";
End_A_Str : constant String := "</a> (";
Title_At : constant Natural := Index (S, Title_Str);
Bracket_At : constant Natural := Index (S (Title_At + Title_Str'Length .. S'Last), ">");
End_A_At : constant Natural := Index (S (Bracket_At + 1 .. S'Last), End_A_Str);
Space_At : constant Natural := Index (S (End_A_At + End_A_Str'Length .. S'Last), " ");
begin
if Title_At /= 0 and then Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then
declare
Count : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1));
Language : constant String := S (Title_At + Title_Str'Length .. Bracket_At - 2);
begin
Counts.Insert (New_Item => (Count, +Language));
end;
-- Recursively parse the string for languages and counts
Find_Counts (S (Space_At + 1 .. S'Last));
end if;
exception
when others =>
null; -- Catch and ignore the error after the last language
end Find_Counts;
Place : Natural := 1;
procedure Display (C : Cursor) is
begin
Put (Place, Width => 1); Put (". ");
Put (Element (C).Count, Width => 1); Put (" - ");
Put_Line (To_String (Element (C).Language));
Place := Place + 1;
end Display;
Http_Source : constant AWS.Response.Data :=
AWS.Client.Get ("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
begin
Find_Counts (AWS.Response.Message_Body (Http_Source));
Counts.Reverse_Iterate (Display'Access);
end Test;
[edit] ALGOL 68
Works with: ALGOL 68G version mk8+ for Unix and Linux - tested with release mk15-0.8b.fc9.i386 - uses non-standard library routines http content and grep in string. Note: the routine http content is currently not available on Win32 systems.
PROC good page = (REF STRING page) BOOL:
IF grep in string("^HTTP/[0-9.]* 200", page, NIL, NIL) = 0
THEN TRUE
ELSE IF INT start, end;
grep in string("^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", page,
start, end) = 0
THEN print (page[start : end])
ELSE print ("unknown error retrieving page")
FI;
FALSE
FI;
MODE LISTOFSTRING = STRUCT(REF LINK first, last, INT upb);
MODE LINK = STRUCT(STRING value, REF LINK next);
PRIO LISTINIT = 1;
OP LISTINIT = (REF LISTOFSTRING new, REF LINK first)REF LISTOFSTRING: (
new := (first, first, (first IS REF LINK(NIL) | 0 | 1 ));
new
);
OP +:= = (REF LISTOFSTRING list, []CHAR item)VOID: (
HEAP LINK new := (STRING(item), REF LINK(NIL));
IF first OF list IS REF LINK(NIL) THEN
first OF list := new
ELSE
next OF last OF list := new
FI;
last OF list := new;
upb OF list +:= 1
);
OP UPB = (LISTOFSTRING list)INT: upb OF list;
OP ARRAYOFSTRING = (LISTOFSTRING list)[]STRING:(
[UPB list]STRING out;
REF LINK this := first OF list;
FOR i TO UPB list DO out[i] := value OF this; this := next OF this OD;
out
);
INT match=0, no match=1, out of memory error=2, other error=3;
PROC re split = (STRING re split, REF STRING beetles)[]STRING:(
LISTOFSTRING out := (NIL, NIL, 0); # LISTINIT REF LINK NIL; #
INT start := 1, pos, end;
WHILE grep in string(re split, beetles[start:], pos, end) = match DO
out +:= beetles[start:start+pos-2];
out +:= beetles[start+pos-1:start+end-1];
start +:= end
OD;
IF start > UPB beetles THEN
out +:= beetles[start:]
FI;
ARRAYOFSTRING(out)
);
IF STRING reply;
INT rc =
http content (reply, "www.rosettacode.org", "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500", 0);
rc /= 0 OR NOT good page (reply)
THEN print (("Error:",strerror (rc)))
ELSE
STRING # hack: HTML should be parsed by an official HTML parsing library #
re html tag = "<[^>]*>",
re a href category = "^<a href=""/wiki/Category:.*"" title=",
re members = "([1-9][0-9]* members)";
MODE STATISTIC = STRUCT(INT members, STRING category);
FLEX[0]STATISTIC stats;
OP +:= = (REF FLEX[]STATISTIC in out, STATISTIC item)VOID:(
[LWB in out: UPB in out+1]STATISTIC new;
new[LWB in out: UPB in out]:=in out;
new[UPB new]:=item;
in out := new
);
# hack: needs to be manually maintained #
STRING re ignore ="Programming Tasks|WikiStubs|Maintenance/OmitCategoriesCreated|"+
"Unimplemented tasks by language|Programming Languages|"+
"Solutions by Programming Language|Implementations|"+
"Solutions by Library|Encyclopedia|Language users|"+
"Solutions by Programming Task|Basic language learning|"+
"RCTemplates|Language Implementations";
FORMAT category fmt = $"<a href=""/wiki/Category:"g""" title=""Category:"g""""$;
STRING encoded category, category;
FORMAT members fmt = $" ("g" members)"$;
INT members;
FLEX[0]STRING tokens := re split(re html tag, reply);
FOR token index TO UPB tokens DO
STRING token := tokens[token index];
FILE file;
IF grep in string(re a href category, token, NIL, NIL) = match THEN
associate(file, token);
make term(file,"""");
getf(file, (category fmt, encoded category, category));
close(file)
ELIF grep in string(re members, token, NIL, NIL) = match THEN
IF grep in string(re ignore, category, NIL, NIL) /= match THEN
associate(file, token);
getf(file, (members fmt, members));
stats +:= STATISTIC(members, category);
close(file)
FI
FI
OD;
OP < = (STATISTIC a,b)BOOL:
members OF a < members OF b;
MODE SORTSTRUCT = STATISTIC;
PR READ "prelude/sort.a68" PR;
stats := in place shell sort reverse(stats);
INT max = 10;
FOR i TO (UPB stats > max | max | UPB stats) DO
printf(($g(-0)". "g(-0)" - "gl$,i,stats[i]))
OD
FI
Output:
1. 233 - Python 2. 222 - Ada 3. 203 - OCaml 4. 203 - C 5. 201 - Perl 6. 193 - Haskell 7. 182 - Java 8. 179 - D 9. 178 - ALGOL 68 10. 160 - Ruby
[edit] AutoHotkey
filedelete, url.txt
urldownloadtofile, http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500, url.txt
loop, read, url.txt
{
if instr(A_LoopReadLine, "<li>")
{
reg = title=\"Category:(.+?)"
regexmatch(A_LoopReadLine, reg, name)
regexmatch(A_LoopReadLine, "(\d*)\smembers", count)
print = %count1% `- %name1% `n %print%
}
}
sort, print, RN
msgbox %print%
[edit] AWK
Works with: gawk
This solution needs help from external tools to fetch the HTML from rosettacode.org, and also to do a numeric sort
printf "GET %s HTTP/1.0\n\n" 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500' |
nc www.rosettacode.org 80 |
gawk '
# ignore categories that are not languages
/Basic language learning/ {next}
/Encyclopedia/ {next}
/Implementations/ {next}
/Language Implementations/ {next}
/Language users/ {next}
/Maintenance\/OmitCategoriesCreated/ {next}
/Programming Languages/ {next}
/Programming Tasks/ {next}
/RCTemplates/ {next}
/Solutions by Library/ {next}
/Solutions by Programming Language/ {next}
/Solutions by Programming Task/ {next}
/Unimplemented tasks by language/ {next}
/WikiStubs/ {next}
match($0, /<li.*>([^<]*)<\/a> \(([[:digit:]]+) members?/, m) {
lang[++i] = m[2] " - " m[1]
}
END {
len = 0; for (i in lang) len++
sorter = "sort -rn"
for (i = 1; i <= len; i++) {
print lang[i] |& sorter
}
close(sorter, "to")
i = 1
while((sorter |& getline line) > 0) {
print i++ ". " line
}
close(sorter)
}
'
[edit] C#
Sorting only programming languages.
using System;
using System.Net;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Collections.Generic;
class Program
{
static void Main(string[] args)
{
string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
ArrayList langs = new ArrayList();
Dictionary<string, int> qtdmbr = new Dictionary<string, int>();
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
foreach (Match lang in match1) langs.Add(lang.Groups[1].Value);
foreach (Match match in match2) {
if (langs.Contains(match.Groups[1].Value)) {
qtdmbr.Add(match.Groups[1].Value, Int32.Parse(match.Groups[2].Value));
}
}
string[] test = qtdmbr.OrderByDescending(x => x.Value).Select(x => String.Format("{0} - {1}", x.Key, x.Value)).ToArray();
int count = 1;
foreach (string i in test)
{
Console.WriteLine("{0}. {1}",count,i);
count++;
}
}
}
Object-orinted solution
using System;
using System.Net;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections.Generic;
class Category {
private string _title;
private int _members;
public Category(string title, int members) {
_title = title;
_members = members;
}
public string Title {
get {
return _title;
}
}
public int Members {
get {
return _members;
}
}
}
class Program {
static void Main(string[] args) {
string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
string[] valids = match1.Cast<Match>().Select(x => x.Groups[1].Value).ToArray();
List<Category> langs = new List<Category>();
foreach (Match match in match2) {
string category = match.Groups[1].Value;
int members = Int32.Parse(match.Groups[2].Value);
if (valids.Contains(category)) langs.Add(new Category(category, members));
}
langs = langs.OrderByDescending(x => x.Members).ToList();
int count = 1;
foreach (Category i in langs) {
Console.WriteLine("{0}. {1} - {2}", count, i.Title, i.Members);
count++;
}
}
}
[edit] C++
using g++ under Linux with g++ -lboost_thread -lboost_system -lboost_regex
#include <string>
#include <boost/regex.hpp>
#include <boost/asio.hpp>
#include <vector>
#include <utility>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <algorithm>
#include <iomanip>
struct Sort { //sorting programming languages according to frequency
bool operator( ) ( const std::pair<std::string,int> & a , const std::pair<std::string,int> & b )
const {
return a.second > b.second ;
}
} ;
int main( ) {
try {
//setting up an io service , with templated subelements for resolver and query
boost::asio::io_service io_service ;
boost::asio::ip::tcp::resolver resolver ( io_service ) ;
boost::asio::ip::tcp::resolver::query query ( "rosettacode.org" , "http" ) ;
boost::asio::ip::tcp::resolver::iterator endpoint_iterator = resolver.resolve( query ) ;
boost::asio::ip::tcp::resolver::iterator end ;
boost::asio::ip::tcp::socket socket( io_service ) ;
boost::system::error_code error = boost::asio::error::host_not_found ;
//looking for an endpoint the socket will be able to connect to
while ( error && endpoint_iterator != end ) {
socket.close( ) ;
socket.connect( *endpoint_iterator++ , error ) ;
}
if ( error )
throw boost::system::system_error ( error ) ;
//we send a request
boost::asio::streambuf request ;
std::ostream request_stream( &request ) ;
request_stream << "GET " << "/mw/index.php?title=Special:Categories&limit=800" << " HTTP/1.0\r\n" ;
request_stream << "Host: " << "rosettacode.org" << "\r\n" ;
request_stream << "Accept: */*\r\n" ;
request_stream << "Connection: close\r\n\r\n" ;
//send the request
boost::asio::write( socket , request ) ;
//we receive the response analyzing every line and storing the programming language
boost::asio::streambuf response ;
std::istream response_stream ( &response ) ;
boost::asio::read_until( socket , response , "\r\n\r\n" ) ;
boost::regex e( "<li><a href=\"[^<>]+?\">([a-zA-Z\\+#1-9]+?)</a>\\s?\\((\\d+) members\\)</li>" ) ;
//using the wrong regex produces incorrect sorting!!
std::ostringstream line ;
std::vector<std::pair<std::string , int> > languages ; //holds language and number of examples
boost::smatch matches ;
while ( boost::asio::read( socket , response , boost::asio::transfer_at_least( 1 ) , error ) ) {
line << &response ;
if ( boost::regex_search( line.str( ) , matches , e ) ) {
std::string lang( matches[2].first , matches[2].second ) ;
int zahl = atoi ( lang.c_str( ) ) ;
languages.push_back( std::make_pair( matches[ 1 ] , zahl ) ) ;
}
line.str( "") ;//we have to erase the string buffer for the next read
}
if ( error != boost::asio::error::eof )
throw boost::system::system_error( error ) ;
//we sort the vector entries , see the struct above
std::sort( languages.begin( ) , languages.end( ) , Sort( ) ) ;
int n = 1 ;
for ( std::vector<std::pair<std::string , int> >::const_iterator spi = languages.begin( ) ;
spi != languages.end( ) ; ++spi ) {
std::cout << std::setw( 3 ) << std::right << n << '.' << std::setw( 4 ) << std::right <<
spi->second << " - " << spi->first << '\n' ;
n++ ;
}
} catch ( std::exception &ex ) {
std::cout << "Exception: " << ex.what( ) << '\n' ;
}
return 0 ;
}
Sample output ( just the "top ten" ):
1. 367 - Tcl 2. 334 - Python 3. 319 - Ruby 4. 286 - C 5. 277 - Perl 6. 272 - OCaml 7. 264 - Ada 8. 241 - E 9. 239 - AutoHotkey 10. 193 - Forth
[edit] J
Solution:Example:
require 'web/gethttp xml/sax/x2j regex'
x2jclass 'rcPopLang'
rx =: (<0 1) {:: (2#a:) ,~ rxmatches rxfrom ]
'Popular Languages' x2jDefn
/ := langs : langs =: 0 2 $ a:
html/body/div/div/div/div/div/ul/li := langs =: langs ,^:(a:~:{.@[)~ lang ; ' \((\d+) members?\)' rx y
html/body/div/div/div/div/div/ul/li/a := lang =: '^\s*((?:.(?!User|Tasks|Omit|attention|operations|by))+)\s*$' rx y
)
cocurrent'base'
sortTab =. \: __ ". [: ;:^:_1: {:"1
formatTab =: [: ;:^:_1: [: (20 A. (<'-') , |. , [: ('.' <"1@:,.~ ":) 1 + 1 i.@,~ 1{$)&.|: sortTab f.
rcPopLangs =: formatTab@:process_rcPopLang_@:gethttp
10 {. rcPopLangs 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=800'
1. 354 - Tcl
2. 321 - Python
3. 268 - OCaml
4. 268 - J
5. 267 - C
6. 265 - Common Lisp
7. 261 - Ada
8. 255 - Perl
9. 253 - Haskell
10. 243 - Java
Notes:
See some notes on the J solution.
[edit] Oz
Library: OzHttpClient
Using web scraping. Does not filter non-language categories.
declare
[HTTPClient] = {Module.link ['x-ozlib://mesaros/net/HTTPClient.ozf']}
[Regex] = {Module.link ['x-oz://contrib/regex']}
fun {GetPage RawUrl}
Client = {New HTTPClient.urlGET init(inPrms(toFile:false toStrm:true) _)}
Url = {VirtualString.toString RawUrl}
OutParams
HttpResponseParams
in
{Client getService(Url ?OutParams ?HttpResponseParams)}
{Client closeAll(true)}
OutParams.sOut
end
fun {GetCategories Doc}
{Map {Regex.allMatches "<li><a[^>]+>([^<]+)</a> \\(([0-9]+) member" Doc}
fun {$ Match}
Category = {Regex.group 1 Match Doc}
Count = {String.toInt {ByteString.toString {Regex.group 2 Match Doc}}}
in
Category#Count
end
}
end
Url = "http://www.rosettacode.org/mw/index.php?title=Special:Categories&limit=5000"
{System.showInfo "Retrieving..."}
Doc = {GetPage Url}
{System.showInfo "Parsing..."}
Cs = {GetCategories Doc}
in
for
Cat#Count in {Sort Cs fun {$ _#C1 _#C2} C1 > C2 end}
I in 1..20
do
{System.showInfo I#". "#Count#" - "#Cat}
end
Output:
1. 371 - Tcl 2. 369 - Programming Tasks 3. 338 - Python 4. 324 - Ruby 5. 306 - Haskell ... 17. 225 - Oz 18. 214 - C++ 19. 209 - JavaScript 20. 208 - ALGOL 68
[edit] Perl
Sorting only programming languages.
use MediaWiki::API;
my $api = new MediaWiki::API({api_url => 'http://rosettacode.org/mw/api.php'});
my @pairs =
sort {$b->[1] <=> $a->[1] or $a->[0] cmp $b->[0]}
map {$_->{title} =~ s/\ACategory://;
[$_->{title}, $_->{categoryinfo}{size} || 0];}
values %{$api->api
({action => 'query',
generator => 'categorymembers',
gcmtitle => 'Category:Programming Languages',
gcmlimit => 'max',
prop => 'categoryinfo'})->{query}{pages}};
for (my $n = 1 ; @pairs ; ++$n)
{my ($lang, $tasks) = @{shift @pairs};
printf "%3d. %3d - %s\n", $n, $tasks, $lang;}
[edit] Python
Works with: Python version 2.6
import urllib
import re
import string
xl = urllib.urlopen("http://rosettacode.org/wiki/Category:Solutions_by_Programming_Language").read()
langList = re.findall("href=\"/wiki/Category:(.+?)\"", xl)
ttbl = string.maketrans("_"," ")
last = langList.index("Solutions")
langList = [lng.translate(ttbl) for lng in langList[:last]]
cppix = langList.index("C%2B%2B")
langList[cppix]="C++"
rlines = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=10000").read()
qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> \((\d+) members\)",rlines)
result = [(x,int(y)) for x,y in qtdmbr if x in langList]
f0 = open("rzts.tt",'w')
for n, i in enumerate(sorted(result,key=lambda x: x[1],reverse=True),start=1):
print "%3d. %3d - %s" % (n, i[1], i[0])
f0.write("%3d. %3d - %s\n" % (n, i[1], i[0]))
f0.close()
Results (of Dec 11):
1. 365 - Tcl 2. 332 - Python 3. 317 - Ruby 4. 289 - Haskell 5. 279 - C 6. 278 - J 7. 276 - Perl 8. 272 - OCaml 9. 270 - Common Lisp 10. 264 - Ada 11. 249 - Java 12. 241 - E 13. 239 - AutoHotkey 14. 229 - D 15. 224 - R ...
Sorting only programming languages.
import urllib,re
key1 = lambda x: int(x[1])
get1 = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json").read()
get2 = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500").read()
langs = re.findall("\"title\":\"Category:(.+?)\"",get1)
qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> \((\d+) members\)",get2)
result = [(x,int(y)) for x,y in qtdmbr if x in langs]
for n, i in enumerate(sorted(result,key=key1,reverse=True)):
print "%3d. %3d - %s" % (n+1, i[1], i[0])
[edit] R
library(RJSONIO)
langUrl <- "http://rosettacode.org/mw/api.php?action=query&format=json&cmtitle=Category:Solutions_by_Programming_Language&list=categorymembers&cmlimit=500"
languages <- fromJSON(langUrl)$query$categorymembers
languages <- sapply(languages, function(x) sub("Category:", "", x$title))
# fails if there are more than 500 users per language
user <- function (lang) {
userBaseUrl <- "http://rosettacode.org/mw/api.php?action=query&format=json&list=categorymembers&cmlimit=500&cmtitle=Category:"
userUrl <- paste(userBaseUrl, URLencode(paste(lang, " User", sep="")),sep="")
length(fromJSON(userUrl)$query$categorymembers)
}
users <- sapply(languages, user)
head(sort(users, decreasing=TRUE),15)
Output (as of March, 13, 2010)
C C++ Java Python JavaScript Perl UNIX Shell
55 55 37 32 27 27 22
Pascal BASIC PHP SQL Haskell AWK C sharp
20 19 19 18 17 16 16
Ruby
14
[edit] Ruby
Works with: Ruby version 1.8.7
Now that there are more than 500 categories, the URL given in the task description is insufficient. I use the RC API to grab the categories, and then count the members of each category.
Uses the RosettaCode module from Count programming examples#Ruby
require 'rosettacode'
$stdout.sync=true
langs = []
RosettaCode.rc_tasks("Solutions_by_Programming_Language") {|lang| langs << lang}
langcount = {}
langs.each_slice(50) do |sublist|
newquery = {
"action" => "query",
"prop" => "categoryinfo",
"format" => "xml",
"titles" => sublist.join("|"),
}
newurl = RosettaCode.rc_url("api.php", newquery)
doc = REXML::Document.new open(newurl)
REXML::XPath.each(doc, "//page") do |page|
lang = page.attribute("title").value
info = REXML::XPath.first(page, "categoryinfo")
langcount[lang] = info.nil? ? 0 : info.attribute("pages").value.to_i
end
end
puts "There are #{langcount.length} languages"
puts "the top 15:"
langcount.sort_by {|key,val| val}.reverse[0,15].each_with_index do |(lang, count), i|
puts "#{i+1}. #{count} - #{lang.sub(/Category:/, '')}"
end
Results
There are 151 languages the top 15: 1. 326 - Tcl 2. 289 - Python 3. 268 - Ruby 4. 248 - C 5. 243 - Ada 6. 238 - Perl 7. 221 - E 8. 219 - Java 9. 218 - AutoHotkey 10. 217 - OCaml 11. 208 - Haskell 12. 195 - ALGOL 68 13. 186 - D 14. 176 - C++ 15. 173 - Forth
[edit] Tcl
[edit] By web scraping
package require Tcl 8.5
package require http
set response [http::geturl http://rosettacode.org/mw/index.php?title=Special:Categories&limit=1000]
array set ignore {
"Basic language learning" 1
"Encyclopedia" 1
"Implementations" 1
"Language Implementations" 1
"Language users" 1
"Maintenance/OmitCategoriesCreated" 1
"Programming Languages" 1
"Programming Tasks" 1
"RCTemplates" 1
"Solutions by Library" 1
"Solutions by Programming Language" 1
"Solutions by Programming Task" 1
"Unimplemented tasks by language" 1
"WikiStubs" 1
}
foreach line [split [http::data $response] \n] {
if {[regexp {>([^<]+)</a> \((\d+) member} $line -> lang num]} {
if {![info exists ignore($lang)]} {
lappend langs [list $num $lang]
}
}
}
foreach entry [lsort -integer -index 0 -decreasing $langs] {
lassign $entry num lang
puts [format "%d. %d - %s" [incr i] $num $lang]
}
Produces this output on 31 July 2009 (top 15 entries only):
1. 329 - Tcl 2. 292 - Python 3. 270 - Ruby 4. 250 - C 5. 247 - Ada 6. 238 - Perl 7. 223 - E 8. 221 - Java 9. 220 - AutoHotkey 10. 219 - OCaml 11. 210 - Haskell 12. 197 - ALGOL 68 13. 188 - D 14. 179 - C++ 15. 175 - Forth ……
[edit] By using the API
Inspired by the Ruby version...
Works with: Tcl version 8.5
Library: tDOM
package require Tcl 8.5
package require http
package require tdom
namespace eval rc {
### Utility function that handles the low-level querying ###
proc rcq {q xp vn b} {
upvar 1 $vn v
dict set q action "query"
# Loop to pick up all results out of a category query
while 1 {
set url "http://rosettacode.org/mw/api.php?[http::formatQuery {*}$q]"
puts -nonewline stderr . ;# Indicate query progress...
set token [http::geturl $url]
set doc [dom parse [http::data $token]]
http::cleanup $token
# Spoon out the DOM nodes that the caller wanted
foreach v [$doc selectNodes $xp] {
uplevel 1 $b
}
# See if we want to go round the loop again
set next [$doc selectNodes "//query-continue/categorymembers"]
if {![llength $next]} break
dict set q cmcontinue [[lindex $next 0] getAttribute "cmcontinue"]
}
}
### API function: Iterate over the members of a category ###
proc members {page varName script} {
upvar 1 $varName var
set query [dict create cmtitle "Category:$page" {*}{
list "categorymembers"
format "xml"
cmlimit "500"
}]
rcq $query "//cm" item {
# Tell the caller's script about the item
set var [$item getAttribute "title"]
uplevel 1 $script
}
}
### API function: Count the members of a list of categories ###
proc count {cats catVar countVar script} {
upvar 1 $catVar cat $countVar count
set query [dict create prop "categoryinfo" format "xml"]
for {set n 0} {$n<[llength $cats]} {incr n 40} {
dict set query titles [join [lrange $cats $n $n+39] |]
rcq $query "//page" item {
# Get title and count
set cat [$item getAttribute "title"]
set info [$item getElementsByTagName "categoryinfo"]
if {[llength $info]} {
set count [[lindex $info 0] getAttribute "pages"]
} else {
set count 0
}
# Let the caller's script figure out what to do with them
uplevel 1 $script
}
}
}
### Assemble the bits into a whole API ###
namespace export members count
namespace ensemble create
}
# Get the list of programming languages
rc members "Solutions by Programming Language" lang {
lappend langs $lang
}
# Get the count of solutions for each, stripping "Category:" prefix
rc count $langs l c {
lappend count [list [regsub {^Category:} $l {}] $c]
}
puts stderr "" ;# Because of the progress dots...
# Print the output
puts "There are [llength $count] languages"
puts "Here are the top fifteen:"
set count [lsort -index 1 -integer -decreasing $count]
foreach item [lrange $count 0 14] {
puts [format "%1\$3d. %3\$3d - %2\$s" [incr n] {*}$item]
}
[edit] UnixPipes
echo "GET http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500 HTTP/1.0\n\n"
| nc www.rosettacode.org 80
| sed -n -e 's,<[^>]*>,,g' -e's,^\([^(]*\)(\([^)]*\) members*) *,\2 - \1,g' -e'/^[0-9]\+./p'
| sort -rn







