Rosetta Code/Rank languages by popularity

Revision as of 01:59, 16 June 2009 by rosettacode>Guga360 (added enumerate + exaplanation)

Sort most popular programming languages based in number of members in Rosetta Code categories (from http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500)

Task
Rosetta Code/Rank languages by popularity
You are encouraged to solve this task according to the task description, using any language you may know.

Output:

1. 233 - Python
2. 222 - Ada
3. 203 - OCaml
4. 203 - C
5. 201 - Perl
6. 193 - Haskell
7. 182 - Java
8. 179 - D
9. 178 - ALGOL 68
10. 160 - Ruby
...

Filtering wrong results is optional. You can check against http://www.rosettacode.org/wiki/Special:MostLinkedCategories

Ada

Library: AWS

NB: The 'ASCII.Quotation' in the declaration of Title_Str is a rather clunky workaround to the Ada->HTML engine not being able to correctly colourize '"' properly.

<lang ada>with Ada.Integer_Text_IO; use Ada.Integer_Text_IO; with Ada.Strings.Fixed; use Ada.Strings.Fixed; with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; with Ada.Text_IO; use Ada.Text_IO;

with Ada.Containers.Ordered_Sets; with Ada.Strings.Less_Case_Insensitive;

with AWS.Client; with AWS.Response;

procedure Test is

  use Ada.Strings;

  function "+" (S : String) return Unbounded_String renames To_Unbounded_String;

  type A_Language_Count is
     record
        Count    : Integer := 0;
        Language : Unbounded_String;
     end record;

  function "=" (L, R : A_Language_Count) return Boolean is
  begin
     return L.Count = R.Count;
  end "=";

  function "<" (L, R : A_Language_Count) return Boolean is
  begin
     -- Sort by 'Count' and then by Language name
     return L.Count < R.Count
       or else (L.Count = R.Count
                and then Less_Case_Insensitive (Left  => To_String (L.Language),
                                                Right => To_String (R.Language)));
  end "<";

  package Sets is new Ada.Containers.Ordered_Sets (A_Language_Count);
  use Sets;

  Counts : Set;

  procedure Find_Counts (S : String) is
     Title_Str : constant String  := "title=" & ASCII.Quotation & "Category:";
     End_A_Str : constant String  := "</a> (";

     Title_At   : constant Natural := Index (S, Title_Str);
     Bracket_At : constant Natural := Index (S (Title_At   + Title_Str'Length .. S'Last), ">");
     End_A_At   : constant Natural := Index (S (Bracket_At + 1                .. S'Last), End_A_Str);
     Space_At   : constant Natural := Index (S (End_A_At   + End_A_Str'Length .. S'Last), " ");
  begin
     if Title_At /= 0 and then Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then
        declare
           Count    : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1));
           Language : constant String  :=                S (Title_At + Title_Str'Length .. Bracket_At - 2);
        begin
           Counts.Insert (New_Item => (Count, +Language));
        end;

        -- Recursively parse the string for languages and counts
        Find_Counts (S (Space_At + 1 .. S'Last));
     end if;
  exception
     when others =>
        null; -- Catch and ignore the error after the last language
  end Find_Counts;

  Place : Natural := 1;

  procedure Display (C : Cursor) is
  begin
     Put (Place, Width => 1);             Put (". ");
     Put (Element (C).Count, Width => 1); Put (" - ");
     Put_Line (To_String (Element (C).Language));
     Place := Place + 1;
  end Display;

  Http_Source : constant AWS.Response.Data :=
    AWS.Client.Get ("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");

begin

  Find_Counts (AWS.Response.Message_Body (Http_Source));
  Counts.Reverse_Iterate (Display'Access);

end Test;</lang>

ALGOL 68

Works with: ALGOL 68G version mk8+ for Unix and Linux - tested with release mk15-0.8b.fc9.i386 - uses non-standard library routines http content and grep in string.

Note: the routine http content is currently not available on Win32 systems. <lang algol>PROC good page = (REF STRING page) BOOL:

    IF grep in string("^HTTP/[0-9.]* 200", page, NIL, NIL) = 0
    THEN TRUE
    ELSE IF INT start, end;
             grep in string("^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", page,
                            start, end) = 0
         THEN print (page[start : end])
         ELSE print ("unknown error retrieving page")
         FI;
         FALSE
    FI;

MODE LISTOFSTRING = STRUCT(REF LINK first, last, INT upb); MODE LINK = STRUCT(STRING value, REF LINK next);

PRIO LISTINIT = 1; OP LISTINIT = (REF LISTOFSTRING new, REF LINK first)REF LISTOFSTRING: (

 new := (first, first, (first IS REF LINK(NIL) | 0 | 1 ));
 new

);

OP +:= = (REF LISTOFSTRING list, []CHAR item)VOID: (

 HEAP LINK new := (STRING(item), REF LINK(NIL));
 IF first OF list IS REF LINK(NIL) THEN
   first OF list := new
 ELSE
   next OF last OF list := new
 FI;
 last OF list := new;
 upb OF list +:= 1

);

OP UPB = (LISTOFSTRING list)INT: upb OF list;

OP ARRAYOFSTRING = (LISTOFSTRING list)[]STRING:(

 [UPB list]STRING out;
 REF LINK this := first OF list;
 FOR i TO UPB list DO out[i] := value OF this; this := next OF this OD;
 out

);

INT match=0, no match=1, out of memory error=2, other error=3;

PROC re split = (STRING re split, REF STRING beetles)[]STRING:(

   LISTOFSTRING out := (NIL, NIL, 0); # LISTINIT REF LINK NIL; #
   INT start := 1, pos, end;
   WHILE grep in string(re split, beetles[start:], pos, end) = match DO
     out +:= beetles[start:start+pos-2];
     out +:= beetles[start+pos-1:start+end-1];
     start +:= end
   OD;
   IF start > UPB beetles THEN
     out +:= beetles[start:]
   FI;
   ARRAYOFSTRING(out)
 );


IF STRING reply;

  INT rc =
     http content (reply, "www.rosettacode.org", "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500", 0);
  rc /= 0 OR NOT good page (reply)

THEN print (("Error:",strerror (rc))) ELSE

 STRING  # hack: HTML should be parsed by an official HTML parsing library #
   re html tag = "<[^>]*>",
   re a href category = "^<a href=""/wiki/Category:.*"" title=",
   re members = "([1-9][0-9]* members)";
 MODE STATISTIC = STRUCT(INT members, STRING category);
 FLEX[0]STATISTIC stats;
 OP +:=  = (REF FLEX[]STATISTIC in out, STATISTIC item)VOID:(
     [LWB in out: UPB in out+1]STATISTIC new;
     new[LWB in out: UPB in out]:=in out;
     new[UPB new]:=item;
     in out := new
   );
  1. hack: needs to be manually maintained #
 STRING re ignore ="Programming Tasks|WikiStubs|Maintenance/OmitCategoriesCreated|"+
                   "Unimplemented tasks by language|Programming Languages|"+
                   "Solutions by Programming Language|Implementations|"+
                   "Solutions by Library|Encyclopedia|Language users|"+
                   "Solutions by Programming Task|Basic language learning|"+
                   "RCTemplates|Language Implementations";
 FORMAT category fmt = $"<a href=""/wiki/Category:"g""" title=""Category:"g""""$;
 STRING encoded category, category;
 FORMAT members fmt = $" ("g" members)"$;
 INT members;
 FLEX[0]STRING tokens := re split(re html tag, reply);
 FOR token index TO UPB tokens DO
   STRING token := tokens[token index];
   FILE file;
   IF grep in string(re a href category, token, NIL, NIL) = match THEN
     associate(file, token);
     make term(file,"""");
     getf(file, (category fmt, encoded category, category));
     close(file)
   ELIF grep in string(re members, token, NIL, NIL) = match THEN
     IF grep in string(re ignore, category, NIL, NIL) /= match THEN
       associate(file, token);
       getf(file, (members fmt, members));
       stats +:= STATISTIC(members, category);
       close(file)
     FI
   FI
 OD;
 OP < = (STATISTIC a,b)BOOL:
   members OF a < members OF b;
 MODE SORTSTRUCT = STATISTIC;
 PR READ "prelude/sort.a68" PR;
 stats := in place shell sort reverse(stats);
 INT max = 10;
 FOR i TO (UPB stats > max | max | UPB stats) DO
   printf(($g(-0)". "g(-0)" - "gl$,i,stats[i]))
 OD

FI</lang> Output:

1. 233 - Python
2. 222 - Ada
3. 203 - OCaml
4. 203 - C
5. 201 - Perl
6. 193 - Haskell
7. 182 - Java
8. 179 - D
9. 178 - ALGOL 68
10. 160 - Ruby

AutoHotkey

<lang AutoHotkey> filedelete, url.txt urldownloadtofile, http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500, url.txt loop, read, url.txt {

if instr(A_LoopReadLine, "

  • ") { reg = title=\"Category:(.+?)" regexmatch(A_LoopReadLine, reg, name) regexmatch(A_LoopReadLine, "(\d*)\smembers", count) print = %count1% `- %name1% `n %print% } } sort, print, RN msgbox %print% </lang>

    AWK

    Works with: gawk

    This solution needs help from external tools to fetch the HTML from rosettacode.org, and also to do a numeric sort <lang sh>printf "GET %s HTTP/1.0\n\n" 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500' | nc www.rosettacode.org 80 | gawk '

       # ignore categories that are not languages
       /Basic language learning/            {next}
       /Encyclopedia/                       {next}
       /Implementations/                    {next}
       /Language Implementations/           {next}
       /Language users/                     {next}
       /Maintenance\/OmitCategoriesCreated/ {next}
       /Programming Languages/              {next}
       /Programming Tasks/                  {next}
       /RCTemplates/                        {next}
       /Solutions by Library/               {next}
       /Solutions by Programming Language/  {next}
       /Solutions by Programming Task/      {next}
       /Unimplemented tasks by language/    {next}
       /WikiStubs/                          {next}
    
       match($0, /<li.*>([^<]*)<\/a> \((digit:+) members?/, m) {
           lang[++i] = m[2] " - " m[1]
       }
    
       END {
           len = 0; for (i in lang) len++
           sorter = "sort -rn"
           for (i = 1; i <= len; i++) {
               print lang[i] |& sorter
           }
           close(sorter, "to")
           i = 1
           while((sorter |& getline line) > 0) {
               print i++ ". " line
           }
           close(sorter)
       }
    

    '</lang>

    C#

    Sorting only programming languages.

    <lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic;

    class Program {

       static void Main(string[] args)
       {
           string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
           string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
    
           ArrayList langs = new ArrayList();
           Dictionary<string, int> qtdmbr = new Dictionary<string, int>();
    
           MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
           MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
           
           foreach (Match lang in match1) langs.Add(lang.Groups[1].Value);
    
           foreach (Match match in match2) {
               if (langs.Contains(match.Groups[1].Value)) {
                   qtdmbr.Add(match.Groups[1].Value, Int32.Parse(match.Groups[2].Value));                 
               }
           }
           
           string[] test = qtdmbr.OrderByDescending(x => x.Value).Select(x => String.Format("{0} - {1}", x.Key, x.Value)).ToArray();
    
           int count = 1;
    
           foreach (string i in test)
           {
               Console.WriteLine("{0}. {1}",count,i);
               count++;
           }
       }
    

    }</lang>

    Object-orinted solution

    <lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections.Generic;

    class Category {

       private string _title;
       private int _members;
    
       public Category(string title, int members) {
           _title = title;
           _members = members;
       }
    
       public string Title {
           get {
               return _title;
           }
       }
    
       public int Members {
           get {
               return _members;
           }
       }
    

    }

    class Program {

       static void Main(string[] args) {
           string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
           string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
    
           MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
           MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
    
           string[] valids = match1.Cast<Match>().Select(x => x.Groups[1].Value).ToArray();
           List<Category> langs = new List<Category>();
    
           foreach (Match match in match2) {
               string category = match.Groups[1].Value;
               int members = Int32.Parse(match.Groups[2].Value);
    
               if (valids.Contains(category)) langs.Add(new Category(category, members));
           }
    
           langs = langs.OrderByDescending(x  => x.Members).ToList();
           int count = 1;
    
           foreach (Category i in langs) {
               Console.WriteLine("{0}. {1} - {2}", count, i.Title, i.Members);
               count++;
           }
       }
    

    }</lang>

    Perl

    Sorting only programming languages.

    <lang perl>use LWP::Simple 'get';

    my $langs_url = 'http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json'; my $cats_url = 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500';

    my %langs = map {/Category:(.+)/, 1}

       get($langs_url) =~ /"title":"(.+?)"}/g;
    
    get($cats_url) =~ m{
    • (.+?)
    }s;

    my @pairs =

       sort {$b->[1] <=> $a->[1]}
       grep {$langs{ $_->[0] }}
       map {[ m{>(\S.*?)</a> \((\d+) member} ]}
    
    split '
  • ', $1; for (my $n = 1 ; @pairs ; ++$n) {my ($lang, $tasks) = @{shift @pairs}; printf "%3d. %3d - %s\n", $n, $tasks, $lang;}</lang>

    Python

    Sorting only programming languages.

    <lang python>import urllib,re

    key1 = lambda x: int(x[1])

    get1 = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json").read() get2 = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500").read()

    langs = re.findall("\"title\":\"Category:(.+?)\"",get1) qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> \((\d+) members\)",get2)

    result = [(x,int(y)) for x,y in qtdmbr if x in langs]

    for n, i in enumerate(sorted(result,key=key1,reverse=True)):

       print "%3d. %3d - %s" % (n+1, i[1], i[0])</lang>
    

    Fail-proof solution

    This solution is using "categoryinfo", this example will work with more than 500 categories (It does not read Special:Categories), and with more than 500 programming languages.

    <lang python>import json import urllib

    result = []

    langs = json.load(urllib.urlopen("http://www.rosettacode.org/w/api.php?\ action=query&list=categorymembers&cmtitle=Category:Programming_Languages\ &cmlimit=500&format=json"));

    titles = [i['title'] for i in langs['query']['categorymembers']]

    while titles: t = '|'.join(titles[:50])

    info = json.load(urllib.urlopen("http://www.rosettacode.org/w/api.php?\ action=query&prop=categoryinfo&format=json", data=urllib.urlencode({"titles":t})))['query']['pages']

    for i in info: try: result.append([info[i]['title'], info[i]['categoryinfo']['pages']]) except: pass

    del titles[:50]


    for n, i in enumerate(sorted(result,key=lambda x: x[1],reverse=True)): print "%d. %s - %d" % (n+1, i[0].replace("Category:",), i[1])</lang>

    Ruby

    Now that there are more than 500 categories, the URL given in the task description is insufficient. I use the RC API to grab the categories, and then count the members of each category.

    Uses the RosettaCode module from Count programming examples#Ruby <lang ruby>require 'rosettacode'

    langs = Hash.new(0) RosettaCode.rc_tasks("Programming_Languages") do |lang|

     sleep 1   # don't kill the server
     lang = (lang.split(":"))[-1]
     RosettaCode.rc_tasks(lang) do |task|
       langs[lang] += 1
     end
    

    end

    puts "There are #{langs.length} languages" puts "the top 10:" langs.sort_by {|key,val| val}.reverse.each_with_index do |pair, i|

     lang, count = pair
     puts "#{i+1}. #{count} - #{lang}"
    

    end</lang>

    There are 139 languages
    the top 10:
    1. 313 - Tcl
    2. 270 - Python
    3. 236 - Ada
    4. 232 - Ruby
    5. 229 - C
    6. 222 - Perl
    7. 209 - OCaml
    8. 206 - Java
    9. 200 - Haskell
    10. 198 - ALGOL 68

    Tcl

    <lang tcl>package require Tcl 8.5 package require http

    set response [http::geturl http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500]

    array set ignore {

       "Basic language learning"           1
       "Encyclopedia"                      1
       "Implementations"                   1
       "Language Implementations"          1
       "Language users"                    1
       "Maintenance/OmitCategoriesCreated" 1
       "Programming Languages"             1
       "Programming Tasks"                 1
       "RCTemplates"                       1
       "Solutions by Library"              1
       "Solutions by Programming Language" 1
       "Solutions by Programming Task"     1
       "Unimplemented tasks by language"   1
       "WikiStubs"                         1
    

    }

    foreach line [split [http::data $response] \n] {

       if {[regexp {>([^<]+)</a> \((\d+) member} $line -> lang num]} {
           if { ! [info exists ignore($lang)]} {
               lappend langs [list $num $lang]
           }
       }
    

    }

    foreach entry [lsort -integer -index 0 -decreasing $langs] {

       lassign $entry num lang
       puts [format "%d. %d - %s" [incr i] $num $lang]
    

    }</lang> Produces this output on 24 May 2009 (top 15 entries only):

    1. 286 - Tcl
    2. 244 - Python
    3. 229 - Ada
    4. 221 - C
    5. 213 - Perl
    6. 204 - OCaml
    7. 198 - ALGOL 68
    8. 197 - Haskell
    9. 190 - Java
    10. 184 - D
    11. 163 - Ruby
    12. 159 - C++
    13. 159 - E
    14. 157 - Forth
    15. 147 - Fortran
    ……

    UnixPipes

    <lang bash> echo "GET http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500 HTTP/1.0\n\n"

     | nc www.rosettacode.org 80 
     | sed -n -e 's,<[^>]*>,,g' -e's,^\([^(]*\)(\([^)]*\) members*) *,\2 - \1,g' -e'/^[0-9]\+./p'
     | sort -rn</lang>