Rosetta Code/Rank languages by popularity: Difference between revisions

Content added Content deleted

Inline

Revision as of 11:10, 18 April 2009

Sort most popular programming languages based in number of members in Rosetta Code categories (from http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500)

Output:

1. 233 - Python
2. 222 - Ada
3. 203 - OCaml
4. 203 - C
5. 201 - Perl
6. 193 - Haskell
7. 182 - Java
8. 179 - D
9. 178 - ALGOL 68
10. 160 - Ruby
...

Filtering wrong results is optional. You can check against http://www.rosettacode.org/wiki/Special:MostLinkedCategories

Ada

Library: AWS

NB: The 'ASCII.Quotation' in the declaration of Title_Str is a rather clunky workaround to the Ada->HTML engine not being able to correctly colourize '"' properly.

<lang ada>with Ada.Integer_Text_IO; use Ada.Integer_Text_IO; with Ada.Strings.Fixed; use Ada.Strings.Fixed; with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; with Ada.Text_IO; use Ada.Text_IO;

with Ada.Containers.Ordered_Sets; with Ada.Strings.Less_Case_Insensitive;

with AWS.Client; with AWS.Response;

procedure Test is

  use Ada.Strings;

  function "+" (S : String) return Unbounded_String renames To_Unbounded_String;

  type A_Language_Count is
     record
        Count    : Integer := 0;
        Language : Unbounded_String;
     end record;

  function "=" (L, R : A_Language_Count) return Boolean is
  begin
     return L.Count = R.Count;
  end "=";

  function "<" (L, R : A_Language_Count) return Boolean is
  begin
     -- Sort by 'Count' and then by Language name
     return L.Count < R.Count
       or else (L.Count = R.Count
                and then Less_Case_Insensitive (Left  => To_String (L.Language),
                                                Right => To_String (R.Language)));
  end "<";

  package Sets is new Ada.Containers.Ordered_Sets (A_Language_Count);
  use Sets;

  Counts : Set;

  procedure Find_Counts (S : String) is
     Title_Str : constant String  := "title=" & ASCII.Quotation & "Category:";
     End_A_Str : constant String  := "</a> (";

     Title_At   : constant Natural := Index (S, Title_Str);
     Bracket_At : constant Natural := Index (S (Title_At   + Title_Str'Length .. S'Last), ">");
     End_A_At   : constant Natural := Index (S (Bracket_At + 1                .. S'Last), End_A_Str);
     Space_At   : constant Natural := Index (S (End_A_At   + End_A_Str'Length .. S'Last), " ");
  begin
     if Title_At /= 0 and then Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then
        declare
           Count    : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1));
           Language : constant String  :=                S (Title_At + Title_Str'Length .. Bracket_At - 2);
        begin
           Counts.Insert (New_Item => (Count, +Language));
        end;

        -- Recursively parse the string for languages and counts
        Find_Counts (S (Space_At + 1 .. S'Last));
     end if;
  exception
     when others =>
        null; -- Catch and ignore the error after the last language
  end Find_Counts;

  Place : Natural := 1;

  procedure Display (C : Cursor) is
  begin
     Put (Place, Width => 1);             Put (". ");
     Put (Element (C).Count, Width => 1); Put (" - ");
     Put_Line (To_String (Element (C).Language));
     Place := Place + 1;
  end Display;

  Http_Source : constant AWS.Response.Data :=
    AWS.Client.Get ("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");

begin

  Find_Counts (AWS.Response.Message_Body (Http_Source));
  Counts.Reverse_Iterate (Display'Access);

end Test;</lang>

ALGOL 68

Works with: ALGOL 68G version Any - tested with release mk15-0.8b.fc9.i386 - uses non-standard library routines http content and grep in string.

<lang algol>PROC good page = (REF STRING page) BOOL:

    IF grep in string("^HTTP/[0-9.]* 200", page, NIL, NIL) = 0
    THEN TRUE
    ELSE IF INT start, end;
             grep in string("^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", page,
                            start, end) = 0
         THEN print (page[start : end])
         ELSE print ("unknown error retrieving page")
         FI;
         FALSE
    FI;

OP +:= = (REF FLEX[]STRING in out, STRING item)VOID:(

   [LWB in out: UPB in out+1]STRING new;
   new[LWB in out: UPB in out]:=in out;
   new[UPB new]:=item;
   in out := new
 );

INT match=0, no match=1, out of memory error=2, other error=3;

PROC re split = (STRING re split, REF STRING beetles)[]STRING:(

   FLEX[0]STRING out;
   INT start := 1, pos, end;
   WHILE grep in string(re split, beetles[start:], pos, end) = match DO
     out +:= STRING(beetles[start:start+pos-2]);
     out +:= STRING(beetles[start+pos-1:start+end-1]);
     start +:= end
   OD;
   IF start > UPB beetles THEN
     out +:= STRING(beetles[start:])
   FI;
   out
 );

IF STRING reply;

  INT rc =
     http content (reply, "www.rosettacode.org", "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500", 0);
  rc /= 0 OR NOT good page (reply)

THEN print (("Error:",strerror (rc))) ELSE

 STRING  # hack: HTML should be parsed by an official HTML parsing library #
   re html tag = "<[^>]*>",
   re a href category = "^<a href=""/wiki/Category:.*"" title=",
   re members = "([1-9][0-9]* members)";

 MODE STATISTIC = STRUCT(INT members, STRING category);
 FLEX[0]STATISTIC stats;

 OP +:=  = (REF FLEX[]STATISTIC in out, STATISTIC item)VOID:(
     [LWB in out: UPB in out+1]STATISTIC new;
     new[LWB in out: UPB in out]:=in out;
     new[UPB new]:=item;
     in out := new
   );

hack: needs to be manually maintained #

 STRING re ignore ="Programming Tasks|WikiStubs|Maintenance/OmitCategoriesCreated|"+
                   "Unimplemented tasks by language|Programming Languages|"+
                   "Solutions by Programming Language|Implementations|"+
                   "Solutions by Library|Encyclopedia|Language users|"+
                   "Solutions by Programming Task|Basic language learning|"+
                   "RCTemplates|Language Implementations";

 FORMAT category fmt = $"<a href=""/wiki/Category:"g""" title=""Category:"g""""$;
 STRING encoded category, category;
 FORMAT members fmt = $" ("g" members)"$;
 INT members;

 FLEX[0]STRING tokens := re split(re html tag, reply);
 FOR token index TO UPB tokens DO
   STRING token := tokens[token index];
   FILE file;
   IF grep in string(re a href category, token, NIL, NIL) = match THEN
     associate(file, token);
     make term(file,"""");
     getf(file, (category fmt, encoded category, category));
     close(file)
   ELIF grep in string(re members, token, NIL, NIL) = match THEN
     IF grep in string(re ignore, category, NIL, NIL) /= match THEN
       associate(file, token);
       getf(file, (members fmt, members));
       stats +:= STATISTIC(members, category);
       close(file)
     FI
   FI
 OD;

 MODE TYPE = STATISTIC;

 OP < = (STATISTIC a,b)BOOL:
   members OF a < members OF b;

hack: boiler plating - sort should be included from a library #

 PROC in place shell sort reverse = (REF FLEX []TYPE seq)REF[]TYPE:(
     INT inc := ( UPB seq + LWB seq + 1 ) OVER 2;
     WHILE inc NE 0 DO
         FOR index FROM LWB seq TO UPB seq DO
             INT i := index;
             TYPE el = seq[i];
             WHILE ( i  - LWB seq >= inc | seq[i - inc] < el | FALSE ) DO
                 seq[i] := seq[i - inc];
                 i -:= inc
             OD;
             seq[i] := el
         OD;
         inc := IF inc = 2 THEN 1 ELSE ENTIER(inc * 5 / 11) FI
     OD;
     seq
 );

 stats := in place shell sort reverse(stats);

 INT max = 10;
 FOR i TO (UPB stats > max | max | UPB stats) DO
   printf(($g(-0)". "g(-0)" - "gl$,i,stats[i]))
 OD

FI</lang> Output:

1. 233 - Python
2. 222 - Ada
3. 203 - OCaml
4. 203 - C
5. 201 - Perl
6. 193 - Haskell
7. 182 - Java
8. 179 - D
9. 178 - ALGOL 68
10. 160 - Ruby

C#

Sorting only programming languages.

<lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic;

class Program {

   static void Main(string[] args)
   {
       string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
       string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");

       ArrayList langs = new ArrayList();
       Dictionary<string, int> qtdmbr = new Dictionary<string, int>();

       MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
       MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
       
       foreach (Match lang in match1) langs.Add(lang.Groups[1].Value);

       foreach (Match match in match2) {
           if (langs.Contains(match.Groups[1].Value)) {
               qtdmbr.Add(match.Groups[1].Value, Int32.Parse(match.Groups[2].Value));                 
           }
       }
       
       string[] test = qtdmbr.OrderByDescending(x => x.Value).Select(x => String.Format("{0} - {1}", x.Key, x.Value)).ToArray();

       int count = 1;

       foreach (string i in test)
       {
           Console.WriteLine("{0}. {1}",count,i);
           count++;
       }
   }

}</lang>

Object-orinted solution

<lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections.Generic;

class Category {

   private string _title;
   private int _members;

   public Category(string title, int members) {
       _title = title;
       _members = members;
   }

   public string Title {
       get {
           return _title;
       }
   }

   public int Members {
       get {
           return _members;
       }
   }

}

class Program {

   static void Main(string[] args) {
       string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json");
       string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");

       MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1);
       MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);

       string[] valids = match1.Cast<Match>().Select(x => x.Groups[1].Value).ToArray();
       List<Category> langs = new List<Category>();

       foreach (Match match in match2) {
           string category = match.Groups[1].Value;
           int members = Int32.Parse(match.Groups[2].Value);

           if (valids.Contains(category)) langs.Add(new Category(category, members));
       }

       langs = langs.OrderByDescending(x  => x.Members).ToList();
       int count = 1;

       foreach (Category i in langs) {
           Console.WriteLine("{0}. {1} - {2}", count, i.Title, i.Members);
           count++;
       }
   }

}</lang>

Perl

Sorting only programming languages.

<lang perl>use LWP::Simple 'get';

my $langs_url = 'http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json'; my $cats_url = 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500';

my %langs = map {/Category:(.+)/, 1}

   get($langs_url) =~ /"title":"(.+?)"}/g;

get($cats_url) =~ m{

(.+?)

}s;

my @pairs =

   sort {$b->[1] <=> $a->[1]}
   grep {$langs{ $_->[0] }}
   map {[ m{>(\S.*?)</a> \((\d+) member} ]}

split '

', $1; for (my $n = 1 ; @pairs ; ++$n) {my ($lang, $tasks) = @{shift @pairs}; printf "%3d. %3d - %s\n", $n, $tasks, $lang;}</lang>

Python

Sorting only programming languages.

<lang python>import urllib,re

key1 = lambda x: int(x[1])

get1 = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json").read() get2 = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500").read()

langs = re.findall("\"title\":\"Category:(.+?)\"",get1) qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> $(\d+) members$",get2)

result = [(x,int(y)) for x,y in qtdmbr if x in langs]

for n, i in enumerate(sorted(result,key=key1,reverse=True)):

   print "%3d. %3d - %s" % (n+1, i[1], i[0])</lang>

Ruby

<lang ruby>require 'open-uri'

entries = []

open("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500") do |f|

 for line in f
   match = line.match(%r{>([^<>]*)</a> \((\d+) members?\)})
   entries << match[2] + ' - ' + match[1] if match
 end

end

entries.sort_by {|x| -x.to_i}.each_with_index do |line, c|

 puts "%3d. %s" % [c+1, line]

end</lang>

Tcl

<lang tcl>package require Tcl 8.5 package require http

set response [http::geturl http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500]

foreach line [split [http::data $response] \n] {

   if {[regexp {>([^<]+)</a> \((\d+) member} $line -> lang num]} {
       lappend langs [list $num $lang]
   }

}

foreach entry [lsort -integer -index 0 -decreasing $langs] {

   lassign $entry num lang
   puts [format "%d. %d - %s" [incr i] $num $lang]

}</lang>

UnixPipes

echo "GET http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500 HTTP/1.0\n\n" 
 | nc www.rosettacode.org 80 
 | sed -n -e 's,<[^>]*>,,g' -e's,^\([^(]*\)(\([^)]*\) members*) *,\2 - \1,g' -e'/^[0-9]\+./p'
 | sort -rn