Rosetta Code/Rank languages by popularity
You are encouraged to solve this task according to the task description, using any language you may know.
Sort most popular programming languages based in number of members in Rosetta Code categories (from http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500)
Output:
1. 233 - Python 2. 222 - Ada 3. 203 - OCaml 4. 203 - C 5. 201 - Perl 6. 193 - Haskell 7. 182 - Java 8. 179 - D 9. 178 - ALGOL 68 10. 160 - Ruby ...
Filtering wrong results is optional. You can check against http://www.rosettacode.org/wiki/Special:MostLinkedCategories
Ada
NB: The 'ASCII.Quotation' in the declaration of Title_Str is a rather clunky workaround to the Ada->HTML engine not being able to correctly colourize '"' properly.
<lang ada>with Ada.Integer_Text_IO; use Ada.Integer_Text_IO; with Ada.Strings.Fixed; use Ada.Strings.Fixed; with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; with Ada.Text_IO; use Ada.Text_IO;
with Ada.Containers.Ordered_Sets; with Ada.Strings.Less_Case_Insensitive;
with AWS.Client; with AWS.Response;
procedure Test is
use Ada.Strings; function "+" (S : String) return Unbounded_String renames To_Unbounded_String; type A_Language_Count is record Count : Integer := 0; Language : Unbounded_String; end record; function "=" (L, R : A_Language_Count) return Boolean is begin return L.Count = R.Count; end "="; function "<" (L, R : A_Language_Count) return Boolean is begin -- Sort by 'Count' and then by Language name return L.Count < R.Count or else (L.Count = R.Count and then Less_Case_Insensitive (Left => To_String (L.Language), Right => To_String (R.Language))); end "<"; package Sets is new Ada.Containers.Ordered_Sets (A_Language_Count); use Sets; Counts : Set; procedure Find_Counts (S : String) is Title_Str : constant String := "title=" & ASCII.Quotation & "Category:"; End_A_Str : constant String := "</a> ("; Title_At : constant Natural := Index (S, Title_Str); Bracket_At : constant Natural := Index (S (Title_At + Title_Str'Length .. S'Last), ">"); End_A_At : constant Natural := Index (S (Bracket_At + 1 .. S'Last), End_A_Str); Space_At : constant Natural := Index (S (End_A_At + End_A_Str'Length .. S'Last), " "); begin if Title_At /= 0 and then Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then declare Count : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1)); Language : constant String := S (Title_At + Title_Str'Length .. Bracket_At - 2); begin Counts.Insert (New_Item => (Count, +Language)); end; -- Recursively parse the string for languages and counts Find_Counts (S (Space_At + 1 .. S'Last)); end if; exception when others => null; -- Catch and ignore the error after the last language end Find_Counts; Place : Natural := 1; procedure Display (C : Cursor) is begin Put (Place, Width => 1); Put (". "); Put (Element (C).Count, Width => 1); Put (" - "); Put_Line (To_String (Element (C).Language)); Place := Place + 1; end Display; Http_Source : constant AWS.Response.Data := AWS.Client.Get ("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
begin
Find_Counts (AWS.Response.Message_Body (Http_Source)); Counts.Reverse_Iterate (Display'Access);
end Test;</lang>
ALGOL 68
Note: the routine http content is currently not available on Win32 systems. <lang algol>PROC good page = (REF STRING page) BOOL:
IF grep in string("^HTTP/[0-9.]* 200", page, NIL, NIL) = 0 THEN TRUE ELSE IF INT start, end; grep in string("^HTTP/[0-9.]* [0-9]+ [a-zA-Z ]*", page, start, end) = 0 THEN print (page[start : end]) ELSE print ("unknown error retrieving page") FI; FALSE FI;
MODE LISTOFSTRING = STRUCT(REF LINK first, last, INT upb); MODE LINK = STRUCT(STRING value, REF LINK next);
PRIO LISTINIT = 1; OP LISTINIT = (REF LISTOFSTRING new, REF LINK first)REF LISTOFSTRING: (
new := (first, first, (first IS REF LINK(NIL) | 0 | 1 )); new
);
OP +:= = (REF LISTOFSTRING list, []CHAR item)VOID: (
HEAP LINK new := (STRING(item), REF LINK(NIL)); IF first OF list IS REF LINK(NIL) THEN first OF list := new ELSE next OF last OF list := new FI; last OF list := new; upb OF list +:= 1
);
OP UPB = (LISTOFSTRING list)INT: upb OF list;
OP ARRAYOFSTRING = (LISTOFSTRING list)[]STRING:(
[UPB list]STRING out; REF LINK this := first OF list; FOR i TO UPB list DO out[i] := value OF this; this := next OF this OD; out
);
INT match=0, no match=1, out of memory error=2, other error=3;
PROC re split = (STRING re split, REF STRING beetles)[]STRING:(
LISTOFSTRING out := (NIL, NIL, 0); # LISTINIT REF LINK NIL; # INT start := 1, pos, end; WHILE grep in string(re split, beetles[start:], pos, end) = match DO out +:= beetles[start:start+pos-2]; out +:= beetles[start+pos-1:start+end-1]; start +:= end OD; IF start > UPB beetles THEN out +:= beetles[start:] FI; ARRAYOFSTRING(out) );
IF STRING reply;
INT rc = http content (reply, "www.rosettacode.org", "http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500", 0); rc /= 0 OR NOT good page (reply)
THEN print (("Error:",strerror (rc))) ELSE
STRING # hack: HTML should be parsed by an official HTML parsing library # re html tag = "<[^>]*>", re a href category = "^<a href=""/wiki/Category:.*"" title=", re members = "([1-9][0-9]* members)";
MODE STATISTIC = STRUCT(INT members, STRING category); FLEX[0]STATISTIC stats;
OP +:= = (REF FLEX[]STATISTIC in out, STATISTIC item)VOID:( [LWB in out: UPB in out+1]STATISTIC new; new[LWB in out: UPB in out]:=in out; new[UPB new]:=item; in out := new );
- hack: needs to be manually maintained #
STRING re ignore ="Programming Tasks|WikiStubs|Maintenance/OmitCategoriesCreated|"+ "Unimplemented tasks by language|Programming Languages|"+ "Solutions by Programming Language|Implementations|"+ "Solutions by Library|Encyclopedia|Language users|"+ "Solutions by Programming Task|Basic language learning|"+ "RCTemplates|Language Implementations";
FORMAT category fmt = $"<a href=""/wiki/Category:"g""" title=""Category:"g""""$; STRING encoded category, category; FORMAT members fmt = $" ("g" members)"$; INT members;
FLEX[0]STRING tokens := re split(re html tag, reply); FOR token index TO UPB tokens DO STRING token := tokens[token index]; FILE file; IF grep in string(re a href category, token, NIL, NIL) = match THEN associate(file, token); make term(file,""""); getf(file, (category fmt, encoded category, category)); close(file) ELIF grep in string(re members, token, NIL, NIL) = match THEN IF grep in string(re ignore, category, NIL, NIL) /= match THEN associate(file, token); getf(file, (members fmt, members)); stats +:= STATISTIC(members, category); close(file) FI FI OD;
OP < = (STATISTIC a,b)BOOL: members OF a < members OF b;
MODE SORTSTRUCT = STATISTIC; PR READ "prelude/sort.a68" PR;
stats := in place shell sort reverse(stats);
INT max = 10; FOR i TO (UPB stats > max | max | UPB stats) DO printf(($g(-0)". "g(-0)" - "gl$,i,stats[i])) OD
FI</lang> Output:
1. 233 - Python 2. 222 - Ada 3. 203 - OCaml 4. 203 - C 5. 201 - Perl 6. 193 - Haskell 7. 182 - Java 8. 179 - D 9. 178 - ALGOL 68 10. 160 - Ruby
AWK
This solution needs help from external tools to fetch the HTML from rosettacode.org, and also to do a numeric sort <lang sh>printf "GET %s HTTP/1.0\n\n" 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500' | nc www.rosettacode.org 80 | gawk '
# ignore categories that are not languages /Basic language learning/ {next} /Encyclopedia/ {next} /Implementations/ {next} /Language Implementations/ {next} /Language users/ {next} /Maintenance\/OmitCategoriesCreated/ {next} /Programming Languages/ {next} /Programming Tasks/ {next} /RCTemplates/ {next} /Solutions by Library/ {next} /Solutions by Programming Language/ {next} /Solutions by Programming Task/ {next} /Unimplemented tasks by language/ {next} /WikiStubs/ {next}
match($0, /<li.*>([^<]*)<\/a> \((digit:+) members?/, m) { lang[++i] = m[2] " - " m[1] }
END { len = 0; for (i in lang) len++ sorter = "sort -rn" for (i = 1; i <= len; i++) { print lang[i] |& sorter } close(sorter, "to") i = 1 while((sorter |& getline line) > 0) { print i++ ". " line } close(sorter) }
'</lang>
C#
Sorting only programming languages.
<lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections; using System.Collections.Generic;
class Program {
static void Main(string[] args) { string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json"); string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
ArrayList langs = new ArrayList(); Dictionary<string, int> qtdmbr = new Dictionary<string, int>();
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1); MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2); foreach (Match lang in match1) langs.Add(lang.Groups[1].Value);
foreach (Match match in match2) { if (langs.Contains(match.Groups[1].Value)) { qtdmbr.Add(match.Groups[1].Value, Int32.Parse(match.Groups[2].Value)); } } string[] test = qtdmbr.OrderByDescending(x => x.Value).Select(x => String.Format("{0} - {1}", x.Key, x.Value)).ToArray();
int count = 1;
foreach (string i in test) { Console.WriteLine("{0}. {1}",count,i); count++; } }
}</lang>
Object-orinted solution
<lang csharp>using System; using System.Net; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Collections.Generic;
class Category {
private string _title; private int _members;
public Category(string title, int members) { _title = title; _members = members; }
public string Title { get { return _title; } }
public int Members { get { return _members; } }
}
class Program {
static void Main(string[] args) { string get1 = new WebClient().DownloadString("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json"); string get2 = new WebClient().DownloadString("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500");
MatchCollection match1 = new Regex("\"title\":\"Category:(.+?)\"").Matches(get1); MatchCollection match2 = new Regex("title=\"Category:(.+?)\">.+?</a> \\((\\d+) members\\)").Matches(get2);
string[] valids = match1.Cast<Match>().Select(x => x.Groups[1].Value).ToArray(); List<Category> langs = new List<Category>();
foreach (Match match in match2) { string category = match.Groups[1].Value; int members = Int32.Parse(match.Groups[2].Value);
if (valids.Contains(category)) langs.Add(new Category(category, members)); }
langs = langs.OrderByDescending(x => x.Members).ToList(); int count = 1;
foreach (Category i in langs) { Console.WriteLine("{0}. {1} - {2}", count, i.Title, i.Members); count++; } }
}</lang>
Perl
Sorting only programming languages.
<lang perl>use LWP::Simple 'get';
my $langs_url = 'http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json'; my $cats_url = 'http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500';
my %langs = map {/Category:(.+)/, 1}
get($langs_url) =~ /"title":"(.+?)"}/g;
get($cats_url) =~ m{
- (.+?)
}s;
my @pairs =
sort {$b->[1] <=> $a->[1]} grep {$langs{ $_->[0] }} map {[ m{>(\S.*?)</a> \((\d+) member} ]}
split '
Python
Sorting only programming languages.
<lang python>import urllib,re
key1 = lambda x: int(x[1])
get1 = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Languages&cmlimit=500&format=json").read() get2 = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500").read()
langs = re.findall("\"title\":\"Category:(.+?)\"",get1) qtdmbr = re.findall("title=\"Category:(.+?)\">.+?</a> \((\d+) members\)",get2)
result = [(x,int(y)) for x,y in qtdmbr if x in langs]
for n, i in enumerate(sorted(result,key=key1,reverse=True)):
print "%3d. %3d - %s" % (n+1, i[1], i[0])</lang>
Ruby
<lang ruby>require 'open-uri'
entries = []
open("http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500") do |f|
for line in f match = line.match(%r{>([^<>]*)</a> \((\d+) members?\)}) entries << match[2] + ' - ' + match[1] if match end
end
entries.sort_by {|x| -x.to_i}.each_with_index do |line, c|
puts "%3d. %s" % [c+1, line]
end</lang>
Tcl
<lang tcl>package require Tcl 8.5 package require http
set response [http::geturl http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500]
array set ignore {
"Basic language learning" 1 "Encyclopedia" 1 "Implementations" 1 "Language Implementations" 1 "Language users" 1 "Maintenance/OmitCategoriesCreated" 1 "Programming Languages" 1 "Programming Tasks" 1 "RCTemplates" 1 "Solutions by Library" 1 "Solutions by Programming Language" 1 "Solutions by Programming Task" 1 "Unimplemented tasks by language" 1 "WikiStubs" 1
}
foreach line [split [http::data $response] \n] {
if {[regexp {>([^<]+)</a> \((\d+) member} $line -> lang num]} { if { ! [info exists ignore($lang)]} { lappend langs [list $num $lang] } }
}
foreach entry [lsort -integer -index 0 -decreasing $langs] {
lassign $entry num lang puts [format "%d. %d - %s" [incr i] $num $lang]
}</lang> Produces this output on 24 May 2009 (top 15 entries only):
1. 286 - Tcl 2. 244 - Python 3. 229 - Ada 4. 221 - C 5. 213 - Perl 6. 204 - OCaml 7. 198 - ALGOL 68 8. 197 - Haskell 9. 190 - Java 10. 184 - D 11. 163 - Ruby 12. 159 - C++ 13. 159 - E 14. 157 - Forth 15. 147 - Fortran ……
UnixPipes
<lang bash> echo "GET http://www.rosettacode.org/w/index.php?title=Special:Categories&limit=500 HTTP/1.0\n\n"
| nc www.rosettacode.org 80 | sed -n -e 's,<[^>]*>,,g' -e's,^\([^(]*\)(\([^)]*\) members*) *,\2 - \1,g' -e'/^[0-9]\+./p' | sort -rn</lang>