Rosetta Code/Rank languages by popularity: Difference between revisions

(PascalABC.NET)
 
(5 intermediate revisions by 3 users not shown)
Line 33:
 
=={{header|Ada}}==
===Ada 2022: using web scraping===
{{libheader|AWS}}
<syntaxhighlight lang="ada">withpragma Ada.Integer_Text_IO; use Ada.Integer_Text_IOAda_2022;
 
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
with Ada.Strings.Unbounded; use Ada.Strings.Unbounded;
Line 40 ⟶ 43:
 
with Ada.Containers.Ordered_Sets;
with Ada.Strings.Unbounded.Less_Case_Insensitive;
with Ada.Characters.Handling; use Ada.Characters.Handling;
 
with AWS.Client; use AWS.Client;
with AWS.Messages; use AWS.Messages;
with AWS.Response;
 
procedure TestRank_Languages_By_Popularity is
 
use Ada.Strings;
 
function "+" (S : String) return Unbounded_String renames To_Unbounded_String;
renames To_Unbounded_String;
 
type A_Language_Count is
record
Count : IntegerNatural := 0;
Language : Unbounded_String;
end record;
Line 64 ⟶ 70:
function "<" (L, R : A_Language_Count) return Boolean is
begin
-- Sort by 'Count' and then by Language name
return L.Count < R.Count
or else (L.Count = R.Count
and then Less_Case_Insensitive (Left => To_String (L.Language),
(Left Right => To_String (RL.Language)));,
Right => R.Language));
end "<";
 
Line 77 ⟶ 84:
 
procedure Find_Counts (S : String) is
Title_Str : constant String := "title=""Category:";
End_A_Str : constant String := "</a> (";
 
function Strip_Character (S : String; C : Character) return String is
Title_At : constant Natural := Index (S, Title_Str);
S_Copy_Str : String (1 .. S'Length);
S_Copy_Index : Natural := 0;
begin
for I in S'Range loop
if S (I) /= C then
S_Copy_Index := S_Copy_Index + 1;
S_Copy_Str (S_Copy_Index) := S (I);
end if;
end loop;
 
return S_Copy_Str (S_Copy_Str'First .. S_Copy_Index);
end Strip_Character;
 
function Ignore_Category (L : String) return Boolean is
type Unbounded_String_Array is array (Positive range <>)
of Unbounded_String;
-- This list is quite comprehensive, but not complete
Categories_To_Ignore : Unbounded_String_Array := [
+"Pages with syntax highlighting errors",
+"Programming",
+"Examples needing attention",
+"Tasks needing attention",
+"Language users",
+"Implementations",
+"Solutions by ",
+"Maintenance/OmitCategoriesCreated",
+"Collection Members",
+"Pages with too many expensive parser function calls",
+"Garbage collection",
+" User",
+"SQL User",
+"Typing",
+"Parameter passing",
+"Execution method",
+"Unimplemented tasks by language",
+"Wolfram Language",
+"/Omit",
+"Wren-",
+"WrenGo",
+"Phix/",
+"PhixClass",
+"Basic language learning",
+"Encyclopedia",
+"RCTemplates",
+"SysUtils",
+"Action! ",
+"Text processing",
+"Image processing",
+"Scala Digital Signal Processing",
+"List processing",
+"Digital signal processing",
+"Processing Python",
+"Classic CS problems and programs",
+"Brainf*** related",
+"Data Structures",
+"Perl modules",
+"Perl/",
+"Perl:LWP",
+"Perl 6 related",
+"Flow control",
+"Excessively difficult task",
+"WikiStubs",
+"Impl needed",
+"Recursion"
];
begin
for Category of Categories_To_Ignore loop
declare
Category_At : constant Natural :=
Index (+To_Lower (L),
To_Lower (To_String (Category)));
begin
if Category_At /= 0 then
return True;
end if;
end;
end loop;
 
return False;
end Ignore_Category;
 
Title_Str : constant String := "title=""Category:";
End_A_Tag_Str : constant String := "</a>";
Space_Paren_Str : constant String := " (";
 
Title_At : constant Natural := Index (S, Title_Str);
begin
if Title_At /= 0 then
declare
Bracket_AtClosing_Bracket_At : constant Natural := Index (S (Title_At + Title_Str'Length .. S'Last), ">");
End_A_At : constant Natural := Index (S (Bracket_AtTitle_At + 1 Title_Str'Length .. S'Last), End_A_Str">");
 
Space_At : constant Natural := Index (S (End_A_At + End_A_Str'Length .. S'Last), " ");
CountEnd_A_Tag_At : constant Natural := Natural'Value (S (End_A_At + End_A_Str'Length .. Space_At - 1));
Language : constant String := Index (S (Title_AtClosing_Bracket_At + Title_Str'Length1 .. Bracket_At -S'Last), 2End_A_Tag_Str);
 
Language : constant String :=
S (Closing_Bracket_At + 1 .. End_A_Tag_At - 1);
 
Space_Paren_At : constant Natural :=
Index (S (End_A_Tag_At + 1 .. S'Last), Space_Paren_Str);
 
Space_At : constant Natural :=
Index (S (Space_Paren_At + Space_Paren_Str'Length + 1
.. S'Last),
" ");
 
Count : constant Natural :=
Natural'Value (
Strip_Character (
S (Space_Paren_At +
Space_Paren_Str'Length
.. Space_At - 1),
','));
begin
if Bracket_AtClosing_Bracket_At /= 0 and then End_A_At /= 0 and then Space_At /= 0 then
and then End_A_Tag_At /= 0
and then Space_Paren_At /= 0
and then Space_At /= 0
then
begin
Counts.Insertif Ignore_Category (New_ItemLanguage) => (Count,False +Language));then
Counts.Insert (New_Item => (Count, +Language));
end if;
exception
when Constraint_Error =>
Put_Line (Standard_Error, "Warning: repeated language: " & Language);
-- Ignore repeated results. Language);
-- Ignore repeated results.
null;
end;
end if;
-- Recursively parse the string for languages and counts
Find_Counts (S (Space_At + 1 .. S'Last));
end;
Line 116 ⟶ 233:
Place := Place + 1;
end Display;
 
Http_Source : constant AWS.Response.Data :=
AWS.Client.Get ("http://rosettacode.org/mww/index.php?title=Special:Categories&limit=5000"); &
"title=Special:Categories&limit=5000",
Follow_Redirection => True);
Status : Status_Code;
begin
Put_Line ("Getting website data...");
 
Status := AWS.Response.Status_Code (Http_Source);
if Status not in Success then
Put_Line ("Unable to retrieve data => Status Code :" &
Image (Status) &
" Reason :" & Reason_Phrase (Status));
raise Connection_Error;
end if;
 
Put_Line ("Finding categories...");
Find_Counts (AWS.Response.Message_Body (Http_Source));
 
Put_Line ("Displaying categories...");
Counts.Reverse_Iterate (Display'Access);
 
end Test;
Put_Line ("Process complete.");
end Rank_Languages_By_Popularity;
</syntaxhighlight>
{{out|Sample output}}
<pre>
1. 1683 - Phix
2. 1676 - Wren
3. 1653 - Julia
4. 1623 - Raku
5. 1577 - Nim
6. 1553 - Go
7. 1548 - Perl
8. 1532 - Python
9. 1416 - J
10. 1349 - Java
11. 1333 - FreeBASIC
12. 1300 - C
13. 1282 - C++
14. 1239 - Ruby
15. 1209 - Mathematica
16. 1175 - Haskell
17. 1156 - REXX
18. 1152 - Kotlin
19. 1149 - Jq
20. 1102 - Racket
21. 1055 - Sidef
22. 1021 - ALGOL 68
23. 1017 - 11l
24. 1012 - Zkl
25. 1004 - Factor
26. 993 - D
27. 989 - C sharp
28. 987 - Tcl
29. 973 - Scala
30. 959 - Ada
31. 954 - Rust
32. 949 - Delphi
33. 947 - Lua
34. 868 - F Sharp
35. 867 - XPL0
</pre>
 
=={{header|ALGOL 68}}==
Line 2,406 ⟶ 2,579:
 
=={{header|Java}}==
Tested with Java 1.722. Uses the api.<br/>
<syntaxhighlight lang="java">import java.net.URL;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.io.*;
Line 2,454 ⟶ 2,629:
{
 
URL url = new URLURI( path ).toURL();
URLConnection rc = url.openConnection();
// Rosetta Code objects to the default Java user agant so use a blank one
rc.setRequestProperty( "User-Agent", "" );
BufferedReader bfr = new BufferedReader( new InputStreamReader( rc.getInputStream() ) );
Line 2,465 ⟶ 2,638:
while( line != null )
{
line = line.trim().replaceAll( "[\",]", "" );
if ( line.startsWith( "[title]" ) )
{
// have a programming language - should look like "[title] =>: Category:languageName"
languageName = after( after( line, ':' ), ':' ).trim();
}
else if( line.startsWith( "[pages]" ) )
{
// number of pages the language has (probably)
String pageCount = after( line, '>:' ).trim();
if( pageCount.compareTo( "Array{" ) != 0 )
{
// haven't got "[pages]: => Array{" - must be a number of pages
languageList.add( ( (char) Integer.parseInt( pageCount ) ) + languageName );
languageName = "?";
} // if [pageCount.compareTo( "Array{" ) != 0
}
else if( line.startsWith( "[gcmcontinue]" ) )
{
// have an indication of wether there is more data or not
gcmcontinue[0] = after( line, '>:' ).trim().replaceAll( "[|]", "%7C" );
} // if various line starts
line = bfr.readLine();
Line 2,505 ⟶ 2,678:
do
{
String path = ( "httphttps://www.rosettacode.org/mww/api.php?action=query"
+ "&generator=categorymembers"
+ "&gcmtitle=Category:Programming%20Languages"
Line 2,511 ⟶ 2,684:
+ ( gcmcontinue[0].compareTo( "" ) == 0 ? "" : ( "&gcmcontinue=" + gcmcontinue[0] ) )
+ "&prop=categoryinfo"
+ "&format=txtjsonfm"
);
parseContent( path, gcmcontinue, languageList );
Line 2,537 ⟶ 2,710:
} // for lPos
} // main
} // GetRCLanguages</syntaxhighlight>
</syntaxhighlight>
{{out}}
Top 10 languages as at 27th1st AugustJune 20152024
<pre>
1: 8831675: TclPhix
21: 8751675: RacketWren
3: 8371650: PythonJulia
4: 7991620: JRaku
5: 7721576: RubyNim
6: 7631549: Perl 6Go
7: 7561542: CPerl
8: 7421514: GoPython
9: 7371413: DJ
10: 7071346: PerlJava
...
</pre>
Line 3,685 ⟶ 3,859:
var s := wc.DownloadString('https://rosettacode.org/wiki/Special:Categories?limit=5000');
s.Matches('([^><]+)</a>.+\(([\d,]+) member')
.Select(x -> KV(x.Groups[1].Value,x.Groups[2].Value.Replace(',','').ToInteger))
.Where(x -> not x[0].Key.StartsWith('Pages'))
.OrderByDescending(pair -> pair[1].Value).Numerate.Take(10)
.PrintLines(x -> $'Rank: {x[0],3} ({x[1][1].Value} entries) {x[1][0].Key}')
end.
</syntaxhighlight>
9

edits