Rosetta Code/Count examples: Difference between revisions

m
→‎{{header|Wren}}: Minor changes (though see Talk Page re encoding of '+')
m (→‎{{header|Perl}}: update URL)
m (→‎{{header|Wren}}: Minor changes (though see Talk Page re encoding of '+'))
(5 intermediate revisions by 4 users not shown)
Line 402:
 
foreach (string task in tasknames) {
stringtry content = GetSourceCodeFromPage(task, wc);{
string content = GetSourceCodeFromPage(WebUtility.UrlEncode(task), wc);
int count = new Regex("=={{header", RegexOptions.IgnoreCase).Matches(content).Count;
Task t int count = new TaskRegex(task"=={{header", countRegexOptions.IgnoreCase).Matches(content).Count;
Task t = new Task(task, count);
 
Console.WriteLine(t);
tasks.Add(t);
}
catch (Exception ex) {
Console.WriteLine("**** Unable to get task \"" + task + "\": " + ex.Message);
}
}
 
Line 1,217 ⟶ 1,222:
 
rosorg = "http://rosettacode.org"
qURI = "/mww/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=json"
qdURI = "/mww/api.php?action=query&list=categorymembers&cmtitle=Category:Draft_Programming_Tasks&cmlimit=500&format=json"
sqURI = rosorg * "/wiki/"
topages(js, v) = for d in js["query"]["categorymembers"] push!(v, sqURI * replace(d["title"], " " => "_")) end
Line 2,259 ⟶ 2,264:
print(f'\nTotal: {sum(tasks)} examples.')
</syntaxhighlight>
 
===Using Semantic MediaWiki===
{{libheader|Requests}}
Here we use the [https://www.semantic-mediawiki.org/wiki/Help:API:ask ask] API provided by the [https://www.semantic-mediawiki.org/wiki/Semantic_MediaWiki Semantic MediaWiki extension] to query page [https://www.semantic-mediawiki.org/wiki/Help:Properties_and_types properties] rather than parse page content, producing subtly different counts to some other solutions. Maybe too different for this task.
 
We only count task examples that have a corresponding category page AND that page has the "Is Language" property set to True. In other words, <code>Implemented In</code> + <code>Not Implemented In</code> + <code>Omitted From</code> is always equal to the total number of languages.
 
<syntaxhighlight lang="python">"""Count Rosetta Code tasks implementations using the Semantic MediaWiki API.
Works with Python >= 3.7."""
import json
import logging
 
from dataclasses import dataclass
from dataclasses import field
 
from datetime import datetime
 
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
 
import requests
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
 
logging.basicConfig(level=logging.WARN)
 
# See https://www.semantic-mediawiki.org/wiki/Help:API:ask
_SM_ASK: Dict[str, str] = {
"action": "ask",
"format": "json",
"formatversion": "2",
"api_version": "3",
}
 
_SM_ASK_REQUEST_BLOCK_SIZE = 500
 
 
@dataclass(frozen=True)
class Page:
# fulltext is the page's title, not the page content.
fulltext: str
fullurl: str
namespace: int
exists: str
displaytitle: str
 
 
@dataclass(frozen=True, eq=False)
class Lang(Page):
def __eq__(self, other: object) -> bool:
if isinstance(other, Lang):
return self.fullurl == other.fullurl
elif isinstance(other, str):
return self.fullurl == other
return False
 
def __hash__(self) -> int:
return hash(self.fullurl)
 
def unimplemented(self, tasks: Set["Task"]) -> Set["Task"]:
return {task for task in tasks if self.fullurl not in task.exclude}
 
def omitted_from(self, tasks: Set["Task"]) -> Set["Task"]:
return {task for task in tasks if self.fullurl in task.omitted_from}
 
 
@dataclass(frozen=True)
class Task(Page):
title: str
implemented_in: Set[Lang] = field(repr=False, compare=False)
omitted_from: Set[Lang] = field(repr=False, compare=False)
exclude: Set[Lang] = field(repr=False, compare=False)
 
def not_implemented_in(self, langs: Set[Lang]) -> Set[Lang]:
return langs.difference(self.exclude)
 
 
@dataclass
class TaskResponseBlock:
tasks: List[Task]
continue_offset: Optional[int] = None
 
 
@dataclass
class LanguageResponseBlock:
langs: List[Lang]
continue_offset: Optional[int] = None
 
 
def sm_ask_category(
session: requests.Session,
url: str,
category: str,
limit: int,
offset: int,
known_langs: Set[Lang],
) -> TaskResponseBlock:
query_params = {
**_SM_ASK,
"query": (
f"[[Category:{category}]]"
"|?Implemented in language"
"|?Omitted from language"
f"|limit={limit}"
f"|offset={offset}"
),
}
 
# Display some progress
log(f"ask [[Category:{category}]] offset={offset}")
 
response = session.get(url, params=query_params)
response.raise_for_status()
data = response.json()
handle_warnings_and_errors(data)
return _transform_implemented_in_response_data(data, known_langs)
 
 
def sm_ask_tasks(
session: requests.Session,
url: str,
limit: int,
offset: int,
known_langs: Set[Lang],
):
return sm_ask_category(
session, url, "Programming Tasks", limit, offset, known_langs
)
 
 
def sm_ask_drafts(
session: requests.Session,
url: str,
limit: int,
offset: int,
known_langs: Set[Lang],
):
return sm_ask_category(
session, url, "Draft Programming Tasks", limit, offset, known_langs
)
 
 
def sm_ask_languages(
session: requests.Session,
url: str,
limit: int,
offset: int,
) -> LanguageResponseBlock:
query_params = {
**_SM_ASK,
"query": (
"[[Is language::+]]"
"|?Implemented in language"
"|?Omitted from language"
f"|limit={limit}"
f"|offset={offset}"
),
}
 
# Display some progress
log(f"ask [[Is language::+]] offset={offset}")
 
response = session.get(url, params=query_params)
response.raise_for_status()
data = response.json()
handle_warnings_and_errors(data)
return _transform_language_response_data(data)
 
 
def sm_ask_all_tasks(
session: requests.Session, url: str, known_langs: Set[Lang]
) -> List[Task]:
block = sm_ask_tasks(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=0,
known_langs=known_langs,
)
tasks = block.tasks
 
while block.continue_offset:
block = sm_ask_tasks(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=block.continue_offset,
known_langs=known_langs,
)
tasks.extend(block.tasks)
 
return tasks
 
 
def sm_ask_all_drafts(
session: requests.Session, url: str, known_langs: Set[Lang]
) -> List[Task]:
block = sm_ask_drafts(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=0,
known_langs=known_langs,
)
tasks = block.tasks
 
while block.continue_offset:
block = sm_ask_drafts(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=block.continue_offset,
known_langs=known_langs,
)
tasks.extend(block.tasks)
 
return tasks
 
 
def sm_ask_all_languages(session: requests.Session, url: str) -> List[Lang]:
block = sm_ask_languages(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=0,
)
langs = block.langs
 
while block.continue_offset:
block = sm_ask_languages(
session,
url,
limit=_SM_ASK_REQUEST_BLOCK_SIZE,
offset=block.continue_offset,
)
langs.extend(block.langs)
 
return langs
 
 
def _transform_implemented_in_response_data(
data: Any, known_langs: Set[Lang]
) -> TaskResponseBlock:
tasks: List[Task] = []
for result in data["query"]["results"]:
for task_title, task_page in result.items():
# We're excluding implementations that don't have a corresponding
# category page with an "Is Language" property.
implemented_in = {
Lang(**lang)
for lang in task_page["printouts"]["Implemented in language"]
}.intersection(known_langs)
 
omitted_from = (
{
Lang(**lang)
for lang in task_page["printouts"]["Omitted from language"]
}
.intersection(known_langs)
.difference(implemented_in)
)
 
tasks.append(
Task(
title=task_title,
implemented_in=implemented_in,
omitted_from=omitted_from,
fulltext=task_page["fulltext"],
fullurl=task_page["fullurl"],
namespace=task_page["namespace"],
exists=task_page["exists"],
displaytitle=task_page["displaytitle"],
exclude=implemented_in.union(omitted_from),
)
)
 
return TaskResponseBlock(
tasks=tasks, continue_offset=data.get("query-continue-offset", None)
)
 
 
def _transform_language_response_data(data: Any) -> LanguageResponseBlock:
langs: List[Lang] = []
for result in data["query"]["results"]:
for _, task_page in result.items():
langs.append(
Lang(
fulltext=task_page["fulltext"],
fullurl=task_page["fullurl"],
namespace=task_page["namespace"],
exists=task_page["exists"],
displaytitle=task_page["displaytitle"],
)
)
 
return LanguageResponseBlock(
langs=langs, continue_offset=data.get("query-continue-offset", None)
)
 
 
def get_session() -> requests.Session:
"""Setup a requests.Session with retries."""
retry_strategy = Retry(
total=5,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS"],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session = requests.Session()
session.mount("https://", adapter)
session.mount("http://", adapter)
return session
 
 
def log(msg: str) -> None:
print(f"{datetime.now().isoformat(' ', 'seconds')}: {msg}")
 
 
def handle_warnings_and_errors(data: Any) -> None:
if data.get("errors"):
for error in data["errors"]:
logging.error(json.dumps(error))
# legacy format
if data.get("error"):
logging.error(json.dumps(data["error"]))
if data.get("warnings"):
for warning in data["warnings"]:
logging.warning(json.dumps(warning))
 
 
def count_examples(url: str, n: int = 30) -> None:
"""Print a table to stdout containing implementation counts for the first
`n` tasks, sorted by number implementations (most to least)."""
session = get_session()
langs = set(sm_ask_all_languages(session, url))
tasks = sm_ask_all_tasks(session, url, langs)
drafts = sm_ask_all_drafts(session, url, langs)
all_tasks = [*tasks, *drafts]
 
# Map of task to (implemented in, not implemented in, omitted from)
counts: Dict[Task, Tuple[int, int, int]] = {}
 
# Running total of examples for all tasks. Where a language has multiple examples
# for a single tasks, we only count one example.
total: int = 0
 
for task in all_tasks:
total += len(task.implemented_in)
counts[task] = (
len(task.implemented_in),
len(task.not_implemented_in(langs)),
len(task.omitted_from),
)
 
# Pretty print
top = sorted(counts.items(), key=lambda it: it[1][0], reverse=True)[:n]
pad = max([len(task.fulltext) for task, _ in top])
 
print("\nKnown languages:", len(langs))
print("Total tasks:", len(all_tasks))
print("Total examples:", total)
print(f"{'Task':>{pad}} | Implemented In | Not Implemented In | Omitted From")
print("-" * (pad + 1), "+", "-" * 16, "+", "-" * 20, "+", "-" * 13, sep="")
 
for task, _counts in top:
implemented_in, not_implemented_in, omitted_from = _counts
print(
f"{task.fulltext:>{pad}} |"
f"{implemented_in:>15} |"
f"{not_implemented_in:>19} |"
f"{omitted_from:>13}"
)
 
 
if __name__ == "__main__":
import argparse
 
URL = "https://rosettacode.org/w/api.php"
parser = argparse.ArgumentParser(description="Count tasks on Rosetta Code.")
 
parser.add_argument(
"--rows",
"-n",
type=int,
default=30,
dest="n",
help="number of rows to display in the output table (default: 30)",
)
 
parser.add_argument(
"--url",
default=URL,
help=f"target MediaWiki URL (default: {URL})",
)
 
args = parser.parse_args()
count_examples(args.url, args.n)</syntaxhighlight>
 
{{out}}
<pre>
2023-02-11 11:09:34: ask [[Is language::+]] offset=0
2023-02-11 11:09:35: ask [[Is language::+]] offset=500
2023-02-11 11:09:36: ask [[Category:Programming Tasks]] offset=0
2023-02-11 11:09:47: ask [[Category:Programming Tasks]] offset=500
2023-02-11 11:10:04: ask [[Category:Programming Tasks]] offset=1000
2023-02-11 11:10:12: ask [[Category:Draft Programming Tasks]] offset=0
 
Known languages: 890
Total tasks: 1602
Total examples: 91075
Task | Implemented In | Not Implemented In | Omitted From
--------------------------------+----------------+--------------------+-------------
Hello world/Text | 498 | 392 | 0
99 bottles of beer | 371 | 519 | 0
100 doors | 334 | 555 | 1
FizzBuzz | 325 | 565 | 0
Fibonacci sequence | 308 | 582 | 0
Factorial | 302 | 588 | 0
Comments | 297 | 592 | 1
A+B | 293 | 597 | 0
Empty program | 274 | 616 | 0
Function definition | 254 | 634 | 2
Loops/Infinite | 254 | 635 | 1
Loops/For | 248 | 641 | 1
Loops/While | 244 | 645 | 1
Arrays | 237 | 652 | 1
Ackermann function | 234 | 653 | 3
Reverse a string | 231 | 657 | 2
Conditional structures | 223 | 666 | 1
Arithmetic/Integer | 213 | 677 | 0
Greatest common divisor | 213 | 677 | 0
Array concatenation | 208 | 682 | 0
Greatest element of a list | 208 | 682 | 0
Even or odd | 207 | 683 | 0
Loops/Downward for | 207 | 683 | 0
Sieve of Eratosthenes | 203 | 687 | 0
Increment a numerical string | 202 | 688 | 0
Integer comparison | 202 | 688 | 0
Repeat a string | 199 | 691 | 0
Boolean values | 198 | 691 | 1
Loops/For with a specified step | 198 | 691 | 1
Copy a string | 196 | 693 | 1
</pre>
 
=={{header|R}}==
Line 3,113 ⟶ 3,565:
{{libheader|Wren-pattern}}
An embedded program so we can use the libcurl library.
<syntaxhighlight lang="ecmascriptwren">/* rc_count_examplesRosetta_Code_Count_examples.wren */
 
import "./pattern" for Pattern
Line 3,150 ⟶ 3,602:
}
 
var url = "https://www.rosettacode.org/mww/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"
var content = getContent.call(url)
var p = Pattern.new("title/=\"[+1^\"]\"")
Line 3,156 ⟶ 3,608:
for (m in matches) {
var title = m.capsText[0].replace("&#039;", "'").replace("&quot;", "\"")
var title2 = title.replace(" ", "_").replace("+", "\%2B252B")
var taskUrl = "https://www.rosettacode.org/mww/index.php?title=%(title2)&action=raw"
var taskContent = getContent.call(taskUrl)
var lines = taskContent.split("\n")
Line 3,167 ⟶ 3,619:
<br>
which we now embed in the following C program, build and run.
<syntaxhighlight lang="c">/* gcc rc_count_examplesRosetta_Code_Count_examples.c -o rc_count_examplesRosetta_Code_Count_examples -lcurl -lwren -lm */
 
#include <stdio.h>
Line 3,341 ⟶ 3,793:
WrenVM* vm = wrenNewVM(&config);
const char* module = "main";
const char* fileName = "rc_count_examplesRosetta_Code_Count_examples.wren";
char *script = readFile(fileName);
WrenInterpretResult result = wrenInterpret(vm, module, script);
Line 3,362 ⟶ 3,814:
Just showing the first 25 'full' tasks:
<pre>
100 doors : 326355 examples
100 prisoners : 7285 examples
15 puzzle game : 7386 examples
15 puzzle solver : 2327 examples
2048 : 6165 examples
21 game : 4152 examples
24 game : 100107 examples
24 game/Solve : 6264 examples
4-rings or 4-squares puzzle : 6074 examples
9 billion names of God the integer : 6066 examples
99 bottles of beer : 336376 examples
A+B : 271296 examples
Abbreviations, automatic : 4658 examples
Abbreviations, easy : 4250 examples
Abbreviations, simple : 3846 examples
ABC problem : 132150 examples
Abelian sandpile model : 2740 examples
Abelian sandpile model/Identity : 2428 examples
Abstract type : 8993 examples
Abundant odd numbers : 6672 examples
Abundant, deficient and perfect number classifications : 100110 examples
Accumulator factory : 110115 examples
AckermannAchilles functionnumbers : 23025 examples
Ackermann function : 243 examples
Active Directory/Connect : 29 examples
Active Directory/Search for a user : 24 examples
....
</pre>
9,479

edits