Rosetta Code/Count examples

Revision as of 20:10, 21 April 2009 by rosettacode>Guga360 (added c#)

Find the total number of programming examples for each task and the total for all tasks.

Rosetta Code/Count examples
You are encouraged to solve this task according to the task description, using any language you may know.

Essentially, count the number of occurrences of =={{header| on each task page.


<lang>100 doors: 20 examples. 99 Bottles of Beer: 29 examples. Abstract type: 10 examples.

Total: X examples.</lang>


Object-oriented solution.

<lang csharp>using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.Net;

class Task {

   private string _task;
   private int _examples;
   public Task(string task, int examples) {
       _task = task;
       _examples = examples;
   public string Name {
       get { return _task; }
   public int Examples {
       get { return _examples; }
   public override string ToString() {
       return String.Format("{0}: {1} examples.", this._task, this._examples);


class Program {

   static List<string> GetTitlesFromCategory(string category) {
       string content = new WebClient().DownloadString(
           String.Format("{0}&cmlimit=500&format=json", category)
       return new Regex("\"title\":\"(.+?)\"").Matches(content).Cast<Match>().Select(x => x.Groups[1].Value).ToList();
   static string GetSourceCodeFromPage(string page) {
       return new WebClient().DownloadString(
           String.Format("{0}&action=raw", page)
   static void Main(string[] args) {
       List<Task> tasks = new List<Task>();
       List<string> tasknames = GetTitlesFromCategory("Programming_Tasks");
       foreach (string task in tasknames) {
           string content = GetSourceCodeFromPage(task);
           int count = new Regex("=={{header", RegexOptions.IgnoreCase).Matches(content).Count;
           Task t = new Task(task, count);
       Console.WriteLine("\nTotal: {0} examples.", tasks.Select(x => x.Examples).Sum());



Works with: Tango

<lang D> import; import; import; import tango.text.xml.Document; import tango.text.Util;

alias HttpHeader.ContentLength CL;

auto url = ""; void main() {

   auto client = new HttpClient (HttpClient.Get, url);;
   char[] mainData, tmp;
   int total, i;
   void cat(void[] content) { tmp ~= cast(char[]) content; }
   if (client.isResponseOK) {, client.getResponseHeaders.getInt(CL));
       mainData = tmp;
       tmp = null;
       auto doc = new Document!(char);
       foreach (n; doc.query.descendant("cm").attribute("title")) {
           auto subClient = new HttpClient(HttpClient.Get, 
                   "" ~
                   replace(n.value.dup, ' ', '_') ~ "&action=raw");
           if (! subClient.isResponseOK) {
               Stderr (client.getResponse);
 , subClient.getResponseHeaders.getInt(CL));
           foreach (segment; patterns(cast(char[])tmp, "=={{header|")) i++;
           if (i) --i;
           Stdout.formatln ("{0,-40} - {}", n.value, i);
           total += i;
           tmp = null;
           i = 0;
       Stdout("total examples: ", total).newline;
   } else {
       Stderr (client.getResponse);

} </lang>


Works with: Java version 1.5+

<lang java5> import java.util.ArrayList; import java.util.Iterator; import ScreenScrape;

public class CountProgramExamples { private static final String baseURL = ""; private static final String rootURL = ""; private static final String taskBegin = "title=\""; private static final String taskEnd = "\""; private static final String exmplBegin = ""; private static final String exmplEnd = ""; private static final String editBegin = "";

/** * @param args */ public static void main(String[] args) { // Setup variables int exTotal = 0; int exSubTot = 0; String title = ""; String taskPage = ""; int startPos = 0; String countStr = ""; try { // Get root query results ArrayList<String> tasks = new ArrayList<String>(); ScreenScrape ss = new ScreenScrape(); String rootPage =; while(rootPage.contains(taskBegin)){ rootPage = rootPage.substring(rootPage.indexOf(taskBegin)+taskBegin.length()); title = rootPage.substring(0, rootPage.indexOf(taskEnd)); if (!title.contains("Category:")) { tasks.add(title); } rootPage = rootPage.substring(rootPage.indexOf(taskEnd)); } // Loop through each task and print count Iterator<String> itr = tasks.iterator(); while(itr.hasNext()) { title ="'","'"); taskPage =" ", "_")); if (taskPage.contains(exmplBegin)) { startPos = taskPage.lastIndexOf(exmplBegin)+exmplBegin.length(); countStr = taskPage.substring(startPos, taskPage.indexOf(exmplEnd, startPos)); exSubTot = Integer.parseInt(countStr.contains(".") ? countStr.substring(0,countStr.indexOf(".")) : countStr); }else{ exSubTot = 0; while(taskPage.contains(editBegin)) { taskPage = taskPage.substring(taskPage.indexOf(editBegin)+editBegin.length()); exSubTot++; } } exTotal += exSubTot; System.out.println(title+": "+exSubTot+" examples."); } // Print total System.out.println("\nTotal: "+exTotal+" examples."); }catch(Exception e){ System.out.println(title); System.out.println(startPos+":"+taskPage.indexOf(exmplEnd, startPos)); System.out.println(taskPage); e.printStackTrace(System.out); } } } </lang>

This is the ScreenScrape class imported in the above class.


<lang Perl>

  1. !/usr/bin/perl -w

use strict ; use LWP::UserAgent ; use HTML::Parser ; use constant DOCROOT => "" ; use constant SOLUTIONROOT => "" ; my %tasklist = ( ) ; #key: last part of solution list URL, value: title of solution my $ua = new LWP::UserAgent ; my $url = DOCROOT . "/Category:Programming_Tasks" ; my $request = HTTP::Request->new( 'GET' => "$url" ) ; my $response = $ua->request( $request ) ; my $counted = 0 ; my $total_examples = 0 ; my $solresponse ; my $p = HTML::Parser->new( api_version => 3 ) ; #parser for list of tasks my $q = HTML::Parser->new( api_version => 3 ) ; #parser for solutions by task $p->handler( start => \&process , "tagname , attr" ) ; $q->handler( text => \&langfinder, "text" ) ;

if ( $response->is_success( ) ) {

  $p->parse( $response->content( ) ) ; 
  foreach my $task( keys %tasklist ) { 
     $request->uri( SOLUTIONROOT . "$task" . "&action=edit" ) ; 
     $solresponse = $ua->request( $request ) ;
     if ( $solresponse->is_success( )) {
        $q->parse( $solresponse->content( ) ) ;
        if ( $tasklist{$task} ) {
            print "$tasklist{$task} : $counted examples!\n" ;
        $counted = 0 ;
        $q->eof( ) ;
     else {
        print "Error: " . $solresponse->code( ) . " " . $solresponse->message( ) . "\n" ;
  $p->eof( ) ;
  print "\nTotal: $total_examples examples.\n" ;

} else {

  print "Error " . $response->code( )  . " " . $response->message( ) . "\n" ;

} sub process( ) {

  return if shift ne "a" ;
  my $props = shift ;
  if ( $props->{href} && $props->{href} =~ m,/wiki/([^:]+), ) {
     if ( $1 !~ /Category/ ) {
        $tasklist{ $1 } = $props->{title} ;

} sub langfinder( ) {

  my $text = shift ;
  while ( $text =~ /header\|.+\}/g ) {
     $counted++ ;
     $total_examples++ ;

} </lang>


<lang python>import urllib, xml.dom.minidom

x = urllib.urlopen("")

tasks = [] for i in xml.dom.minidom.parseString("cm"):

   t = i.getAttribute('title').replace(" ", "_")
   y = urllib.urlopen("" % t)
   tasks.append("{{header|") )
   print t.replace("_", " ") + ": %d examples." % tasks[-1]

print "\nTotal: %d examples." % sum(tasks)</lang>


Using the json package from tcllib <lang tcl>package require Tcl 8.5 package require http package require json

fconfigure stdout -buffering none

proc get_tasks {category} {

   set start [clock milliseconds]
   puts -nonewline "getting $category members..."
   set base_url
   set query {action query list categorymembers cmtitle Category:%s format json cmlimit 500}
   set this_query [dict create {*}[split [format $query $category]]]
   set tasks [list]
   while {1} {
       set url [join [list $base_url [http::formatQuery {*}$this_query]] ?]
       set response [http::geturl $url]
       if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
           error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
       set data [json::json2dict [http::data $response]]
       http::cleanup $response
       # add tasks to list
       foreach task [dict get $data query categorymembers] {
           lappend tasks [dict get [dict create {*}$task] title]
       if {[catch {dict get $data query-continue categorymembers cmcontinue} continue_task] != 0} {
           # no more continuations, we're done
       dict set this_query cmcontinue $continue_task
   puts " found [llength $tasks] tasks in [expr {[clock milliseconds] - $start}] milliseconds"
   return $tasks


  1. This proc can be replaced by a single regexp command:
  2. set count [regexp -all "***=$needle" $haystack]
  3. However this proc is more efficient -- we're dealing with plain strings only.

proc count_substrings {needle haystack} {

   set count 0
   set idx 0
   while {[set idx [string first $needle $haystack $idx]] != -1} {
       incr count
       incr idx
   return $count


set total 0 foreach task [get_tasks Programming_Tasks] {

   set url [format "" [string map {{ } _} $task]]
   set response [http::geturl $url]
   if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
       error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
   set count [count_substrings "\{\{header|" [http::data $response]]
   puts [format "%3d examples in %s" $count $task]
   http::cleanup $response
   incr total $count


puts "\nTotal: $total examples"</lang>