Rosetta Code/Count examples

Find the total number of programming examples for each task and the total for all tasks.

Essentially, count the number of occurrences of =={{header| on each task page.

Output:

<lang>100 doors: 20 examples. 99 Bottles of Beer: 29 examples. Abstract type: 10 examples.

Total: X examples.</lang>

Java

Works with: Java version 1.5+

<lang java5> import java.util.ArrayList; import java.util.Iterator; import ScreenScrape;

public class CountProgramExamples { private static final String baseURL = "http://rosettacode.org/wiki/"; private static final String rootURL = "http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml"; private static final String taskBegin = "title=\""; private static final String taskEnd = "\""; private static final String exmplBegin = ""; private static final String exmplEnd = ""; private static final String editBegin = "";

/** * @param args */ public static void main(String[] args) { // Setup variables int exTotal = 0; int exSubTot = 0; String title = ""; String taskPage = ""; int startPos = 0; String countStr = ""; try { // Get root query results ArrayList<String> tasks = new ArrayList<String>(); ScreenScrape ss = new ScreenScrape(); String rootPage = ss.read(rootURL); while(rootPage.contains(taskBegin)){ rootPage = rootPage.substring(rootPage.indexOf(taskBegin)+taskBegin.length()); title = rootPage.substring(0, rootPage.indexOf(taskEnd)); if (!title.contains("Category:")) { tasks.add(title); } rootPage = rootPage.substring(rootPage.indexOf(taskEnd)); } // Loop through each task and print count Iterator<String> itr = tasks.iterator(); while(itr.hasNext()) { title = itr.next().replaceAll("'","'"); taskPage = ss.read(baseURL+title.replaceAll(" ", "_")); if (taskPage.contains(exmplBegin)) { startPos = taskPage.lastIndexOf(exmplBegin)+exmplBegin.length(); countStr = taskPage.substring(startPos, taskPage.indexOf(exmplEnd, startPos)); exSubTot = Integer.parseInt(countStr.contains(".") ? countStr.substring(0,countStr.indexOf(".")) : countStr); }else{ exSubTot = 0; while(taskPage.contains(editBegin)) { taskPage = taskPage.substring(taskPage.indexOf(editBegin)+editBegin.length()); exSubTot++; } } exTotal += exSubTot; System.out.println(title+": "+exSubTot+" examples."); } // Print total System.out.println("\nTotal: "+exTotal+" examples."); }catch(Exception e){ System.out.println(title); System.out.println(startPos+":"+taskPage.indexOf(exmplEnd, startPos)); System.out.println(taskPage); e.printStackTrace(System.out); } } } </lang>

This is the ScreenScrape class imported in the above class.

Perl

!/usr/bin/perl -w

use strict ; use LWP::UserAgent ; use HTML::Parser ; use constant DOCROOT => "http://www.rosettacode.org/wiki" ; use constant SOLUTIONROOT => "http://www.rosettacode.org/w/index.php?title=" ; my %tasklist = ( ) ; #key: last part of solution list URL, value: title of solution my $ua = new LWP::UserAgent ; my $url = DOCROOT . "/Category:Programming_Tasks" ; my $request = HTTP::Request->new( 'GET' => "$url" ) ; my $response = $ua->request( $request ) ; my $counted = 0 ; my $total_examples = 0 ; my $solresponse ; my $p = HTML::Parser->new( api_version => 3 ) ; #parser for list of tasks my $q = HTML::Parser->new( api_version => 3 ) ; #parser for solutions by task $p->handler( start => \&process , "tagname , attr" ) ; $q->handler( text => \&langfinder, "text" ) ;

if ( $response->is_success( ) ) {

  $p->parse( $response->content( ) ) ; 
  foreach my $task( keys %tasklist ) { 
     $request->uri( SOLUTIONROOT . "$task" . "&action=edit" ) ; 
     $solresponse = $ua->request( $request ) ;
     if ( $solresponse->is_success( )) {
        $q->parse( $solresponse->content( ) ) ;
        if ( $tasklist{$task} ) {
            print "$tasklist{$task} : $counted examples!\n" ;
        }
        $counted = 0 ;
        $q->eof( ) ;
     }
     else {
        print "Error: " . $solresponse->code( ) . " " . $solresponse->message( ) . "\n" ;
     }
  }
  $p->eof( ) ;
  print "\nTotal: $total_examples examples.\n" ;

} else {

  print "Error " . $response->code( )  . " " . $response->message( ) . "\n" ;

} sub process( ) {

  return if shift ne "a" ;
  my $props = shift ;
  if ( $props->{href} && $props->{href} =~ m,/wiki/([^:]+), ) {
     if ( $1 !~ /Category/ ) {
        $tasklist{ $1 } = $props->{title} ;
     }
  }

} sub langfinder( ) {

  my $text = shift ;
  while ( $text =~ /header\|.+\}/g ) {
     $counted++ ;
     $total_examples++ ;
  }

} </lang>

Python

<lang python>import urllib, xml.dom.minidom

x = urllib.urlopen("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml")

tasks = [] for i in xml.dom.minidom.parseString(x.read()).getElementsByTagName("cm"):

   t = i.getAttribute('title').replace(" ", "_")
   y = urllib.urlopen("http://www.rosettacode.org/w/index.php?title=%s&action=raw" % t)
   tasks.append( y.read().lower().count("{{header|") )
   print t.replace("_", " ") + ": %d examples." % tasks[-1]

print "\nTotal: %d examples." % sum(tasks)</lang>

R

Library: XML (R)

Library: RCurl

<lang R> library(XML) library(RCurl) doc <- xmlInternalTreeParse("http://www.rosettacode.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Programming_Tasks&cmlimit=500&format=xml") nodes <- getNodeSet(doc,"//cm") titles = as.character( sapply(nodes, xmlGetAttr, "title") ) headers <- list() counts <- list() for (i in 1:length(titles)){ headersi <- getURL( paste("http://rosettacode.org/mw/index.php?title=", gsub(" ", "_", titles[i]), "&action=raw", sep="") ) countsi <- strsplit(headersi,split=" ")1 countsi <- grep("\\{\\{header", countsi) cat(titles[i], ":", length(countsi), "examples\n") } cat("Total: ", length(unlist(counts)), "examples\n") </lang>

Ruby

Library: REXML

First, a RosettaCode module, saved as rosettacode.rb: <lang ruby>require 'open-uri' require 'rexml/document'

module RosettaCode

 def RosettaCode.rc_url(page, query)
   url = "http://www.rosettacode.org/w/%s?%s" % [
     URI.escape(page),
     URI.escape(query.map {|k,v| "%s=%s" % [k,v]}.join("&"))
   ]
   url.gsub(/\+/, '%2B')
 end

 def RosettaCode.rc_tasks(category)
   query = {
     "action" => "query",
     "list" => "categorymembers",
     "cmtitle" => "Category:#{category}",
     "format" => "xml",
     "cmlimit" => 500,
   }
   while true
     url = rc_url "api.php", query
     doc = REXML::Document.new open(url)

     REXML::XPath.each(doc, "//cm") do |task|
       yield task.attribute("title").value
     end

     continue = REXML::XPath.first(doc, "//query-continue")
     break if continue.nil?
     cm = REXML::XPath.first(continue, "categorymembers")
     query["cmcontinue"] = cm.attribute("cmcontinue").value
   end
 end

end</lang>

Then, we implement the task with: <lang ruby>require 'rosettacode'

total_examples = 0

RosettaCode.rc_tasks("Programming_Tasks") do |task|

 url = RosettaCode.rc_url("index.php", {"action" => "raw", "title" => task})
 examples = open(url).read.scan("=={{header").length
 puts "#{task}: #{examples}"
 total_examples += examples

end

puts puts "Total: #{total_examples}"</lang>

Tcl

Using the json package from

Library: tcllib

<lang tcl>package require Tcl 8.5 package require http package require json

fconfigure stdout -buffering none

proc get_tasks {category} {

   set start [clock milliseconds]
   puts -nonewline "getting $category members..."
   set base_url http://www.rosettacode.org/w/api.php
   set query {action query list categorymembers cmtitle Category:%s format json cmlimit 500}
   set this_query [dict create {*}[split [format $query $category]]]
   set tasks [list]

   while {1} {
       set url [join [list $base_url [http::formatQuery {*}$this_query]] ?]
       set response [http::geturl $url]
       if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
           error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
       }
       set data [json::json2dict [http::data $response]]
       http::cleanup $response
       
       # add tasks to list
       foreach task [dict get $data query categorymembers] {
           lappend tasks [dict get [dict create {*}$task] title]
       }
       
       if {[catch {dict get $data query-continue categorymembers cmcontinue} continue_task] != 0} {
           # no more continuations, we're done
           break
       }
       dict set this_query cmcontinue $continue_task
   }
   puts " found [llength $tasks] tasks in [expr {[clock milliseconds] - $start}] milliseconds"
   return $tasks

}

This proc can be replaced by a single regexp command:
set count [regexp -all "***=$needle" $haystack]
However this proc is more efficient -- we're dealing with plain strings only.

proc count_substrings {needle haystack} {

   set count 0
   set idx 0
   while {[set idx [string first $needle $haystack $idx]] != -1} {
       incr count
       incr idx
   }
   return $count

}

set total 0 foreach task [get_tasks Programming_Tasks] {

   set url [format "http://www.rosettacode.org/w/index.php?title=%s&action=raw" [string map {{ } _} $task]]
   set response [http::geturl $url]
   if {[set s [http::status $response]] ne "ok" || [http::ncode $response] != 200} {
       error "Oops: url=$url\nstatus=$s\nhttp code=[http::code $response]"
   }
   set count [count_substrings "\{\{header|" [http::data $response]]
   puts [format "%3d examples in %s" $count $task]
   http::cleanup $response
   incr total $count

}

puts "\nTotal: $total examples"</lang>

Rosetta Code/Count examples

Contents

AutoHotkey

C#

D

Java

Perl

Python

R

Ruby

Tcl