Word frequency: Difference between revisions

m (→‎version 1: elided a conditional test from a DO loop (by incorporating it into an UNTIL clause).)
Line 714:
=={{header|zkl}}==
<lang zkl>fname,count := vm.arglist; // grab cammand line args
File(fname).pump(Void, // read the file line by line and hash words
"toLower",findAll,Void.Xplode,Dictionary().incV)
.toList().copy().sort(fcn(a,b){ b[1]<a[1] })[0,count.toInt()]
.apply("concat",",").concat("\n").println();
 
// words may have leading or trailing "_", ie "the" and "_the"
fcn findAll(text){
File(fname).pump(Void, "toLower", // read the file line by line and hash words
// \w is [a-zA-Z0-9_] many words (jn Les Misérables)
RegExp("[a-z]+").pump.fp1(Dictionary().incV)) // line-->(word:count,..)
// have leading or trailing "_", ie "the" and "_the"
.toList().copy().sort(fcn(a,b){ b[1]<a[1] })[0,count.toInt()] // hash-->list
var re=RegExp("[a-zA-Z]+"), r=List(); // static variables, not re-entrant
.pump(String,Void.Xplode,"%s,%s\n".fmt).println();</lang>
n,sz := 0,0; r.clear();
while(re.search(text,True,n)){ // moving search starting at n
n,sz=re.matched[0];
r.append(text[n,sz]);
n+=sz;
}
r
}</lang>
{{out}}
<pre>
$ zkl bbb ~/Documents/Les\ Miserables.txt 10
the,4103841089
of,1990819949
and,1492514942
a,1460214608
to,1394613951
in,1119611214
he,96469648
was,86138621
that,79227924
it,66576661
</pre>
Anonymous user