Letter frequency
You are encouraged to solve this task according to the task description, using any language you may know.
Open a text file and count the occurrences of each letter.
Some of these programs count all characters (including punctuation), but some only count letters A to Z.
[edit] ACL2
(defun increment-alist (tbl key)
(cond ((endp tbl) (list (cons key 1)))
((eql (car (first tbl)) key)
(cons (cons key (1+ (cdr (first tbl))))
(rest tbl)))
(t (cons (first tbl)
(increment-alist (rest tbl) key)))))
(defun freq-table (xs)
(if (endp xs)
nil
(increment-alist (freq-table (rest xs))
(first xs))))
(defun letter-freq (str)
(freq-table (coerce str 'list)))
[edit] Ada
with Ada.Text_IO;
procedure Letter_Frequency is
Counters: array (Character) of Natural := (others => 0); -- initialize all Counters to 0
C: Character;
File: Ada.Text_IO.File_Type;
begin
Ada.Text_IO.Open(File, Mode => Ada.Text_IO.In_File, Name => "letter_frequency.adb");
while not Ada.Text_IO.End_Of_File(File) loop
Ada.Text_IO.Get(File, C);
Counters(C) := Counters(C) + 1;
end loop;
for I in Counters'Range loop
if Counters(I) > 0 then
Ada.Text_IO.Put_Line("'" & I & "':" & Integer'Image(Counters(I)));
end if;
end loop;
end Letter_Frequency;
Sample Output (counting the characters of its own source code):
>./letter_frequency ' ': 122 '"': 6 '&': 3 ... [a lot of lines omitted] 'x': 7 'y': 5 'z': 1
[edit] Aikido
import ctype
var letters = new int [26]
var s = openin (args[0])
while (!s.eof()) {
var ch = s.getchar()
if (s.eof()) {
break
}
if (ctype.isalpha (ch)) {
var n = cast<int>(ctype.tolower(ch) - 'a')
++letters[n]
}
}
foreach i letters.size() {
println (cast<char>('a' + i) + " " + letters[i])
}
[edit] APL
freq←{(⍪∪⍵),+/(∪⍵)∘.⍷⍵}
freq 0 1 2 3 2 3 4 3 4 4 4
0 1
1 1
2 2
3 3
4 4
freq 'balloon'
b 1
a 1
l 2
o 2
n 1
[edit] AutoHotkey
OpenFile = %A_ScriptFullPath% ; use own source code
FileRead, FileText, %OpenFile%
Loop 26
{
StringReplace, junk, FileText, % Chr(96+A_Index),, UseErrorLevel
out .= Chr(96+A_Index) ": " ErrorLevel "`n"
}
MsgBox % out
Output (using script's own file):
a: 6 b: 1 c: 6 d: 4 e: 24 [several lines omitted] x: 5 y: 0 z: 0
[edit] AutoIt
This function prints the Letter frequency of a given textfile. You can choose to use case sensitive search and if special chars should be searched too.
Func _Letter_frequency($Path, $fcase = True, $fspecial_chars = True)
Local $hFile, $sRead, $iupto, $iStart, $iCount
If Not $fcase Then $fcase = False
If Not $fspecial_chars Then
$iStart = 64
If Not $fcase Then
$iupto = 26
Else
$iupto = 58
EndIf
Else
$iStart = 31
$iupto = 224
EndIf
$hFile = FileOpen($Path, 0)
$sRead = FileRead($hFile)
FileClose($hFile)
For $i = 1 To $iupto
If Not $fspecial_chars Then
If $iStart + $i > 90 And $iStart + $i < 97 Then ContinueLoop
EndIf
$sRead = StringReplace($sRead, Chr($iStart + $i), "", 0, $fcase)
$iCount = @extended
If $iCount > 0 Then ConsoleWrite(Chr($iStart + $i) & " : " & $iCount & @CRLF)
Next
EndFunc ;==>_Letter_frequency
Output :
A : 32 B : 2 C : 15 E : 31 F : 10 [several lines omitted] u : 14 v : 1 w : 1 x : 14
[edit] AWK
BEGIN { FS="" }
{for(i=1;i<=NF;i++) m[$i]++}END{for(i in m)printf("%9d %-14s\n",m[i],i)}
usage: awk -f letters.awk HolyBible.txt
[edit] BBC BASIC
DIM cnt%(255)
file% = OPENIN("C:\unixdict.txt")
IF file%=0 ERROR 100, "Could not open file"
REPEAT
A$ = GET$#file%
L% = LEN(A$)
IF L% THEN
FOR I% = 1 TO L%
cnt%(ASCMID$(A$,I%)) += 1
NEXT
ENDIF
UNTIL EOF#file%
CLOSE #file%
FOR c% = &41 TO &5A
PRINT CHR$(c%)CHR$(c%+32) ": " cnt%(c%)+cnt%(c%+32)
NEXT
Output:
Aa: 16421 Bb: 4115 Cc: 8216 Dd: 5799 Ee: 20144 Ff: 2662 Gg: 4129 Hh: 5208 Ii: 13980 Jj: 430 Kk: 1925 Ll: 10061 Mm: 5828 Nn: 12097 Oo: 12738 Pp: 5516 Qq: 378 Rr: 13436 Ss: 10210 Tt: 12836 Uu: 6489 Vv: 1902 Ww: 1968 Xx: 617 Yy: 3633 Zz: 433
[edit] Bracmat
(lc=
counts c
. fil$(!arg,r) {open file for reading}
& 0:?counts
& whl
' ( fil$:?c {read a byte}
& ( !c:(~<A:~>Z|~<a:~>z)
| 0
)
+ !counts
: ?counts {simply add any found letter to the sum}
)
& fil$(,SET,-1) {close the file by seeking to impossible file position.}
| !counts {return the sum}
);
lc$"valid.bra" {example: count letters in Bracmat's validation suite.}
107*A
+ 33*B
+ 37*C
+ 39*D
+ 74*E
+ 50*F
+ 27*G
+ 28*H
+ 20*I
+ 55*J
+ 32*K
+ 112*L
+ 36*M
+ 32*N
+ 621*O
+ 43*P
+ 25*R
+ 67*S
+ 62*T
+ 64*U
+ 5*V
+ 26*W
+ 353*X
+ 248*Y
+ 70*Z
+ 2173*a
+ 840*b
+ 738*c
+ 639*d
+ 1345*e
+ 472*f
+ 372*g
+ 568*h
+ 91*j
+ 142*k
+ 529*l
+ 409*m
+ 941*n
+ 840*o
+ 336*p
+ 65*q
+ 993*r
+ 1018*s
+ 2097*t
+ 978*u
+ 122*v
+ 156*w
+ 909*x
+ 685*y
+ 211*z
+ 1035*i
[edit] C
/* declare array */
int frequency[26];
int ch;
FILE* txt_file = fopen ("a_text_file.txt", "rt");
/* init the freq table: */
for (ch = 0; ch < 26; ch++)
frequency[ch] = 0;
while (1) {
ch = fgetc(txt_file);
if (ch == EOF) break; /* end of file or read error. EOF is typically -1 */
/* assuming ASCII; "letters" means "a to z" */
if ('a' <= ch && ch <= 'z') /* lower case */
frequency[ch-'a']++;
else if ('A' <= ch && ch <= 'Z') /* upper case */
frequency[ch-'A']++;
}
[edit] C#
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
class Program
{
static SortedDictionary<TItem, int> GetFrequencies<TItem>(IEnumerable<TItem> items)
{
var dictionary = new SortedDictionary<TItem, int>();
foreach (var item in items)
{
if (dictionary.ContainsKey(item))
{
dictionary[item]++;
}
else
{
dictionary[item] = 1;
}
}
return dictionary;
}
static void Main(string[] arguments)
{
var file = arguments.FirstOrDefault();
if (File.Exists(file))
{
var text = File.ReadAllText(file);
foreach (var entry in GetFrequencies(text))
{
Console.WriteLine("{0}: {1}", entry.Key, entry.Value);
}
}
}
}
Sample output:
: 1 !: 1 ,: 1 H: 1 d: 1 e: 1 l: 3 o: 2 r: 1 w: 1
Declarative approach:
var freq = from c in str
where char.IsLetter(c)
orderby c
group c by c into g
select g.Key + ":" + g.Count();
foreach(var g in freq)
Console.WriteLine(g);
C:2 I:1 K:1 L:2 W:1 a:4 ... y:2
[edit] C++
#include <fstream>
#include <iostream>
int main()
{
std::ifstream input("filename.txt", std::ios_base::binary);
if (!input)
{
std::cerr << "error: can't open file\n";
return -1;
}
size_t count[256];
std::fill_n(count, 256, 0);
for (char c; input.get(c); ++count[uint8_t(c)]) // process input file
; // empty loop body
for (size_t i = 0; i < 256; ++i)
{
if (count[i] && isgraph(i)) // non-zero counts of printable characters
{
std::cout << char(i) << " = " << count[i] << '\n';
}
}
}
Example output when file contains "Hello, world!" (without quotes):
! = 1 , = 1 H = 1 d = 1 e = 1 l = 3 o = 2 r = 1 w = 1
[edit] Common Lisp
(defun letter-freq (file)
(with-open-file (stream file)
(let ((str (make-string (file-length stream)))
(arr (make-array 256 :element-type 'integer :initial-element 0)))
(read-sequence str stream)
(loop for c across str do (incf (aref arr (char-code c))))
(loop for c from 32 to 126 for i from 1 do
(format t "~c: ~d~a"
(code-char c) (aref arr c)
(if (zerop (rem i 8)) #\newline #\tab))))))
(letter-freq "test.lisp")
[edit] Clojure
(println (sort-by second >
(frequencies (map #(java.lang.Character/toUpperCase %)
(filter #(java.lang.Character/isLetter %) (slurp "text.txt"))))))
[edit] D
import std.stdio, std.ascii, std.algorithm, std.range;
void main() {
int[26] frequency;
foreach (ubyte[] buffer; File("unixdict.txt").byChunk(2 ^^ 15))
foreach (c; buffer.filter!isAlpha())
frequency[c.toLower() - 'a']++;
writefln("%(%(%s, %),\n%)", std.range.chunks(frequency[], 10));
}
- Output:
16421, 4115, 8216, 5799, 20144, 2662, 4129, 5208, 13980, 430, 1925, 10061, 5828, 12097, 12738, 5516, 378, 13436, 10210, 12836, 6489, 1902, 1968, 617, 3633, 433
[edit] FBSL
The result of the first evaluation of ASC() is retained in the symbol ASC for later use. This is a standard feature of FBSL functions. The ascii array is dynamic. Command(1) is the name of the script file.
#APPTYPE CONSOLE
'Open a text file and count the occurrences of each letter.
FUNCTION countBytes(fileName AS STRING)
DIM c AS STRING
DIM ascii[]
DIM handle AS INTEGER = FILEOPEN(fileName, BINARY)
WHILE NOT FILEEOF(handle)
c = FILEGETC(handle)
IF c = "" THEN EXIT WHILE
ascii[ASC] = ascii[ASC(c)] + 1
WEND
FILECLOSE(handle)
RETURN ascii
END SUB
DIM counters = countBytes(COMMAND(1))
FOR DIM i = LBOUND(counters) TO UBOUND(counters)
PRINT i, TAB, IIF(i <= 32, i, CHR(i)), TAB, counters[i]
NEXT
PAUSE
[edit] Forth
create counts 26 cells allot
: freq ( filename -- )
counts 26 cells erase
slurp-file bounds do
i c@ 32 or 'a -
dup 0 26 within if
cells counts +
1 swap +!
else drop then
loop
26 0 do
cr [char] ' emit 'a i + emit ." ': "
counts i cells + @ .
loop ;
s" example.txt" freq
[edit] Fortran
Using the configuration file (which has changed since the example was documented) of the J example, compilation and output of this program on a gnu/linux system is
-*- mode: compilation; default-directory: "/tmp/" -*-
Compilation started at Sat May 18 18:09:46
a=./F && make $a && $a < configuration.file
f95 -Wall -ffree-form F.F -o F
92 21 17 24 82 19 19 22 67 0 2 27 27 57 55 31 1 61 43 60 20 6 2 0 10 0
Compilation finished at Sat May 18 18:09:46
And here's the FORTRAN90 program source. The program reads stdin and writes the result to stdout. Future enhancement: use block size records.
! count letters from stdin
program LetterFrequency
implicit none
character (len=1) :: s
integer, dimension(26) :: a
integer :: ios, i, t
data a/26*0/,i/0/
open(unit=7, file='/dev/stdin', access='direct', form='formatted', recl=1, status='old', iostat=ios)
if (ios .ne. 0) then
write(0,*)'Opening stdin failed'
stop
endif
do i=1, huge(i)
read(unit=7, rec = i, fmt = '(a)', iostat = ios ) s
if (ios .ne. 0) then
!write(0,*)'ios on failure is ',ios
close(unit=7)
exit
endif
t = ior(iachar(s(1:1)), 32) - iachar('a')
if ((0 .le. t) .and. (t .le. iachar('z'))) then
t = t+1
a(t) = a(t) + 1
endif
end do
write(6, *) a
end program LetterFrequency
[edit] Go
package main
import (
"fmt"
"io/ioutil"
"sort"
"unicode"
)
const file = "unixdict.txt"
func main() {
bs, err := ioutil.ReadFile(file)
if err != nil {
fmt.Println(err)
return
}
m := make(map[rune]int)
for _, r := range string(bs) {
m[r]++
}
// answer is now in m. sort and format output:
lfs := make(lfList, 0, len(m))
for l, f := range m {
lfs = append(lfs, &letterFreq{l, f})
}
sort.Sort(lfs)
fmt.Println("file:", file)
fmt.Println("letter frequency")
for _, lf := range lfs {
if unicode.IsGraphic(lf.rune) {
fmt.Printf(" %c %7d\n", lf.rune, lf.freq)
} else {
fmt.Printf("%U %7d\n", lf.rune, lf.freq)
}
}
}
type letterFreq struct {
rune
freq int
}
type lfList []*letterFreq
func (lfs lfList) Len() int { return len(lfs) }
func (lfs lfList) Less(i, j int) bool {
switch fd := lfs[i].freq - lfs[j].freq; {
case fd < 0:
return false
case fd > 0:
return true
}
return lfs[i].rune < lfs[j].rune
}
func (lfs lfList) Swap(i, j int) {
lfs[i], lfs[j] = lfs[j], lfs[i]
}
Output:
file: unixdict.txt letter frequency U+000A 25104 e 20144 a 16421 i 13980 r 13436 t 12836 o 12738 n 12097 s 10210 l 10061 c 8216 u 6489 m 5828 d 5799 p 5516 h 5208 g 4129 b 4115 y 3633 f 2662 w 1968 k 1925 v 1902 x 617 z 433 j 430 q 378 ' 105 & 6 . 6 1 2 0 1 2 1 3 1 4 1 5 1 6 1 7 1 8 1 9 1
[edit] Groovy
def frequency = { it.inject([:]) { map, value -> map[value] = (map[value] ?: 0) + 1; map } }
frequency(new File('frequency.groovy').text).each { key, value ->
println "'$key': $value"
}
Output:
'd': 1 'e': 19 'f': 4 ' ': 29 'r': 5 'q': 3 'u': 8 [lines omitted] 'o': 2 'x': 1 'h': 1 'k': 2 '"': 2 '$': 2
[edit] Haskell
Short version:
import Data.List
main = readFile "freq.hs" >>= mapM_ (\x -> print (head x, length x)) . group . sort
Properly architected version:
import qualified Data.Map as M
main = do
text <- readFile "freq.hs"
let result = foldl (flip (M.adjust (+1))) initial text
mapM_ print $ M.toList result
initial = M.fromList $ zipWith (\k v -> (toEnum k,v)) [0..255] (repeat 0)
[edit] Icon and Unicon
The example below counts (case insensitive) letters and was run on a version of this source file.
link printf
procedure main(A)
every PrintCount(CountLetters(!A))
end
procedure CountLetters(fn) #: Return case insensitive count of letters
K := table(0)
if f := open(fn,"r") then {
every c := !map(|read(f)) do
if any(&lcase,c) then K[c] +:= 1
close(f)
return K
}
else write(&errout,"Unable to open file ",fn)
end
procedure PrintCount(T) #: Print the letters
every c := key(T) do
printf("%s - %d\n",c,T[c])
end
printf.icn provides formatting
Output:c - 17 k - 5 s - 10 h - 2 p - 10 e - 41 m - 2 u - 12 b - 2 r - 25 o - 16 w - 1 d - 10 l - 10 t - 27 a - 10 i - 13 y - 5 f - 12 n - 28 v - 4
[edit] J
Input is a directory-path with filename. Result is 26 integers representing counts of each letter, in alphabetic order (a's count is first).
require 'files' NB. define fread
ltrfreq=: 3 : 0
letters=. u: (u:inv'A') + i.26 NB. upper case letters
<: #/.~ (toupper fread y) (,~ -. -.) letters
)
Example use (based on a configuration file from another task):
ltrfreq 'config.file'
88 17 17 24 79 18 19 19 66 0 2 26 26 57 54 31 1 53 43 59 19 6 2 0 8 0
[edit] Java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
public class LetterFreq {
public static int[] countLetters(String filename) throws IOException{
int[] freqs = new int[26];
BufferedReader in = new BufferedReader(new FileReader(filename));
String line;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(char ch:line.toCharArray()){
if(Character.isLetter(ch)){
freqs[ch - 'A']++;
}
}
}
in.close();
return freqs;
}
public static void main(String[] args) throws IOException{
System.out.println(Arrays.toString(countLetters("filename.txt")));
}
}
In Java 7, we can use try with resources. The countLetters method would look like this:
public static int[] countLetters(String filename) throws IOException{
int[] freqs = new int[26];
try(BufferedReader in = new BufferedReader(new FileReader(filename))){
String line;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(char ch:line.toCharArray()){
if(Character.isLetter(ch)){
freqs[ch - 'A']++;
}
}
}
}
return freqs;
}
[edit] K
+(?a;#:'=a:,/0:`)
Example: The file "hello.txt" contains the string "Hello, world!"
c:+(?a;#:'=a:,/0:`hello.txt)
Output:
(("H";1)
("e";1)
("l";3)
("o";2)
(",";1)
(" ";1)
("w";1)
("r";1)
("d";1)
("!";1))
Sort on decreasing occurrences:
c@>c[;1]
Output:
(("l";3)
("o";2)
("H";1)
("e";1)
(",";1)
(" ";1)
("w";1)
("r";1)
("d";1)
("!";1))
[edit] Liberty BASIC
Un-rem a line to convert to all-upper-case. Letter freq'y is printed as percentages.
open "text.txt" for input as #i
txt$ =input$( #i, lof( #i))
Le =len( txt$)
close #i
dim LetterFreqy( 255)
' txt$ =upper$( txt$)
for i =1 to Le
char =asc( mid$( txt$, i, 1))
if char >=32 then LetterFreqy( char) =LetterFreqy( char) +1
next i
for j =32 to 255
if LetterFreqy( j) <>0 then print " Character #"; j, "("; chr$( j);_
") appeared "; using( "##.##", 100 *LetterFreqy( j) /Le); "% of the time."
next j
end
[edit] Lua
-- Open the file named on the command line
local file = assert(io.open(arg[1]))
-- Keep a table counting the instances of each letter
local instances = {}
local function tally(char)
-- normalize case
char = string.upper(char)
-- add to the count of the found character
occurrences[char] = occurrences[char] + 1
end
-- For each line in the file
for line in file:lines() do
line:gsub(
'%a', -- For each letter (%a) on the line,
tally) --increase the count for that letter
end
-- Print letter counts
for letter, count in pairs(instances) do
print(letter, count)
end
[edit] Maple
StringTools:-CharacterFrequencies(readbytes("File.txt",infinity,TEXT))
[edit] Mathematica
Tally[Characters[Import["file.txt","Text"]]]
[edit] NetRexx
/* NetRexx ************************************************************
* 22.05.2013 Walter Pachl translated from REXX
**********************************************************************/
options replace format comments java crossref symbols nobinary
parse arg dsn .
if dsn = '' then
dsn = 'test.txt'
cnt=0
totChars=0 /*count of the total num of chars*/
totLetters=0 /*count of the total num letters.*/
indent=' '.left(20) /*used for indentation of output.*/
lines = scanFile(dsn)
loop l_ = 1 to lines[0]
line = lines[l_]
Say '>'line'<' line.length /* that's in test.txt */
/*
lrx=left_right(line)
Parse lrx leftx rightx
Say ' 'leftx
Say ' 'rightx
*/
loop k=1 for line.length() /*loop over characters */
totChars=totChars+1 /*Increment total number of chars*/
c=line.substr(k,1) /*get character number k */
cnt[c]=cnt[c]+1 /*increment the character's count*/
End
end l_
w=totChars.length /*used for right-aligning counts.*/
say 'file -----' dsn "----- has" lines[0] 'records.'
say 'file -----' dsn "----- has" totChars 'characters.'
Loop L=0 to 255 /* display nonzero letter counts */
c=l.d2c /* the character in question */
if cnt[c]>0 & c.datatype('M')>0 Then Do /* was found in the file */
/* and is a latin letter */
say indent "(Latin) letter " c 'count:' cnt[c].right(w) /* tell */
totLetters=totLetters+cnt[c] /* increment number of letters */
End
End
say 'file -----' dsn "----- has" totLetters '(Latin) letters.'
say ' other charactes follow'
other=0
loop m=0 to 255 /* now for non-letters */
c=m.d2c /* the character in question */
y=c.c2x /* the hex representation */
if cnt[c]>0 & c.datatype('M')=0 Then Do /* was found in the file */
/* and is not a latin letter */
other=other+cnt[c] /* increment count */
_=cnt[c].right(w) /* prepare output of count */
select /*make the character viewable. */
when c<<' ' | m==255 then say indent "'"y"'x character count:" _
when c==' ' then say indent "blank character count:" _
otherwise say indent " " c 'character count:' _
end
end
end
say 'file -----' dsn "----- has" other 'other characters.'
say 'file -----' dsn "----- has" totLetters 'letters.'
-- Read a file and return contents as a Rexx indexed string
method scanFile(dsn) public static returns Rexx
fileLines = ''
do
inFile = File(dsn)
inFileScanner = Scanner(inFile)
loop l_ = 1 while inFileScanner.hasNext()
fileLines[0] = l_
fileLines[l_] = inFileScanner.nextLine()
end l_
inFileScanner.close()
catch ex = FileNotFoundException
ex.printStackTrace
end
return fileLines
[edit] Objeck
use IO;
bundle Default {
class Test {
function : Main(args : String[]) ~ Nil {
freqs := CountLetters("filename.txt");
for(i := 'A'; i < 'Z'; i += 1;) {
Console->Print(i->As(Char))->Print("=>")->PrintLine(freqs[i - 'A']);
};
}
function : CountLetters(filename : String) ~ Int[] {
freqs := Int->New[26];
reader := FileReader->New(filename);
while(reader->IsEOF() <> true) {
line := reader->ReadString()->ToUpper();
each(i : line) {
ch := line->Get(i);
if(ch->IsChar()){
index := ch - 'A';
freqs[index] := freqs[index] + 1;
};
};
};
reader->Close();
return freqs;
}
}
}
[edit] Objective-C
#import <Foundation/Foundation.h>
int main (int argc, const char *argv[]) {
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
NSData *data = [NSData dataWithContentsOfFile:[NSString stringWithUTF8String:argv[1]]];
NSString *string = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
NSCountedSet *countedSet = [[NSCountedSet alloc] init];
NSUInteger len = [string length];
for (NSUInteger i = 0; i < len; i++) {
unichar c = [string characterAtIndex:i];
if ([[NSCharacterSet letterCharacterSet] characterIsMember:c])
[countedSet addObject:[NSNumber numberWithInteger:c]];
}
[string release];
for (NSNumber *chr in countedSet) {
NSLog(@"%C => %lu", (unichar)[chr integerValue], [countedSet countForObject:chr]);
}
[countedSet release];
[pool release];
return 0;
}
[edit] OCaml
We open a text file and compute letter frequency. Other characters than [a-z] and [A-Z] are ignored, and upper case letters are first converted to lower case before to compute letter frequency.
let () =
let ic = open_in Sys.argv.(1) in
let base = int_of_char 'a' in
let arr = Array.make 26 0 in
try while true do
let c = Char.lowercase(input_char ic) in
let ndx = int_of_char c - base in
if ndx < 26 && ndx >= 0 then
arr.(ndx) <- succ arr.(ndx)
done
with End_of_file ->
close_in ic;
for i=0 to 25 do
Printf.printf "%c -> %d\n" (char_of_int(i + base)) arr.(i)
done
[edit] OxygenBasic
indexbase 0
sys a,e,i,c[255]
string s=getfile "t.txt"
e=len s
for i=1 to e
a=asc(s,i)
++c(a)
next
cr=chr(13)+chr(10)
pr="Char Frequencies" cr cr
for i=32 to 255
pr+=chr(i) chr(9) c(i) cr
next
print pr
'putfile "CharCount.txt",pr
[edit] PARI/GP
v=vector(26);
U=readvec("foo.txt");
for(i=1,#U,u=Vecsmall(U[i]);for(j=1,#u,if(u[j]>64&&u[j]<91,v[u[j]-64]++,u[j]>96&&u[j]<123,v[u[j]-96]++)));
v
[edit] Pascal
program LetterFrequency;
var
textFile: text;
character: char;
counter: array[0..255] of integer;
i: integer;
begin
for i := low(counter) to high(counter) do
counter[i] := 0;
assign(textFile, 'a_text_file.txt');
reset(textFile);
while not eof(textFile) do
begin
while not eoln(textFile) do
begin
read(textFile, character);
inc(counter[ord(character)]);
end;
readln(textFile);
end;
for i := low(counter) to high(counter) do
if counter[i] > 0 then
writeln(char(i), ': ', counter[i]);
end.
Output:
>: ./LetterFrequency 3: 2 a: 4 d: 3 e: 3 f: 3 g: 2 q: 1 r: 4 s: 3 t: 2 w: 2
[edit] Perl
Counts letters in files given on command line or piped to stdin. Case insensitive.
while (<>) { $cnt{lc chop}++ while length }
print "$_: ", $cnt{$_}//0, "\n" for 'a' .. 'z';
[edit] Perl 6
(my %count){$_}++ for lines.comb;
.say for %count.sort;
The lines function automatically opens the file supplied on the command line. This program does not count newlines.
.say for bag(slurp.comb).pairs.sort;
[edit] PHP
<?php
print_r(array_count_values(str_split(file_get_contents($argv[1]))));
?>
[edit] PicoLisp
(let Freq NIL
(in "file.txt"
(while (char) (accu 'Freq @ 1)) )
(sort Freq) )
For a "file.txt":
abcd cdef
Output:
-> (("^J" . 2) ("a" . 1) ("b" . 1) ("c" . 2) ("d" . 2) ("e" . 1) ("f" . 1))
[edit] PL/I
frequencies: procedure options (main);
declare tallies(26) fixed binary static initial ((26) 0);
declare alphabet character (26) static initial
('ABCDEFGHIJKLMNOPQRSTUVWXYZ');
declare c character (1), i fixed binary;
declare in file;
open file (in) title ('/LETTER.DAT,type(text),recsize(200)') input;
on endfile (in) go to prepare_list;
do while('1'b);
get file (in) edit (c) (a(1)); put edit (c) (a);
i = index(alphabet, c);
if i > 0 then tallies(i) = tallies(i) + 1;
end;
prepare_list:
put skip list('Letter', 'Frequency');
do i = 1 to 26;
if tallies(i) > 0 then
put skip list (substr(alphabet, i, 1), tallies(i));
end;
end frequencies;
Data:
THEQUICKBROWNFOX JUMPSOVERTHELAZYDOG
Output:
Letter Frequency A 1 B 1 C 1 D 1 E 3 F 1 G 1 H 2 I 1 J 1 K 1 L 1 M 1 N 1 O 4 P 1 Q 1 R 2 S 1 T 2 U 2 V 1 W 1 X 1 Y 1 Z 1
[edit] Prolog
Works with SWI-Prolog.
Only alphabetic codes are computed in uppercase state.
Uses packlist/2 defined there : Run-length encoding#Prolog
frequency(File) :-
read_file_to_codes(File, Code, []),
% we only keep alphabetic codes
include(my_code_type, Code, LstCharCode),
% we translate char_codes into uppercase atoms.
maplist(my_upcase, LstCharCode, LstChar),
% sort and pack the list
msort(LstChar, SortLstChar),
packList(SortLstChar, Freq),
maplist(my_write, Freq).
my_write([Num, Atom]) :-
swritef(A, '%3r', [Num]),
writef('Number of %w :%w\n', [Atom, A]).
my_code_type(Code) :-
code_type(Code, alpha).
my_upcase(CharCode, UpChar) :-
char_code(Atom, CharCode),
upcase_atom(Atom, UpChar).
:- use_module(library(clpfd)).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ?- packList([a,a,a,b,c,c,c,d,d,e], L).
% L = [[3,a],[1,b],[3,c],[2,d],[1,e]] .
%
% ?- packList(R, [[3,a],[1,b],[3,c],[2,d],[1,e]]).
% R = [a,a,a,b,c,c,c,d,d,e] .
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
packList([],[]).
packList([X],[[1,X]]) :-
!.
packList([X|Rest],[XRun|Packed]):-
run(X,Rest, XRun,RRest),
packList(RRest,Packed).
run(Var,[],[1,Var],[]).
run(Var,[Var|LRest],[N1, Var],RRest):-
N #> 0,
N1 #= N + 1,
run(Var,LRest,[N, Var],RRest).
run(Var,[Other|RRest], [1,Var],[Other|RRest]):-
dif(Var,Other).
Output for this file
Number of A : 63 Number of B : 7 Number of C : 53 Number of D : 29 Number of E : 65 ... Number of T : 52 Number of U : 20 Number of V : 10 Number of W : 8 Number of X : 6 Number of Y : 12 true .
[edit] PureBasic
Alphabetic codes are converted to uppercase before being used and no other codes are used as part of the calculations.
Procedure countLetters(Array letterCounts(1), textLine.s)
;counts only letters A -> Z, uses index 0 of letterCounts() to keep a total of all counts
Protected i, lineLength = Len(textLine), letter
textLine = UCase(textLine)
For i = 1 To lineLength
letter = Asc(Mid(textLine, i, 1)) - 'A' + 1
If letter >= 1 And letter <= 26
letterCounts(letter) + 1 ;tally individual letter count
letterCounts(0) + 1 ;increment total letter count
EndIf
Next
EndProcedure
If OpenConsole()
Define filename.s, fileID, i
filename = OpenFileRequester("Select text file to examine", "*.txt", "Text (*.txt)|*.txt;|All files (*.*)|*.*", 0)
fileID = 0
If ReadFile(fileID, filename)
Dim letterCounts(26) ;A - Z only, index 0 contains the total of all letter counts
Define textLine.s
While Not Eof(fileID)
textLine = ReadString(fileID)
countLetters(letterCounts(), textLine)
Wend
CloseFile(fileID)
PrintN("File: " + filename + #CRLF$)
PrintN("Letter %Freq Count")
For i = 1 To 26
Print(" " + Chr(64 + i) + " ")
Print(RSet(StrF(100 * letterCounts(i) / letterCounts(0), 1), 5, " ") + " ")
PrintN(Str(letterCounts(i)))
Next
PrintN(#CRLF$ + "Total letter count in file: " + Str(letterCounts(0)))
EndIf
Print(#CRLF$ + #CRLF$ + "Press ENTER to exit"): Input()
CloseConsole()
EndIf
Sample output:
File: D:\_T\Text\dictionary.txt Letter %Freq Count A 7.6 27743 B 2.0 7248 C 4.3 15433 D 3.8 13798 E 11.8 42917 F 1.4 5030 G 2.8 10336 H 2.1 7720 I 8.6 31141 J 0.2 588 K 0.8 2964 L 5.3 19399 M 2.7 9821 N 7.1 25682 O 6.1 22084 P 2.9 10696 Q 0.2 714 R 7.5 27055 S 8.0 28898 T 7.1 25773 U 3.3 12032 V 1.1 4019 W 0.9 3348 X 0.3 1096 Y 1.7 6251 Z 0.3 1177 Total letter count in file: 362963
[edit] Python
[edit] Using collections.Counter
import collections, sys
def filecharcount(openfile):
return sorted(collections.Counter(c for l in openfile for c in l).items())
f = open(sys.argv[1])
print(filecharcount(f))
[edit] Not using collections.Counter
import string
if hasattr(string, ''ascii_lowercase''):
letters = string.ascii_lowercase # Python 2.2 and later
else:
letters = string.lowercase # Earlier versions
offset = ord('a')
def countletters(file_handle):
"""Traverse a file and compute the number of occurences of each letter
"""return results as a simple 26 element list of integers.
results = [0] * len(letters)
for line in file_handle:
for char in line:
char = char.lower()
if char in letters:
results[offset - ord(char)] += 1
# Ordinal of 'a' minus ordinal of any lowercase ASCII letter -> 0..25
return results
if __name__ == "__main__":
sourcedata = open(sys.argv[1])
lettercounts = countletters(sourcedata)
for i in xrange(len(lettercounts)):
print "%s=%d" % (chr(i + ord('a')), lettercounts[i]),
This example defines the function and provides a sample usage. The if ... __main__... line allows it to be cleanly imported into any other Python code while also allowing it to function as a standalone script. (A very common Python idiom).
Using a numerically indexed array (list) for this is artificial and clutters the code somewhat.
[edit] Using defaultdict
...
from collections import defaultdict
def countletters(file_handle):
"""Count occurences of letters and return a dictionary of them
"""
results = defaultdict(int)
for line in file_handle:
for char in line:
if char.lower() in letters:
c = char.lower()
results[c] += 1
return results
Which eliminates the ungainly fiddling with ordinal values and offsets in function countletters of a previous example above. More importantly it allows the results to be more simply printed using:
lettercounts = countletters(sourcedata)
for letter,count in lettercounts.iteritems():
print "%s=%s" % (letter, count),
Again eliminating all fussing with the details of converting letters into list indices.
[edit] R
letter.frequency <- function(filename)
{
file <- paste(readLines(filename), collapse = '')
chars <- strsplit(file, NULL)[[1]]
summary(factor(chars))
}
Usage on itself:
> source('letter.frequency.r')
> letter.frequency('letter.frequency.r')
- , . ' ( ) [ ] { } < = 1 a c d e f h i l L m n N o p q r s t u U y
22 3 2 1 2 6 6 2 2 1 1 3 1 1 9 6 1 14 7 2 7 8 3 4 6 1 3 3 1 8 8 7 3 1 2
[edit] Racket
#lang racket
(require math)
(define (letter-frequencies ip)
(count-samples
(port->list read-char ip)))
(letter-frequencies (open-input-string "abaabdc"))
Output:
'(#\a #\b #\d #\c) '(3 2 1 1)
Using input from a text file:
(letter-frequencies (open-input-file "somefile.txt"))
[edit] Raven
define count_letters use $words
{ } as $wordHash [ ] as $keys [ ] as $vals
$words each chr
dup $wordHash swap get 0 prefer 1 + # stack: chr cnt
swap $wordHash swap set
$wordHash keys copy sort each
dup $keys push
$wordHash swap get $vals push
$keys $vals combine print "\n" print
"test.dat" as $file
$file read as $all_data
$all_data count_letters
[edit] REXX
[edit] Version 1
It should be noted that the file being read is read one line at time, so the
line-end characters (presumably the line-feed, carriage return, new-line, or
whatever control characters are being used) could have been read and reported.
These characters could be read if the charin bif would be used instead of linein.
Also note that this REXX program is ASCII or EBCDIC independent, but what constitutes a letter is
restricted to the Latin alphabet (which the English version of REXX considers a letter).
All characters are still counted, whether a letter or not, including non-displayable characters.
/*REXX program counts the occurrences of all characters in a file, */
/* {all Latin alphabet letters are uppercased for counting letters}. */
parse arg fileID . /*That's not a middle dot: · */
if fileID=='' then fileID='JUNK.TXT' /*¿none specified? Use default.*/
@.=0 /*wouldn't it be neat to use Θ ? */
totChars=0 /*count of the total num of chars*/
totLetters=0 /*count of the total num letters.*/
indent=left('',20) /*used for indentation of output.*/
do j=1 while lines(fileID)\==0 /*read file until cows come home.*/
rec=linein(fileID) /*get a line/record from the file*/
do k=1 for length(rec) /*examine/count each character. */
totChars=totChars+1 /*bump the count of num of chars.*/
c=substr(rec,k,1) /*peel off a character from input*/
x=c2x(c) /*convert the character to hex. */
@.x=@.x+1 /*bump the character's count. */
if \datatype(c,'M') then iterate /*if not a letter, get next char*/
totLetters=totLetters+1 /*bump the [Latin] letter count. */
upper c /* ◄«««««««««««««««««««───uppercase a Latin character.*/
x=c2x(c) /*convert uppCase letter ══► hex.*/
@.up.x=@.up.x+1 /*bump the (Latin) letter's count*/
end /*k*/ /*this program doesn't use π or Γ*/
end /*j*/ /*maybe we're ½ done by now, or ¬*/
w=length(totChars) /*used for right-aligning counts.*/
say 'file ─────' fileId "───── has" j-1 'records.' ; say
say 'file ─────' fileId "───── has" totChars 'characters.' ; say
do L=0 to 255 /*display none-zero letter counts*/
y=d2x(L); if @.up.y==0 then iterate /*zero count? Then ignore letter*/
c=d2c(L) /*C is the char version of a char*/
say indent "(Latin) letter " c 'count:' right(@.up.y,w)
end /*L*/ /*in a rut, maybe it's a cañon. */
say; say 'file ─────' fileId "───── has" totLetters '(Latin) letters.'; say
do m=0 to 255 /*display none-zero char counts. */
y=d2x(m); if @.y==0 then iterate /*Zero count? Then ignore char.*/
c=d2c(m) /*C is the char version of a char*/
_=right(@.y,w) /*bad place for dithering: ░▒▓█ */
select /*make the character viewable. */
when c<<' ' | m==255 then say indent "'"y"'x character count:" _
when c==' ' then say indent "blank character count:" _
otherwise say indent " " c 'character count:' _
end /*select*/ /*I wish REXX had a Σ function.*/
end /*m*/ /*255 isn't ∞, but sometimes ∙∙∙ */
say; say 'file ─────' fileId "───── has" totChars 'characters.' /*Ω*/
output when using as input this REXX program Note that this REXX program works with ASCII or EBCDIC, but the order of the output will be different because of the order in which EBCDIC stores its characters.
file ───── countfrq.rex ───── has 52 records.
file ───── countfrq.rex ───── has 3076 characters.
(Latin) letter A count: 108
(Latin) letter B count: 14
(Latin) letter C count: 94
(Latin) letter D count: 48
(Latin) letter E count: 180
(Latin) letter F count: 45
(Latin) letter G count: 16
(Latin) letter H count: 70
(Latin) letter I count: 85
(Latin) letter J count: 4
(Latin) letter K count: 6
(Latin) letter L count: 68
(Latin) letter M count: 26
(Latin) letter N count: 97
(Latin) letter O count: 93
(Latin) letter P count: 28
(Latin) letter R count: 103
(Latin) letter S count: 75
(Latin) letter T count: 169
(Latin) letter U count: 50
(Latin) letter V count: 5
(Latin) letter W count: 13
(Latin) letter X count: 22
(Latin) letter Y count: 26
(Latin) letter Z count: 4
file ───── countfrq.rex ───── has 1449 (Latin) letters.
'10'x character count: 1
'11'x character count: 1
blank character count: 1119
" character count: 18
' character count: 42
( character count: 18
) character count: 18
* character count: 80
+ character count: 4
, character count: 11
- character count: 4
. character count: 37
/ character count: 82
0 character count: 9
1 character count: 8
2 character count: 11
5 character count: 8
: character count: 6
; character count: 7
< character count: 2
= character count: 35
? character count: 4
@ character count: 9
C character count: 9
D character count: 5
E character count: 2
I character count: 10
J character count: 1
K character count: 1
L character count: 14
M character count: 1
N character count: 1
R character count: 2
T character count: 5
U character count: 2
X character count: 5
Z character count: 1
[ character count: 1
\ character count: 2
] character count: 1
_ character count: 4
a character count: 108
b character count: 14
c character count: 85
d character count: 43
e character count: 178
f character count: 45
g character count: 16
h character count: 70
i character count: 75
j character count: 3
k character count: 5
l character count: 54
m character count: 25
n character count: 96
o character count: 93
p character count: 28
r character count: 101
s character count: 75
t character count: 164
u character count: 48
v character count: 5
w character count: 13
x character count: 17
y character count: 26
z character count: 3
{ character count: 1
| character count: 1
} character count: 1
ñ character count: 1
¿ character count: 1
¬ character count: 1
½ character count: 1
« character count: 19
░ character count: 1
▒ character count: 1
▓ character count: 1
─ character count: 43
═ character count: 2
█ character count: 1
Γ character count: 1
π character count: 1
Σ character count: 1
Θ character count: 1
Ω character count: 1
∞ character count: 1
∙ character count: 3
· character count: 1
file ───── countfrq.rex ───── has 3076 characters.
[edit] Version 2 (for TSO
/*REXX program counts the occurences of all characters in a file
* Adapted version 1 for TSO (EXECIO instead of linein)
* No translation to uppercase takes place
* There is no need for tails being hex
* 25.07.2012 Walter Pachl
***********************************************************************/
Parse arg dsn . /*Data set to be processed */
if dsn='' Then /*none specified? */
dsn='PRIV.V100(TEST)' /* Use default. */
c.=0 /* Character counts */
"ALLOC FI(IN) DA("dsn") SHR REUSE"
'EXECIO * DISKR IN (STEM L. FINIS'
'FREE FI(IN)'
totChars=0 /*count of the total num of chars*/
totLetters=0 /*count of the total num letters.*/
indent=left('',20) /*used for indentation of output.*/
do j=1 to l.0 /*process all lines */
rec=l.j /*take line number j */
Say '>'rec'<' length(rec) /*that's in PRIV.V100(TEST) */
Say ' E8C44D8FF015674BCDEF'
Say ' 61100711200000000002'
do k=1 for length(rec) /*loop over characters */
totChars=totChars+1 /*Increment total number of chars*/
c=substr(rec,k,1) /*get character number k */
c.c=c.c+1 /*increment the character's count*/
End
End /*maybe we're ½ done by now, or ¬*/
w=length(totChars) /*used for right-aligning counts.*/
say 'file -----' dsn "----- has" j-1 'records.'
say 'file -----' dsn "----- has" totChars 'characters.'
do L=0 to 255 /* display nonzero letter counts */
c=d2c(l) /* the character in question */
if c.c>0 &, /* was found in the file */
datatype(c,'M')>0 Then Do /* and is a latin letter */
say indent "(Latin) letter " c 'count:' right(c.c,w) /* tell */
totLetters=totLetters+c.c /* increment number of letters */
End
End
say 'file -----' dsn "----- has" totLetters '(Latin) letters.'
say ' other charactes follow'
other=0
do m=0 to 255 /* now for non-letters */
c=d2c(m) /* the character in question */
y=c2x(c) /* the hex representation */
if c.c>0 &, /* was found in the file */
datatype(c,'M')=0 Then Do /* and is not a latin letter */
other=other+c.c /* increment count */
_=right(c.c,w) /* prepare output of count */
select /*make the character viewable. */
when c<<' ' | m==255 then say indent "'"y"'x character count:" _
when c==' ' then say indent "blank character count:" _
otherwise say indent " " c 'character count:' _
end
end
end
say 'file -----' dsn "----- has" other 'other characters.'
Output:
>WaA Pa12 :&-: :äüÖ2< 20
E8C44D8FF015674BCDEF
61100711200000000002
file ----- PRIV.V100(TEST) ----- has 1 records.
file ----- PRIV.V100(TEST) ----- has 20 characters.
(Latin) letter a count: 2
(Latin) letter A count: 1
(Latin) letter P count: 1
(Latin) letter W count: 1
file ----- PRIV.V100(TEST) ----- has 5 (Latin) letters.
other charactes follow
'00'x character count: 1
'10'x character count: 1
blank character count: 3
& character count: 1
- character count: 1
: character count: 1
: character count: 1
ä character count: 1
ü character count: 1
Ö character count: 1
1 character count: 1
2 character count: 2
file ----- PRIV.V100(TEST) ----- has 15 other characters.
[edit] Ruby
def letter_frequency(file)
letters = 'a' .. 'z'
File.read(file) .
split(//) .
group_by {|letter| letter.downcase} .
select {|key, val| letters.include? key} .
collect {|key, val| [key, val.length]}
end
letter_frequency(ARGV[0]).sort_by {|key, val| -val}.each {|pair| p pair}
example output, using the program file as input:
$ ruby letterFrequency.rb letterFrequency.rb ["e", 34] ["l", 20] ["t", 17] ["r", 14] ["a", 12] ["y", 9] ["c", 8] ["i", 7] ["v", 6] ["n", 6] ["f", 6] ["s", 6] ["d", 5] ["p", 5] ["k", 5] ["u", 4] ["o", 4] ["g", 3] ["b", 2] ["h", 2] ["q", 2] ["z", 1] ["w", 1]
[edit] Ruby 2.0
def letter_frequency(file)
file.each_char.lazy.grep(/[[:alpha:]]/).map(&:upcase).inject({}) do |freq_map, char|
freq_map[char] ||= 0
freq_map[char] += 1
freq_map
end
end
letter_frequency(ARGF).sort.each do |letter, frequency|
puts "#{letter}: #{frequency}\n"
end
note that this version *should* use less memory, even on a gigantic file. This is done by using lazy enumerables, which ruby 2.0 introduces.
example output, using the (somewhat large) dictionary file as the input. Also note that this versions works on unicode text.
$ ruby letter_frequency.rb /usr/share/dict/words A: 64439 B: 15526 C: 31872 D: 28531 E: 88833 F: 10675 G: 22712 H: 19320 I: 66986 J: 1948 K: 8409 L: 41107 M: 22508 N: 57144 O: 48944 P: 22274 Q: 1524 R: 57347 S: 90113 T: 53006 U: 26118 V: 7989 W: 7530 X: 2124 Y: 12652 Z: 3281 Å: 1 á: 10 â: 6 ä: 7 å: 3 ç: 5 è: 28 é: 144 ê: 6 í: 2 ñ: 8 ó: 8 ô: 2 ö: 16 û: 3 ü: 12
[edit] Run BASIC
open "c:\rbp101\public\textFile.txt" for input as #f
textData$ = input$(#f, lof( #f))
ln =len(textData$)
close #f
dim charCount( 255)
for i =1 to ln
char = asc(mid$(textData$,i,1))
charCount(char) = charCount(char) + 1
if char > 31 then totCount = totCount + 1
next i
for i = 32 to 255
if charCount(i) > 0 then print "Ascii:";using("###",i);" char:";chr$(i);" Count:";using("#######",charCount(i));" ";using("##.#",(charCount(i) / totCount) * 100);"%"
next i
Output uses this program to count itself:
Ascii: 32 char: Count: 76 16.1% Ascii: 34 char:" Count: 18 3.8% Ascii: 35 char:# Count: 17 3.6% Ascii: 36 char:$ Count: 6 1.3% Ascii: 37 char:% Count: 1 0.2% Ascii: 40 char:( Count: 16 3.4% Ascii: 41 char:) Count: 16 3.4% Ascii: 42 char:* Count: 1 0.2% Ascii: 43 char:+ Count: 2 0.4% Ascii: 44 char:, Count: 6 1.3% Ascii: 46 char:. Count: 2 0.4% Ascii: 47 char:/ Count: 1 0.2% Ascii: 48 char:0 Count: 4 0.8% Ascii: 49 char:1 Count: 8 1.7% Ascii: 50 char:2 Count: 3 0.6% Ascii: 51 char:3 Count: 2 0.4% Ascii: 53 char:5 Count: 4 0.8% Ascii: 58 char:: Count: 4 0.8% Ascii: 59 char:; Count: 8 1.7% Ascii: 61 char:= Count: 7 1.5% Ascii: 62 char:> Count: 2 0.4% Ascii: 65 char:A Count: 1 0.2% Ascii: 67 char:C Count: 10 2.1% Ascii: 68 char:D Count: 3 0.6% Ascii: 70 char:F Count: 1 0.2% Ascii: 92 char:\ Count: 3 0.6% Ascii: 97 char:a Count: 19 4.0% Ascii: 98 char:b Count: 2 0.4% Ascii: 99 char:c Count: 17 3.6% Ascii:100 char:d Count: 3 0.6% Ascii:101 char:e Count: 13 2.7% Ascii:102 char:f Count: 10 2.1% Ascii:103 char:g Count: 3 0.6% Ascii:104 char:h Count: 14 3.0% Ascii:105 char:i Count: 24 5.1% Ascii:108 char:l Count: 7 1.5% Ascii:109 char:m Count: 2 0.4% Ascii:110 char:n Count: 25 5.3% Ascii:111 char:o Count: 21 4.4% Ascii:112 char:p Count: 6 1.3% Ascii:114 char:r Count: 17 3.6% Ascii:115 char:s Count: 7 1.5% Ascii:116 char:t Count: 38 8.0% Ascii:117 char:u Count: 16 3.4% Ascii:120 char:x Count: 7 1.5%
[edit] Scala
import io.Source.fromFile
def letterFrequencies(filename: String) =
fromFile(filename).mkString groupBy (c => c) mapValues (_.length)
[edit] Scheme
[edit] Imperative version
For brevity, outputs only lower-case letters.
#!/usr/local/bin/gosh
(use srfi-1) ;; iota
(define *freqs* (make-vector 256 0))
(define (main args)
(with-input-from-file "../word-list.txt" count-char-freqs)
(show-char-freqs #\a #\z))
(define (count-char-freqs)
(let* ((ln (read-line))
(at-eof (eof-object? ln)))
(if (not at-eof)
(let ((string-chars (string->list ln)))
(for-each count-char-freq string-chars)
(count-char-freqs)))))
(define (count-char-freq ch)
(if (char-alphabetic? ch)
(let* ((char-num (char->integer (char-downcase ch)))
(frq (vector-ref *freqs* char-num)))
(vector-set! *freqs* char-num (+ 1 frq)))))
(define (show-char-freqs first-letter last-letter)
(format #t "Letter Frequency~%")
(let* ((first-num (char->integer first-letter))
(last-num (char->integer last-letter))
(num-count (+ 1 (- last-num first-num)))
(nums-list (iota num-count first-num)))
(for-each show-char-freq nums-list)))
(define (show-char-freq let-num)
(let ((ch (integer->char let-num))
(frq (vector-ref *freqs* let-num)))
(format #t "~6a ~8a~%" ch frq)))
Example output:
Letter frequency a 16421 b 4115 c 8216 d 5799 e 20144 f 2662 g 4129 h 5208 i 13980 j 430 k 1925 l 10061 m 5828 n 12097 o 12738 p 5516 q 378 r 13436 s 10210 t 12836 u 6489 v 1902 w 1968 x 617 y 3633 z 433
[edit] Seed7
$ include "seed7_05.s7i";
const type: charHash is hash [char] integer;
const proc: main is func
local
var charHash: numberOfChars is charHash.EMPTY_HASH;
var char: ch is ' ';
begin
ch := getc(IN);
while ch <> EOF do
if ch in numberOfChars then
incr(numberOfChars[ch]);
else
numberOfChars @:= [ch] 1;
end if;
ch := getc(IN);
end while;
for ch range sort(keys(numberOfChars)) do
writeln(ch <& " " <& numberOfChars[ch]);
end for;
end func;
Output when the program uses itself as input:
22 129 " 4 $ 1 & 2 ' 2 ( 6 ) 6 . 2 0 1 ... s 21 t 9 u 9 v 2 w 3 y 2
[edit] SIMPOL
Example: open a text file and compute letter frequency.
constant iBUFSIZE 500
function main(string filename)
fsfileinputstream fpi
integer e, i, aval, zval, cval
string s, buf, c
array chars
e = 0
fpi =@ fsfileinputstream.new(filename, error=e)
if fpi =@= .nul
s = "Error, file """ + filename + """ not found{d}{a}"
else
chars =@ array.new()
aval = .charval("a")
zval = .charval("z")
i = 1
while i <= 26
chars[i] = 0
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
while not fpi.endofdata and buf > ""
i = 1
while i <= .len(buf)
c = .substr(buf, i, 1)
cval = .charval(c)
if cval >= aval and cval <= zval
chars[cval - aval + 1] = chars[cval - aval + 1] + 1
end if
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
end while
s = "Character counts for """ + filename + """{d}{a}"
i = 1
while i <= chars.count()
s = s + .char(aval + i - 1) + ": " + .tostr(chars[i], 10) + "{d}{a}"
i = i + 1
end while
end if
end function s
As this was being created I realized that in [SIMPOL] I wouldn't have done it this way (in fact, I wrote it differently the first time and had to go back and change it to use an array afterward). In [SIMPOL] we would have used the set object. It acts similarly to a single-dimensional array, but can also use various set operations, such as difference, unite, intersect, etc. One of th einteresting things is that each unique value is stored only once, and the number of duplicates is stored with it. The sample then looks a little cleaner:
constant iBUFSIZE 500
function main(string filename)
fsfileinputstream fpi
integer e, i, aval, zval
string s, buf, c
set chars
e = 0
fpi =@ fsfileinputstream.new(filename, error=e)
if fpi =@= .nul
s = "Error, file """ + filename + """ not found{d}{a}"
else
chars =@ set.new()
aval = .charval("a")
zval = .charval("z")
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
while not fpi.endofdata and buf > ""
i = 1
while i <= .len(buf)
c = .substr(buf, i, 1)
if .charval(c) >= aval and .charval(c) <= zval
chars.addvalue(c)
end if
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
end while
s = "Character counts for """ + filename + """{d}{a}"
i = 1
while i <= chars.count()
s = s + chars[i] + ": " + .tostr(chars.valuecount(chars[i]), 10) + "{d}{a}"
i = i + 1
end while
end if
end function s
The final stage simply reads the totals for each character. One caveat, if a character is unrepresented, then it will not show up at all in this second implementation.
[edit] Tcl
proc letterHistogram {fileName} {
# Initialize table (in case of short texts without every letter)
for {set i 97} {$i<=122} {incr i} {
set frequency([format %c $i]) 0
}
# Iterate over characters in file
set f [open $fileName]
foreach c [split [read $f] ""] {
# Count them if they're alphabetic
if {[string is alpha $c]} {
incr frequency([string tolower $c])
}
}
close $f
# Print the histogram
parray frequency
}
letterHistogram the/sample.txt
[edit] TUSCRIPT
$$ MODE TUSCRIPT
words = REQUEST ("http://www.puzzlers.org/pub/wordlists/unixdict.txt")
DICT letters create
MODE {}
COMPILE
LOOP word=words
letters=SPLIT (word,|":?:")
LOOP letter=letters
DICT letters ADD/QUIET/COUNT letter
ENDLOOP
ENDLOOP
ENDCOMPILE
DICT letters unload letter,size,cnt
index =DIGIT_INDEX (cnt)
index =REVERSE (index)
letter =INDEX_SORT (letter,index)
cnt =INDEX_SORT (cnt,index)
frequency=JOIN (letter," --- ",cnt)
*{frequency}
Output:
e --- 20144 a --- 16421 i --- 13980 r --- 13436 t --- 12836 o --- 12738 n --- 12097 s --- 10210 l --- 10061 c --- 8216 u --- 6489 m --- 5828 d --- 5799 p --- 5516 h --- 5208 g --- 4129 b --- 4115 y --- 3633 f --- 2662 w --- 1968 k --- 1925 v --- 1902 x --- 617 z --- 433 j --- 430 q --- 378 ' --- 105 . --- 6 & --- 6 1 --- 2 9 --- 1 8 --- 1 7 --- 1 6 --- 1 5 --- 1 4 --- 1 3 --- 1 2 --- 1 0 --- 1
[edit] TXR
[edit] Pattern Matching Plus Embedded Lisp
@(do (defvar h (make-hash nil nil t)))
@(collect :vars ())
@(coll :vars ())@\
@{letter /[A-Za-z]/}@(filter :upcase letter)@\
@(do (inc [h letter 0]))@\
@(end)
@(end)
@(do (dohash (key value h)
(format t "~a: ~a\n" key value)))
$ ./txr letterfreq.txr /usr/share/dict/words A: 64123 B: 15524 C: 31569 [ ... abridged ... ] X: 2124 Y: 12507 Z: 3238
[edit] Just Embedded Lisp
@(do (defun lazy-char-stream (s)
(let (ch) (gen (set ch (get-char s)) ch)))
(let ((h (make-hash nil nil t))
(s (open-file "/usr/share/dict/words" "r")))
(each ((ch (lazy-char-stream s)))
(if (chr-isalpha ch)
(inc [h (chr-toupper ch) 0])))
(dohash (key value h)
(format t "~a: ~a\n" key value))))
[edit] Vala
Counts every character except new line character.
using Gee;
void main(string[] args){
string filename = args[1];
var file = FileStream.open(filename, "r");
var counter = new HashMap<char, int>();
string line = file.read_line();
while (line != null){
for (int x = 0; x < line.length; x++){
counter[line[x]] = counter[line[x]] + 1;
}
line = file.read_line();
}
foreach (var elem in counter.entries){
stdout.printf("%c occured %d times\n", elem.key, elem.value);
}
}
Sample output (run on its own source code) with several lines omitted:
v occured 5 times , occured 4 times w occured 2 times occured 19 times S occured 1 times 1 occured 2 times ! occured 1 times k occured 1 times l occured 22 times
[edit] VBA
Public Sub LetterFrequency(fname)
'count number of letters in text file "fname" (ASCII-coded)
'note: we count all characters but print only the letter frequencies
Dim Freqs(255) As Long
Dim abyte As Byte
Dim ascal as Byte 'ascii code for lowercase a
Dim ascau as Byte 'ascii code for uppercase a
'try to open the file
On Error GoTo CantOpen
Open fname For Input As #1
On Error GoTo 0
'initialize
For i = 0 To 255
Freqs(i) = 0
Next i
'process file byte-per-byte
While Not EOF(1)
abyte = Asc(Input(1, #1))
Freqs(abyte) = Freqs(abyte) + 1
Wend
Close #1
'add lower and upper case together and print result
Debug.Print "Frequencies:"
ascal = Asc("a")
ascau = Asc("A")
For i = 0 To 25
Debug.Print Chr$(ascal + i), Freqs(ascal + i) + Freqs(ascau + i)
Next i
Exit Sub
CantOpen:
Debug.Print "can't find or read the file "; fname
Close
End Sub
Output:
LetterFrequency "d:\largetext.txt" Frequencies: a 24102 b 4985 c 4551 d 19127 e 61276 f 2734 g 10661 h 8243 i 21589 j 4904 k 7186 l 12026 m 7454 n 31963 o 19021 p 4960 q 37 r 21166 s 13403 t 21090 u 6117 v 8612 w 5017 x 168 y 299 z 4159
[edit] Vedit macro language
File_Open("c:\txt\a_text_file.txt")
Update()
for (#1='A'; #1<='Z'; #1++) {
Out_Reg(103) Char_Dump(#1,NOCR) Out_Reg(CLEAR)
#2 = Search(@103, BEGIN+ALL+NOERR)
Message(@103) Num_Type(#2)
}
Example output:
A 76 B 23 C 51 D 64 E 192 F 51 G 32 H 59 I 146 J 1 K 9 L 73 M 34 N 94 O 113 P 27 Q 1 R 92 S 89 T 138 U 63 V 26 W 35 X 16 Y 16 Z 2
[edit] Whitespace
push 127
; Initialize a slot in the heap for each ASCII character.
0:
dup
push 0
store
push 1
sub
dup
jn 1
jump 0
; Read until EOF, incrementing the relevant heap slot.
1:
push 0
dup
ichr
load
dup
jn 2 ; Done reading, proceed to print.
dup
load
push 1
add
store
jump 1
; Stack is [-1 -1], but [0] would be nice.
2:
sub
; Print characters with tallies greater than 0.
3:
push 1
add
dup
push 128
sub
jz 4 ; All done.
dup
load
jz 3 ; Don't print if no occurrences.
dup
ochr ; Display the character,
push 32
ochr ; a space,
dup
load
onum ; its frequency,
push 10
ochr ; and a newline.
jump 3
4:
pop
exit
- Output:
$ cat freq.ws | wspace freq.ws 64 55 119
[edit] XPL0
This takes advantage of DOS's ability to redirect input from a file to the keyboard. The input file must be terminated with an end-of-file character ($1A). Usage: count <filename.ext
include c:\cxpl\codes; \intrinsic 'code' declarations
int A(256), C, I;
[for C:= 0 to 256-1 do A(C):= 0;
repeat C:= ChIn(1); \device 1 doesn't buffer nor echo chars
A(C):= A(C)+1; \count character
until C=\EOF\$1A;
C:= 0;
for I:= 0 to 128-1 do \only show 7-bit ASCII
[ChOut(0, \tab\9);
case C of
$0A: ChOut(6, $19); \line feed = down arrow
$0D: ChOut(6, $1B) \carriage return = left arrow
other ChOut(6, C); \all other characters display on device 6
ChOut(0, ^ );
IntOut(0, A(C)); \show count
C:= C+16; \columnar order
if (I&7) = 7 then [CrLf(0); C:= C-8*16+1];
];
]
- Programming Tasks
- Solutions by Programming Task
- ACL2
- Ada
- Aikido
- APL
- AutoHotkey
- AutoIt
- AWK
- BBC BASIC
- Bracmat
- C
- C sharp
- C++
- Common Lisp
- Clojure
- D
- FBSL
- Forth
- Fortran
- Go
- Groovy
- Haskell
- Icon
- Unicon
- Icon Programming Library
- J
- Java
- K
- Liberty BASIC
- Lua
- Maple
- Mathematica
- NetRexx
- Objeck
- Objective-C
- OCaml
- OxygenBasic
- PARI/GP
- Pascal
- Perl
- Perl 6
- PHP
- PicoLisp
- PL/I
- Prolog
- PureBasic
- Python
- R
- Racket
- Raven
- REXX
- Ruby
- Ruby 2.0
- Run BASIC
- Scala
- Scheme
- Scheme examples needing attention
- Examples needing attention
- Seed7
- SIMPOL
- Tcl
- TUSCRIPT
- TXR
- Vala
- Gee
- VBA
- Vedit macro language
- Whitespace
- XPL0
