I'm working on modernizing Rosetta Code's infrastructure. Starting with communications. Please accept this time-limited open invite to RC's Slack.. --Michael Mol (talk) 20:59, 30 May 2020 (UTC)

Letter frequency

From Rosetta Code
Task
Letter frequency
You are encouraged to solve this task according to the task description, using any language you may know.
Task

Open a text file and count the occurrences of each letter.

Some of these programs count all characters (including punctuation), but some only count letters A to Z.

8080 Assembly[edit]

This program prints the frequency of each printable ASCII character contained in the file.

bdos:		equ	5	; CP/M syscalls 
putch: equ 2 ; Print a character
puts: equ 9 ; Print a string
fopen: equ 15 ; Open a file
fread: equ 20 ; Read a file
fcb: equ 5ch ; FCB for file given on command line
dma: equ 80h ; Default DMA
org 100h ; CP/M loads the program starting at page 1
;; Zero out pages two and three (to keep a 16-bit counter
;; for each possible byte in the file).
;; We can do this because this program is small enough to
;; fit in page 1 in its entirety.
xra a ; Zero A.
mov b,a ; Zero B too (make it loop 256 times)
lxi d,200h ; Start of page two
zero: stax d ; Zero out a byte (store A, which is zero)
inx d ; Next byte
stax d ; Zero out another byte
inx d ; Next byte
dcr b ; Decrement the loop counter.
jnz zero ; Continue until B comes back to zero.
;; Open the file given on the command line.
lxi d,fcb ; CP/M always tries to parse the command line,
mvi c,fopen ; and gives us a file "object" in page zero.
call bdos ; We can just call fopen on it.
inr a ; It sets A=FF on error, so if incrementing A
jz error ; rolls back over to 0, that's an error.
;; Process the file record by record.
;; In CP/M, each file consists of a number of 128-byte
;; records. An exact size is not kept.
;; If a text file is not an exact multiple of 128 bytes
;; long, the last record will contain a ^Z (26 decimal),
;; and anything after that byte should be ignored.
read: lxi d,fcb ; From the file control block (the "object"),
mvi c,fread ; read one record. By default it ends up in
call bdos ; the last half of page zero.
ana a ; Zero carry flag.
rar ; Low bit says if end reached
jc output ; If so, go print the table
ana a ; If any other bits are set, that's a
jnz error ; read error.
;; Count the characters in the current record.
lxi d,dma-1 ; Set DE to point just before the record
byte: inr e ; Go to the next byte.
jz read ; If end of record, go get next record.
ldax d ; Grab the current byte
cpi 26 ; If it is EOF, we're done.
jz output ; Go print the table
mov l,a ; Otherwise, increment the counter for this
mvi h,2 ; character: the low byte is kept in page 2.
inr m ; 'm' means the value in memory at HL.
jnz byte ; If no rollover, we're done; count next byte
inr h ; But we're keeping a 16-bit counter, so
inr m ; if there is rollover, increment high byte.
jmp byte ; The high byte is in page 3 -unorthodox, but
; it's easy to access here.
;; We've done the whole file. For each printable
;; ASCII character (32..126), print the character and
;; the count.
output: mvi a,32 ; Start at 32.
;; Print a character and its counter
char: mov l,a ; Load 16-bit counter into DE. Low byte
mvi h,2 ; is in page 2 at a;
mov e,m
inr h ; And the high byte is in page 3.
mov d,m
mov a,d ; Test if the counter is zero
ora e
mov a,l ; Put the character back in A
jz next ; If zero, don't print anything.
push psw ; If not, push the character,
push d ; and the counter.
mvi c,putch ; Print the current character
mov e,a
call bdos
lxi d,separator ; Then print ': '
call outs
;; Then convert the counter to ASCII
pop d ; Retrieve the counter
lxi h,numend ; Get pointer to end of digit string
push h ; And put it on the stack
dgtloop: xchg ; Put counter in HL (16-bit accumulator)
lxi b,-10 ; Dividend is 10
mov d,b ; Start quotient at -1 (we'll loop once
mov e,b ; too many, this corrects for it)
divloop: inx d ; Increment the quotient,
dad b ; subtract 10 from the dividend,
jc divloop ; and keep doing it until it goes negative
lxi b,10+'0' ; Add 10 back to get the remainder,
dad b ; plus '0' to make it ASCII.
mov a,l
pop h ; Retrieve digit pointer
dcx h ; Decrement it (to point at current digit)
mov m,a ; Store the digit
push h ; And store the new pointer
mov a,d ; Check if the quotient is now zero
ora e
jnz dgtloop ; If not, do the next digit.
pop d ; Set DE to point at the first digit
call outs ; And output it as a string.
pop psw ; Restore the character
next: inr a ; Increment it
cpi 127 ; Did we just do the last character?
jnz char ; If not, go do the next character.
ret ; If so, we're done.
;; Print the error message
error: lxi d,errmsg
;; Print string
outs: mvi c,puts
jmp bdos
;; Strings
errmsg: db '?$' ; "Error message" (if file error)
separator: db ': $' ; Goes in between character and number
number: db '00000' ; Space to keep ASCII representation of
numend: db 13,10,'$' ; a 16-bit number, plus newline.
 
 

ACL2[edit]

(defun increment-alist (tbl key)
(cond ((endp tbl) (list (cons key 1)))
((eql (car (first tbl)) key)
(cons (cons key (1+ (cdr (first tbl))))
(rest tbl)))
(t (cons (first tbl)
(increment-alist (rest tbl) key)))))
 
(defun freq-table (xs)
(if (endp xs)
nil
(increment-alist (freq-table (rest xs))
(first xs))))
 
(defun letter-freq (str)
(freq-table (coerce str 'list)))

Ada[edit]

with Ada.Text_IO;
 
procedure Letter_Frequency is
Counters: array (Character) of Natural := (others => 0); -- initialize all Counters to 0
C: Character;
File: Ada.Text_IO.File_Type;
 
begin
Ada.Text_IO.Open(File, Mode => Ada.Text_IO.In_File, Name => "letter_frequency.adb");
while not Ada.Text_IO.End_Of_File(File) loop
Ada.Text_IO.Get(File, C);
Counters(C) := Counters(C) + 1;
end loop;
 
for I in Counters'Range loop
if Counters(I) > 0 then
Ada.Text_IO.Put_Line("'" & I & "':" & Integer'Image(Counters(I)));
end if;
end loop;
end Letter_Frequency;
Output:
(counting the characters of its own source code):
>./letter_frequency
' ': 122
'"': 6
'&': 3

... [a lot of lines omitted]

'x': 7
'y': 5
'z': 1

Aikido[edit]

import ctype
 
var letters = new int [26]
 
var s = openin (args[0])
while (!s.eof()) {
var ch = s.getchar()
if (s.eof()) {
break
}
if (ctype.isalpha (ch)) {
var n = cast<int>(ctype.tolower(ch) - 'a')
++letters[n]
}
}
 
foreach i letters.size() {
println (cast<char>('a' + i) + " " + letters[i])
}

Aime[edit]

Letters proper:

file f;
index x;
integer c;
 
f.affix("unixdict.txt");
 
while ((c = f.pick) ^ -1) {
x[c] += 1;
}
 
c = 'A';
while (c <= 'Z') {
o_form("%c: /w5/\n", c, x[c] += x[c + 'a' - 'A'] += 0);
c += 1;
}

All chars:

file f;
index x;
integer c, n;
 
f.affix("unixdict.txt");
 
while ((c = f.pick) ^ -1) {
x[c] += 1;
}
 
for (c, n in x) {
o_form("%c: /w5/\n", c, n);
}

ALGOL 68[edit]

 
BEGIN
[0:max abs char]INT histogram;
FOR i FROM 0 TO max abs char DO histogram[i] := 0 OD;
FILE input file;
STRING input file name = "Letter_frequency.a68";
IF open (input file, input file name, stand in channel) /= 0 THEN
put (stand error, ("Cannot open ", input file name, newline));
stop
ELSE
on file end (input file, (REF FILE f) BOOL: (close (f); GOTO finished))
FI;
DO
STRING s;
get (input file, (s, newline));
FOR i TO UPB s DO
CHAR c = s[i];
IF "A" <= c AND c <= "Z" OR "a" <= c AND c <= "z" THEN
histogram[ABS c] PLUSAB 1
FI
OD
OD;
close (input file);
finished:
FOR i FROM ABS "A" TO ABS "Z" DO printf (($a3xg(0)l$, REPR i, histogram[i])) OD;
FOR i FROM ABS "a" TO ABS "z" DO printf (($a3xg(0)l$, REPR i, histogram[i])) OD
END
 
Output:
Counting letters in its own source code:
A   11
B   9
C   2
D   13
E   11
F   14
G   4
H   3
I   10
J   0
[[ Omitted for K – Z and a – p ]]
q   1
r   15
s   19
t   24
u   10
v   0
w   3
x   4
y   1
z   2

APL[edit]

 
freq←{(⍪∪⍵),+/(∪⍵)∘.⍷⍵}
 
freq 0 1 2 3 2 3 4 3 4 4 4
0 1
1 1
2 2
3 3
4 4
 
freq 'balloon'
b 1
a 1
l 2
o 2
n 1
 

AppleScript[edit]

This is probably best handled with vanilla AppleScript and ASObjC each each doing what it does best. The test text used here is the one specified for the Word frequency task.

use AppleScript version "2.4" -- OS X 10.10 (Yosemite) or later
use framework "Foundation"
use scripting additions
 
on letterFrequencyinFile(theFile)
-- Read the file as an NSString, letting the system guess the encoding.
set fileText to current application's class "NSString"'s stringWithContentsOfFile:(POSIX path of theFile) ¬
usedEncoding:(missing value) |error|:(missing value)
-- Get the NSString's non-letter delimited runs, lower-cased, as an AppleScript list of texts.
-- The switch to vanilla objects is for speed and the ability to extract 'characters'.
set nonLetterSet to current application's class "NSCharacterSet"'s letterCharacterSet()'s invertedSet()
script o
property letterRuns : (fileText's lowercaseString()'s componentsSeparatedByCharactersInSet:(nonLetterSet)) as list
end script
 
-- Extract the characters from the runs and add them to an NSCountedSet to have the occurrences of each value counted.
-- No more than 50,000 characters are extracted in one go to avoid slowing or freezing the script.
set countedSet to current application's class "NSCountedSet"'s new()
repeat with i from 1 to (count o's letterRuns)
set thisRun to item i of o's letterRuns
set runLength to (count thisRun)
repeat with i from 1 to runLength by 50000
set j to i + 49999
if (j > runLength) then set j to runLength
tell countedSet to addObjectsFromArray:(characters i thru j of thisRun)
end repeat
end repeat
 
-- Work through the counted set's contents and build a list of records showing how many of what it received.
set output to {}
repeat with thisLetter in countedSet's allObjects()
set thisCount to (countedSet's countForObject:(thisLetter))
set end of output to {letter:thisLetter, |count|:thisCount}
end repeat
 
-- Derive an array of dictionaries from the list and sort it on the letters.
set output to current application's class "NSMutableArray"'s arrayWithArray:(output)
set byLetter to current application's class "NSSortDescriptor"'s sortDescriptorWithKey:("letter") ¬
ascending:(true) selector:("localizedStandardCompare:")
tell output to sortUsingDescriptors:({byLetter})
 
-- Convert back to a list of records and return the result.
return output as list
end letterFrequencyinFile
 
-- Test with the text file for the "Word frequency" task.
set theFile to ((path to desktop as text) & "135-0.txt") as alias
return letterFrequencyinFile(theFile)
Output:
{{letter:"a", |count|:207133}, {letter:"à", |count|:63}, {letter:"â", |count|:56}, {letter:"æ", |count|:116}, {letter:"b", |count|:37506}, {letter:"c", |count|:67354}, {letter:"ç", |count|:50}, {letter:"d", |count|:108747}, {letter:"e", |count|:330738}, {letter:"é", |count|:1474}, {letter:"è", |count|:299}, {letter:"ê", |count|:74}, {letter:"ë", |count|:5}, {letter:"f", |count|:56206}, {letter:"g", |count|:48598}, {letter:"h", |count|:176839}, {letter:"i", |count|:175288}, {letter:"î", |count|:39}, {letter:"ï", |count|:18}, {letter:"j", |count|:5840}, {letter:"k", |count|:14433}, {letter:"l", |count|:99543}, {letter:"m", |count|:62219}, {letter:"n", |count|:169954}, {letter:"ñ", |count|:2}, {letter:"o", |count|:184388}, {letter:"ô", |count|:34}, {letter:"œ", |count|:38}, {letter:"p", |count|:43387}, {letter:"q", |count|:2533}, {letter:"r", |count|:148671}, {letter:"s", |count|:162047}, {letter:"t", |count|:235526}, {letter:"u", |count|:68270}, {letter:"ù", |count|:18}, {letter:"û", |count|:9}, {letter:"ü", |count|:39}, {letter:"v", |count|:26268}, {letter:"w", |count|:56513}, {letter:"x", |count|:4027}, {letter:"y", |count|:39183}, {letter:"z", |count|:1906}}


Or, if we want to get something up and running (and tabulating output) with a minimum of new code, we can click together a composition of generic functions:

use AppleScript version "2.4"
use framework "Foundation"
use scripting additions
 
 
------------- CHARACTER COUNTS FROM FILE PATH -------------
 
-- charCounts :: FilePath -> Either String [(Char, Int)]
on charCounts(fp)
script go
on |λ|(s)
|Right|(sortBy(flip(comparing(my snd)), ¬
map(fanArrow(my head, my |length|), ¬
groupBy(my eq, sort(characters of s)))))
end |λ|
end script
 
bindLR(readFileLR(fp), go)
end charCounts
 
 
-------------------------- TEST ---------------------------
on run
set intColumns to 4
 
either(identity, frequencyTabulation(intColumns), ¬
charCounts("~/Code/charCount/readme.txt"))
 
end run
 
 
------------------------- DISPLAY -------------------------
 
-- frequencyTabulation :: Int -> [(Char, Int)] -> String
on frequencyTabulation(intCols)
script
on |λ|(xs)
set w to length of (snd(item 1 of xs) as string)
script go
on |λ|(x)
justifyRight(5, " ", showChar(fst(x))) & ¬
" -> " & justifyRight(w, " ", snd(x) as string)
end |λ|
end script
showColumns(intCols, map(go, xs))
end |λ|
end script
end frequencyTabulation
 
 
-------------------- GENERIC FUNCTIONS --------------------
 
-- Left :: a -> Either a b
on |Left|(x)
{type:"Either", |Left|:x, |Right|:missing value}
end |Left|
 
 
-- Right :: b -> Either a b
on |Right|(x)
{type:"Either", |Left|:missing value, |Right|:x}
end |Right|
 
 
-- Tuple (,) :: a -> b -> (a, b)
on Tuple(a, b)
-- Constructor for a pair of values, possibly of two different types.
{type:"Tuple", |1|:a, |2|:b, length:2}
end Tuple
 
 
-- Absolute value.
-- abs :: Num -> Num
on abs(x)
if 0 > x then
-x
else
x
end if
end abs
 
 
-- bindLR (>>=) :: Either a -> (a -> Either b) -> Either b
on bindLR(m, mf)
if missing value is not |Left| of m then
m
else
mReturn(mf)'s |λ|(|Right| of m)
end if
end bindLR
 
 
-- chunksOf :: Int -> [a] -> [[a]]
on chunksOf(n, xs)
set lng to length of xs
script go
on |λ|(a, i)
set x to (i + n) - 1
if x ≥ lng then
a & {items i thru -1 of xs}
else
a & {items i thru x of xs}
end if
end |λ|
end script
foldl(go, {}, enumFromThenTo(1, 1 + n, lng))
end chunksOf
 
 
-- comparing :: (a -> b) -> (a -> a -> Ordering)
on comparing(f)
script
on |λ|(a, b)
tell mReturn(f)
set fa to |λ|(a)
set fb to |λ|(b)
if fa < fb then
-1
else if fa > fb then
1
else
0
end if
end tell
end |λ|
end script
end comparing
 
 
-- concatMap :: (a -> [b]) -> [a] -> [b]
on concatMap(f, xs)
set lng to length of xs
set acc to {}
tell mReturn(f)
repeat with i from 1 to lng
set acc to acc & (|λ|(item i of xs, i, xs))
end repeat
end tell
return acc
end concatMap
 
 
-- either :: (a -> c) -> (b -> c) -> Either a b -> c
on either(lf, rf, e)
if missing value is |Left| of e then
tell mReturn(rf) to |λ|(|Right| of e)
else
tell mReturn(lf) to |λ|(|Left| of e)
end if
end either
 
 
-- enumFromThenTo :: Int -> Int -> Int -> [Int]
on enumFromThenTo(x1, x2, y)
set xs to {}
set gap to x2 - x1
set d to max(1, abs(gap)) * (signum(gap))
repeat with i from x1 to y by d
set end of xs to i
end repeat
return xs
end enumFromThenTo
 
 
-- eq (==) :: Eq a => a -> a -> Bool
on eq(a, b)
a = b
end eq
 
 
-- Compose a function from a simple value to a tuple of
-- the separate outputs of two different functions
-- fanArrow (&&&) :: (a -> b) -> (a -> c) -> (a -> (b, c))
on fanArrow(f, g)
script
on |λ|(x)
Tuple(mReturn(f)'s |λ|(x), mReturn(g)'s |λ|(x))
end |λ|
end script
end fanArrow
 
 
-- flip :: (a -> b -> c) -> b -> a -> c
on flip(f)
script
property g : mReturn(f)
on |λ|(x, y)
g's |λ|(y, x)
end |λ|
end script
end flip
 
 
-- foldl :: (a -> b -> a) -> a -> [b] -> a
on foldl(f, startValue, xs)
tell mReturn(f)
set v to startValue
set lng to length of xs
repeat with i from 1 to lng
set v to |λ|(v, item i of xs, i, xs)
end repeat
return v
end tell
end foldl
 
 
-- fst :: (a, b) -> a
on fst(tpl)
if class of tpl is record then
|1| of tpl
else
item 1 of tpl
end if
end fst
 
 
-- Typical usage: groupBy(on(eq, f), xs)
-- groupBy :: (a -> a -> Bool) -> [a] -> [[a]]
on groupBy(f, xs)
set mf to mReturn(f)
 
script enGroup
on |λ|(a, x)
if length of (active of a) > 0 then
set h to item 1 of active of a
else
set h to missing value
end if
 
if h is not missing value and mf's |λ|(h, x) then
{active:(active of a) & {x}, sofar:sofar of a}
else
{active:{x}, sofar:(sofar of a) & {active of a}}
end if
end |λ|
end script
 
if length of xs > 0 then
set dct to foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, rest of xs)
if length of (active of dct) > 0 then
sofar of dct & {active of dct}
else
sofar of dct
end if
else
{}
end if
end groupBy
 
 
-- head :: [a] -> a
on head(xs)
if xs = {} then
missing value
else
item 1 of xs
end if
end head
 
 
-- identity :: a -> a
on identity(x)
-- The argument unchanged.
x
end identity
 
 
-- justifyRight :: Int -> Char -> String -> String
on justifyRight(n, cFiller, strText)
if n > length of strText then
text -n thru -1 of ((replicate(n, cFiller) as text) & strText)
else
strText
end if
end justifyRight
 
 
-- length :: [a] -> Int
on |length|(xs)
set c to class of xs
if list is c or string is c then
length of xs
else
(2 ^ 29 - 1) -- (maxInt - simple proxy for non-finite)
end if
end |length|
 
 
-- map :: (a -> b) -> [a] -> [b]
on map(f, xs)
-- The list obtained by applying f
-- to each element of xs.
tell mReturn(f)
set lng to length of xs
set lst to {}
repeat with i from 1 to lng
set end of lst to |λ|(item i of xs, i, xs)
end repeat
return lst
end tell
end map
 
 
-- max :: Ord a => a -> a -> a
on max(x, y)
if x > y then
x
else
y
end if
end max
 
-- maximum :: Ord a => [a] -> a
on maximum(xs)
script
on |λ|(a, b)
if a is missing value or b > a then
b
else
a
end if
end |λ|
end script
 
foldl(result, missing value, xs)
end maximum
 
 
-- partition :: (a -> Bool) -> [a] -> ([a], [a])
on partition(f, xs)
tell mReturn(f)
set ys to {}
set zs to {}
repeat with x in xs
set v to contents of x
if |λ|(v) then
set end of ys to v
else
set end of zs to v
end if
end repeat
end tell
Tuple(ys, zs)
end partition
 
 
-- mReturn :: First-class m => (a -> b) -> m (a -> b)
on mReturn(f)
-- 2nd class handler function lifted into 1st class script wrapper.
if script is class of f then
f
else
script
property |λ| : f
end script
end if
end mReturn
 
 
-- readFileLR :: FilePath -> Either String IO String
on readFileLR(strPath)
set ca to current application
set e to reference
set {s, e} to (ca's NSString's ¬
stringWithContentsOfFile:((ca's NSString's ¬
stringWithString:strPath)'s ¬
stringByStandardizingPath) ¬
encoding:(ca's NSUTF8StringEncoding) |error|:(e))
if s is missing value then
|Left|((localizedDescription of e) as string)
else
|Right|(s as string)
end if
end readFileLR
 
 
-- Egyptian multiplication - progressively doubling a list, appending
-- stages of doubling to an accumulator where needed for binary
-- assembly of a target length
-- replicate :: Int -> a -> [a]
on replicate(n, a)
set out to {}
if 1 > n then return out
set dbl to {a}
 
repeat while (1 < n)
if 0 < (n mod 2) then set out to out & dbl
set n to (n div 2)
set dbl to (dbl & dbl)
end repeat
return out & dbl
end replicate
 
 
-- showChar :: Char -> String
on showChar(c)
if space is c then
"SPACE"
else if tab is c then
"TAB"
else if linefeed is c then
"LF"
else
c
end if
end showChar
 
 
-- showColumns :: Int -> [String] -> String
on showColumns(n, xs)
set w to maximum(map(my |length|, xs))
set m to (length of xs) div n
unlines(map(my unwords, ¬
transpose(chunksOf(m, xs))))
end showColumns
 
 
-- signum :: Num -> Num
on signum(x)
if x < 0 then
-1
else if x = 0 then
0
else
1
end if
end signum
 
-- snd :: (a, b) -> b
on snd(tpl)
if class of tpl is record then
|2| of tpl
else
item 2 of tpl
end if
end snd
 
 
-- sort :: Ord a => [a] -> [a]
on sort(xs)
((current application's NSArray's arrayWithArray:xs)'s ¬
sortedArrayUsingSelector:"compare:") as list
end sort
 
 
-- Enough for small scale sorts.
-- Use instead sortOn (Ord b => (a -> b) -> [a] -> [a])
-- which is equivalent to the more flexible sortBy(comparing(f), xs)
-- and uses a much faster ObjC NSArray sort method
-- sortBy :: (a -> a -> Ordering) -> [a] -> [a]
on sortBy(f, xs)
if length of xs > 1 then
set h to item 1 of xs
set f to mReturn(f)
script
on |λ|(x)
f's |λ|(x, h)0
end |λ|
end script
set lessMore to partition(result, rest of xs)
sortBy(f, |1| of lessMore) & {h} & ¬
sortBy(f, |2| of lessMore)
else
xs
end if
end sortBy
 
 
-- transpose :: [[String]] -> [[String]]
on transpose(rows)
script cols
on |λ|(_, iCol)
script cell
on |λ|(row)
if iCol > length of row then
""
else
item iCol of row
end if
end |λ|
end script
concatMap(cell, rows)
end |λ|
end script
map(cols, item 1 of rows)
end transpose
 
 
-- unlines :: [String] -> String
on unlines(xs)
-- A single string formed by the intercalation
-- of a list of strings with the newline character.
set {dlm, my text item delimiters} to ¬
{my text item delimiters, linefeed}
set str to xs as text
set my text item delimiters to dlm
str
end unlines
 
 
-- unwords :: [String] -> String
on unwords(xs)
set {dlm, my text item delimiters} to ¬
{my text item delimiters, space}
set s to xs as text
set my text item delimiters to dlm
return s
end unwords
Output:
SPACE -> 1330     p ->  138     " ->   28     k ->    5
    e ->  598     | ->  132     L ->   20     U ->    5
  TAB ->  584     g ->  129     F ->   20     < ->    4
    t ->  562     x ->  121     E ->   20     W ->    3
   LF ->  509     > ->  107     I ->   18     G ->    3
    n ->  462     : ->  102     O ->   17     / ->    3
    s ->  423     , ->   98     B ->   17     V ->    2
    - ->  384     y ->   70     A ->   17     H ->    2
    i ->  372     b ->   70     & ->   17     D ->    2
    o ->  365     R ->   42     ' ->   15     4 ->    2
    r ->  316     λ ->   39     ¬ ->   13     + ->    2
    a ->  311     w ->   39     N ->   13     ≥ ->    1
    l ->  241     v ->   35     2 ->   13     ≤ ->    1
    f ->  240     ] ->   35     = ->   12     ~ ->    1
    d ->  198     [ ->   35     q ->   11     _ ->    1
    ) ->  181     C ->   35     0 ->   10     ^ ->    1
    ( ->  181     T ->   33     . ->    9     Y ->    1
    m ->  154     S ->   32     P ->    8     9 ->    1
    h ->  152     1 ->   31     M ->    8     8 ->    1
    c ->  149     } ->   30     j ->    6     5 ->    1
    u ->  141     { ->   30     z ->    5     * ->    1

AutoHotkey[edit]

OpenFile = %A_ScriptFullPath% ; use own source code
FileRead, FileText, %OpenFile%
Loop 26
{
StringReplace, junk, FileText, % Chr(96+A_Index),, UseErrorLevel
out .= Chr(96+A_Index) ": " ErrorLevel "`n"
}
MsgBox % out
Output:
(using script's own file):
a: 6
b: 1
c: 6
d: 4
e: 24
[several lines omitted]
x: 5
y: 0
z: 0

AutoIt[edit]

This function prints the Letter frequency of a given textfile. You can choose to use case sensitive search and if special chars should be searched too.

 
Func _Letter_frequency($Path, $fcase = True, $fspecial_chars = True)
Local $hFile, $sRead, $iupto, $iStart, $iCount
If Not $fcase Then $fcase = False
If Not $fspecial_chars Then
$iStart = 64
If Not $fcase Then
$iupto = 26
Else
$iupto = 58
EndIf
Else
$iStart = 31
$iupto = 224
EndIf
$hFile = FileOpen($Path, 0)
$sRead = FileRead($hFile)
FileClose($hFile)
For $i = 1 To $iupto
If Not $fspecial_chars Then
If $iStart + $i > 90 And $iStart + $i < 97 Then ContinueLoop
EndIf
$sRead = StringReplace($sRead, Chr($iStart + $i), "", 0, $fcase)
$iCount = @extended
If $iCount > 0 Then ConsoleWrite(Chr($iStart + $i) & " : " & $iCount & @CRLF)
Next
EndFunc  ;==>_Letter_frequency
Output:
A : 32
B : 2
C : 15
E : 31
F : 10
[several lines omitted]
u : 14
v : 1
w : 1
x : 14

AWK[edit]

 
# usage: awk -f letters.awk HolyBible.txt
 
BEGIN { FS="" }
{ for(i=1;i<=NF;i++) m[$i]++}
END { for(i in m) printf("%9d %-14s\n", m[i],i) }
 

BaCon[edit]

txt$ = LOAD$("bible.txt")
 
FOR x = 97 TO 122
PRINT CHR$(x-32), " ", CHR$(x), " : ", COUNT(txt$, x-32), " - ", COUNT(txt$, x)
NEXT
 
Output:
A a : 17915 - 257815
B b : 4714 - 44161
C c : 1698 - 53373
D d : 8782 - 149313
E e : 2710 - 409525
F f : 2386 - 81157
G g : 6206 - 49096
H h : 3208 - 279471
I i : 13302 - 180660
J j : 6374 - 2515
K k : 547 - 21745
L l : 9222 - 120716
M m : 3056 - 76884
N n : 1891 - 223166
O o : 8896 - 234290
P p : 1877 - 41377
Q q : 6 - 958
R r : 7568 - 162761
S s : 4906 - 185124
T t : 7763 - 309983
U u : 333 - 83140
V v : 107 - 30258
W w : 2408 - 63079
X x : 2 - 1476
Y y : 569 - 58007
Z z : 904 - 2068

BBC BASIC[edit]

      DIM cnt%(255)
 
file% = OPENIN("C:\unixdict.txt")
IF file%=0 ERROR 100, "Could not open file"
 
REPEAT
A$ = GET$#file%
L% = LEN(A$)
IF L% THEN
FOR I% = 1 TO L%
cnt%(ASCMID$(A$,I%)) += 1
NEXT
ENDIF
UNTIL EOF#file%
CLOSE #file%
 
FOR c% = &41 TO &5A
PRINT CHR$(c%)CHR$(c%+32) ": " cnt%(c%)+cnt%(c%+32)
NEXT
Output:
Aa:      16421
Bb:       4115
Cc:       8216
Dd:       5799
Ee:      20144
Ff:       2662
Gg:       4129
Hh:       5208
Ii:      13980
Jj:        430
Kk:       1925
Ll:      10061
Mm:       5828
Nn:      12097
Oo:      12738
Pp:       5516
Qq:        378
Rr:      13436
Ss:      10210
Tt:      12836
Uu:       6489
Vv:       1902
Ww:       1968
Xx:        617
Yy:       3633
Zz:        433

Bracmat[edit]

(lc=
counts c
. fil$(!arg,r) {open file for reading}
& 0:?counts
& whl
' ( fil$:?c {read a byte}
& ( !c:(~<A:~>Z|~<a:~>z)
| 0
)
+ !counts
 : ?counts {simply add any found letter to the sum}
)
& fil$(,SET,-1) {close the file by seeking to impossible file position.}
| !counts {return the sum}
);
 
lc$"valid.bra" {example: count letters in Bracmat's validation suite.}
 
107*A
+ 33*B
+ 37*C
+ 39*D
+ 74*E
+ 50*F
+ 27*G
+ 28*H
+ 20*I
+ 55*J
+ 32*K
+ 112*L
+ 36*M
+ 32*N
+ 621*O
+ 43*P
+ 25*R
+ 67*S
+ 62*T
+ 64*U
+ 5*V
+ 26*W
+ 353*X
+ 248*Y
+ 70*Z
+ 2173*a
+ 840*b
+ 738*c
+ 639*d
+ 1345*e
+ 472*f
+ 372*g
+ 568*h
+ 91*j
+ 142*k
+ 529*l
+ 409*m
+ 941*n
+ 840*o
+ 336*p
+ 65*q
+ 993*r
+ 1018*s
+ 2097*t
+ 978*u
+ 122*v
+ 156*w
+ 909*x
+ 685*y
+ 211*z
+ 1035*i

C[edit]

/* declare array */
int frequency[26];
int ch;
FILE* txt_file = fopen ("a_text_file.txt", "rt");
 
/* init the freq table: */
for (ch = 0; ch < 26; ch++)
frequency[ch] = 0;
 
while (1) {
ch = fgetc(txt_file);
if (ch == EOF) break; /* end of file or read error. EOF is typically -1 */
 
/* assuming ASCII; "letters" means "a to z" */
if ('a' <= ch && ch <= 'z') /* lower case */
frequency[ch-'a']++;
else if ('A' <= ch && ch <= 'Z') /* upper case */
frequency[ch-'A']++;
}

C#[edit]

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
 
class Program
{
static SortedDictionary<TItem, int> GetFrequencies<TItem>(IEnumerable<TItem> items)
{
var dictionary = new SortedDictionary<TItem, int>();
foreach (var item in items)
{
if (dictionary.ContainsKey(item))
{
dictionary[item]++;
}
else
{
dictionary[item] = 1;
}
}
return dictionary;
}
 
static void Main(string[] arguments)
{
var file = arguments.FirstOrDefault();
if (File.Exists(file))
{
var text = File.ReadAllText(file);
foreach (var entry in GetFrequencies(text))
{
Console.WriteLine("{0}: {1}", entry.Key, entry.Value);
}
}
}
}
Output:
 : 1
!: 1
,: 1
H: 1
d: 1
e: 1
l: 3
o: 2
r: 1
w: 1

Declarative approach:

 
var freq = from c in str
where char.IsLetter(c)
orderby c
group c by c into g
select g.Key + ":" + g.Count();
 
foreach(var g in freq)
Console.WriteLine(g);
 
C:2
I:1
K:1
L:2
W:1
a:4
...
y:2

C++[edit]

#include <fstream>
#include <iostream>
 
int main()
{
std::ifstream input("filename.txt", std::ios_base::binary);
if (!input)
{
std::cerr << "error: can't open file\n";
return -1;
}
 
size_t count[256];
std::fill_n(count, 256, 0);
 
for (char c; input.get(c); ++count[uint8_t(c)]) // process input file
; // empty loop body
 
for (size_t i = 0; i < 256; ++i)
{
if (count[i] && isgraph(i)) // non-zero counts of printable characters
{
std::cout << char(i) << " = " << count[i] << '\n';
}
}
}
Output:
when file contains "Hello, world!" (without quotes):
! = 1
, = 1
H = 1
d = 1
e = 1
l = 3
o = 2
r = 1
w = 1

Clojure[edit]

(println (sort-by second > 
(frequencies (map #(java.lang.Character/toUpperCase %)
(filter #(java.lang.Character/isLetter %) (slurp "text.txt"))))))

Common Lisp[edit]

(defun letter-freq (file)
(with-open-file (stream file)
(let ((str (make-string (file-length stream)))
(arr (make-array 256 :element-type 'integer :initial-element 0)))
(read-sequence str stream)
(loop for c across str do (incf (aref arr (char-code c))))
(loop for c from 32 to 126 for i from 1 do
(format t "~c: ~d~a"
(code-char c) (aref arr c)
(if (zerop (rem i 8)) #\newline #\tab))))))
 
(letter-freq "test.lisp")

Component Pascal[edit]

BlackBox Component Builder

 
MODULE LetterFrecuency;
IMPORT Files,StdLog,Strings;
 
PROCEDURE Do*;
VAR
loc: Files.Locator;
fd: Files.File;
rd: Files.Reader;
x: BYTE;
frecuency: ARRAY 26 OF LONGINT;
c: CHAR;
i: INTEGER;
BEGIN
loc := Files.dir.This("BBTest/Mod");
fd := Files.dir.Old(loc,"LetterFrecuency.odc",FALSE);
rd := fd.NewReader(NIL);
 
(* init the frecuency array *)
FOR i := 0 TO LEN(frecuency) - 1 DO frecuency[i] := 0 END;
 
(* collect frecuencies *)
WHILE ~rd.eof DO
rd.ReadByte(x);c := CAP(CHR(x));
(* convert vowels with diacritics *)
CASE ORD(c) OF
193: c := 'A';
|201: c := 'E';
|205: c := 'I';
|211: c := 'O';
|218: c := 'U';
ELSE
END;
IF (c >= 'A') & (c <= 'Z') THEN
INC(frecuency[ORD(c) - ORD('A')]);
END
END;
 
(* show data *)
FOR i := 0 TO LEN(frecuency) - 1 DO
StdLog.Char(CHR(i + ORD('A')));StdLog.String(":> ");StdLog.Int(frecuency[i]);
StdLog.Ln
END
END Do;
END LetterFrecuency.
 

Execute: ^Q LetterFrecuency.Do

Output:
A:>  28
B:>  7
C:>  100
D:>  94
E:>  168
F:>  30
G:>  10
H:>  11
I:>  49
J:>  0
K:>  1
L:>  67
M:>  25
N:>  57
O:>  81
P:>  3
Q:>  0
R:>  91
S:>  90
T:>  94
U:>  32
V:>  14
W:>  15
X:>  15
Y:>  17
Z:>  3

D[edit]

void main() {
import std.stdio, std.ascii, std.algorithm, std.range;
 
uint[26] frequency;
 
foreach (const buffer; "unixdict.txt".File.byChunk(2 ^^ 15))
foreach (immutable c; buffer.filter!isAlpha)
frequency[c.toLower - 'a']++;
 
writefln("%(%(%s, %),\n%)", frequency[].chunks(10));
}
Output:
16421, 4115, 8216, 5799, 20144, 2662, 4129, 5208, 13980, 430,
1925, 10061, 5828, 12097, 12738, 5516, 378, 13436, 10210, 12836,
6489, 1902, 1968, 617, 3633, 433

EchoLisp[edit]

We use a property list - plist for short - which is a hash table, to store the pairs ( letter . count) .

 
;; bump count when letter added
(define (hash-counter hash key )
;; (set! key (string-downcase key)) - if ignore case wanted
(putprop hash (1+ (or (getprop hash key) 0 )) key))
 
;; apply to exploded string
;; and sort result
(define (hash-compare a b) ( < (first a) (first b)))
(define (count-letters hash string)
(map (curry hash-counter hash) (string->list string))
(list-sort hash-compare (symbol-plist hash)))
 
Output:
 
(define (file-stats file string)
(set-plist! 'file-stats null) ; reset counters
(writeln (count-letters 'file-stats string))
(writeln "Total letters:" (string-length string))
(writeln "Total lines:" (getprop 'file-stats "#\\newline")))
 
; frequency for 'help.html' file
(file->string file-stats) ; browser 'open' dialog
 
➛ help.html -> string
(( 28918) (! 138) (# 1035) (#\newline 4539) (#\tab 409) ($ 7) (% 24) (& 136) (' 1643) ((3577) () 3583) (* 233)
(+ 303) (, 599) (- 3164) (. 1454) (/ 5388) (0 1567) (1 1769) (2 1258) (3 857) (4 1872) (5 453) (6 581) (7 344)
(8 337) (9 411) (: 1235) (; 647) (< 9951) (= 1834) (> 10255) (? 392) (@ 11) (A 166) (B 92) (C 144) (D 72) (E 224)
(F 52) (G 35) (H 42) (I 193) (J 31) (K 36) (L 196) (M 82) (N 94) (O 132) (P 192) (Q 27) (R 56) (S 220) (T 226) (U 37)
(V 51) (W 28) (X 6) (Y 38) (Z 2) ([ 237) (\ 12) (] 215) (^ 28) (_ 107) (` 7) (a 8420) (b 4437) (c 3879) (d 4201)
(e 11905) (f 2989) (g 2068) (h 3856) (i 11313) (j 334) (k 653) (l 5748) (m 3048) (n 7020) (o 7207) (p 3585) (q 249)
(r 8312) (s 8284) (t 8704) (u 3833) (v 1135) (w 861) (x 1172) (y 1451) (z 268) ({ 123) (| 62) (} 123) (~ 7) (§ 1) (© 1)
(« 1) (» 1) (É 2) (à 18) (â 3) (ç 3) (è 6) (é 53) (î 1) (ö 9) (û 1) (œ 1) (ε 2) (λ 12) (μ 1) (ο 2) (ς 1)
(τ 1) (а 1) (д 1) (е 1) (з 1) (л 1) (м 1) (н 1) (я 3) (1) (2) (2) (2) (184) (465) (57) (4) (1)
(2) (6) (4)(12) (2) (2) (1) (4) (1) (7) (2) (2) (6) (2) (8) (8)
(2) (1) (2) (1) (1) (1) (1) (1) (1) (1) (1) (1) (1) (1) (3) (1)
(1) (1) (900) (1) (2) ... )
➛ Total letters: 212631
➛ Total lines: 4539
 

Eiffel[edit]

class
APPLICATION
 
create
make
 
feature {NONE} -- Initialization
 
make
-- Read from the file and print frequencies.
local
file: PLAIN_TEXT_FILE
do
create file.make_open_read("input.txt")
file.read_stream(file.count)
file.close
across get_frequencies(file.last_string) as f loop
print(f.key.out + ": " + f.item.out + "%N")
end
end
 
feature -- Access
 
get_frequencies (s: STRING): HASH_TABLE[INTEGER, CHARACTER]
-- Hash table of counts for alphabetic characters in `s'.
local
char: CHARACTER
do
create Result.make(0)
across s.area as st loop
char := st.item
if char.is_alpha then
if Result.has(char) then
Result.force(Result.at(char) + 1, char)
else
Result.put (1, char)
end
end
end
end
end
Output:
when file contains "Hello, Eiffel world!":
H: 1
e: 2
l: 4
o: 2
E: 1
i: 1
f: 2
w: 1
r: 1
d: 1

Elixir[edit]

file = hd(System.argv)
 
File.read!(file)
|> String.upcase
|> String.graphemes
|> Enum.filter(fn c -> c =~ ~r/[A-Z]/ end)
|> Enum.reduce(Map.new, fn c,acc -> Map.update(acc, c, 1, &(&1+1)) end)
|> Enum.sort_by(fn {_k,v} -> -v end)
|> Enum.each(fn {k,v} -> IO.puts "#{k} #{v}" end)
Output:
C:\Elixir>elixir letterfrequency.exs \work\unixdict.txt
E  20144
A  16421
I  13980
R  13436
T  12836
O  12738
N  12097
S  10210
L  10061
C  8216
U  6489
M  5828
D  5799
P  5516
H  5208
G  4129
B  4115
Y  3633
F  2662
W  1968
K  1925
V  1902
X  617
Z  433
J  430
Q  378

Erlang[edit]

%% Implemented by Arjun Sunel
-module(letter_frequency).
-export([main/0, letter_freq/1]).
main() ->
case file:read_file("file.txt") of
{ok, FileData} ->
letter_freq(binary_to_list(FileData));
_FileNotExist ->
io:format("File do not exist~n")
end.
 
letter_freq(Data) ->
lists:foreach(fun(Char) ->
LetterCount = lists:foldl(fun(Element, Count) ->
case Element =:= Char of
true ->
Count+1;
false ->
Count
end
end, 0, Data),
 
case LetterCount >0 of
true ->
io:format("~p : ~p~n", [[Char], LetterCount]);
false ->
io:format("")
end
end, lists:seq(0, 222)).
 
Output:
"\n"    :       5
" "     :       4
","     :       1
"."     :       22
":"     :       3
"M"     :       1
"a"     :       2
"e"     :       2
"i"     :       1
"j"     :       1
"l"     :       1
"m"     :       1
"n"     :       3
"r"     :       1
"s"     :       2
"u"     :       2
"y"     :       1
"}"     :       2
ok

Alternatively letter_freq/1 above can be replaced with

 
letter_freq( Data ) ->
Dict = lists:foldl( fun (Char, Dict) -> dict:update_counter( Char, 1, Dict ) end, dict:new(), Data ),
[io:fwrite( "~p : ~p~n", [[X], dict:fetch(X, Dict)]) || X <- dict:fetch_keys(Dict)].
 

ERRE[edit]

Using ERRE help file for testing.

PROGRAM LETTER
 
DIM CNT[255]
 
BEGIN
 
OPEN("I",1,"f:\errev30\erre.hlp")
 
REPEAT
GET(#1,A$)
L%=LEN(A$)
IF L%>0 THEN
FOR I%=1 TO L% DO
A%=ASC(MID$(A$,I%))
CNT[A%]+=1
END FOR
END IF
UNTIL EOF(1)
CLOSE(1)
 
FOR C%=$41 TO $5A DO
PRINT(CHR$(C%);CHR$(C%+32);": ";CNT[C%]+CNT[C%+32])
END FOR
 
END PROGRAM
 

Euphoria[edit]

Works with: OpenEuphoria
 
-- LetterFrequency.ex
-- Count frequency of each letter in own source code.
 
include std/console.e
include std/io.e
include std/text.e
 
sequence letters = repeat(0,26)
 
sequence content = read_file("LetterFrequency.ex")
 
content = lower(content)
 
for i = 1 to length(content) do
if content[i] > 96 and content[i] < 123 then
letters[content[i]-96] += 1
end if
end for
 
for i = 1 to 26 do
printf(1,"%s:  %d\n",{i+96,letters[i]})
end for
 
if getc(0) then end if
 
Output:
a: 4
b: 0
c: 21
-snip
x: 3
y: 3
z: 0

F#[edit]

let alphabet =
['A'..'Z'] |> Set.ofList
 
let letterFreq (text : string) =
text.ToUpper().ToCharArray()
|> Array.filter (fun x -> alphabet.Contains(x))
|> Seq.countBy (fun x -> x)
|> Seq.sort
 
let v = "Now is the time for all good men to come to the aid of the party"
 
let res = letterFreq v
 
for (letter, freq) in res do
printfn "%A, %A" letter freq

Factor[edit]

USING: hashtables locals io assocs kernel io.encodings.utf8 io.files formatting ;
IN: count-letters
 
<PRIVATE
 
: count-from-stream ( -- counts )
52 <hashtable>
[ read1 dup ] [ over inc-at ] while
drop ;
 
: print-counts ( counts -- )
[ "%c: %d\n" printf ] assoc-each ;
 
PRIVATE>
 
: count-letters ( filename -- )
utf8 [ count-from-stream ] with-file-reader
print-counts ;
 

FBSL[edit]

The result of the first evaluation of ASC() is retained in the symbol ASC for later use. This is a standard feature of FBSL functions. The ascii array is dynamic. Command(1) is the name of the script file.

#APPTYPE CONSOLE
 
'Open a text file and count the occurrences of each letter.
FUNCTION countBytes(fileName AS STRING)
DIM c AS STRING
DIM ascii[]
DIM handle AS INTEGER = FILEOPEN(fileName, BINARY)
WHILE NOT FILEEOF(handle)
c = FILEGETC(handle)
IF c = "" THEN EXIT WHILE
ascii[ASC] = ascii[ASC(c)] + 1
WEND
FILECLOSE(handle)
RETURN ascii
END SUB
 
DIM counters = countBytes(COMMAND(1))
FOR DIM i = LBOUND(counters) TO UBOUND(counters)
PRINT i, TAB, IIF(i <= 32, i, CHR(i)), TAB, counters[i]
NEXT
 
PAUSE
 

Forth[edit]

create counts 26 cells allot
 
: freq ( filename -- )
counts 26 cells erase
slurp-file bounds do
i [email protected] 32 or [char] a -
dup 0 26 within if
cells counts +
1 swap +!
else drop then
loop
26 0 do
cr [char] ' emit [char] a i + emit ." ': "
counts i cells + @ .
loop ;
 
s" example.txt" freq

Fortran[edit]

Using the configuration file (which has changed since the example was documented) of the J example, compilation and output of this program on a gnu/linux system is

 
-*- mode: compilation; default-directory: "/tmp/" -*-
Compilation started at Sat May 18 18:09:46
 
a=./F && make $a && $a < configuration.file
f95 -Wall -ffree-form F.F -o F
92 21 17 24 82 19 19 22 67 0 2 27 27 57 55 31 1 61 43 60 20 6 2 0 10 0
 
Compilation finished at Sat May 18 18:09:46
 

And here's the FORTRAN90 program source. The program reads stdin and writes the result to stdout. Future enhancement: use block size records.

 
! count letters from stdin
program LetterFrequency
implicit none
character (len=1) :: s
integer, dimension(26) :: a
integer :: ios, i, t
data a/26*0/,i/0/
open(unit=7, file='/dev/stdin', access='direct', form='formatted', recl=1, status='old', iostat=ios)
if (ios .ne. 0) then
write(0,*)'Opening stdin failed'
stop
endif
do i=1, huge(i)
read(unit=7, rec = i, fmt = '(a)', iostat = ios ) s
if (ios .ne. 0) then
!write(0,*)'ios on failure is ',ios
close(unit=7)
exit
endif
t = ior(iachar(s(1:1)), 32) - iachar('a')
if ((0 .le. t) .and. (t .le. iachar('z'))) then
t = t+1
a(t) = a(t) + 1
endif
end do
write(6, *) a
end program LetterFrequency
 

FreeBASIC[edit]

' FB 1.05.0 Win64
 
Dim a(65 to 90) As Integer ' array to hold frequency of each letter, all elements zero initially
Dim fileName As String = "input.txt"
Dim s As String
Dim i As Integer
Open fileName For Input As #1
 
While Not Eof(1)
Line Input #1, s
s = UCase(s)
For i = 0 To Len(s) - 1
a(s[i]) += 1
Next
Wend
 
Close #1
 
Print "The frequency of each letter in the file "; fileName; " is as follows:"
Print
For i = 65 To 90
If a(i) > 0 Then
Print Chr(i); " : "; a(i)
End If
Next
Print
Print "Press any key to quit"
Sleep
Output:
/'
   results for input.txt which contains the single line:
   The quick brown fox jumps over the lazy dog. 
'/

The frequency of each letter in the file input.txt is as follows:

A :  1
B :  1
C :  1
D :  1
E :  3
F :  1
G :  1
H :  2
I :  1
J :  1
K :  1
L :  1
M :  1
N :  1
O :  4
P :  1
Q :  1
R :  2
S :  1
T :  2
U :  2
V :  1
W :  1
X :  1
Y :  1
Z :  1

Input:

This is the one question that most people ask. Why bother learning a completely different computing environment, when the operating 
system that ships with most desktops, laptops, and servers works just fine? To answer that question, I would pose another question. 
Does that operating system you’re currently using really work “just fine”? Or are you constantly battling viruses, malware, slow 
downs, crashes, costly repairs, and licensing fees?

If you struggle with the above, and want to free yourself from the constant fear of losing data or having to take your computer in 
for the “yearly clean up,” Linux might be the perfect platform for you. Linux has evolved into one of the most reliable computer 
ecosystems on the planet. Combine that reliability with zero cost of entry and you have the perfect solution for a desktop platform.

Gambas[edit]

Public Sub Form_Open()
Dim sData As String = File.Load("data.txt")
Dim iCount, iSpaces, iLetters, iOther As Integer
Dim bPunctuation As Boolean
 
For iCount = 1 To Len(sData)
If InStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", UCase(Mid(sData, iCount, 1))) Then
Inc iLetters
bPunctuation = True
End If
If Mid(sData, icount, 1) = " " Then
Inc iSpaces
bPunctuation = True
End If
If bPunctuation = False Then Inc iOther
bPunctuation = False
Next
 
Message("Text contains " & Len(sData) & " characters\n" & iLetters & " Letters\n" & iSpaces & " Spaces\n" & iOther & " Punctuation, newlines etc.")
 
End

Output:

Text contains 854 characters
677 Letters
135 Spaces
42 Punctuation, newlines etc.

Go[edit]

package main
 
import (
"fmt"
"io/ioutil"
"sort"
"unicode"
)
 
const file = "unixdict.txt"
 
func main() {
bs, err := ioutil.ReadFile(file)
if err != nil {
fmt.Println(err)
return
}
m := make(map[rune]int)
for _, r := range string(bs) {
m[r]++
}
// answer is now in m. sort and format output:
lfs := make(lfList, 0, len(m))
for l, f := range m {
lfs = append(lfs, &letterFreq{l, f})
}
sort.Sort(lfs)
fmt.Println("file:", file)
fmt.Println("letter frequency")
for _, lf := range lfs {
if unicode.IsGraphic(lf.rune) {
fmt.Printf("  %c  %7d\n", lf.rune, lf.freq)
} else {
fmt.Printf("%U  %7d\n", lf.rune, lf.freq)
}
}
}
 
type letterFreq struct {
rune
freq int
}
type lfList []*letterFreq
 
func (lfs lfList) Len() int { return len(lfs) }
func (lfs lfList) Less(i, j int) bool {
switch fd := lfs[i].freq - lfs[j].freq; {
case fd < 0:
return false
case fd > 0:
return true
}
return lfs[i].rune < lfs[j].rune
}
func (lfs lfList) Swap(i, j int) {
lfs[i], lfs[j] = lfs[j], lfs[i]
}
Output:
file: unixdict.txt
letter  frequency
U+000A    25104
   e      20144
   a      16421
   i      13980
   r      13436
   t      12836
   o      12738
   n      12097
   s      10210
   l      10061
   c       8216
   u       6489
   m       5828
   d       5799
   p       5516
   h       5208
   g       4129
   b       4115
   y       3633
   f       2662
   w       1968
   k       1925
   v       1902
   x        617
   z        433
   j        430
   q        378
   '        105
   &          6
   .          6
   1          2
   0          1
   2          1
   3          1
   4          1
   5          1
   6          1
   7          1
   8          1
   9          1

Groovy[edit]

def frequency = { it.inject([:]) { map, value -> map[value] = (map[value] ?: 0) + 1; map } }
 
frequency(new File('frequency.groovy').text).each { key, value ->
println "'$key': $value"
}
Output:
'd': 1
'e': 19
'f': 4
' ': 29
'r': 5
'q': 3
'u': 8
[lines omitted]
'o': 2
'x': 1
'h': 1
'k': 2
'"': 2
'$': 2

Harbour[edit]

PROCEDURE Main()
LOCAL s := hb_MemoRead( Left( __FILE__ , At( ".", __FILE__ )) +"prg")
LOCAL c, n, i
LOCAL a := {}
 
FOR EACH c IN s
IF Asc( c ) > 31
AAdd( a, c )
ENDIF
NEXT
a := ASort( a )
i := 1
WHILE i <= Len( a )
c := a[i] ; n := 1
i++
IF i < Len(a) .AND. c == a[i]
WHILE c == a[i]
n++ ; i++
END
ENDIF
?? "'" + c + "'" + "=" + hb_NtoS( n ) + " "
END
 
RETURN
Output:
(counting the printable characters of its own source code):
' '=190 '"'=12 ' ' '=2 '('=10 ')'=10 '+'=12 ','=5 '.'=3 '1'=3 '3'=1 ':'=6 ';'=2 '<'=2 '='=12 
'>'=1 '?'=2 'A'=10 'C'=5 'D'=6 'E'=13 'F'=7 'H'=3 'I'=9 'L'=13 'M'=2 'N'=9 'O'=5 'P'=1 
'R'=6 'S'=2 'T'=2 'U'=2 'W'=2 'X'=1 '['=3 ']'=3 '_'=10 'a'=12 'b'=2 'c'=9 'd'=3 'e'=5 
'f'=1 'g'=1 'h'=2 'i'=11 'm'=1 'n'=7 'o'=3 'p'=1 'r'=2 's'=3 't'=5 'w'=1 '{'=1 '}'=1

Haskell[edit]

Short version:

import Data.List (group,sort)
import Control.Arrow ((&&&))
main = interact (show . map (head &&& length) . group . sort)

or, as an alternative to sorting and grouping the whole string, we could use some kind of container as the accumulator for a single fold, for example:

import qualified Data.Map.Strict as M
import Data.Ord (comparing)
import Data.List (sortBy)
 
charCounts :: String -> M.Map Char Int
charCounts = foldr (M.alter f) M.empty
where
f (Just x) = Just (succ x)
f _ = Just 1
 
main :: IO ()
main = do
s <- readFile "miserables.txt"
mapM_ print $ sortBy (flip $ comparing snd) $ M.toList (charCounts s)
Output:
(' ',516452)
('e',325769)
('t',222955)
('a',199774)
('o',180987)
('h',170234)
('n',166901)
('i',165221)
('s',157643)
('r',145136)
('d',106989)
('l',97091)
('\n',73828)
('u',67370)
('c',62760)
('m',56011)
('f',53438)
('w',53332)
(',',48784)
('g',46086)
('p',39958)
('y',37945)
('b',34313)
('.',30487)
('v',24058)
('\8364',21159)
('\226',21155)
('k',14110)
('T',12571)
('I',10067)
('A',7359)
('\339',7121)
('\157',7033)
('H',6605)
('M',6208)
(';',5885)
('E',4969)
('-',4775)
('C',4594)
('S',4404)
('x',3694)
('\8482',3633)
('!',3539)
('R',3535)
('P',3429)
('O',3401)
('j',3392)
('B',3193)
('W',3181)
('\8221',3071)
('N',3053)
('?',2976)
('F',2768)
('G',2512)
(':',2463)
('L',2452)
('J',2448)
('q',2398)
('\195',2296)
('V',2210)
('_',2068)
('z',1847)
('D',1758)
('\169',1328)
('Y',1238)
('U',900)
('1',732)
('8',412)
('X',333)
('K',323)
('\732',298)
('\168',294)
('3',254)
('2',242)
('0',212)
('5',208)
('*',179)
('(',172)
(')',172)
('4',170)
('\8240',146)
('6',143)
('7',140)
('Q',135)
('[',122)
(']',122)
('9',118)
('\166',107)
('\170',74)
('Z',59)
('\162',56)
('\167',48)
('\174',39)
('\197',38)
('"',37)
('\188',37)
('\8220',35)
('\180',34)
('|',24)
('\175',18)
('\185',18)
('/',12)
('\8224',10)
('\187',9)
('\'',8)
('+',5)
('\171',5)
('\710',5)
('\8217',3)
('$',2)
('\177',2)
('\8225',2)
('#',1)
('%',1)
('&',1)
('@',1)
('{',1)
('}',1)
('\189',1)
('\194',1)

Icon and Unicon[edit]

The example below counts (case insensitive) letters and was run on a version of this source file.

link printf
 
procedure main(A)
every PrintCount(CountLetters(!A))
end
 
procedure CountLetters(fn) #: Return case insensitive count of letters
K := table(0)
if f := open(fn,"r") then {
every c := !map(|read(f)) do
if any(&lcase,c) then K[c] +:= 1
close(f)
return K
}
else write(&errout,"Unable to open file ",fn)
end
 
procedure PrintCount(T) #: Print the letters
every c := key(T) do
printf("%s - %d\n",c,T[c])
end

printf.icn provides formatting

Output:
c - 17
k - 5
s - 10
h - 2
p - 10
e - 41
m - 2
u - 12
b - 2
r - 25
o - 16
w - 1
d - 10
l - 10
t - 27
a - 10
i - 13
y - 5
f - 12
n - 28
v - 4

IS-BASIC[edit]

100 PROGRAM "Letters.bas"
110 NUMERIC LETT(65 TO 90)
120 FOR I=65 TO 90
130 LET LETT(I)=0
140 NEXT
150 LET EOF=0
160 OPEN #1:"list.txt"
170 WHEN EXCEPTION USE IOERROR
180 DO
190 GET #1:A$
200 LET A$=UCASE$(A$)
210 IF A$>="A" AND A$<="Z" THEN LET LETT(ORD(A$))=LETT(ORD(A$))+1
220 LOOP UNTIL EOF
230 END WHEN
240 FOR I=65 TO 90
250 PRINT CHR$(I);":";LETT(I),
260 NEXT
270 HANDLER IOERROR
280 LET EOF=-1
290 CLOSE #1
300 CONTINUE
310 END HANDLER

J[edit]


Input is a directory-path with filename. Result is 26 integers representing counts of each letter, in alphabetic order (a's count is first).

ltrfreq=: 3 : 0
letters=. u: 65 + i.26 NB. upper case letters
<: #/.~ letters (, -. -.~) toupper fread y
)

Example use (based on a configuration file from another task):

   ltrfreq 'config.file'
88 17 17 24 79 18 19 19 66 0 2 26 26 57 54 31 1 53 43 59 19 6 2 0 8 0

Java[edit]

Works with: Java version 5+
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
 
public class LetterFreq {
public static int[] countLetters(String filename) throws IOException{
int[] freqs = new int[26];
BufferedReader in = new BufferedReader(new FileReader(filename));
String line;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(char ch:line.toCharArray()){
if(Character.isLetter(ch)){
freqs[ch - 'A']++;
}
}
}
in.close();
return freqs;
}
 
public static void main(String[] args) throws IOException{
System.out.println(Arrays.toString(countLetters("filename.txt")));
}
}
Works with: Java version 7+

In Java 7, we can use try with resources. The countLetters method would look like this:

public static int[] countLetters(String filename) throws IOException{
int[] freqs = new int[26];
try(BufferedReader in = new BufferedReader(new FileReader(filename))){
String line;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(char ch:line.toCharArray()){
if(Character.isLetter(ch)){
freqs[ch - 'A']++;
}
}
}
}
return freqs;
}
Works with: Java version 8+

In Java 8, we can use streams. This code also handles unicode codepoints as well. The countLetters method would look like this:

public static Map<Integer, Long> countLetters(String filename) throws IOException {
return Files.lines(Paths.get(filename))
.flatMapToInt(String::chars)
.filter(Character::isLetter)
.boxed()
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}

JavaScript[edit]

ES5[edit]

JavaScript is no longer used only in environments which are carefully isolated from file systems, but JavaScript standards still do not specify standard file-system functions. Leaving aside the particular and variable details of how files will be opened and read in environments like Node.js and OS X JavaScript for Automation etc., we can still use core JavasScript (ES5 in the example below), to count the characters in a text once it has been read from a file system.

(function(txt) {
 
var cs = txt.split(''),
i = cs.length,
dct = {},
c = '',
keys;
 
while (i--) {
c = cs[i];
dct[c] = (dct[c] || 0) + 1;
}
 
keys = Object.keys(dct);
keys.sort();
return keys.map(function (c) { return [c, dct[c]]; });
 
})("Not all that Mrs. Bennet, however, with the assistance of her five\
daughters, could ask on the subject, was sufficient to draw from her\
husband any satisfactory description of Mr. Bingley. They attacked him\
in various ways--with barefaced questions, ingenious suppositions, and\
distant surmises; but he eluded the skill of them all, and they were at\
last obliged to accept the second-hand intelligence of their neighbour,\
Lady Lucas. Her report was highly favourable. Sir William had been\
delighted with him. He was quite young, wonderfully handsome, extremely\
agreeable, and, to crown the whole, he meant to be at the next assembly\
with a large party. Nothing could be more delightful! To be fond of\
dancing was a certain step towards falling in love; and very lively\
hopes of Mr. Bingley's heart were entertained."
);
Output:
[[" ", 121], ["!", 1], ["'", 1], [",", 13], ["-", 3], [".", 9], [";", 2], 
["B", 3], ["H", 2], ["L", 2], ["M", 3], ["N", 2], ["S", 1], ["T", 2], ["W", 1],
["a", 53], ["b", 13], ["c", 17], ["d", 29], ["e", 82], ["f", 17], ["g", 16], ["h", 36],
["i", 44], ["j", 1], ["k", 3], ["l", 34], ["m", 11], ["n", 41], ["o", 40], ["p", 8],
["q", 2], ["r", 35], ["s", 39], ["t", 55], ["u", 20], ["v", 7], ["w", 17], ["x", 2], ["y", 16]]

ES6[edit]

Using the 'JavaScript for Automation' embedding of a JSContext on macOS, for access to the file system:

(() => {
'use strict';
 
 
// charCounts :: String -> [(Char, Int)]
const charCounts = s =>
sortBy(flip(comparing(snd)))(
Object.entries(
chars(s).reduce(
(a, c) => (
a[c] = 1 + (a[c] || 0),
a
), {}
)
)
);
 
// ----------------------- TEST -----------------------
// main :: IO ()
const main = () =>
either(msg => msg)(
compose(
unlines,
map(JSON.stringify),
charCounts
)
)(readFileLR('~/Code/charCount/miserables.txt'));
 
 
// -----------------GENERIC FUNCTIONS -----------------
 
// Left :: a -> Either a b
const Left = x => ({
type: 'Either',
Left: x
});
 
 
// Right :: b -> Either a b
const Right = x => ({
type: 'Either',
Right: x
});
 
 
// chars :: String -> [Char]
const chars = s =>
s.split('');
 
 
// comparing :: (a -> b) -> (a -> a -> Ordering)
const comparing = f =>
x => y => {
const
a = f(x),
b = f(y);
return a < b ? -1 : (a > b ? 1 : 0);
};
 
// compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
const compose = (...fs) =>
fs.reduce(
(f, g) => x => f(g(x)),
x => x
);
 
// either :: (a -> c) -> (b -> c) -> Either a b -> c
const either = fl =>
fr => e => 'Either' === e.type ? (
undefined !== e.Left ? (
fl(e.Left)
) : fr(e.Right)
) : undefined;
 
 
// flip :: (a -> b -> c) -> b -> a -> c
const flip = f =>
1 < f.length ? (
(a, b) => f(b, a)
) : (x => y => f(y)(x));
 
 
// map :: (a -> b) -> [a] -> [b]
const map = f =>
// The list obtained by applying f
// to each element of xs.
// (The image of xs under f).
xs => (
Array.isArray(xs) ? (
xs
) : xs.split('')
).map(f);
 
 
// readFileLR :: FilePath -> Either String IO String
const readFileLR = fp => {
const
e = $(),
ns = $.NSString
.stringWithContentsOfFileEncodingError(
$(fp).stringByStandardizingPath,
$.NSUTF8StringEncoding,
e
);
return ns.isNil() ? (
Left(ObjC.unwrap(e.localizedDescription))
) : Right(ObjC.unwrap(ns));
};
 
 
// snd :: (a, b) -> b
const snd = tpl => tpl[1];
 
 
// sortBy :: (a -> a -> Ordering) -> [a] -> [a]
const sortBy = f =>
xs => xs.slice()
.sort((a, b) => f(a)(b));
 
 
// unlines :: [String] -> String
const unlines = xs =>
// A single string formed by the intercalation
// of a list of strings with the newline character.
xs.join('\n');
 
// MAIN ---
return main();
})();
Output:
[" ",516452]
["e",325769]
["t",222955]
["a",199774]
["o",180987]
["h",170234]
["n",166901]
["i",165221]
["s",157643]
["r",145136]
["d",106989]
["l",97091]
["\n",73828]
["u",67370]
["c",62760]
["m",56011]
["f",53438]
["w",53332]
[",",48784]
["g",46086]
["p",39958]
["y",37945]
["b",34313]
[".",30487]
["v",24058]
["€",21159]
["â",21155]
["k",14110]
["T",12571]
["I",10067]
["A",7359]
["œ",7121]
["",7033]
["H",6605]
["M",6208]
[";",5885]
["E",4969]
["-",4775]
["C",4594]
["S",4404]
["x",3694]
["™",3633]
["!",3539]
["R",3535]
["P",3429]
["O",3401]
["j",3392]
["B",3193]
["W",3181]
["”",3071]
["N",3053]
["?",2976]
["F",2768]
["G",2512]
[":",2463]
["L",2452]
["J",2448]
["q",2398]
["Ã",2296]
["V",2210]
["_",2068]
["z",1847]
["D",1758]
["©",1328]
["Y",1238]
["U",900]
["1",732]
["8",412]
["X",333]
["K",323]
["˜",298]
["¨",294]
["3",254]
["2",242]
["0",212]
["5",208]
["*",179]
["(",172]
[")",172]
["4",170]
["‰",146]
["6",143]
["7",140]
["Q",135]
["[",122]
["]",122]
["9",118]
["¦",107]
["ª",74]
["Z",59]
["¢",56]
["§",48]
["®",39]
["Å",38]
["¼",37]
["\"",37]
["“",35]
["´",34]
["|",24]
["¯",18]
["¹",18]
["/",12]
["†",10]
["»",9]
["'",8]
["ˆ",5]
["«",5]
["+",5]
["’",3]
["±",2]
["‡",2]
["$",2]
["#",1]
["&",1]
["Â",1]
["½",1]
["{",1]
["}",1]
["%",1]
["@",1]


Or, using an object as a hash-table, and the reduce method: (note that this version omits the opening of a text file which is specified in the task description):

(() => {
'use strict';
 
const letterfreq = text => [...text]
.reduce(
(a, c) => (a[c] = (a[c] || 0) + 1, a),
{}
);
 
return JSON.stringify(
letterfreq(
`remember, remember, the fifth of november
gunpowder treason and plot
I see no reason why gunpowder treason
should ever be forgot`
),
null, 2
);
})();

Using the spread operator, you get the unicode characters rather than the UTF-16 code units.

Output:
{
  "r": 12,
  "e": 19,
  "m": 5,
  "b": 4,
  ",": 2,
  " ": 56,
  "t": 6,
  "h": 4,
  "f": 4,
  "i": 1,
  "o": 12,
  "n": 8,
  "v": 2,
  "\n": 3,
  "g": 3,
  "u": 3,
  "p": 3,
  "w": 3,
  "d": 4,
  "a": 4,
  "s": 5,
  "l": 2,
  "I": 1,
  "y": 1
}

jq[edit]

The following program will report the frequency of all characters in the input file, including newlines, returns, etc, provided the file will fit in memory.
 
# Input: an array of strings.
# Output: an object with the strings as keys,
# the values of which are the corresponding frequencies.
def counter:
reduce .[] as $item ( {}; .[$item] += 1 ) ;
 
# For neatness we sort the keys:
explode | map( [.] | implode ) | counter | . as $counter
| keys | sort[] | [., $counter[.] ]
 
 
Example:
jq -s -R -c -f Letter_frequency.jq somefile.txt
Output:
["\n",12]
[" ",124]
["#",1]
["$",8]
["(",4]
[")",4]
["+",3]
[",",4]
["-",4]
[".",9]
["0",3]
["1",7]
[":",2]
[";",2]
["=",4]
...

Julia[edit]

using DataStructures
 
function letterfreq(file::AbstractString; fltr::Function=(_) -> true)
sort(Dict(counter(filter(fltr, read(file, String)))))
end
 
display(letterfreq("src/Letter_frequency.jl"; fltr=isletter))
 
Output:
DataStructures.OrderedDict{Char,Int64} with 29 entries:
  'A' => 1
  'C' => 1
  'D' => 2
  'F' => 1
  'L' => 3
  'S' => 2
  'a' => 9
  'b' => 1
  'c' => 13
  'd' => 5
  'e' => 30
  'f' => 13
  'g' => 4
  'h' => 10
  'i' => 14
  'j' => 1
  'k' => 3
  'l' => 11
  'n' => 15
  ⋮   => ⋮

K[edit]

+(?a;#:'=a:,/0:`)

Example: The file "hello.txt" contains the string "Hello, world!"

 
c:+(?a;#:'=a:,/0:`hello.txt)
 
Output:
(("H";1)
 ("e";1)
 ("l";3)
 ("o";2)
 (",";1)
 (" ";1)
 ("w";1)
 ("r";1)
 ("d";1)
 ("!";1))

Sort on decreasing occurrences:

 
[email protected]>c[;1]
 
Output:
(("l";3)
 ("o";2)
 ("H";1)
 ("e";1)
 (",";1)
 (" ";1)
 ("w";1)
 ("r";1)
 ("d";1)
 ("!";1))

Kotlin[edit]

// version 1.1.2
 
import java.io.File
 
fun main(args: Array<String>) {
val text = File("input.txt").readText().toLowerCase()
val letterMap = text.filter { it in 'a'..'z' }.groupBy { it }.toSortedMap()
for (letter in letterMap) println("${letter.key} = ${letter.value.size}")
val sum = letterMap.values.sumBy { it.size }
println("\nTotal letters = $sum")
}
Output:

'input.txt' just contains two pangrams:

The quick brown fox jumps over the lazy dog. Sphinx of black quartz, judge my vow.

a = 3
b = 2
c = 2
d = 2
e = 4
f = 2
g = 2
h = 3
i = 2
j = 2
k = 2
l = 2
m = 2
n = 2
o = 6
p = 2
q = 2
r = 3
s = 2
t = 3
u = 4
v = 2
w = 2
x = 2
y = 2
z = 2

Total letters = 64

Lambdatalk[edit]

In this entry we choose to show how lambdatalk can use any existing javascript code (say the #Javascript entry in this page), and build an interface to use it as a standard lambdatalk function. So, applied to any string the W.frequency primitive returns a pair structure containing the array of chars and the corresponding array of frequencies.

 
 
{script
// W.frequency is added to the lambdatalk dictionary via the {script ...} special form
 
LAMBDATALK.DICT['W.frequency'] = function() {
 
// 1) simply copied from the rosetta.org #Javascript entry
var frequency = function(txt) {
var cs = txt.split(''),
i = cs.length,
dct = {},
c = '';
while (i--) {
c = cs[i];
dct[c] = (dct[c] || 0) + 1;
}
var keys = Object.keys(dct);
keys.sort();
return keys.map(function (c) { return [c, dct[c]]; });
};
 
// 2) then interfaced with lambdatalk
var args = arguments[0].trim().replace( /\s+/g, "␣" );
 
var output = frequency( args );
 
for (var a=[], b=[], i=0; i< output.length; i++) {
a.push( output[i][0] );
b.push( output[i][1] );
}
 
var pair = "{cons {A.new " + a.join(" ") +
"} {A.new " + b.join(" ") + "}}"
 
return LAMBDATALK.eval_forms( pair );
};
}
 
{def S3
Not all that Mrs. Bennet, however, with the assistance of her five daughters, could ask on the subject, was sufficient to draw from her husband any satisfactory description of Mr. Bingley. They attacked him in various ways--with barefaced questions, ingenious suppositions, and distant surmises; but he eluded the skill of them all, and they were at last obliged to accept the second-hand intelligence of their neighbour, Lady Lucas. Her report was highly favourable. Sir William had been delighted with him. He was quite young, wonderfully handsome, extremely agreeable, and, to crown the whole, he meant to be at the next assembly with a large party. Nothing could be more delightful! To be fond of dancing was a certain step towards falling in love; and very lively hopes of Mr. Bingley's heart were entertained.
}
-> S3
{def S3.freq {W.frequency {S3}}}
-> S3.freq
 
characters: {car {S3.freq}}
-> [!,',,,-,.,;,B,H,L,M,N,S,T,W,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,␣]
 
frequencies: {cdr {S3.freq}}
-> [1,1,13,3,9,2,3,2,2,3,2,1,2,1,53,13,17,29,82,17,16,36,44,1,3,34,11,41,40,8,2,35,39,55,20,7,17,2,16,132]
}
 
 
 

langur[edit]

Works with: langur version 0.7.0
val .countLetters = f(.s) {
for[=h{}] .s2 in split(replace(.s, RE/\P{L}/)) {
_for[.s2; 0] += 1
}
}
 
val .counts = .countLetters(readfile "./fuzz.txt")
writeln join "\n", map f(.k) $"\.k;: \.counts[.k];", keys .counts
Output:

The input contains "fuzzy furry kittens ασδ ξκλ ασδ ξα" (random Greek letters at the end) and the output is as follows.

f: 2
u: 2
z: 2
y: 2
r: 2
k: 1
i: 1
t: 2
e: 1
n: 1
s: 1
α: 3
σ: 2
δ: 2
ξ: 2
κ: 1
λ: 1

Lasso[edit]

local(
str = 'Hello world!',
freq = map
)
// as a loop. arguably quicker than query expression
loop(#str->size) => {
#freq->keys !>> #str->get(loop_count) ?
#freq->insert(#str->get(loop_count) = #str->values->find(#str->get(loop_count))->size)
}
 
// or
local(
str = 'Hello world!',
freq = map
)
// as query expression, less code
with i in #str->values where #freq->keys !>> #i do => {
#freq->insert(#i = #str->values->find(#i)->size)
}
 
// output #freq
with elem in #freq->keys do => {^
'"'+#elem+'": '+#freq->find(#elem)+'\r'
^}

Liberty BASIC[edit]

Un-rem a line to convert to all-upper-case. Letter freq'y is printed as percentages.

 
open "text.txt" for input as #i
txt$ =input$( #i, lof( #i))
Le =len( txt$)
close #i
 
dim LetterFreqy( 255)
 
' txt$ =upper$( txt$)
 
for i =1 to Le
char =asc( mid$( txt$, i, 1))
if char >=32 then LetterFreqy( char) =LetterFreqy( char) +1
next i
 
for j =32 to 255
if LetterFreqy( j) <>0 then print " Character #"; j, "("; chr$( j);_
") appeared "; using( "##.##", 100 *LetterFreqy( j) /Le); "% of the time."
next j
 
end
 

Lua[edit]

This solution counts letters only, which could be changed by altering the pattern argument to 'gmatch' on line 31. It also treats upper and lower case letters as distinct, which could be changed by changing everything to upper or lower case with string.upper() or string.lower() before tallying.

-- Return entire contents of named file
function readFile (filename)
local file = assert(io.open(filename, "r"))
local contents = file:read("*all")
file:close()
return contents
end
 
-- Return a closure to keep track of letter counts
function tally ()
local t = {}
 
-- Add x to tally if supplied, return tally list otherwise
local function count (x)
if x then
if t[x] then
t[x] = t[x] + 1
else
t[x] = 1
end
else
return t
end
end
 
return count
end
 
-- Main procedure
local letterCount = tally()
for letter in readFile(arg[1]):gmatch("%a") do
letterCount(letter)
end
for k, v in pairs(letterCount()) do
print(k, v)
end

Output from running this script on itself:

i       24
g       2
h       4
e       61
f       16
c       19
d       17
R       2
o       31
p       7
m       4
n       42
k       4
l       40
y       4
w       1
x       7
u       18
v       2
s       14
t       54
a       24
C       3
M       1
A       1
F       2
r       32

M2000 Interpreter[edit]

 
document file1$={Open a text file and count the occurrences of each letter.
Some of these programs count all characters (including punctuation), but some only count letters A to Z
}
const Ansi=3, nl$=chr$(13)+chr$(10), Console=-2
save.doc file1$, "checkdoc.txt", Ansi
open "checkdoc.txt" for input as F
buffer onechar as byte
m=0
dim m(65 to 90)
while not eof(#F)
get #F, onechar
a$=chr$(eval(onechar,0))
if a$ ~ "[A-Za-z]" then
m++
m(asc(ucase$(a$)))++
end if
end while
close #F
document Export$
for i=65 to 90
if m(i)>0 then Export$=format$("{0} - {1:2:4}%",chr$(i),m(i)/m*100)+nl$
next
print #Console, Export$
clipboard Export$
 
Output:
A - 6,87%
B - 0,76%
C - 8,40%
D - 1,53%
E - 12,2%
F - 2,29%
G - 1,53%
H - 3,05%
I - 3,05%
L - 5,34%
M - 2,29%
N - 8,40%
O - 9,92%
P - 2,29%
R - 6,11%
S - 5,34%
T - 12,2%
U - 6,11%
X - 0,76%
Y - 0,76%
Z - 0,76%

Maple[edit]

StringTools:-CharacterFrequencies(readbytes("File.txt",infinity,TEXT))

Mathematica / Wolfram Language[edit]

Tally[Characters[Import["file.txt","Text"]]]

MATLAB / Octave[edit]

function u = letter_frequency(t)
if ischar(t)
t = abs(t);
end;
A = sparse(t+1,1,1,256,1);
printf('"%c":%i\n',[find(A)-1,A(A>0)]')
end

Nanoquery[edit]

// define a list to hold characters and amounts
characters = list()
amounts = list()
 
// define the alphabet as a string to check only letters and numbers
alpha = "abcdefghijklmnopqrstuvwxyz0123456789"
 
// get the filename as an argument
fname = args[len(args) - 1]
 
// read the entire file into a string
contents = new(Nanoquery.IO.File, fname).readAll()
 
// loop through all the characters in the array
for i in range(0, len(contents) - 1)
// get the character to check
toCheck = str(contents[i]).toLowerCase()
 
// check if the current character is in the array
if ((alpha .contains. toCheck) && (characters .contains. toCheck))
// if it's there, increment its amount
index = characters[toCheck]
amounts[index] = amounts[index] + 1
else
if (alpha .contains. toCheck)
// if it's not, add it
append characters toCheck
append amounts 0
end
end if
end for
 
// output the amounts
println format("%-20s %s", "Character", "Amount")
println "=" * 30
for i in range(0, len(characters) - 1)
println format("%-20s %d", characters[i], amounts[i])
end for
Output:
$ java -jar ../nanoquery-2.3_1462.jar -b letterfreq.nq sherlock-holmes.txt
Character            Amount
==============================
p                    7239
r                    25708
o                    34866
j                    544
e                    54972
c                    11118
t                    40545
g                    8311
u                    13604
n                    29701
b                    6645
s                    27941
h                    29588
a                    36146
d                    19064
v                    4567
f                    9362
l                    17633
k                    3684
m                    12150
y                    9776
i                    31240
w                    11554
2                    45
9                    23
0                    104
1                    127
6                    30
8                    46
z                    152
x                    578
q                    437
5                    27
4                    29
7                    25
3                    25

NetRexx[edit]

Translation of: REXX
/* NetRexx ************************************************************
* 22.05.2013 Walter Pachl translated from REXX
**********************************************************************/

options replace format comments java crossref symbols nobinary
 
parse arg dsn .
if dsn = '' then
dsn = 'test.txt'
cnt=0
totChars=0 /*count of the total num of chars*/
totLetters=0 /*count of the total num letters.*/
indent=' '.left(20) /*used for indentation of output.*/
lines = scanFile(dsn)
loop l_ = 1 to lines[0]
line = lines[l_]
 
Say '>'line'<' line.length /* that's in test.txt */
/*
lrx=left_right(line)
Parse lrx leftx rightx
Say ' 'leftx
Say ' 'rightx
*/

loop k=1 for line.length() /*loop over characters */
totChars=totChars+1 /*Increment total number of chars*/
c=line.substr(k,1) /*get character number k */
cnt[c]=cnt[c]+1 /*increment the character's count*/
End
end l_
 
w=totChars.length /*used for right-aligning counts.*/
say 'file -----' dsn "----- has" lines[0] 'records.'
say 'file -----' dsn "----- has" totChars 'characters.'
Loop L=0 to 255 /* display nonzero letter counts */
c=l.d2c /* the character in question */
if cnt[c]>0 & c.datatype('M')>0 Then Do /* was found in the file */
/* and is a latin letter */
say indent "(Latin) letter " c 'count:' cnt[c].right(w) /* tell */
totLetters=totLetters+cnt[c] /* increment number of letters */
End
End
 
say 'file -----' dsn "----- has" totLetters '(Latin) letters.'
say ' other charactes follow'
other=0
loop m=0 to 255 /* now for non-letters */
c=m.d2c /* the character in question */
y=c.c2x /* the hex representation */
if cnt[c]>0 & c.datatype('M')=0 Then Do /* was found in the file */
/* and is not a latin letter */
other=other+cnt[c] /* increment count */
_=cnt[c].right(w) /* prepare output of count */
select /*make the character viewable. */
when c<<' ' | m==255 then say indent "'"y"'x character count:" _
when c==' ' then say indent "blank character count:" _
otherwise say indent " " c 'character count:' _
end
end
end
say 'file -----' dsn "----- has" other 'other characters.'
say 'file -----' dsn "----- has" totLetters 'letters.'
 
-- Read a file and return contents as a Rexx indexed string
method scanFile(dsn) public static returns Rexx
 
fileLines = ''
do
inFile = File(dsn)
inFileScanner = Scanner(inFile)
loop l_ = 1 while inFileScanner.hasNext()
fileLines[0] = l_
fileLines[l_] = inFileScanner.nextLine()
end l_
inFileScanner.close()
 
catch ex = FileNotFoundException
ex.printStackTrace
end
 
return fileLines

Nim[edit]

import tables, os
 
var t = initCountTable[char]()
var f = open(paramStr(1))
for l in f.lines:
for c in l:
t.inc(c)
echo t

Objeck[edit]

 
use IO;
 
bundle Default {
class Test {
function : Main(args : String[]) ~ Nil {
freqs := CountLetters("filename.txt");
for(i := 'A'; i < 'Z'; i += 1;) {
Console->Print(i->As(Char))->Print("=>")->PrintLine(freqs[i - 'A']);
};
}
 
function : CountLetters(filename : String) ~ Int[] {
freqs := Int->New[26];
reader := FileReader->New(filename);
while(reader->IsEOF() <> true) {
line := reader->ReadString()->ToUpper();
each(i : line) {
ch := line->Get(i);
if(ch->IsChar()){
index := ch - 'A';
freqs[index] := freqs[index] + 1;
};
};
};
reader->Close();
 
return freqs;
}
}
}
 

Objective-C[edit]

#import <Foundation/Foundation.h>
 
int main (int argc, const char *argv[]) {
@autoreleasepool {
 
NSData *data = [NSData dataWithContentsOfFile:@(argv[1])];
NSString *string = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
NSCountedSet *countedSet = [[NSCountedSet alloc] init];
NSUInteger len = [string length];
for (NSUInteger i = 0; i < len; i++) {
unichar c = [string characterAtIndex:i];
if ([[NSCharacterSet letterCharacterSet] characterIsMember:c])
[countedSet addObject:@(c)];
}
for (NSNumber *chr in countedSet) {
NSLog(@"%C => %lu", (unichar)[chr integerValue], [countedSet countForObject:chr]);
}
 
}
return 0;
}

OCaml[edit]

We open a text file and compute letter frequency. Other characters than [a-z] and [A-Z] are ignored, and upper case letters are first converted to lower case before to compute letter frequency.

let () =
let ic = open_in Sys.argv.(1) in
let base = int_of_char 'a' in
let arr = Array.make 26 0 in
try while true do
let c = Char.lowercase(input_char ic) in
let ndx = int_of_char c - base in
if ndx < 26 && ndx >= 0 then
arr.(ndx) <- succ arr.(ndx)
done
with End_of_file ->
close_in ic;
for i=0 to 25 do
Printf.printf "%c -> %d\n" (char_of_int(i + base)) arr.(i)
done

If we want to compute all characters in an UTF8 file, we must use an external library, for example Batteries. The following function takes as input a string that contains the path to the file, and prints all the characters together with their frequencies, ordered by increasing frequencies, on the standard output.

 
open Batteries
 
let frequency file =
let freq = Hashtbl.create 52 in
File.with_file_in file
(Enum.iter (fun c -> Hashtbl.modify_def 1 c succ freq) % Text.chars_of);
List.iter (fun (k,v) -> Text.write_text stdout k;
Printf.printf " %d\n" v)
@@ List.sort (fun (_,v) (_,v') -> compare v v')
@@ Hashtbl.fold (fun k v l -> (Text.of_uchar k,v) :: l) freq []
 

OxygenBasic[edit]

 
indexbase 0
 
sys a,e,i,c[255]
 
string s=getfile "t.txt"
 
e=len s
 
for i=1 to e
a=asc(s,i)
++c(a)
next
 
cr=chr(13)+chr(10)
pr="Char Frequencies" cr cr
for i=32 to 255
pr+=chr(i) chr(9) c(i) cr
next
 
print pr
'putfile "CharCount.txt",pr
 

PARI/GP[edit]

v=vector(26);
U=readvec("foo.txt");
for(i=1,#U,u=Vecsmall(U[i]);for(j=1,#u,if(u[j]>64&&u[j]<91,v[u[j]-64]++,u[j]>96&&u[j]<123,v[u[j]-96]++)));
v

Pascal[edit]

program letterFrequency(input, output, stdErr);
var
chart: array[char] of integer;
c: char;
begin
for c := low(chart) to high(chart) do
begin
chart[c] := 0;
end;
 
// parameter-less EOF() checks for EOF(input)
while not EOF() do
begin
read(c);
inc(chart[c]);
end;
 
// now, chart[someLetter] gives you the letter’s frequency
end.

Perl[edit]

Counts letters in files given on command line or piped to stdin. Case insensitive.

while (<>) { $cnt{lc chop}++ while length }
print "$_: ", $cnt{$_}//0, "\n" for 'a' .. 'z';

Phix[edit]

Counts own source or supplied filename

sequence lc = repeat(0,#7E)
integer fn = open(command_line()[$],"rb"), ch
while 1 do
ch = getc(fn)
if ch=-1 then exit end if
if ch>=' ' and ch<#7F then
lc[ch] += 1
end if
end while
close(fn)
 
for i=' ' to #7E do
if lc[i]!=0 then
printf(1,"'%c': %d%s",{i,lc[i],iff(mod(i,32)=31?'\n':'\t')})
end if
end for
{} = wait_key()
Output:
' ': 77 '!': 1  '"': 4  '#': 3  '$': 1  '%': 3  ''': 10 '(': 9  ')': 9  '+': 1  ',': 8  '-': 1  '0': 2  '1': 5  '2': 1  '3': 2  '7': 3  ':': 2  '<': 1  '=': 10 '>': 1  '?': 1
'E': 2  'F': 1  '[': 4  '\': 2  ']': 4  '_': 2
'a': 4  'b': 1  'c': 15 'd': 11 'e': 23 'f': 14 'g': 2  'h': 11 'i': 19 'k': 1  'l': 8  'm': 3  'n': 19 'o': 9  'p': 3  'q': 1  'r': 6  's': 3  't': 11 'u': 1  'w': 3  'x': 1    'y': 1  '{': 2  '}': 2

Phixmonti[edit]

0 255 repeat var ascCodes
 
"unixdict.txt" "r" fopen var file
 
file 0 < not
if
true
while
file fgets
dup 0 < not
if
len 1 swap 2 tolist
for
var i
i get ascCodes over get 1 + rot set var ascCodes
endfor
drop
true
else
drop
false
endif
endwhile
 
ascCodes len
for
var i
i get
if
i tochar print " = " print i get print nl
endif
endfor
file fclose
endif

PHP[edit]

<?php
print_r(array_count_values(str_split(file_get_contents($argv[1]))));
?>

PicoLisp[edit]

(let Freq NIL
(in "file.txt"
(while (char) (accu 'Freq @ 1)) )
(sort Freq) )

For a "file.txt":

abcd
cdef
Output:
-> (("^J" . 2) ("a" . 1) ("b" . 1) ("c" . 2) ("d" . 2) ("e" . 1) ("f" . 1))

Pike[edit]

 
string all = Stdio.read_file("README.md");
mapping res = ([]);
foreach(all/1, string char)
res[char]++;
write("%O\n", res);
 
Output:
([ /* 26 elements */
  "\n": 2,
  " ": 12,
  ".": 2,
  "/": 3,
  ":": 1,
  "P": 1,
  "T": 1,
  "a": 5,
  "c": 1,
  "d": 2,
  "e": 10,
  "f": 3,
  "g": 1,
  "h": 2,
  "i": 5,
  "k": 1,
  "l": 4,
  "m": 3,
  "n": 1,
  "o": 7,
  "p": 4,
  "r": 4,
  "s": 10,
  "t": 5,
  "u": 2,
  "x": 2
])

PL/I[edit]

 
frequencies: procedure options (main);
declare tallies(26) fixed binary static initial ((26) 0);
declare alphabet character (26) static initial
('ABCDEFGHIJKLMNOPQRSTUVWXYZ');
declare c character (1), i fixed binary;
declare in file;
 
open file (in) title ('/LETTER.DAT,type(text),recsize(200)') input;
 
on endfile (in) go to prepare_list;
 
do while('1'b);
get file (in) edit (c) (a(1)); put edit (c) (a);
i = index(alphabet, c);
if i > 0 then tallies(i) = tallies(i) + 1;
end;
 
prepare_list:
put skip list('Letter', 'Frequency');
do i = 1 to 26;
if tallies(i) > 0 then
put skip list (substr(alphabet, i, 1), tallies(i));
end;
end frequencies;

Data:

THEQUICKBROWNFOX
JUMPSOVERTHELAZYDOG
Output:
Letter                  Frequency 
A                               1 
B                               1 
C                               1 
D                               1 
E                               3 
F                               1 
G                               1 
H                               2 
I                               1 
J                               1 
K                               1 
L                               1 
M                               1 
N                               1 
O                               4 
P                               1 
Q                               1 
R                               2 
S                               1 
T                               2 
U                               2 
V                               1 
W                               1 
X                               1 
Y                               1 
Z                               1

PowerShell[edit]

 
function frequency ($string) {
$arr = $string.ToUpper().ToCharArray() |where{$_ -match '[A-KL-Z]'}
$n = $arr.count
$arr | group | foreach{
[pscustomobject]@{letter = "$($_.name)"; frequency = "$([math]::round($($_.Count/$n),5))"; count = "$($_.count)"}
} | sort letter
}
$file = "$($MyInvocation.MyCommand.Name )" #Put the name of your file here
frequency $(get-content $file -Raw)
 

Output:

letter frequency count
------ --------- -----
A      0.06809   16   
B      0.00426   1    
C      0.06809   16   
D      0.00851   2    
E      0.11064   26   
F      0.0383    9    
G      0.01702   4    
H      0.02979   7    
I      0.03404   8    
J      0.00426   1    
K      0.00426   1    
L      0.02553   6    
M      0.04255   10   
N      0.09362   22   
O      0.08085   19   
P      0.02128   5    
Q      0.01277   3    
R      0.10638   25   
S      0.02128   5    
T      0.10213   24   
U      0.05957   14   
V      0.00426   1    
W      0.00851   2    
Y      0.02979   7    
Z      0.00426   1 

Prolog[edit]

Works with SWI-Prolog.
Only alphabetic codes are computed in uppercase state.
Uses packlist/2 defined there : Run-length encoding#Prolog

frequency(File) :-
read_file_to_codes(File, Code, []),
 
% we only keep alphabetic codes
include(my_code_type, Code, LstCharCode),
 
% we translate char_codes into uppercase atoms.
maplist(my_upcase, LstCharCode, LstChar),
 
% sort and pack the list
msort(LstChar, SortLstChar),
packList(SortLstChar, Freq),
maplist(my_write, Freq).
 
 
my_write([Num, Atom]) :-
swritef(A, '%3r', [Num]),
writef('Number of %w :%w\n', [Atom, A]).
 
 
my_code_type(Code) :-
code_type(Code, alpha).
 
my_upcase(CharCode, UpChar) :-
char_code(Atom, CharCode),
upcase_atom(Atom, UpChar).
 
:- use_module(library(clpfd)).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% ?- packList([a,a,a,b,c,c,c,d,d,e], L).
% L = [[3,a],[1,b],[3,c],[2,d],[1,e]] .
%
% ?- packList(R, [[3,a],[1,b],[3,c],[2,d],[1,e]]).
% R = [a,a,a,b,c,c,c,d,d,e] .
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
packList([],[]).
 
packList([X],[[1,X]]) :-
!.
 
packList([X|Rest],[XRun|Packed]):-
run(X,Rest, XRun,RRest),
packList(RRest,Packed).
 
run(Var,[],[1,Var],[]).
 
run(Var,[Var|LRest],[N1, Var],RRest):-
N #> 0,
N1 #= N + 1,
run(Var,LRest,[N, Var],RRest).
 
run(Var,[Other|RRest], [1,Var],[Other|RRest]):-
dif(Var,Other).
 
Output:
for this file
Number of A : 63
Number of B :  7
Number of C : 53
Number of D : 29
Number of E : 65
...
Number of T : 52
Number of U : 20
Number of V : 10
Number of W :  8
Number of X :  6
Number of Y : 12
true .

PureBasic[edit]

Alphabetic codes are converted to uppercase before being used and no other codes are used as part of the calculations.

Procedure countLetters(Array letterCounts(1), textLine.s)
;counts only letters A -> Z, uses index 0 of letterCounts() to keep a total of all counts
Protected i, lineLength = Len(textLine), letter
 
textLine = UCase(textLine)
For i = 1 To lineLength
letter = Asc(Mid(textLine, i, 1)) - 'A' + 1
If letter >= 1 And letter <= 26
letterCounts(letter) + 1 ;tally individual letter count
letterCounts(0) + 1 ;increment total letter count
EndIf
Next
EndProcedure
 
If OpenConsole()
Define filename.s, fileID, i
filename = OpenFileRequester("Select text file to examine", "*.txt", "Text (*.txt)|*.txt;|All files (*.*)|*.*", 0)
fileID = 0
If ReadFile(fileID, filename)
Dim letterCounts(26) ;A - Z only, index 0 contains the total of all letter counts
 
Define textLine.s
While Not Eof(fileID)
textLine = ReadString(fileID)
countLetters(letterCounts(), textLine)
Wend
CloseFile(fileID)
 
PrintN("File: " + filename + #CRLF$)
PrintN("Letter  %Freq Count")
For i = 1 To 26
Print(" " + Chr(64 + i) + " ")
Print(RSet(StrF(100 * letterCounts(i) / letterCounts(0), 1), 5, " ") + " ")
PrintN(Str(letterCounts(i)))
Next
PrintN(#CRLF$ + "Total letter count in file: " + Str(letterCounts(0)))
EndIf
 
Print(#CRLF$ + #CRLF$ + "Press ENTER to exit"): Input()
CloseConsole()
EndIf
Output:
File: D:\_T\Text\dictionary.txt

Letter  %Freq  Count
  A       7.6  27743
  B       2.0  7248
  C       4.3  15433
  D       3.8  13798
  E      11.8  42917
  F       1.4  5030
  G       2.8  10336
  H       2.1  7720
  I       8.6  31141
  J       0.2  588
  K       0.8  2964
  L       5.3  19399
  M       2.7  9821
  N       7.1  25682
  O       6.1  22084
  P       2.9  10696
  Q       0.2  714
  R       7.5  27055
  S       8.0  28898
  T       7.1  25773
  U       3.3  12032
  V       1.1  4019
  W       0.9  3348
  X       0.3  1096
  Y       1.7  6251
  Z       0.3  1177

Total letter count in file: 362963

Python[edit]

Functional[edit]

Using collections.Counter[edit]

Works with: Python version 2.7+ and 3.1+
import collections, sys
 
def filecharcount(openfile):
return sorted(collections.Counter(c for l in openfile for c in l).items())
 
f = open(sys.argv[1])
print(filecharcount(f))

As a fold[edit]

Character counting can be conveniently expressed in terms of fold/reduce. See the example below, which also generates column-wrapped output:

Works with: Python version 3
'''Character counting as a fold'''
 
from functools import reduce
from itertools import repeat
from os.path import expanduser
 
 
# charCounts :: String -> Dict Char Int
def charCounts(s):
'''A dictionary of
(character, frequency) mappings
'''

def tally(dct, c):
dct[c] = 1 + dct[c] if c in dct else 1
return dct
return reduce(tally, list(s), {})
 
 
# TEST ----------------------------------------------------
# main :: IO ()
def main():
'''Listing in descending order of frequency.'''
 
print(
tabulated(
'Descending order of frequency:\n'
)(compose(repr)(fst))(compose(str)(snd))(
5
)(stet)(
sorted(
charCounts(
readFile('~/Code/charCount/readme.txt')
).items(),
key=swap,
reverse=True
)
)
)
 
 
# GENERIC -------------------------------------------------
 
# chunksOf :: Int -> [a] -> [[a]]
def chunksOf(n):
'''A series of lists of length n,
subdividing the contents of xs.
Where the length of xs is not evenly divible,
the final list will be shorter than n.'''

return lambda xs: reduce(
lambda a, i: a + [xs[i:n + i]],
range(0, len(xs), n), []
) if 0 < n else []
 
 
# compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
def compose(g):
'''Right to left function composition.'''
return lambda f: lambda x: g(f(x))
 
 
# fst :: (a, b) -> a
def fst(tpl):
'''First member of a pair.'''
return tpl[0]
 
 
# readFile :: FilePath -> IO String
def readFile(fp):
'''The contents of any file at the path
derived by expanding any ~ in fp.'''

with open(expanduser(fp), 'r', encoding='utf-8') as f:
return f.read()
 
 
# paddedMatrix :: a -> [[a]] -> [[a]]
def paddedMatrix(v):
''''A list of rows padded to equal length
(where needed) with instances of the value v.'''

def go(rows):
return paddedRows(
len(max(rows, key=len))
)(v)(rows)
return lambda rows: go(rows) if rows else []
 
 
# paddedRows :: Int -> a -> [[a]] -[[a]]
def paddedRows(n):
'''A list of rows padded (but never truncated)
to length n with copies of value v.'''

def go(v, xs):
def pad(x):
d = n - len(x)
return (x + list(repeat(v, d))) if 0 < d else x
return list(map(pad, xs))
return lambda v: lambda xs: go(v, xs) if xs else []
 
 
# showColumns :: Int -> [String] -> String
def showColumns(n):
'''A column-wrapped string
derived from a list of rows.'''

def go(xs):
def fit(col):
w = len(max(col, key=len))
 
def pad(x):
return x.ljust(4 + w, ' ')
return ''.join(map(pad, col)).rstrip()
 
q, r = divmod(len(xs), n)
return '\n'.join(map(
fit,
zip(*paddedMatrix('')(
chunksOf(q + int(bool(r)))(xs)
))
))
return lambda xs: go(xs)
 
 
# snd :: (a, b) -> b
def snd(tpl):
'''Second member of a pair.'''
return tpl[1]
 
 
# stet :: a -> a
def stet(x):
'''The identity function.
The usual 'id' is reserved in Python.'''

return x
 
 
# swap :: (a, b) -> (b, a)
def swap(tpl):
'''The swapped components of a pair.'''
return (tpl[1], tpl[0])
 
 
# tabulated :: String -> (a -> String) ->
# (b -> String) ->
# Int ->
# (a -> b) -> [a] -> String
def tabulated(s):
'''Heading -> x display function -> fx display function ->
number of columns -> f -> value list -> tabular string.'''

def go(xShow, fxShow, intCols, f, xs):
def mxw(fshow, g):
return max(map(compose(len)(fshow), map(g, xs)))
w = mxw(xShow, lambda x: x)
fw = mxw(fxShow, f)
return s + '\n' + showColumns(intCols)([
xShow(x).rjust(w, ' ') + ' -> ' + (
fxShow(f(x)).rjust(fw, ' ')
)
for x in xs
])
return lambda xShow: lambda fxShow: lambda nCols: (
lambda f: lambda xs: go(
xShow, fxShow, nCols, f, xs
)
)
 
 
# MAIN ---
if __name__ == '__main__':
main()
Output:
Descending order of frequency:

 ' ' -> 568     ')' ->  62     'v' ->  25     'w' ->   7     '5' ->   3
'\t' -> 382     '(' ->  62     '1' ->  24     'k' ->   7     '4' ->   3
 'e' -> 274     'd' ->  60     'G' ->  22     '9' ->   6     '+' ->   3
 'n' -> 233     'g' ->  59     ']' ->  17     'S' ->   5     '¬' ->   2
'\n' -> 228     'u' ->  58     '[' ->  17     'R' ->   5     '=' ->   2
 't' -> 204     '|' ->  54     'λ' ->  16     'M' ->   5     '.' ->   2
 's' -> 198     'x' ->  53     '2' ->  15     'F' ->   5     'L' ->   1
 '-' -> 178     'm' ->  52     'N' ->  11     '<' ->   5     'C' ->   1
 'i' -> 145     'c' ->  52     '}' ->  10     '6' ->   5     'A' ->   1
 'o' -> 126     'h' ->  47     '{' ->  10     'z' ->   4     '3' ->   1
 'f' -> 100     ':' ->  47     'T' ->  10     "'" ->   4     '&' ->   1
 'r' ->  96     ',' ->  38     'I' ->  10     '^' ->   3     '$' ->   1
 'a' ->  86     'b' ->  32     '0' ->  10     'E' ->   3
 'l' ->  70     'y' ->  31     '"' ->  10     '8' ->   3
 'p' ->  68     '>' ->  28     'J' ->   9     '7' ->   3

Procedural[edit]

Without using collections.Counter[edit]

import string
if hasattr(string, 'ascii_lowercase'):
letters = string.ascii_lowercase # Python 2.2 and later
else:
letters = string.lowercase # Earlier versions
offset = ord('a')
 
def countletters(file_handle):
"""Traverse a file and compute the number of occurences of each letter
return results as a simple 26 element list of integers."""

results = [0] * len(letters)
for line in file_handle:
for char in line:
char = char.lower()
if char in letters:
results[ord(char) - offset] += 1
# Ordinal minus ordinal of 'a' of any lowercase ASCII letter -> 0..25
return results
 
if __name__ == "__main__":
sourcedata = open(sys.argv[1])
lettercounts = countletters(sourcedata)
for i in xrange(len(lettercounts)):
print "%s=%d" % (chr(i + ord('a')), lettercounts[i]),

This example defines the function and provides a sample usage. The if ... __main__... line allows it to be cleanly imported into any other Python code while also allowing it to function as a standalone script. (A very common Python idiom).

Using a numerically indexed array (list) for this is artificial and clutters the code somewhat.

Using defaultdict[edit]

Works with: Python version 2.5+ and 3.x
...
from collections import defaultdict
def countletters(file_handle):
"""Count occurences of letters and return a dictionary of them
"""

results = defaultdict(int)
for line in file_handle:
for char in line:
if char.lower() in letters:
c = char.lower()
results[c] += 1
return results

Which eliminates the ungainly fiddling with ordinal values and offsets in function countletters of a previous example above. More importantly it allows the results to be more simply printed using:

lettercounts = countletters(sourcedata)
for letter,count in lettercounts.iteritems():
print "%s=%s" % (letter, count),

Again eliminating all fussing with the details of converting letters into list indices.

Quick Basic/QBASIC/PDS 7.1/VB-DOS[edit]

This version counts valid letters from A to Z (including Ñ in Spanish alphabet) or characters in a file. Takes in account accented vowels. It runs in QB, QBASIC, PDS 7.1 and VB_DOS as is.

 
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' Program CountCar '
' '
' This program counts how many distinct characters '
' have a text file specified by the user. '
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' OPTION EXPLICIT ' Remove comment in VB-DOS

' Register
TYPE regChar
Character AS STRING * 3
Count AS LONG
END TYPE
 
' Var
DIM iChar AS INTEGER
DIM iCL AS INTEGER
DIM iCountChars AS INTEGER
DIM iFile AS INTEGER
DIM i AS INTEGER
DIM lMUC AS LONG
DIM iMUI AS INTEGER
DIM lLUC AS LONG
DIM iLUI AS INTEGER
DIM iMaxIdx AS INTEGER
DIM iP AS INTEGER
DIM iPause AS INTEGER
DIM iPMI AS INTEGER
DIM iPrint AS INTEGER
DIM lHowMany AS LONG
DIM lTotChars AS LONG
DIM sTime AS SINGLE
DIM strFile AS STRING
DIM strTxt AS STRING
DIM strDate AS STRING
DIM strTime AS STRING
DIM strKey AS STRING
CONST LngReg = 256
CONST Letters = 1
CONST FALSE = 0
CONST TRUE = NOT FALSE
 
'------Main program cycle

' Initialize variables
strDate = DATE$
strTime = TIME$
iFile = FREEFILE
 
 
DO
CLS
PRINT "This program counts letters or characters in a text file."
PRINT
INPUT "File to open: ", strFile
OPEN strFile FOR BINARY AS #iFile
IF LOF(iFile) > 0 THEN
PRINT "Count: 1) Letters 2) Characters (1 or 2)";
DO
strKey = INKEY$
LOOP UNTIL strKey = "1" OR strKey = "2"
PRINT ". Option selected: "; strKey
iCL = VAL(strKey)
sTime = TIMER
iP = POS(0)
lHowMany = LOF(iFile)
strTxt = SPACE$(LngReg)
 
IF iCL = Letters THEN
iMaxIdx = 26
ELSE
iMaxIdx = 255
END IF
 
IF iMaxIdx <> iPMI THEN
iPMI = iMaxIdx
REDIM rChar(0 TO iMaxIdx) AS regChar
 
FOR i = 0 TO iMaxIdx
IF iCL = Letters THEN
strTxt = CHR$(i + 65)
IF i = 26 THEN strTxt = CHR$(165)
ELSE
SELECT CASE i
CASE 0: strTxt = "nul"
CASE 7: strTxt = "bel"
CASE 9: strTxt = "tab"
CASE 10: strTxt = "lf"
CASE 11: strTxt = "vt"
CASE 12: strTxt = "ff"
CASE 13: strTxt = "cr"
CASE 28: strTxt = "fs"
CASE 29: strTxt = "gs"
CASE 30: strTxt = "rs"
CASE 31: strTxt = "us"
CASE 32: strTxt = "sp"
CASE ELSE: strTxt = CHR$(i)
END SELECT
END IF
rChar(i).Character = strTxt
NEXT i
ELSE
FOR i = 0 TO iMaxIdx
rChar(i).Count = 0
NEXT i
END IF
 
PRINT "Looking for ";
IF iCL = Letters THEN PRINT "letters."; ELSE PRINT "characters.";
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7)
DO WHILE LOC(iFile) < LOF(iFile)
IF LOC(iFile) + LngReg > LOF(iFile) THEN
strTxt = SPACE$(LOF(iFile) - LOC(iFile))
END IF
GET #iFile, , strTxt
FOR i = 1 TO LEN(strTxt)
IF iCL = Letters THEN
iChar = ASC(UCASE$(MID$(strTxt, i, 1)))
SELECT CASE iChar
CASE 164: iChar = 165
CASE 160: iChar = 65
CASE 130, 144: iChar = 69
CASE 161: iChar = 73
CASE 162: iChar = 79
CASE 163, 129: iChar = 85
END SELECT
iChar = iChar - 65
 
' Validates if iChar is a letter
IF iChar >= 0 AND iChar <= 25 THEN
rChar(iChar).Count = rChar(iChar).Count + 1
ELSEIF iChar = 100 THEN ' CHR$(165)
rChar(iMaxIdx).Count = rChar(iMaxIdx).Count + 1
END IF
ELSE
iChar = ASC(MID$(strTxt, i, 1))
rChar(iChar).Count = rChar(iChar).Count + 1
END IF
NEXT i
LOOP
CLOSE #iFile
 
' Show the characters found
lMUC = 0
iMUI = 0
lLUC = 2147483647
iLUI = 0
iPrint = FALSE
lTotChars = 0
iCountChars = 0
iPause = FALSE
CLS
IF iCL = Letters THEN PRINT "Letters found: "; ELSE PRINT "Characters found: ";
FOR i = 0 TO iMaxIdx
' Most Used Character
IF lMUC < rChar(i).Count THEN
lMUC = rChar(i).Count
iMUI = i
END IF
 
' Print character
IF rChar(i).Count > 0 THEN
strTxt = ""
IF iPrint THEN strTxt = ", " ELSE iPrint = TRUE
strTxt = strTxt + LTRIM$(RTRIM$(rChar(i).Character))
strTxt = strTxt + "=" + LTRIM$(STR$(rChar(i).Count))
iP = POS(0)
IF iP + LEN(strTxt) + 1 >= 80 AND iPrint THEN
PRINT ","
IF CSRLIN >= 23 AND NOT iPause THEN
iPause = TRUE
PRINT "Press a key to continue..."
DO
strKey = INKEY$
LOOP UNTIL strKey <> ""
END IF
strTxt = MID$(strTxt, 3)
END IF
PRINT strTxt;
lTotChars = lTotChars + rChar(i).Count
iCountChars = iCountChars + 1
 
' Least Used Character
IF lLUC > rChar(i).Count THEN
lLUC = rChar(i).Count
iLUI = i
END IF
END IF
NEXT i
 
PRINT "."
 
' Shows the summary
PRINT
PRINT "File analyzed....................: "; strFile
PRINT "Looked for.......................: "; : IF iCL = Letters THEN PRINT "Letters" ELSE PRINT "Characters"
PRINT "Total characters in file.........:"; lHowMany
PRINT "Total characters counted.........:"; lTotChars
IF iCL = Letters THEN PRINT "Characters discarded on count....:"; lHowMany - lTotChars
PRINT "Distinct characters found in file:"; iCountChars; "of"; iMaxIdx + 1
PRINT "Most used character was..........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lMUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iMUI).Count)); " times)"
PRINT "Least used character was.........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lLUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iLUI).Count)); " times)"
PRINT "Time spent in the process........:"; TIMER - sTime; "seconds"
ELSE
' File does not exist
CLOSE #iFile
KILL strFile
PRINT
PRINT "File does not exist."
END IF
 
' Again?
PRINT
PRINT "Again? (Y/n)"
DO
strTxt = UCASE$(INKEY$)
LOOP UNTIL strTxt = "N" OR strTxt = "Y" OR strTxt = CHR$(13) OR strTxt = CHR$(27)
LOOP UNTIL strTxt = "N" OR strTxt = CHR$(27)
 
CLS
PRINT "End of execution."
PRINT "Start time: "; strDate; " "; strTime; ", end time: "; DATE$; " "; TIME$; "."
END
' ---End of main program cycle

 

Output:

This program counts letters or characters in a text file.

File to open: readme.txt
Count: 1) Letters 2) Characters (1 or 2). Option selected: 1
Looking for letters. File is 23769 in size. Working...

Letters found: A=1427, B=306, C=583, D=530, E=2098, F=279, G=183, H=501,
I=1177, J=15, K=34, L=741, M=379, N=1219, O=1183, P=312, Q=32, R=1105, S=1079,
T=1309, U=660, V=346, W=147, X=190, Y=242, Z=70, Ñ=5.

File analyzed....................: readme.txt
Looked for.......................: Letters
Total characters in file.........: 23769
Total characters counted.........: 16152
Characters discarded on count....: 7617
Distinct characters found in file: 27 of 27
Most used character was..........: E (2098 times)
Least used character was.........: Ñ (5 times)
Time spent in the process........: .3789063 seconds

Again? (Y/n)

R[edit]

Using summary[edit]

letter.frequency <- function(filename)
{
file <- paste(readLines(filename), collapse = '')
chars <- strsplit(file, NULL)[[1]]
summary(factor(chars))
}

Usage on itself:

> source('letter.frequency.r')
> letter.frequency('letter.frequency.r')
- , . ' ( ) [ ] { } < = 1 a c d e f h i l L m n N o p q r s t u U y
22 3 2 1 2 6 6 2 2 1 1 3 1 1 9 6 1 14 7 2 7 8 3 4 6 1 3 3 1 8 8 7 3 1 2

Using table[edit]

R's table function is more idiomatic. For variety, we will use read.delim rather than readLines and show how to only count letters. It is worth noting that readLines is prone to counting empty lines. This may be undesirable.

letterFreq<-function(filename,lettersOnly)
{
txt<-read.delim(filename,header = FALSE,stringsAsFactors = FALSE,allowEscapes = FALSE,quote = "")
count<-table(strsplit(paste0(txt[,],collapse = ""),""))
if(lettersOnly){count[names(count)%in%c(LETTERS,letters)]}
else{count}
}
Output:

For fun, we'll use this page for input. However, HTML rarely parses well and the variety of text here is so large that I suspect inaccurate output.

file<-'https://rosettacode.org/wiki/Letter_frequency'
> letterFreq(file,TRUE)

    a     A     b     B     c     C     d     D     e     E     f     F     g     G     h     H     i     I 
38186   666  8008   350 16585  1263  4151   277 15020   713  3172   529  3079   149  4549   161  9397   690 
    j     J     k     K     l     L     m     M     n     N     o     O     p     P     q     Q     r     R 
  311   113  3294    76 15906   928  3333   322 26795   355  8926   456 22702   497  1877    39 15055   591 
    s     S     t     T     u     U     v     V     w     W     x     X     y     Y     z     Z 
46527   695 15549   597  5268   269  1003   128  4134   148  1239   144  3037    55   127    77 

Racket[edit]

 
#lang racket
(require math)
 
(define (letter-frequencies ip)
(count-samples
(port->list read-char ip)))
 
(letter-frequencies (open-input-string "abaabdc"))
 
Output:
'(#\a #\b #\d #\c)
'(3 2 1 1)

Using input from a text file:

 
(letter-frequencies (open-input-file "somefile.txt"))
 

Raku[edit]

(formerly Perl 6) In Raku, whenever you want to count things in a collection, the rule of thumb is to use the Bag structure.

.&ws.say for slurp.comb.Bag.sort: -*.value;
 
sub ws ($pair) {
$pair.key ~~ /\n/
?? ('NEW LINE' => $pair.value)
!! $pair.key ~~ /\s/
?? ($pair.key.uniname => $pair.value)
!! $pair
}
Output when fed the same Les Misérables text file as used in the Word frequency task:
SPACE => 522095
e => 325692
t => 222916
a => 199790
o => 180974
h => 170210
n => 167006
i => 165201
s => 157585
r => 145118
d => 106987
l => 97131
NEW LINE => 67662
u => 67340
c => 62717
m => 56021
f => 53494
w => 53301
, => 48784
g => 46060
p => 39932
y => 37985
b => 34276
. => 30589
v => 24045
" => 14340
k => 14169
T => 12547
- => 11037
I => 10067
A => 7355
H => 6600
M => 6206
; => 5885
E => 4968
C => 4583
S => 4392
' => 3938
x => 3692
! => 3539
R => 3531
P => 3424
O => 3401
j => 3390
B => 3185
W => 3180
N => 3053
? => 2976
F => 2754
G => 2508
: => 2468
J => 2448
L => 2444
q => 2398
V => 2200
_ => 2070
z => 1847
D => 1756
é => 1326
Y => 1238
U => 895
1 => 716
8 => 412
X => 333
K => 321
è => 292
3 => 259
2 => 248
5 => 220
0 => 218
* => 181
4 => 181
) => 173
( => 173
6 => 167
É => 146
7 => 143
Q => 135
] => 122
[ => 122
9 => 117
æ => 106
= => 75
ê => 74
Z => 59
à => 59
â => 56
> => 50
< => 50
/ => 50
ç => 48
NO-BREAK SPACE => 45
î => 39
ü => 37
| => 36
ô => 34
# => 26
ù => 18
ï => 18
Æ => 10
û => 9
+ => 5
È => 5
ë => 5
À => 4
@ => 2
ñ => 2
Ç => 2
$ => 2
% => 1
& => 1
{ => 1
} => 1
½ => 1

Raven[edit]

define count_letters use $words
{ } as $wordHash [ ] as $keys [ ] as $vals
$words each chr
dup $wordHash swap get 0 prefer 1 + # stack: chr cnt
swap $wordHash swap set
$wordHash keys copy sort each
dup $keys push
$wordHash swap get $vals push
$keys $vals combine print "\n" print
 
"test.dat" as $file
$file read as $all_data
$all_data count_letters

REXX[edit]

version 1[edit]

It should be noted that the file being read is read one line at time, so the line-end characters (presumably the
line-feed, carriage return, new-line, or whatever control characters are being used) are not reported.

These characters could be read and reported if the   charin   BIF would be used instead of the   linein   BIF.

Also note that this REXX program is ASCII or EBCDIC independent, but what constitutes a letter is restricted to
the Latin (Roman) alphabet (that is, which characters are considered to be letters of a particular language.

The version of REXX that was used was the English version of Regina REXX.   It should be noted that almost all
REXX interpreters assume the English language for such things as determining what characters are considered
letters unless another language is specified   (Regina REXX uses an environmental variable for this purpose).

All characters are still counted, whether a letter or not, including non-displayable characters.

/*REXX program counts the occurrences of all characters in a file,  and note that all   */
/* Latin alphabet letters are uppercased for also counting {Latin} letters (both cases).*/
/*════════════════════════════════════~~~~~~~~~~════════════════════════════════════════*/
abc = 'abcdefghijklmnopqrstuvwxyz' /*define an (Latin or English) alphabet*/
abcU= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' /*define an uppercase version of [↑]. */
parse arg fileID . /*this last char isn't a middle dot: · */
if fileID=='' then fileID= 'JUNK.TXT' /*¿none specified? Then use the default*/
totChars= 0; totLetters= 0 /*count of all chars and of all letters*/
pad= left('',18); pad9= left('', 18%2) /*used for the indentations of output. */
@.= 0 /*wouldn't it be neat to use Θ instead?*/
do j=1 while lines(fileID)\==0 /*read the file 'til the cows come home*/
rec= linein(fileID) /*get a line/record from the input file*/
/* [↓] process all characters in REC.*/
do k=1 for length(rec) /*examine/count each of the characters.*/
totChars= totChars + 1 /*bump count of number of characters. */
c= substr(rec, k, 1); @.c= @.c + 1 /*Peel off a character; bump its count.*/
if \datatype(c, 'M') then iterate /*Not a Latin letter? Get next char.⌠*/
totLetters= totLetters + 1 /*bump the count for [Latin] letters. ⌡*/
upper c /* ◄─────◄ uppercase a Latin character.*/
@..c= @..c + 1 /*bump the (Latin) letter's count. */
end /*k*/ /*no Greek glyphs: αßΓπΣσµτΦΘΩδφε ··· */
end /*j*/ /*maybe we're ½ done by now, or mäÿbé ¬*/
LL= '(Latin) letter' /*literal used for a "SAY" (below). */
w= length(totChars) /*used for right─aligning the counts. */
say 'file ─────' fileId "───── has" j-1 'records and has' totLetters LL"s."; say
do L=0 for 256; c= d2c(L) /*display all none─zero letter counts. */
if @..c==0 then iterate /*Has a zero count? Then skip character*/
say pad9 LL' ' c " (also" translate(c,abc,abcU)') count:' right(@..c, w)
end /*L*/ /*we may be in a rut, but not a cañyon.*/
say /*¡The old name for Eygpt was Æygpt! _*/
say 'file ─────' fileId "───── has" totChars 'characters.' /* √ */
say /*The name for « » chars is guillemets.*/
do #=0 for 256; y= d2c(#) /*display all none─zero char counts. */
if @.y==0 then iterate /*¿Å zero count? Then ignore character*/
c= d2c(#); ch= c /*C is the character glyph of a char. */
if c<<' ' | #==255 then ch= /*don't show some control characters. */
if c==' ' then ch= 'blank' /*show a blank's {true} name. */
say pad right(ch, 5) " ('"d2x(#,2)"'x character count:" right(@.c, w)
end /*#*/ /*255 isn't quite ∞, but sometimes ∙∙∙ */
say /*not a good place for dithering: ░▒▓█ */
say pad pad9 '☼ end─of─list ☼' /*show we are at the end of the list. */
/*§§§§ Talk about a mishmash of 2¢ comments. ▬▬^▬▬ stick a fork in it, we're all done. ☻*/

output   when using the (above) REXX program for the input file:

Note that this REXX program works with ASCII or EBCDIC, but the order of the output will
be different because of the order in which EBCDIC and ASCII stores characters.

file ───── JUNK.TXT ───── has 42 records and has 1652 (Latin) letters.

          (Latin) letter  A  (also a)  count:  146
          (Latin) letter  B  (also b)  count:   26
          (Latin) letter  C  (also c)  count:  104
          (Latin) letter  D  (also d)  count:   58
          (Latin) letter  E  (also e)  count:  187
          (Latin) letter  F  (also f)  count:   53
          (Latin) letter  G  (also g)  count:   25
          (Latin) letter  H  (also h)  count:   80
          (Latin) letter  I  (also i)  count:   89
          (Latin) letter  J  (also j)  count:    6
          (Latin) letter  K  (also k)  count:   13
          (Latin) letter  L  (also l)  count:   97
          (Latin) letter  M  (also m)  count:   28
          (Latin) letter  N  (also n)  count:  102
          (Latin) letter  O  (also o)  count:  106
          (Latin) letter  P  (also p)  count:   38
          (Latin) letter  Q  (also q)  count:    3
          (Latin) letter  R  (also r)  count:  111
          (Latin) letter  S  (also s)  count:   96
          (Latin) letter  T  (also t)  count:  175
          (Latin) letter  U  (also u)  count:   48
          (Latin) letter  V  (also v)  count:    3
          (Latin) letter  W  (also w)  count:   18
          (Latin) letter  X  (also x)  count:    9
          (Latin) letter  Y  (also y)  count:   25
          (Latin) letter  Z  (also z)  count:    6

file ───── JUNK.TXT ───── has 3778 characters.

                          ('02'x  character count:    1
                          ('0F'x  character count:    2
                          ('11'x  character count:    2
                          ('15'x  character count:    4
                          ('16'x  character count:    4
                          ('18'x  character count:    1
                          ('19'x  character count:    1
                   blank  ('20'x  character count: 1477
                       !  ('21'x  character count:    1
                       "  ('22'x  character count:   14
                       #  ('23'x  character count:    6
                       %  ('25'x  character count:    1
                       '  ('27'x  character count:   47
                       (  ('28'x  character count:   23
                       )  ('29'x  character count:   22
                       *  ('2A'x  character count:   86
                       +  ('2B'x  character count:    4
                       ,  ('2C'x  character count:   16
                       -  ('2D'x  character count:    1
                       .  ('2E'x  character count:   40
                       /  ('2F'x  character count:   88
                       0  ('30'x  character count:    8
                       1  ('31'x  character count:   10
                       2  ('32'x  character count:   11
                       5  ('35'x  character count:    7
                       6  ('36'x  character count:    2
                       8  ('38'x  character count:    2
                       9  ('39'x  character count:    3
                       :  ('3A'x  character count:    5
                       ;  ('3B'x  character count:    8
                       <  ('3C'x  character count:    2
                       =  ('3D'x  character count:   38
                       ?  ('3F'x  character count:    5
                       @  ('40'x  character count:    9
                       A  ('41'x  character count:    2
                       B  ('42'x  character count:    1
                       C  ('43'x  character count:    8
                       D  ('44'x  character count:    6
                       E  ('45'x  character count:    5
                       F  ('46'x  character count:    1
                       G  ('47'x  character count:    3
                       H  ('48'x  character count:    2
                       I  ('49'x  character count:    8
                       J  ('4A'x  character count:    2
                       K  ('4B'x  character count:    2
                       L  ('4C'x  character count:   22
                       M  ('4D'x  character count:    2
                       N  ('4E'x  character count:    3
                       O  ('4F'x  character count:    1
                       P  ('50'x  character count:    2
                       Q  ('51'x  character count:    1
                       R  ('52'x  character count:    3
                       S  ('53'x  character count:    2
                       T  ('54'x  character count:    9
                       U  ('55'x  character count:    4
                       V  ('56'x  character count:    1
                       W  ('57'x  character count:    1
                       X  ('58'x  character count:    4
                       Y  ('59'x  character count:    2
                       Z  ('5A'x  character count:    1
                       [  ('5B'x  character count:    3
                       \  ('5C'x  character count:    2
                       ]  ('5D'x  character count:    3
                       ^  ('5E'x  character count:    1
                       _  ('5F'x  character count:    1
                       a  ('61'x  character count:  144
                       b  ('62'x  character count:   25
                       c  ('63'x  character count:   96
                       d  ('64'x  character count:   52
                       e  ('65'x  character count:  182
                       f  ('66'x  character count:   52
                       g  ('67'x  character count:   22
                       h  ('68'x  character count:   78
                       i  ('69'x  character count:   81
                       j  ('6A'x  character count:    4
                       k  ('6B'x  character count:   11
                       l  ('6C'x  character count:   75
                       m  ('6D'x  character count:   26
                       n  ('6E'x  character count:   99
                       o  ('6F'x  character count:  105
                       p  ('70'x  character count:   36
                       q  ('71'x  character count:    2
                       r  ('72'x  character count:  108
                       s  ('73'x  character count:   94
                       t  ('74'x  character count:  166
                       u  ('75'x  character count:   44
                       v  ('76'x  character count:    2
                       w  ('77'x  character count:   17
                       x  ('78'x  character count:    5
                       y  ('79'x  character count:   23
                       z  ('7A'x  character count:    5
                       {  ('7B'x  character count:    2
                       |  ('7C'x  character count:    1
                       }  ('7D'x  character count:    2
                       ~  ('7E'x  character count:   10
                       é  ('82'x  character count:    1
                       ä  ('84'x  character count:    1
                       Å  ('8F'x  character count:    1
                       Æ  ('92'x  character count:    1
                       ÿ  ('98'x  character count:    1
                       ¢  ('9B'x  character count:    1
                       ñ  ('A4'x  character count:    1
                       ¿  ('A8'x  character count:    2
                       ¬  ('AA'x  character count:    1
                       ½  ('AB'x  character count:    1
                       ¡  ('AD'x  character count:    1
                       «  ('AE'x  character count:    1
                       »  ('AF'x  character count:    1
                       ░  ('B0'x  character count:    1
                       ▒  ('B1'x  character count:    1
                       ▓  ('B2'x  character count:    1
                       ─  ('C4'x  character count:   30
                       ═  ('CD'x  character count:   76
                       █  ('DB'x  character count:    1
                       α  ('E0'x  character count:    1
                       ß  ('E1'x  character count:    1
                       Γ  ('E2'x  character count:    1
                       π  ('E3'x  character count:    1
                       Σ  ('E4'x  character count:    1
                       σ  ('E5'x  character count:    1
                       µ  ('E6'x  character count:    1
                       τ  ('E7'x  character count:    1
                       Φ  ('E8'x  character count:    1
                       Θ  ('E9'x  character count:    2
                       Ω  ('EA'x  character count:    1
                       δ  ('EB'x  character count:    1
                       ∞  ('EC'x  character count:    1
                       φ  ('ED'x  character count:    1
                       ε  ('EE'x  character count:    1
                       ⌠  ('F4'x  character count:    1
                       ⌡  ('F5'x  character count:    1
                       ∙  ('F9'x  character count:    3
                       ·  ('FA'x  character count:    4
                       √  ('FB'x  character count:    1

                             ☼ end─of─list ☼

Version 2 (for TSO)[edit]

/*REXX program counts the occurences of all characters in a file
* Adapted version 1 for TSO (EXECIO instead of linein)
* No translation to uppercase takes place
* There is no need for tails being hex
* 25.07.2012 Walter Pachl
***********************************************************************/

 
Parse arg dsn . /*Data set to be processed */
if dsn='' Then /*none specified? */
dsn='PRIV.V100(TEST)' /* Use default. */
c.=0 /* Character counts */
"ALLOC FI(IN) DA("dsn") SHR REUSE"
'EXECIO * DISKR IN (STEM L. FINIS'
'FREE FI(IN)'
totChars=0 /*count of the total num of chars*/
totLetters=0 /*count of the total num letters.*/
indent=left('',20) /*used for indentation of output.*/
 
do j=1 to l.0 /*process all lines */
rec=l.j /*take line number j */
Say '>'rec'<' length(rec) /*that's in PRIV.V100(TEST) */
Say ' E8C44D8FF015674BCDEF'
Say ' 61100711200000000002'
do k=1 for length(rec) /*loop over characters */
totChars=totChars+1 /*Increment total number of chars*/
c=substr(rec,k,1) /*get character number k */
c.c=c.c+1 /*increment the character's count*/
End
End /*maybe we're ½ done by now, or ¬*/
 
w=length(totChars) /*used for right-aligning counts.*/
say 'file -----' dsn "----- has" j-1 'records.'
say 'file -----' dsn "----- has" totChars 'characters.'
 
do L=0 to 255 /* display nonzero letter counts */
c=d2c(l) /* the character in question */
if c.c>0 &, /* was found in the file */
datatype(c,'M')>0 Then Do /* and is a Latin letter */
say indent "(Latin) letter " c 'count:' right(c.c,w) /* tell */
totLetters=totLetters+c.c /* increment number of letters */
End
End
 
say 'file -----' dsn "----- has" totLetters '(Latin) letters.'
say ' other characters follow'
other=0
do m=0 to 255 /* now for non-letters */
c=d2c(m) /* the character in question */
y=c2x(c) /* the hex representation */
if c.c>0 &, /* was found in the file */
datatype(c,'M')=0 Then Do /* and is not a Latin letter */
other=other+c.c /* increment count */
_=right(c.c,w) /* prepare output of count */
select /*make the character viewable. */
when c<<' ' | m==255 then say indent "'"y"'x character count:" _
when c==' ' then say indent "blank character count:" _
otherwise say indent " " c 'character count:' _
end
end
end
say 'file -----' dsn "----- has" other 'other characters.'

Output:

>WaA  Pa12 :&-: :äüÖ2< 20                                
 E8C44D8FF015674BCDEF                                    
 61100711200000000002                                    
file ----- PRIV.V100(TEST) ----- has 1 records.          
file ----- PRIV.V100(TEST) ----- has 20 characters.      
                     (Latin) letter  a count:  2         
                     (Latin) letter  A count:  1         
                     (Latin) letter  P count:  1         
                     (Latin) letter  W count:  1         
file ----- PRIV.V100(TEST) ----- has 5 (Latin) letters.  
                           other characters follow        
                     '00'x character count:  1           
                     '10'x character count:  1           
                     blank character count:  3           
                         & character count:  1           
                         - character count:  1           
                         : character count:  1           
                         : character count:  1           
                         ä character count:  1           
                         ü character count:  1           
                         Ö character count:  1           
                         1 character count:  1           
                         2 character count:  2           
file ----- PRIV.V100(TEST) ----- has 15 other characters.

Ring[edit]

 
textData = read("C:\Ring\ReadMe.txt")
ln =len(textData)
charCount = list(255)
totCount = 0
 
for i =1 to ln
char = ascii(substr(textData,i,1))
charCount[char] = charCount[char] + 1
if char > 31 totCount = totCount + 1 ok
next
 
for i = 32 to 255
if charCount[i] > 0 see char(i) + " = " + charCount[i] + " " + (charCount[i]/totCount)*100 + " %" + nl ok
next
 

Ruby[edit]

def letter_frequency(file)
letters = 'a' .. 'z'
File.read(file) .
split(//) .
group_by {|letter| letter.downcase} .
select {|key, val| letters.include? key} .
collect {|key, val| [key, val.length]}
end
 
letter_frequency(ARGV[0]).sort_by {|key, val| -val}.each {|pair| p pair}

example output, using the program file as input:

$ ruby letterFrequency.rb letterFrequency.rb
["e", 34]
["l", 20]
["t", 17]
["r", 14]
["a", 12]
["y", 9]
["c", 8]
["i", 7]
["v", 6]
["n", 6]
["f", 6]
["s", 6]
["d", 5]
["p", 5]
["k", 5]
["u", 4]
["o", 4]
["g", 3]
["b", 2]
["h", 2]
["q", 2]
["z", 1]
["w", 1]

Ruby 2.0[edit]

def letter_frequency(file)
freq = Hash.new(0)
file.each_char.lazy.grep(/[[:alpha:]]/).map(&:upcase).each_with_object(freq) do |char, freq_map|
freq_map[char] += 1
end
end
 
letter_frequency(ARGF).sort.each do |letter, frequency|
puts "#{letter}: #{frequency}"
end

note that this version *should* use less memory, even on a gigantic file. This is done by using lazy enumerables, which ruby 2.0 introduces.

example output, using the (somewhat large) dictionary file as the input. Also note that this versions works on unicode text.

$ ruby letter_frequency.rb /usr/share/dict/words
A: 64439
B: 15526
C: 31872
D: 28531
E: 88833
F: 10675
G: 22712
H: 19320
I: 66986
J: 1948
K: 8409
L: 41107
M: 22508
N: 57144
O: 48944
P: 22274
Q: 1524
R: 57347
S: 90113
T: 53006
U: 26118
V: 7989
W: 7530
X: 2124
Y: 12652
Z: 3281
Å: 1
á: 10
â: 6
ä: 7
å: 3
ç: 5
è: 28
é: 144
ê: 6
í: 2
ñ: 8
ó: 8
ô: 2
ö: 16
û: 3
ü: 12

Run BASIC[edit]

open "c:\rbp101\public\textFile.txt" for input as #f
textData$ = input$(#f, lof( #f))
ln =len(textData$)
close #f
 
dim charCount( 255)
 
for i =1 to ln
char = asc(mid$(textData$,i,1))
charCount(char) = charCount(char) + 1
if char > 31 then totCount = totCount + 1
next i
 
for i = 32 to 255
if charCount(i) > 0 then print "Ascii:";using("###",i);" char:";chr$(i);" Count:";using("#######",charCount(i));" ";using("##.#",(charCount(i) / totCount) * 100);"%"
next i

Output uses this program to count itself:

Ascii: 32 char:  Count:     76 16.1%
Ascii: 34 char:" Count:     18  3.8%
Ascii: 35 char:# Count:     17  3.6%
Ascii: 36 char:$ Count:      6  1.3%
Ascii: 37 char:% Count:      1  0.2%
Ascii: 40 char:( Count:     16  3.4%
Ascii: 41 char:) Count:     16  3.4%
Ascii: 42 char:* Count:      1  0.2%
Ascii: 43 char:+ Count:      2  0.4%
Ascii: 44 char:, Count:      6  1.3%
Ascii: 46 char:. Count:      2  0.4%
Ascii: 47 char:/ Count:      1  0.2%
Ascii: 48 char:0 Count:      4  0.8%
Ascii: 49 char:1 Count:      8  1.7%
Ascii: 50 char:2 Count:      3  0.6%
Ascii: 51 char:3 Count:      2  0.4%
Ascii: 53 char:5 Count:      4  0.8%
Ascii: 58 char:: Count:      4  0.8%
Ascii: 59 char:; Count:      8  1.7%
Ascii: 61 char:= Count:      7  1.5%
Ascii: 62 char:> Count:      2  0.4%
Ascii: 65 char:A Count:      1  0.2%
Ascii: 67 char:C Count:     10  2.1%
Ascii: 68 char:D Count:      3  0.6%
Ascii: 70 char:F Count:      1  0.2%
Ascii: 92 char:\ Count:      3  0.6%
Ascii: 97 char:a Count:     19  4.0%
Ascii: 98 char:b Count:      2  0.4%
Ascii: 99 char:c Count:     17  3.6%
Ascii:100 char:d Count:      3  0.6%
Ascii:101 char:e Count:     13  2.7%
Ascii:102 char:f Count:     10  2.1%
Ascii:103 char:g Count:      3  0.6%
Ascii:104 char:h Count:     14  3.0%
Ascii:105 char:i Count:     24  5.1%
Ascii:108 char:l Count:      7  1.5%
Ascii:109 char:m Count:      2  0.4%
Ascii:110 char:n Count:     25  5.3%
Ascii:111 char:o Count:     21  4.4%
Ascii:112 char:p Count:      6  1.3%
Ascii:114 char:r Count:     17  3.6%
Ascii:115 char:s Count:      7  1.5%
Ascii:116 char:t Count:     38  8.0%
Ascii:117 char:u Count:     16  3.4%
Ascii:120 char:x Count:      7  1.5%

Rust[edit]

Works with all UTF-8 characters

use std::collections::btree_map::BTreeMap;
use std::{env, process};
use std::io::{self, Read, Write};
use std::fmt::Display;
use std::fs::File;
 
fn main() {
let filename = env::args().nth(1)
.ok_or("Please supply a file name")
.unwrap_or_else(|e| exit_err(e, 1));
 
let mut buf = String::new();
let mut count = BTreeMap::new();
 
File::open(&filename)
.unwrap_or_else(|e| exit_err(e, 2))
.read_to_string(&mut buf)
.unwrap_or_else(|e| exit_err(e, 3));
 
 
for c in buf.chars() {
*count.entry(c).or_insert(0) += 1;
}
 
println!("Number of occurences per character");
for (ch, count) in &count {
println!("{:?}: {}", ch, count);
}
}
 
#[inline]
fn exit_err<T>(msg: T, code: i32) -> ! where T: Display {
writeln!(&mut io::stderr(), "{}", msg).expect("Could not write to stderr");
process::exit(code)
}

Output when run on source file:

Number of occurences per character
'\n': 35
' ': 167
'!': 4
'\"': 10
'#': 1
'&': 4
'(': 25
')': 25
'*': 1
'+': 1
',': 12
'-': 1
'.': 10
'0': 1
'1': 3
'2': 2
'3': 2
':': 37
';': 13
'<': 1
'=': 4
'>': 2
'?': 1
'B': 2
'C': 1
'D': 2
'F': 2
'M': 2
'N': 1
'P': 1
'R': 1
'S': 1
'T': 5
'W': 1
'[': 1
']': 1
'_': 15
'a': 20
'b': 5
'c': 22
'd': 12
'e': 75
'f': 14
'g': 5
'h': 6
'i': 29
'k': 1
'l': 23
'm': 13
'n': 36
'o': 28
'p': 17
'r': 45
's': 33
't': 42
'u': 24
'v': 2
'w': 8
'x': 6
'y': 4
'{': 9
'|': 6
'}': 9

Scala[edit]

import io.Source.fromFile
 
def letterFrequencies(filename: String) =
fromFile(filename).mkString groupBy (c => c) mapValues (_.length)

Scheme[edit]

Using guile scheme 2.0.11.

Note that this prints the scheme representations of characters in no particular order.

(use-modules (ice-9 format))
 
(define (char-freq port table)
(if
(eof-object? (peek-char port))
table
(char-freq port (add-char (read-char port) table))))
 
(define (add-char char table)
(cond
((null? table) (list (list char 1)))
((eq? (caar table) char) (cons (list char (+ (cadar table) 1)) (cdr table)))
(#t (cons (car table) (add-char char (cdr table))))))
 
(define (format-table table)
(for-each (lambda (t) (format #t "~10s~10d~%" (car t) (cadr t))) table))
 
(define (print-freq filename)
(format-table (char-freq (open-input-file filename) '())))
 
(print-freq "letter-frequency.scm")

Output when reading own source:

#\(               45
#\u                5
#\s                9
#\e               47
#\-               19
#\m                9
#\o               16
#\d               19
#\l               25
#\space           83
#\i               15
#\c               28
#\9                1
#\f               20
#\r               39
#\a               47
#\t               36
#\)               45
#\newline         21
#\n               15
#\h               14
#\q                7
#\p                9
#\b               16
#\j                1
#\?                3
#\k                1
#\1                4
#\+                1
#\#                2
#\"                4
#\~                3
#\0                2
#\%                1
#\'                1
#\y                1
#\.                1

An implementation for CHICKEN scheme:

 
(with-input-from-string "foobar"
(lambda ()
(port-fold (lambda (x s)
(alist-update x
(add1 (alist-ref x s eq? 0))
s))
'()
read-char)))
 

which shows: ((#\f . 1) (#\o . 2) (#\b . 1) (#\a . 1) (#\r . 1))

Seed7[edit]

$ include "seed7_05.s7i";
 
const type: charHash is hash [char] integer;
 
const proc: main is func
local
var charHash: numberOfChars is charHash.EMPTY_HASH;
var char: ch is ' ';
begin
ch := getc(IN);
while ch <> EOF do
if ch in numberOfChars then
incr(numberOfChars[ch]);
else
numberOfChars @:= [ch] 1;
end if;
ch := getc(IN);
end while;
for ch range sort(keys(numberOfChars)) do
writeln(ch <& " " <& numberOfChars[ch]);
end for;
end func;

Output when the program uses itself as input:

 22
  129
" 4
$ 1
& 2
' 2
( 6
) 6
. 2
0 1
...
s 21
t 9
u 9
v 2
w 3
y 2

SenseTalk[edit]

 
put file "~/Documents/addresses.csv" into source
 
repeat with each character of source
if it is a controlChar then next repeat -- skip control characters
if it is a lowercase then put "." after it -- make keys distinct
add 1 to counts.(it)
end repeat
 
repeat with each (theChar, count) in counts
put char 1 of theChar & " —> " & count
end repeat
 

Output:

  —> 2862
" —> 11180
# —> 109
& —> 58
, —> 5646
- —> 2009
. —> 1629
/ —> 1000
0 —> 1496
1 —> 1665
2 —> 1487
3 —> 1481
4 —> 1405
5 —> 1416
6 —> 1260
7 —> 1499
8 —> 1323
9 —> 1349
: —> 500
@ —> 500
_ —> 127
A —> 558
a —> 4082
B —> 290
b —> 455
C —> 572
c —> 2387
D —> 273
d —> 1230
E —> 265
e —> 4493
F —> 177
f —> 392
G —> 146
g —> 911
H —> 239
h —> 1699
I —> 235
i —> 2935
J —> 212
j —> 147
K —> 131
k —> 646
L —> 302
l —> 2602
M —> 428
m —> 1912
N —> 319
n —> 3237
O —> 136
o —> 4018
P —> 294
p —> 1141
Q —> 6
q —> 228
R —> 288
r —> 3124
S —> 600
s —> 2229
T —> 196
t —> 3328
U —> 21
u —> 899
V —> 65
v —> 508
W —> 222
w —> 1937
X —> 34
x —> 153
Y —> 99
y —> 858
Z —> 14
z —> 145

Sidef[edit]

func letter_frequency(File file) {
file.read.chars.grep{.match(/[[:alpha:]]/)} \
.group_by {|letter| letter.downcase} \
.map_val {|_, val| val.len} \
.sort_by {|_, val| -val}
}
 
var top = letter_frequency(File(__FILE__))
top.each{|pair| say "#{pair[0]}: #{pair[1]}"}
Output:
e: 22
l: 17
a: 16
t: 14
r: 14
p: 12
f: 8
i: 8
n: 7
c: 6
u: 6
o: 6
v: 6
y: 5
s: 5
h: 3
w: 2
q: 2
b: 2
m: 2
g: 2
d: 1

SIMPOL[edit]

Example: open a text file and compute letter frequency.

constant iBUFSIZE 500
 
function main(string filename)
fsfileinputstream fpi
integer e, i, aval, zval, cval
string s, buf, c
array chars
 
e = 0
fpi [email protected] fsfileinputstream.new(filename, error=e)
if fpi [email protected]= .nul
s = "Error, file """ + filename + """ not found{d}{a}"
else
chars [email protected] array.new()
aval = .charval("a")
zval = .charval("z")
i = 1
while i <= 26
chars[i] = 0
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
while not fpi.endofdata and buf > ""
i = 1
while i <= .len(buf)
c = .substr(buf, i, 1)
cval = .charval(c)
if cval >= aval and cval <= zval
chars[cval - aval + 1] = chars[cval - aval + 1] + 1
end if
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
end while
 
s = "Character counts for """ + filename + """{d}{a}"
i = 1
while i <= chars.count()
s = s + .char(aval + i - 1) + ": " + .tostr(chars[i], 10) + "{d}{a}"
i = i + 1
end while
end if
end function s

As this was being created I realized that in [SIMPOL] I wouldn't have done it this way (in fact, I wrote it differently the first time and had to go back and change it to use an array afterward). In [SIMPOL] we would have used the set object. It acts similarly to a single-dimensional array, but can also use various set operations, such as difference, unite, intersect, etc. One of th einteresting things is that each unique value is stored only once, and the number of duplicates is stored with it. The sample then looks a little cleaner:

constant iBUFSIZE 500
 
function main(string filename)
fsfileinputstream fpi
integer e, i, aval, zval
string s, buf, c
set chars
 
e = 0
fpi [email protected] fsfileinputstream.new(filename, error=e)
if fpi [email protected]= .nul
s = "Error, file """ + filename + """ not found{d}{a}"
else
chars [email protected] set.new()
aval = .charval("a")
zval = .charval("z")
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
while not fpi.endofdata and buf > ""
i = 1
while i <= .len(buf)
c = .substr(buf, i, 1)
if .charval(c) >= aval and .charval(c) <= zval
chars.addvalue(c)
end if
i = i + 1
end while
buf = .lcase(fpi.getstring(iBUFSIZE, 1))
end while
 
s = "Character counts for """ + filename + """{d}{a}"
i = 1
while i <= chars.count()
s = s + chars[i] + ": " + .tostr(chars.valuecount(chars[i]), 10) + "{d}{a}"
i = i + 1
end while
end if
end function s

The final stage simply reads the totals for each character. One caveat, if a character is unrepresented, then it will not show up at all in this second implementation.

Swift[edit]

import Foundation
 
let dictPath: String
 
switch CommandLine.arguments.count {
case 2:
dictPath = CommandLine.arguments[1]
case _:
dictPath = "/usr/share/dict/words"
}
 
let wordsData = FileManager.default.contents(atPath: dictPath)!
let allWords = String(data: wordsData, encoding: .utf8)!
let words = allWords.components(separatedBy: "\n")
let counts = words.flatMap({ $0.map({ ($0, 1) }) }).reduce(into: [:], { $0[$1.0, default: 0] += $1.1 })
 
for (char, count) in counts {
print("\(char): \(count)")
}

Tcl[edit]

proc letterHistogram {fileName} {
# Initialize table (in case of short texts without every letter)
for {set i 97} {$i<=122} {incr i} {
set frequency([format %c $i]) 0
}
# Iterate over characters in file
set f [open $fileName]
foreach c [split [read $f] ""] {
# Count them if they're alphabetic
if {[string is alpha $c]} {
incr frequency([string tolower $c])
}
}
close $f
# Print the histogram
parray frequency
}
 
letterHistogram the/sample.txt

TUSCRIPT[edit]

 
$$ MODE TUSCRIPT
words = REQUEST ("http://www.puzzlers.org/pub/wordlists/unixdict.txt")
 
DICT letters create
MODE {}
COMPILE
LOOP word=words
letters=SPLIT (word,|":?:")
LOOP letter=letters
DICT letters ADD/QUIET/COUNT letter
ENDLOOP
ENDLOOP
ENDCOMPILE
DICT letters unload letter,size,cnt
 
index =DIGIT_INDEX (cnt)
index =REVERSE (index)
letter =INDEX_SORT (letter,index)
cnt =INDEX_SORT (cnt,index)
frequency=JOIN (letter," --- ",cnt)
 
*{frequency}
 

Output:

e --- 20144
a --- 16421
i --- 13980
r --- 13436
t --- 12836
o --- 12738
n --- 12097
s --- 10210
l --- 10061
c --- 8216
u --- 6489
m --- 5828
d --- 5799
p --- 5516
h --- 5208
g --- 4129
b --- 4115
y --- 3633
f --- 2662
w --- 1968
k --- 1925
v --- 1902
x --- 617
z --- 433
j --- 430
q --- 378
' --- 105
. --- 6
& --- 6
1 --- 2
9 --- 1
8 --- 1
7 --- 1
6 --- 1
5 --- 1
4 --- 1
3 --- 1
2 --- 1
0 --- 1

TXR[edit]

TXR Extraction Language plus TXR Lisp[edit]

@(do (defvar h (hash :equal-based)))
@(repeat)
@(coll :vars ())@\
@{letter /[A-Za-z]/}@(filter :upcase letter)@\
@(do (inc [h letter 0]))@\
@(end)
@(end)
@(do (dohash (key value h)
(format t "~a: ~a\n" key value)))
Output:
$ ./txr letterfreq.txr /usr/share/dict/words
A: 64123
B: 15524
C: 31569
[ ... abridged ... ]
X: 2124
Y: 12507
Z: 3238

TXR Lisp[edit]

(let* ((s (open-file "/usr/share/dict/words" "r"))
(chrs [keep-if* chr-isalpha (gun (get-char s))])
(h [group-reduce (hash) chr-toupper (op succ @1) chrs 0]))
(dohash (key value h)
(put-line `@key: @value`)))

Vala[edit]

Library: Gee

Counts every character except new line character.

 
using Gee;
 
void main(string[] args){
string filename = args[1];
var file = FileStream.open(filename, "r");
 
var counter = new HashMap<char, int>();
 
string line = file.read_line();
while (line != null){
for (int x = 0; x < line.length; x++){
counter[line[x]] = counter[line[x]] + 1;
}
line = file.read_line();
}
 
foreach (var elem in counter.entries){
stdout.printf("%c occured %d times\n", elem.key, elem.value);
}
}
 

Sample output (run on its own source code) with several lines omitted:

v occured 5 times
, occured 4 times
w occured 2 times
	 occured 19 times
S occured 1 times
1 occured 2 times
! occured 1 times
k occured 1 times
l occured 22 times

VBA[edit]

 
Public Sub LetterFrequency(fname)
'count number of letters in text file "fname" (ASCII-coded)
'note: we count all characters but print only the letter frequencies
 
Dim Freqs(255) As Long
Dim abyte As Byte
Dim ascal as Byte 'ascii code for lowercase a
Dim ascau as Byte 'ascii code for uppercase a
 
'try to open the file
On Error GoTo CantOpen
Open fname For Input As #1
On Error GoTo 0
 
'initialize
For i = 0 To 255
Freqs(i) = 0
Next i
 
'process file byte-per-byte
While Not EOF(1)
abyte = Asc(Input(1, #1))
Freqs(abyte) = Freqs(abyte) + 1
Wend
Close #1
 
'add lower and upper case together and print result
Debug.Print "Frequencies:"
ascal = Asc("a")
ascau = Asc("A")
For i = 0 To 25
Debug.Print Chr$(ascal + i), Freqs(ascal + i) + Freqs(ascau + i)
Next i
Exit Sub
 
CantOpen:
Debug.Print "can't find or read the file "; fname
Close
End Sub
 

Output:

LetterFrequency "d:\largetext.txt"
Frequencies:
a              24102 
b              4985 
c              4551 
d              19127 
e              61276 
f              2734 
g              10661 
h              8243 
i              21589 
j              4904 
k              7186 
l              12026 
m              7454 
n              31963 
o              19021 
p              4960 
q              37 
r              21166 
s              13403 
t              21090 
u              6117 
v              8612 
w              5017 
x              168 
y              299 
z              4159 

VBScript[edit]

 
filepath = "SPECIFY FILE PATH HERE"
 
Set objfso = CreateObject("Scripting.FileSystemObject")
Set objdict = CreateObject("Scripting.Dictionary")
Set objfile = objfso.OpenTextFile(filepath,1)
 
txt = objfile.ReadAll
 
For i = 1 To Len(txt)
char = Mid(txt,i,1)
If objdict.Exists(char) Then
objdict.Item(char) = objdict.Item(char) + 1
Else
objdict.Add char,1
End If
Next
 
For Each key In objdict.Keys
WScript.StdOut.WriteLine key & " = " & objdict.Item(key)
Next
 
objfile.Close
Set objfso = Nothing
Set objdict = Nothing
 

Vedit macro language[edit]

File_Open("c:\txt\a_text_file.txt")
Update()
 
for (#1='A'; #1<='Z'; #1++) {
Out_Reg(103) Char_Dump(#1,NOCR) Out_Reg(CLEAR)
#2 = Search(@103, BEGIN+ALL+NOERR)
Message(@103) Num_Type(#2)
}

Example output:

A   76
B   23
C   51
D   64
E  192
F   51
G   32
H   59
I  146
J    1
K    9
L   73
M   34
N   94
O  113
P   27
Q    1
R   92
S   89
T  138
U   63
V   26
W   35
X   16
Y   16
Z    2

Whitespace[edit]

   							
 







 


 












 






















 


 
 
 
push 127
; Initialize a slot in the heap for each ASCII character.
0:
dup
push 0
store
push 1
sub
dup
jn 1
jump 0
; Read until EOF, incrementing the relevant heap slot.
1:
push 0
dup
ichr
load
dup
jn 2 ; Done reading, proceed to print.
dup
load
push 1
add
store
jump 1
; Stack is [-1 -1], but [0] would be nice.
2:
sub
; Print characters with tallies greater than 0.
3:
push 1
add
dup
push 128
sub
jz 4 ; All done.
dup
load
jz 3 ; Don't print if no occurrences.
dup
ochr ; Display the character,
push 32
ochr ; a space,
dup
load
onum ; its frequency,
push 10
ochr ; and a newline.
jump 3
4:
pop
exit
Output:
$ cat freq.ws | wspace freq.ws
	 64

 55
  119

Wren[edit]

Library: Fmt

As we have a copy to hand, we count the number of letters in the MIT 10000 word list which apparently contains nothing other than lower case letters.

import "io" for File
import "/fmt" for Fmt
 
var text = File.read("mit10000.txt")
var freqs = List.filled(26, 0)
for (c in text.codePoints) {
if (c >= 97 && c <= 122) {
freqs[c-97] = freqs[c-97] + 1
}
}
var totalFreq = freqs.reduce { |sum, f| sum + f }
System.print("Frequencies of letters in mit10000.txt:")
System.print("\n freq \%")
System.print("----------------")
 
for (i in 0..25) {
Fmt.print("$c $5d $6.2f", i+97, freqs[i], freqs[i]/totalFreq * 100)
}
System.print(" ----- ------")
Fmt.print(" $5d 100.00", totalFreq)
 
Fmt.print("\nTotal characters in text file = $d minus 10000 \\n's = $d", text.count, totalFreq)
Output:
Frequencies of letters in mit10000.txt:

    freq     %
----------------
a   5378    8.16
b   1141    1.73
c   3025    4.59
d   2507    3.81
e   7601   11.54
f    927    1.41
g   1717    2.61
h   1429    2.17
i   5461    8.29
j    183    0.28
k    592    0.90
l   3231    4.90
m   1912    2.90
n   4822    7.32
o   4252    6.45
p   2027    3.08
q    123    0.19
r   4860    7.38
s   5085    7.72
t   4760    7.23
u   1939    2.94
v    849    1.29
w    632    0.96
x    264    0.40
y   1027    1.56
z    136    0.21
   -----  ------
   65880  100.00

Total characters in text file = 75880 minus 10000 \n's = 65880


XPL0[edit]

This takes advantage of DOS's ability to redirect input from a file to the keyboard. The input file must be terminated with an end-of-file character ($1A). Usage: count <filename.ext

include c:\cxpl\codes;          \intrinsic 'code' declarations
int A(256), C, I;
[for C:= 0 to 256-1 do A(C):= 0;
repeat C:= ChIn(1); \device 1 doesn't buffer nor echo chars
A(C):= A(C)+1; \count character
until C=\EOF\$1A;
C:= 0;
for I:= 0 to 128-1 do \only show 7-bit ASCII
[ChOut(0, \tab\9);
case C of
$0A: ChOut(6, $19); \line feed = down arrow
$0D: ChOut(6, $1B) \carriage return = left arrow
other ChOut(6, C); \all other characters display on device 6
ChOut(0, ^ );
IntOut(0, A(C)); \show count
C:= C+16; \columnar order
if (I&7) = 7 then [CrLf(0); C:= C-8*16+1];
];
]

Example output of count.xpl counting itself: Count2XPL0.gif

Yabasic[edit]

dim ascCodes(255)
 
f = open("unixdict.txt", "r")
 
if f then
while(not eof(#f))
line input #f a$
for i = 1 to len(a$)
c = asc(mid$(a$, i, 1))
ascCodes(c) = ascCodes(c) + 1
next
wend
for i = 1 to 255
c = ascCodes(i)
if c print chr$(i), " = ", c
next
close #f
end if

zkl[edit]

fcn ccnt(textInBitBucket){
letters:=["a".."z"].pump(List().write,0); // array of 26 zeros
textInBitBucket.howza(0).pump(Void,'wrap(c){ // pump text as ints
if(97<=c<=122) c-=97;
else if(65<=c<=90) c-=65;
else return(Void.Skip);
letters[c]+=1
});
sum:=letters.sum(); println(sum," letters");
letters.enumerate().pump(List,'wrap([(c,n)]){
"%s(%d:%d%)".fmt((c+65).toChar(),n,n*100/sum)})
.concat(",").println();
}
 
ccnt(Data(0,Int,"This is a test"));
ccnt(File("dict.txt").read());
Output:
11 letters
A(1:9%),B(0:0%),C(0:0%),D(0:0%),E(1:9%),F(0:0%),G(0:0%),H(1:9%),I(2:18%),J(0:0%),K(0:0%),L(0:0%),M(0:0%),N(0:0%),O(0:0%),P(0:0%),Q(0:0%),R(0:0%),S(3:27%),T(3:27%),U(0:0%),V(0:0%),W(0:0%),X(0:0%),Y(0:0%),Z(0:0%)

181171 letters
A(16421:9%),B(4115:2%),C(8216:4%),D(5799:3%),E(20144:11%),F(2662:1%),G(4129:2%),H(5208:2%),I(13980:7%),J(430:0%),K(1925:1%),L(10061:5%),M(5828:3%),N(12097:6%),O(12738:7%),P(5516:3%),Q(378:0%),R(13436:7%),S(10210:5%),T(12836:7%),U(6489:3%),V(1902:1%),W(1968:1%),X(617:0%),Y(3633:2%),Z(433:0%)

Zoea[edit]

 
program: letter_frequency
input: 'cbcacb' # can be literal value, stdin or file url at runtime
derive: [[a,1],[b,2],[c,3]]
output: 'a : 1\nb : 2\nc : 3\n'