I before E except after C: Difference between revisions

Content deleted Content added
Petelomax (talk | contribs)
m →‎{{header|Phix}}: improved comment
Not a robot (talk | contribs)
Add 8080 assembly
Line 29: Line 29:
* [http://ucrel.lancs.ac.uk/bncfreq/ Companion website] for the book: "Word Frequencies in Written and Spoken English: based on the British National Corpus".
* [http://ucrel.lancs.ac.uk/bncfreq/ Companion website] for the book: "Word Frequencies in Written and Spoken English: based on the British National Corpus".

=={{header|8080 Assembly}}==

This program is written to run under CP/M. It takes the filename on the command line.
The file can be as large as you like, it does not need to fit in memory at once.
(Indeed, <code>unixdict.txt</code> is 206k.)

<lang 8080asm> ;;; I before E, except after C
fcb1: equ 5Ch ; FCB 1 (populated by file on command line)
dma: equ 80h ; Standard DMA location
bdos: equ 5 ; CP/M entry point
puts: equ 9 ; CP/M call to write a string to the console
fopen: equ 0Fh ; CP/M call to open a file
fread: equ 14h ; CP/M call to read from a file
CR: equ 13
LF: equ 10
EOF: equ 26
org 100h
;;; Open the file given on the command line
lxi d,fcb1
mvi c,fopen
call bdos
inr a ; FF = error
jz die
;;; We can only read one 128-byte block at a time, and the file
;;; will not fit in memory (max 64 k). So there are two things
;;; going on here: we copy from the block into a word buffer
;;; until we see the end of a line, at which point we process
;;; the word. In the meantime, if while copying we reach the end
;;; of the block, we read the next block.
lxi b,curwrd ; Word pointer
block: push b ; Keep word pointer while reading
lxi d,fcb1 ; Read a block from the file
mvi c,fread
call bdos
pop b ; Restore word pointer
dcr a ; 1 = EOF
jz done
inr a ; otherwise, <>0 = error
jnz die
lxi h,dma ; Start reading at DMA
char: mov a,m ; Get character
cpi EOF ; If it's an EOF character, we're done
jz done
stax b ; Store character in current word
inx b
cpi LF ; If it's LF, then we've got a full word
cz word ; Process the word
inr l ; Go to next character
jz block ; If we're done with this block, get next one
jmp char
;;; When done, report the statistics
done: lxi d,scie ; CIE
call sout
lhld cie
call puthl
lxi d,sxie ; xIE
call sout
lhld xie
call puthl
lxi d,scei ; CEI
call sout
lhld cei
call puthl
lxi d,sxei ; xEI
call sout
lhld xei
call puthl
;;; Then say what is and isn't plausible
lxi d,s_ienc ; I before E when not preceded by C
call sout ; plausible if 2*xIE>CIE
lhld cie
lhld xie
call pplaus
lxi d,s_eic ; E before I when preceded by C
call sout ; plausible if 2*CEI>xEI
lhld xei
lhld cei
;;; If HL = amount of words with feature, and
;;; DE = amount of words with opposit feature, then print
;;; '(not) plausible', as appropriate.
pplaus: dad h ; 2 * feature
mov a,d ; Compare high byte
cmp h
jc plaus ; If 2*H>D then plausible
mov a,e ; Otherwise, compare low byte
cmp l
jc plaus ; If 2*L>E then plausible
lxi d,snop ; Otherwise, not plausible
jmp sout
plaus: lxi d,splau
jmp sout
;;; Process a word
word: push h ; Save file read address
xra a ; Zero out end of word
stax b
dcx b
lxi h,curwrd ; Scan word
start: mov a,m ; Get current character
inx h ; Move pointer ahead
ana a ; If zero,
jz w_end ; we're done
cpi 'c' ; Did we find a 'c'?
jz findc
cpi 'e' ; Otherwise, did we find 'e'?
jz finde
cpi 'i' ; Otherwise, did we find 'i'?
jz findi
jmp start ; Otherwise, keep going
;;; We found an 'e'
finde: mov a,m ; Get following character
cpi 'i' ; Is it 'i'?
jnz start ; If not, keep going
inx h ; Otherwise, move past it,
xchg ; keep pointer in DE,
lhld xie ; We found ie without c
inx h
shld xie
jmp start
;;; We found an 'i'
findi: mov a,m ; Get following character
cpi 'e' ; Is it 'e'?
jnz start ; If not, keep going
inx h ; Otherwise, move past it,
xchg ; keep pointer in DE,
lhld xei ; We found ei without c
inx h
shld xei
jmp start
;;; We found a 'c'
findc: mov a,m ; Get following character
cpi 'e' ; Is it 'e'?
jz findce ; Then we have 'ce'
cpi 'i' ; Is it 'i'?
jz findci ; Then we have 'ci'
jmp start ; Otherwise, just keep going
findce: mov d,h ; set DE = start of 'e?'
mov e,l
inx d ; Get next character
ldax d
cpi 'i' ; Is it 'i'?
jnz start ; If not, do nothing
lhld cei ; But if so, we found 'cei'
inx h ; Increment the counter
shld cei
xchg ; Keep scanning _after_ the 'cei'
inx h
jmp start
findci: mov d,h ; set DE = start of 'i?'
mov e,l
inx d ; Get next character
ldax d
cpi 'e' ; Is it 'e'?
jnz start ; If not, do nothing
lhld cie ; But if so, we found 'cie'
inx h ; Increment the counter
shld cie
xchg ; Keep scanning _after_ the 'cie'
inx h
jmp start
w_end: lxi b,curwrd ; Set word pointer to beginning
pop h ; Restore file read address
;;; Print error message and stop the program
die: lxi d,errmsg
mvi c,puts
call bdos
rst 0
;;; Print string
sout: mvi c,puts
jmp bdos
;;; Print HL to the console as a decimal number
puthl: push h
lxi h,num
lxi b,-10
dgt: lxi d,-1
clcdgt: inx d
dad b
jc clcdgt
mov a,l
adi 10+'0'
dcx h
mov m,a
mov a,h
ora l
jnz dgt
pop d
mvi c,puts
jmp bdos
errmsg: db 'Error$' ; Good enough
s_ienc: db 'I before E when not preceded by C:$'
s_eic: db 'E before I when preceded by C:$'
snop: db ' not'
splau: db ' plausible',CR,LF,'$'
scie: db 'CIE: $' ; Report strings
sxie: db 'xIE: $'
scei: db 'CEI: $'
sxei: db 'xEI: $'
db '00000'
num: db CR,LF,'$' ; Space for number
;;; Counters
xie: dw 0 ; I before E when not preceded by C
cie: dw 0 ; I before E when preceded by C
cei: dw 0 ; E before I when preceded by C
xei: dw 0 ; E before I when not preceded by C
curwrd: equ $ ; Current word stored here</lang>


<pre>A>iec unixdict.txt
CIE: 24
xIE: 217
CEI: 13
xEI: 464
I before E when not preceded by C: plausible
E before I when preceded by C: not plausible</pre>

=={{header|ALGOL 68}}==
=={{header|ALGOL 68}}==