Letter frequency: Difference between revisions

Content added Content deleted
Line 4,020: Line 4,020:
Again eliminating all fussing with the details of converting letters into list indices.
Again eliminating all fussing with the details of converting letters into list indices.


=={{header|Quick Basic PDS 7.1/VB-DOS}}==
=={{header|Quick Basic/QBASIC/PDS 7.1/VB-DOS}}==
This version only counts valid letters from A to Z (including Ñ in Spanish alphabet). Takes in account accented vowels. It runs in QB PDS 7.1 and VB_DOS. If you want to run it in QBASIC or Quick BASIC 4.x, you need to modify the error handler.
This version counts valid letters from A to Z (including Ñ in Spanish alphabet) or characters in a file. Takes in account accented vowels. It runs in QB, QBASIC, PDS 7.1 and VB_DOS as is.
<lang VB>
<lang VB>
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' Program CountLet '
' Program CountCar '
' '
' '
' This program counts how many letters have a text '
' This program counts how many distinct characters '
' file specified by the user. Also, returns how many '
' have a text file specified by the user. '
' total characters has the file. '
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' OPTION EXPLICIT ' Remove comment in VB-DOS
' OPTION EXPLICIT ' Remove comment in VB-DOS

' Register
TYPE regChar
Character AS STRING * 3
Count AS LONG
END TYPE


' Var
' Var
DIM iChar AS INTEGER
DIM iCL AS INTEGER
DIM iCountChars AS INTEGER
DIM iFile AS INTEGER
DIM iFile AS INTEGER
DIM i AS INTEGER
DIM i AS INTEGER
DIM iLetter AS INTEGER
DIM lMUC AS LONG
DIM iMUL AS INTEGER
DIM iMUI AS INTEGER
DIM iMUI AS INTEGER
DIM iLUL AS INTEGER
DIM lLUC AS LONG
DIM iLUI AS INTEGER
DIM iLUI AS INTEGER
DIM iMaxIdx AS INTEGER
DIM iP AS INTEGER
DIM iP AS INTEGER
DIM iPause AS INTEGER
DIM iPMI AS INTEGER
DIM iPrint AS INTEGER
DIM lHowMany AS LONG
DIM lHowMany AS LONG
DIM lTotLetters AS LONG
DIM lTotChars AS LONG
DIM strChar AS STRING
DIM sTime AS SINGLE
DIM strFile AS STRING
DIM strFile AS STRING
DIM strTxt AS STRING
DIM strTxt AS STRING
DIM strDate AS STRING
DIM strTime AS STRING
DIM strTime AS STRING
DIM strKey AS STRING
CONST LngReg = 256
CONST Letters = 1
CONST FALSE = 0
CONST TRUE = NOT FALSE


'------Main program cycle
'------Main program cycle

' Initialize variables
strDate = DATE$
strTime = TIME$
strTime = TIME$
iFile = FREEFILE
iFile = FREEFILE


DO
DO
REDIM iL(0 TO 25) AS INTEGER ' 25 is ¤
iMUL = 0
iMUI = 0
CLS
CLS
PRINT "This program counts letters or characters in a text file."
INPUT ; "File to open: ", strFile
PRINT
ON ERROR RESUME NEXT
INPUT "File to open: ", strFile
OPEN strFile FOR INPUT AS #iFile
OPEN strFile FOR BINARY AS #iFile
IF ERR <> 53 THEN
IF LOF(iFile) > 0 THEN
PRINT "Count: 1) Letters 2) Characters (1 or 2)";
DO
strKey = INKEY$
LOOP UNTIL strKey = "1" OR strKey = "2"
PRINT ". Option selected: "; strKey
iCL = VAL(strKey)
sTime = TIMER
iP = POS(0)
iP = POS(0)
lHowMany = LOF(iFile)
lHowMany = LOF(iFile)
strTxt = SPACE$(LngReg)

IF iCL = Letters THEN
iMaxIdx = 26
ELSE
iMaxIdx = 255
END IF

IF iMaxIdx <> iPMI THEN
iPMI = iMaxIdx
REDIM rChar(0 TO iMaxIdx) AS regChar

FOR i = 0 TO iMaxIdx
IF iCL = Letters THEN
strTxt = CHR$(i + 65)
IF i = 26 THEN strTxt = CHR$(165)
ELSE
SELECT CASE i
CASE 0: strTxt = "nul"
CASE 7: strTxt = "bel"
CASE 9: strTxt = "tab"
CASE 10: strTxt = "lf"
CASE 11: strTxt = "vt"
CASE 12: strTxt = "ff"
CASE 13: strTxt = "cr"
CASE 28: strTxt = "fs"
CASE 29: strTxt = "gs"
CASE 30: strTxt = "rs"
CASE 31: strTxt = "us"
CASE 32: strTxt = "sp"
CASE ELSE: strTxt = CHR$(i)
END SELECT
END IF
rChar(i).Character = strTxt
NEXT i
ELSE
FOR i = 0 TO iMaxIdx
rChar(i).Count = 0
NEXT i
END IF

PRINT "Looking for ";
IF iCL = Letters THEN PRINT "letters."; ELSE PRINT "characters.";
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7)
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7)
DO WHILE NOT EOF(iFile)
DO WHILE LOC(iFile) < LOF(iFile)
LINE INPUT #iFile, strTxt
IF LOC(iFile) + LngReg > LOF(iFile) THEN
strTxt = SPACE$(LOF(iFile) - LOC(iFile))
END IF
GET #iFile, , strTxt
FOR i = 1 TO LEN(strTxt)
FOR i = 1 TO LEN(strTxt)
strChar = UCASE$(MID$(strTxt, i, 1))
IF iCL = Letters THEN
iChar = ASC(UCASE$(MID$(strTxt, i, 1)))
SELECT CASE strChar
CASE CHR$(164): strChar = CHR$(165) ' Ñ
SELECT CASE iChar
CASE CHR$(160): strChar = "A" ' á
CASE 164: iChar = 165
CASE CHR$(130), CHR$(144): strChar = "E" ' é, É
CASE 160: iChar = 65
CASE CHR$(161): strChar = "I" ' í
CASE 130, 144: iChar = 69
CASE CHR$(162): strChar = "O" ' ó
CASE 161: iChar = 73
CASE CHR$(163), CHR$(129): strChar = "U" ' ú, ü
CASE 162: iChar = 79
END SELECT
CASE 163, 129: iChar = 85
iLetter = ASC(strChar) - 65
END SELECT
IF iLetter >= 0 AND iLetter <= 25 THEN
iChar = iChar - 65
iL(iLetter) = iL(iLetter) + 1
ELSEIF iLetter = 100 THEN ' Ñ
' Validates if iChar is a letter
iL(25) = iL(25) + 1
IF iChar >= 0 AND iChar <= 25 THEN
rChar(iChar).Count = rChar(iChar).Count + 1
ELSEIF iChar = 100 THEN ' CHR$(165)
rChar(iMaxIdx).Count = rChar(iMaxIdx).Count + 1
END IF
ELSE
iChar = ASC(MID$(strTxt, i, 1))
rChar(iChar).Count = rChar(iChar).Count + 1
END IF
END IF
NEXT i
NEXT i
LOOP
LOOP
CLOSE #iFile


' Show the letters found
' Show the characters found
iMUL = 0
lMUC = 0
iMUI = 0
iMUI = 0
iLUL = 32767
lLUC = 2147483647
iLUI = 0
iLUI = 0
lTotLetters = 0
iPrint = FALSE
lTotChars = 0
LOCATE , iP: PRINT SPACE$(40): PRINT
iCountChars = 0
PRINT "Letters found: ";
FOR i = 0 TO 24
iPause = FALSE
CLS
' Most Used Letter
IF iCL = Letters THEN PRINT "Letters found: "; ELSE PRINT "Characters found: ";
IF iMUL < iL(i) THEN
iMUL = iL(i)
FOR i = 0 TO iMaxIdx
' Most Used Character
IF lMUC < rChar(i).Count THEN
lMUC = rChar(i).Count
iMUI = i
iMUI = i
END IF
END IF


' Print letter
' Print character
IF iL(i) > 0 THEN
IF rChar(i).Count > 0 THEN
PRINT CHR$(i + 65); "="; LTRIM$(STR$(iL(i)));
strTxt = ""
IF i < 24 THEN PRINT ", ";
IF iPrint THEN strTxt = ", " ELSE iPrint = TRUE
strTxt = strTxt + LTRIM$(RTRIM$(rChar(i).Character))
IF POS(0) > 73 THEN PRINT
lTotLetters = lTotLetters + iL(i)
strTxt = strTxt + "=" + LTRIM$(STR$(rChar(i).Count))
iP = POS(0)
IF iP + LEN(strTxt) + 1 >= 80 AND iPrint THEN
PRINT ","
IF CSRLIN >= 23 AND NOT iPause THEN
iPause = TRUE
PRINT "Press a key to continue..."
DO
strKey = INKEY$
LOOP UNTIL strKey <> ""
END IF
strTxt = MID$(strTxt, 3)
END IF
PRINT strTxt;
lTotChars = lTotChars + rChar(i).Count
iCountChars = iCountChars + 1


' Least Used Letter
' Least Used Character
IF iLUL > iL(i) THEN
IF lLUC > rChar(i).Count THEN
iLUL = iL(i)
lLUC = rChar(i).Count
iLUI = i
iLUI = i
END IF
END IF
Line 4,114: Line 4,213:
NEXT i
NEXT i


IF iL(25) > 0 THEN
' Are there more ¤s?
IF iMUL < iL(25) THEN
iMUL = iL(25)
iMUI = 25
ELSEIF iLUL > iL(25) THEN
iLUL = iL(25)
iLUI = 25
END IF
PRINT ", "; CHR$(165); " ="; STR$(iL(25));
END IF
PRINT "."
PRINT "."


' Shows the most used letter
' Shows the summary
IF iMUI < 25 THEN strChar = CHR$(iMUI + 65) ELSE strChar = CHR$(165)
IF iLUI < 25 THEN strTxt = CHR$(iLUI + 65) ELSE strTxt = CHR$(165)
PRINT
PRINT
PRINT "Total characters in document:"; lHowMany
PRINT "File analyzed....................: "; strFile
PRINT "Looked for.......................: "; : IF iCL = Letters THEN PRINT "Letters" ELSE PRINT "Characters"
PRINT "Total letters in document...:"; lTotLetters
PRINT "Other characters in document:"; lHowMany - lTotLetters
PRINT "Total characters in file.........:"; lHowMany
PRINT "Most used letter was........: "; strChar
PRINT "Total characters counted.........:"; lTotChars
PRINT "Least used letter was.......: "; strTxt
IF iCL = Letters THEN PRINT "Characters discarded on count....:"; lHowMany - lTotChars
PRINT "Distinct characters found in file:"; iCountChars; "of"; iMaxIdx + 1
PRINT "Most used character was..........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lMUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iMUI).Count)); " times)"
PRINT "Least used character was.........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lLUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iLUI).Count)); " times)"
PRINT "Time spent in the process........:"; TIMER - sTime; "seconds"
ELSE
ELSE
' File does not exist
' File does not exist
ON ERROR GOTO 0
CLOSE #iFile
KILL strFile
PRINT
PRINT
PRINT "File does not exist."
PRINT "File does not exist."
END IF
END IF
CLOSE #iFile


' Again?
' Again?
Line 4,149: Line 4,254:
PRINT "Again? (Y/n)"
PRINT "Again? (Y/n)"
DO
DO
strChar = UCASE$(INKEY$)
strTxt = UCASE$(INKEY$)
LOOP UNTIL strChar = "N" OR strChar = "Y" OR strChar = CHR$(13) OR strChar = CHR$(27)
LOOP UNTIL strTxt = "N" OR strTxt = "Y" OR strTxt = CHR$(13) OR strTxt = CHR$(27)
LOOP UNTIL strChar = "N" OR strChar = CHR$(27)
LOOP UNTIL strTxt = "N" OR strTxt = CHR$(27)


CLS
CLS
PRINT "End of execution. Start time: "; strTime; ", end time: "; TIME$; "."
PRINT "End of execution."
PRINT "Start time: "; strDate; " "; strTime; ", end time: "; DATE$; " "; TIME$; "."
END
END
' ---End of main program cycle
' ---End of main program cycle

</lang>
</lang>


Output:
<pre>
This program counts letters or characters in a text file.

File to open: readme.txt
Count: 1) Letters 2) Characters (1 or 2). Option selected: 1
Looking for letters. File is 23769 in size. Working...

Letters found: A=1427, B=306, C=583, D=530, E=2098, F=279, G=183, H=501,
I=1177, J=15, K=34, L=741, M=379, N=1219, O=1183, P=312, Q=32, R=1105, S=1079,
T=1309, U=660, V=346, W=147, X=190, Y=242, Z=70, Ñ=5.

File analyzed....................: readme.txt
Looked for.......................: Letters
Total characters in file.........: 23769
Total characters counted.........: 16152
Characters discarded on count....: 7617
Distinct characters found in file: 27 of 27
Most used character was..........: E (2098 times)
Least used character was.........: Ñ (5 times)
Time spent in the process........: .3789063 seconds

Again? (Y/n)
</pre>


=={{header|R}}==
=={{header|R}}==