Letter frequency: Difference between revisions

Line 4,020:
Again eliminating all fussing with the details of converting letters into list indices.
 
=={{header|Quick Basic /QBASIC/PDS 7.1/VB-DOS}}==
This version only counts valid letters from A to Z (including Ñ in Spanish alphabet) or characters in a file. Takes in account accented vowels. It runs in QB, QBASIC, PDS 7.1 and VB_DOS. If you want to run it in QBASIC or Quick BASIC 4.x, you need to modify the erroras handleris.
<lang VB>
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' Program CountLetCountCar '
' '
' This program counts how many lettersdistinct have a textcharacters '
' have a text file specified by the user. Also, returns how many '
' total characters has the file. '
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
' OPTION EXPLICIT ' Remove comment in VB-DOS
 
' Register
TYPE regChar
Character AS STRING * 3
Count AS LONG
END TYPE
 
' Var
DIM iChar AS INTEGER
DIM iCL AS INTEGER
DIM iCountChars AS INTEGER
DIM iFile AS INTEGER
DIM i AS INTEGER
DIM iLetterlMUC AS INTEGERLONG
DIM iMUL AS INTEGER
DIM iMUI AS INTEGER
DIM iLULlLUC AS INTEGERLONG
DIM iLUI AS INTEGER
DIM iMaxIdx AS INTEGER
DIM iP AS INTEGER
DIM iPause AS INTEGER
DIM iPMI AS INTEGER
DIM iPrint AS INTEGER
DIM lHowMany AS LONG
DIM lTotLetterslTotChars AS LONG
DIM strCharsTime AS STRINGSINGLE
DIM strFile AS STRING
DIM strTxt AS STRING
DIM strDate AS STRING
DIM strTime AS STRING
DIM strKey AS STRING
CONST LngReg = 256
CONST Letters = 1
CONST FALSE = 0
CONST TRUE = NOT FALSE
 
'------Main program cycle
 
' Initialize variables
strDate = DATE$
strTime = TIME$
iFile = FREEFILE
 
 
DO
REDIM iL(0 TO 25) AS INTEGER ' 25 is ¤
iMUL = 0
iMUI = 0
CLS
PRINT "This program counts letters or characters in a text file."
INPUT ; "File to open: ", strFile
PRINT
ON ERROR RESUME NEXT
INPUT "File to open: ", strFile
OPEN strFile FOR INPUT AS #iFile
OPEN strFile FOR BINARY AS #iFile
IF ERR <> 53 THEN
IF LOF(iFile) > 0 THEN
PRINT "Count: 1) Letters 2) Characters (1 or 2)";
DO
strKey = INKEY$
LOOP UNTIL strKey = "1" OR strKey = "2"
PRINT ". Option selected: "; strKey
iCL = VAL(strKey)
sTime = TIMER
iP = POS(0)
lHowMany = LOF(iFile)
strTxt = SPACE$(LngReg)
 
IF iCL = Letters THEN
iMaxIdx = 26
ELSE
iMaxIdx = 255
END IF
 
IF iMaxIdx <> iPMI THEN
iPMI = iMaxIdx
REDIM rChar(0 TO iMaxIdx) AS regChar
 
FOR i = 0 TO iMaxIdx
IF iCL = Letters THEN
strTxt = CHR$(i + 65)
IF i = 26 THEN strTxt = CHR$(165)
ELSE
SELECT CASE i
CASE 0: strTxt = "nul"
CASE 7: strTxt = "bel"
CASE 9: strTxt = "tab"
CASE 10: strTxt = "lf"
CASE 11: strTxt = "vt"
CASE 12: strTxt = "ff"
CASE 13: strTxt = "cr"
CASE 28: strTxt = "fs"
CASE 29: strTxt = "gs"
CASE 30: strTxt = "rs"
CASE 31: strTxt = "us"
CASE 32: strTxt = "sp"
CASE ELSE: strTxt = CHR$(i)
END SELECT
END IF
rChar(i).Character = strTxt
NEXT i
ELSE
FOR i = 0 TO iMaxIdx
rChar(i).Count = 0
NEXT i
END IF
 
PRINT "Looking for ";
IF iCL = Letters THEN PRINT "letters."; ELSE PRINT "characters.";
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7)
DO WHILE NOTLOC(iFile) EOF< LOF(iFile)
LINEIF INPUTLOC(iFile) + LngReg > #LOF(iFile,) strTxtTHEN
strTxt = SPACE$(LOF(iFile) - LOC(iFile))
END IF
GET #iFile, , strTxt
FOR i = 1 TO LEN(strTxt)
strCharIF =iCL UCASE$(MID$(strTxt,= i,Letters 1))THEN
iChar = ASC(UCASE$(MID$(strTxt, i, 1)))
SELECT CASE strChar
SELECT CASE CHR$(164): strChar = CHR$(165) ' ÑiChar
CASE CHR$(160)164: strChariChar = "A" ' á165
CASE CHR$(130), CHR$(144)160: strChariChar = "E" ' é, É65
CASE CHR$(161)130, 144: strChariChar = "I" ' í69
CASE CHR$(162)161: strChariChar = "O" ' ó73
CASE CHR$(163), CHR$(129)162: strChariChar = "U" ' ú, ü79
END SELECT CASE 163, 129: iChar = 85
iLetter = ASC(strChar)END - 65SELECT
IF iLetter >iChar = 0 AND iLetter <=iChar 25- THEN65
iL(iLetter) = iL(iLetter) + 1
ELSEIF iLetter =' 100Validates THENif 'iChar is a Ñletter
iL(25)IF iChar >= iL(25)0 +AND iChar <= 25 1THEN
rChar(iChar).Count = rChar(iChar).Count + 1
ELSEIF iChar = 100 THEN ' CHR$(165)
rChar(iMaxIdx).Count = rChar(iMaxIdx).Count + 1
END IF
ELSE
iChar = ASC(MID$(strTxt, i, 1))
rChar(iChar).Count = rChar(iChar).Count + 1
END IF
NEXT i
LOOP
CLOSE #iFile
 
' Show the letterscharacters found
iMULlMUC = 0
iMUI = 0
iLULlLUC = 327672147483647
iLUI = 0
lTotLettersiPrint = 0FALSE
lTotChars = 0
LOCATE , iP: PRINT SPACE$(40): PRINT
iCountChars = 0
PRINT "Letters found: ";
FOR iiPause = 0 TO 24FALSE
CLS
' Most Used Letter
IF iCL = Letters THEN PRINT "Letters found: "; ELSE PRINT "Characters found: ";
IF iMUL < iL(i) THEN
FOR i = 0 iMULTO = iL(i)iMaxIdx
' Most Used Character
IF lMUC < rChar(i).Count THEN
lMUC = rChar(i).Count
iMUI = i
END IF
 
' Print lettercharacter
IF iLrChar(i).Count > 0 THEN
PRINTstrTxt CHR$(i + 65);= "="; LTRIM$(STR$(iL(i)));
IF i < 24iPrint THEN PRINTstrTxt = ", "; ELSE iPrint = TRUE
strTxt = strTxt + LTRIM$(RTRIM$(rChar(i).Character))
IF POS(0) > 73 THEN PRINT
lTotLettersstrTxt = lTotLettersstrTxt + iL"=" + LTRIM$(STR$(rChar(i).Count))
iP = POS(0)
IF iP + LEN(strTxt) + 1 >= 80 AND iPrint THEN
PRINT ","
IF CSRLIN >= 23 AND NOT iPause THEN
iPause = TRUE
PRINT "Press a key to continue..."
DO
strKey = INKEY$
LOOP UNTIL strKey <> ""
END IF
strTxt = MID$(strTxt, 3)
END IF
PRINT strTxt;
lTotChars = lTotChars + rChar(i).Count
iCountChars = iCountChars + 1
 
' Least Used LetterCharacter
IF iLULlLUC > iLrChar(i).Count THEN
iLULlLUC = iLrChar(i).Count
iLUI = i
END IF
Line 4,114 ⟶ 4,213:
NEXT i
 
IF iL(25) > 0 THEN
' Are there more ¤s?
IF iMUL < iL(25) THEN
iMUL = iL(25)
iMUI = 25
ELSEIF iLUL > iL(25) THEN
iLUL = iL(25)
iLUI = 25
END IF
PRINT ", "; CHR$(165); " ="; STR$(iL(25));
END IF
PRINT "."
 
' Shows the most used lettersummary
IF iMUI < 25 THEN strChar = CHR$(iMUI + 65) ELSE strChar = CHR$(165)
IF iLUI < 25 THEN strTxt = CHR$(iLUI + 65) ELSE strTxt = CHR$(165)
PRINT
PRINT "TotalFile charactersanalyzed....................: in document:"; lHowManystrFile
PRINT "Looked for.......................: "; : IF iCL = Letters THEN PRINT "Letters" ELSE PRINT "Characters"
PRINT "Total letters in document...:"; lTotLetters
PRINT "OtherTotal characters in documentfile.........:"; lHowMany - lTotLetters
PRINT "MostTotal usedcharacters letter wascounted.........: "; strCharlTotChars
IF iCL = Letters THEN PRINT "LeastCharacters useddiscarded letteron was...count....: "; strTxtlHowMany - lTotChars
PRINT "Distinct characters found in file:"; iCountChars; "of"; iMaxIdx + 1
PRINT "Most used character was..........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lMUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iMUI).Count)); " times)"
PRINT "Least used character was.........: ";
iPrint = FALSE
FOR i = 0 TO iMaxIdx
IF rChar(i).Count = lLUC THEN
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE
PRINT RTRIM$(LTRIM$(rChar(i).Character));
END IF
NEXT i
PRINT " ("; LTRIM$(STR$(rChar(iLUI).Count)); " times)"
PRINT "Time spent in the process........:"; TIMER - sTime; "seconds"
ELSE
' File does not exist
ONCLOSE ERROR GOTO 0#iFile
KILL strFile
PRINT
PRINT "File does not exist."
END IF
CLOSE #iFile
 
' Again?
Line 4,149 ⟶ 4,254:
PRINT "Again? (Y/n)"
DO
strCharstrTxt = UCASE$(INKEY$)
LOOP UNTIL strCharstrTxt = "N" OR strCharstrTxt = "Y" OR strCharstrTxt = CHR$(13) OR strCharstrTxt = CHR$(27)
LOOP UNTIL strCharstrTxt = "N" OR strCharstrTxt = CHR$(27)
 
CLS
PRINT "End of execution. Start time: "; strTime; ", end time: "; TIME$; "."
PRINT "Start time: "; strDate; " "; strTime; ", end time: "; DATE$; " "; TIME$; "."
END
' ---End of main program cycle
 
</lang>
 
Output:
<pre>
This program counts letters or characters in a text file.
 
File to open: readme.txt
Count: 1) Letters 2) Characters (1 or 2). Option selected: 1
Looking for letters. File is 23769 in size. Working...
 
Letters found: A=1427, B=306, C=583, D=530, E=2098, F=279, G=183, H=501,
I=1177, J=15, K=34, L=741, M=379, N=1219, O=1183, P=312, Q=32, R=1105, S=1079,
T=1309, U=660, V=346, W=147, X=190, Y=242, Z=70, Ñ=5.
 
File analyzed....................: readme.txt
Looked for.......................: Letters
Total characters in file.........: 23769
Total characters counted.........: 16152
Characters discarded on count....: 7617
Distinct characters found in file: 27 of 27
Most used character was..........: E (2098 times)
Least used character was.........: Ñ (5 times)
Time spent in the process........: .3789063 seconds
 
Again? (Y/n)
</pre>
 
=={{header|R}}==
58

edits