Letter frequency: Difference between revisions
Content added Content deleted
Line 4,020: | Line 4,020: | ||
Again eliminating all fussing with the details of converting letters into list indices. |
Again eliminating all fussing with the details of converting letters into list indices. |
||
=={{header|Quick Basic |
=={{header|Quick Basic/QBASIC/PDS 7.1/VB-DOS}}== |
||
This version |
This version counts valid letters from A to Z (including Ñ in Spanish alphabet) or characters in a file. Takes in account accented vowels. It runs in QB, QBASIC, PDS 7.1 and VB_DOS as is. |
||
<lang VB> |
<lang VB> |
||
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' |
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' |
||
' Program |
' Program CountCar ' |
||
' ' |
' ' |
||
' This program counts how many |
' This program counts how many distinct characters ' |
||
' file specified by the user. |
' have a text file specified by the user. ' |
||
' total characters has the file. ' |
|||
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' |
' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' |
||
' OPTION EXPLICIT ' Remove comment in VB-DOS |
' OPTION EXPLICIT ' Remove comment in VB-DOS |
||
' Register |
|||
TYPE regChar |
|||
Character AS STRING * 3 |
|||
Count AS LONG |
|||
END TYPE |
|||
' Var |
' Var |
||
DIM iChar AS INTEGER |
|||
DIM iCL AS INTEGER |
|||
DIM iCountChars AS INTEGER |
|||
DIM iFile AS INTEGER |
DIM iFile AS INTEGER |
||
DIM i AS INTEGER |
DIM i AS INTEGER |
||
DIM |
DIM lMUC AS LONG |
||
DIM iMUL AS INTEGER |
|||
DIM iMUI AS INTEGER |
DIM iMUI AS INTEGER |
||
DIM |
DIM lLUC AS LONG |
||
DIM iLUI AS INTEGER |
DIM iLUI AS INTEGER |
||
DIM iMaxIdx AS INTEGER |
|||
DIM iP AS INTEGER |
DIM iP AS INTEGER |
||
DIM iPause AS INTEGER |
|||
DIM iPMI AS INTEGER |
|||
DIM iPrint AS INTEGER |
|||
DIM lHowMany AS LONG |
DIM lHowMany AS LONG |
||
DIM |
DIM lTotChars AS LONG |
||
DIM |
DIM sTime AS SINGLE |
||
DIM strFile AS STRING |
DIM strFile AS STRING |
||
DIM strTxt AS STRING |
DIM strTxt AS STRING |
||
DIM strDate AS STRING |
|||
DIM strTime AS STRING |
DIM strTime AS STRING |
||
DIM strKey AS STRING |
|||
CONST LngReg = 256 |
|||
CONST Letters = 1 |
|||
CONST FALSE = 0 |
|||
CONST TRUE = NOT FALSE |
|||
'------Main program cycle |
'------Main program cycle |
||
' Initialize variables |
|||
strDate = DATE$ |
|||
strTime = TIME$ |
strTime = TIME$ |
||
iFile = FREEFILE |
iFile = FREEFILE |
||
DO |
DO |
||
REDIM iL(0 TO 25) AS INTEGER ' 25 is ¤ |
|||
iMUL = 0 |
|||
iMUI = 0 |
|||
CLS |
CLS |
||
PRINT "This program counts letters or characters in a text file." |
|||
INPUT ; "File to open: ", strFile |
|||
PRINT |
|||
ON ERROR RESUME NEXT |
|||
INPUT "File to open: ", strFile |
|||
OPEN strFile FOR INPUT AS #iFile |
|||
OPEN strFile FOR BINARY AS #iFile |
|||
IF ERR <> 53 THEN |
|||
IF LOF(iFile) > 0 THEN |
|||
PRINT "Count: 1) Letters 2) Characters (1 or 2)"; |
|||
DO |
|||
strKey = INKEY$ |
|||
LOOP UNTIL strKey = "1" OR strKey = "2" |
|||
PRINT ". Option selected: "; strKey |
|||
iCL = VAL(strKey) |
|||
sTime = TIMER |
|||
iP = POS(0) |
iP = POS(0) |
||
lHowMany = LOF(iFile) |
lHowMany = LOF(iFile) |
||
strTxt = SPACE$(LngReg) |
|||
IF iCL = Letters THEN |
|||
iMaxIdx = 26 |
|||
ELSE |
|||
iMaxIdx = 255 |
|||
END IF |
|||
IF iMaxIdx <> iPMI THEN |
|||
iPMI = iMaxIdx |
|||
REDIM rChar(0 TO iMaxIdx) AS regChar |
|||
FOR i = 0 TO iMaxIdx |
|||
IF iCL = Letters THEN |
|||
strTxt = CHR$(i + 65) |
|||
IF i = 26 THEN strTxt = CHR$(165) |
|||
ELSE |
|||
SELECT CASE i |
|||
CASE 0: strTxt = "nul" |
|||
CASE 7: strTxt = "bel" |
|||
CASE 9: strTxt = "tab" |
|||
CASE 10: strTxt = "lf" |
|||
CASE 11: strTxt = "vt" |
|||
CASE 12: strTxt = "ff" |
|||
CASE 13: strTxt = "cr" |
|||
CASE 28: strTxt = "fs" |
|||
CASE 29: strTxt = "gs" |
|||
CASE 30: strTxt = "rs" |
|||
CASE 31: strTxt = "us" |
|||
CASE 32: strTxt = "sp" |
|||
CASE ELSE: strTxt = CHR$(i) |
|||
END SELECT |
|||
END IF |
|||
rChar(i).Character = strTxt |
|||
NEXT i |
|||
ELSE |
|||
FOR i = 0 TO iMaxIdx |
|||
rChar(i).Count = 0 |
|||
NEXT i |
|||
END IF |
|||
PRINT "Looking for "; |
|||
IF iCL = Letters THEN PRINT "letters."; ELSE PRINT "characters."; |
|||
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7) |
PRINT " File is"; STR$(lHowMany); " in size. Working"; : COLOR 23: PRINT "..."; : COLOR (7) |
||
DO WHILE |
DO WHILE LOC(iFile) < LOF(iFile) |
||
IF LOC(iFile) + LngReg > LOF(iFile) THEN |
|||
strTxt = SPACE$(LOF(iFile) - LOC(iFile)) |
|||
END IF |
|||
GET #iFile, , strTxt |
|||
FOR i = 1 TO LEN(strTxt) |
FOR i = 1 TO LEN(strTxt) |
||
IF iCL = Letters THEN |
|||
iChar = ASC(UCASE$(MID$(strTxt, i, 1))) |
|||
SELECT CASE strChar |
|||
CASE |
SELECT CASE iChar |
||
CASE |
CASE 164: iChar = 165 |
||
CASE |
CASE 160: iChar = 65 |
||
CASE |
CASE 130, 144: iChar = 69 |
||
CASE |
CASE 161: iChar = 73 |
||
CASE |
CASE 162: iChar = 79 |
||
CASE 163, 129: iChar = 85 |
|||
END SELECT |
|||
iChar = iChar - 65 |
|||
' Validates if iChar is a letter |
|||
IF iChar >= 0 AND iChar <= 25 THEN |
|||
rChar(iChar).Count = rChar(iChar).Count + 1 |
|||
ELSEIF iChar = 100 THEN ' CHR$(165) |
|||
rChar(iMaxIdx).Count = rChar(iMaxIdx).Count + 1 |
|||
END IF |
|||
ELSE |
|||
iChar = ASC(MID$(strTxt, i, 1)) |
|||
rChar(iChar).Count = rChar(iChar).Count + 1 |
|||
END IF |
END IF |
||
NEXT i |
NEXT i |
||
LOOP |
LOOP |
||
CLOSE #iFile |
|||
' Show the |
' Show the characters found |
||
lMUC = 0 |
|||
iMUI = 0 |
iMUI = 0 |
||
lLUC = 2147483647 |
|||
iLUI = 0 |
iLUI = 0 |
||
iPrint = FALSE |
|||
lTotChars = 0 |
|||
LOCATE , iP: PRINT SPACE$(40): PRINT |
|||
iCountChars = 0 |
|||
PRINT "Letters found: "; |
|||
iPause = FALSE |
|||
CLS |
|||
' Most Used Letter |
|||
IF iCL = Letters THEN PRINT "Letters found: "; ELSE PRINT "Characters found: "; |
|||
IF iMUL < iL(i) THEN |
|||
FOR i = 0 TO iMaxIdx |
|||
' Most Used Character |
|||
IF lMUC < rChar(i).Count THEN |
|||
lMUC = rChar(i).Count |
|||
iMUI = i |
iMUI = i |
||
END IF |
END IF |
||
' Print |
' Print character |
||
IF |
IF rChar(i).Count > 0 THEN |
||
strTxt = "" |
|||
IF |
IF iPrint THEN strTxt = ", " ELSE iPrint = TRUE |
||
strTxt = strTxt + LTRIM$(RTRIM$(rChar(i).Character)) |
|||
IF POS(0) > 73 THEN PRINT |
|||
strTxt = strTxt + "=" + LTRIM$(STR$(rChar(i).Count)) |
|||
iP = POS(0) |
|||
IF iP + LEN(strTxt) + 1 >= 80 AND iPrint THEN |
|||
PRINT "," |
|||
IF CSRLIN >= 23 AND NOT iPause THEN |
|||
iPause = TRUE |
|||
PRINT "Press a key to continue..." |
|||
DO |
|||
strKey = INKEY$ |
|||
LOOP UNTIL strKey <> "" |
|||
END IF |
|||
strTxt = MID$(strTxt, 3) |
|||
END IF |
|||
PRINT strTxt; |
|||
lTotChars = lTotChars + rChar(i).Count |
|||
iCountChars = iCountChars + 1 |
|||
' Least Used |
' Least Used Character |
||
IF |
IF lLUC > rChar(i).Count THEN |
||
lLUC = rChar(i).Count |
|||
iLUI = i |
iLUI = i |
||
END IF |
END IF |
||
Line 4,114: | Line 4,213: | ||
NEXT i |
NEXT i |
||
IF iL(25) > 0 THEN |
|||
' Are there more ¤s? |
|||
IF iMUL < iL(25) THEN |
|||
iMUL = iL(25) |
|||
iMUI = 25 |
|||
ELSEIF iLUL > iL(25) THEN |
|||
iLUL = iL(25) |
|||
iLUI = 25 |
|||
END IF |
|||
PRINT ", "; CHR$(165); " ="; STR$(iL(25)); |
|||
END IF |
|||
PRINT "." |
PRINT "." |
||
' Shows the |
' Shows the summary |
||
IF iMUI < 25 THEN strChar = CHR$(iMUI + 65) ELSE strChar = CHR$(165) |
|||
IF iLUI < 25 THEN strTxt = CHR$(iLUI + 65) ELSE strTxt = CHR$(165) |
|||
PRINT |
PRINT |
||
PRINT " |
PRINT "File analyzed....................: "; strFile |
||
PRINT "Looked for.......................: "; : IF iCL = Letters THEN PRINT "Letters" ELSE PRINT "Characters" |
|||
PRINT "Total letters in document...:"; lTotLetters |
|||
PRINT " |
PRINT "Total characters in file.........:"; lHowMany |
||
PRINT " |
PRINT "Total characters counted.........:"; lTotChars |
||
PRINT " |
IF iCL = Letters THEN PRINT "Characters discarded on count....:"; lHowMany - lTotChars |
||
PRINT "Distinct characters found in file:"; iCountChars; "of"; iMaxIdx + 1 |
|||
PRINT "Most used character was..........: "; |
|||
iPrint = FALSE |
|||
FOR i = 0 TO iMaxIdx |
|||
IF rChar(i).Count = lMUC THEN |
|||
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE |
|||
PRINT RTRIM$(LTRIM$(rChar(i).Character)); |
|||
END IF |
|||
NEXT i |
|||
PRINT " ("; LTRIM$(STR$(rChar(iMUI).Count)); " times)" |
|||
PRINT "Least used character was.........: "; |
|||
iPrint = FALSE |
|||
FOR i = 0 TO iMaxIdx |
|||
IF rChar(i).Count = lLUC THEN |
|||
IF iPrint THEN PRINT ", "; ELSE iPrint = TRUE |
|||
PRINT RTRIM$(LTRIM$(rChar(i).Character)); |
|||
END IF |
|||
NEXT i |
|||
PRINT " ("; LTRIM$(STR$(rChar(iLUI).Count)); " times)" |
|||
PRINT "Time spent in the process........:"; TIMER - sTime; "seconds" |
|||
ELSE |
ELSE |
||
' File does not exist |
' File does not exist |
||
CLOSE #iFile |
|||
KILL strFile |
|||
PRINT |
PRINT |
||
PRINT "File does not exist." |
PRINT "File does not exist." |
||
END IF |
END IF |
||
CLOSE #iFile |
|||
' Again? |
' Again? |
||
Line 4,149: | Line 4,254: | ||
PRINT "Again? (Y/n)" |
PRINT "Again? (Y/n)" |
||
DO |
DO |
||
strTxt = UCASE$(INKEY$) |
|||
LOOP UNTIL |
LOOP UNTIL strTxt = "N" OR strTxt = "Y" OR strTxt = CHR$(13) OR strTxt = CHR$(27) |
||
LOOP UNTIL |
LOOP UNTIL strTxt = "N" OR strTxt = CHR$(27) |
||
CLS |
CLS |
||
PRINT "End of execution |
PRINT "End of execution." |
||
PRINT "Start time: "; strDate; " "; strTime; ", end time: "; DATE$; " "; TIME$; "." |
|||
END |
END |
||
' ---End of main program cycle |
' ---End of main program cycle |
||
</lang> |
</lang> |
||
Output: |
|||
<pre> |
|||
This program counts letters or characters in a text file. |
|||
File to open: readme.txt |
|||
Count: 1) Letters 2) Characters (1 or 2). Option selected: 1 |
|||
Looking for letters. File is 23769 in size. Working... |
|||
Letters found: A=1427, B=306, C=583, D=530, E=2098, F=279, G=183, H=501, |
|||
I=1177, J=15, K=34, L=741, M=379, N=1219, O=1183, P=312, Q=32, R=1105, S=1079, |
|||
T=1309, U=660, V=346, W=147, X=190, Y=242, Z=70, Ñ=5. |
|||
File analyzed....................: readme.txt |
|||
Looked for.......................: Letters |
|||
Total characters in file.........: 23769 |
|||
Total characters counted.........: 16152 |
|||
Characters discarded on count....: 7617 |
|||
Distinct characters found in file: 27 of 27 |
|||
Most used character was..........: E (2098 times) |
|||
Least used character was.........: Ñ (5 times) |
|||
Time spent in the process........: .3789063 seconds |
|||
Again? (Y/n) |
|||
</pre> |
|||
=={{header|R}}== |
=={{header|R}}== |