Word frequency: Difference between revisions

(Added Kotlin)
Line 255:
that 9 7924
it 10 6661
</pre>
=={{header|Simula}}==
<lang simula>COMMENT COMPILE WITH
$ cim -m64 word-count.sim
;
BEGIN
 
COMMENT ----- CLASSES FOR GENERAL USE ;
 
! ABSTRACT HASH KEY TYPE ;
CLASS HASHKEY;
VIRTUAL:
PROCEDURE HASH IS
INTEGER PROCEDURE HASH;;
PROCEDURE EQUALTO IS
BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K;;
BEGIN
END HASHKEY;
 
! ABSTRACT HASH VALUE TYPE ;
CLASS HASHVAL;
BEGIN
! THERE IS NOTHING REQUIRED FOR THE VALUE TYPE ;
END HASHVAL;
 
CLASS HASHMAP;
BEGIN
CLASS INNERHASHMAP(N); INTEGER N;
BEGIN
 
INTEGER PROCEDURE INDEX(K); REF(HASHKEY) K;
BEGIN
INTEGER I;
IF K == NONE THEN
ERROR("HASHMAP.INDEX: NONE IS NOT A VALID KEY");
I := MOD(K.HASH,N);
LOOP:
IF KEYTABLE(I) == NONE OR ELSE KEYTABLE(I).EQUALTO(K) THEN
INDEX := I
ELSE BEGIN
I := IF I+1 = N THEN 0 ELSE I+1;
GO TO LOOP;
END;
END INDEX;
 
! PUT SOMETHING IN ;
PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V;
BEGIN
INTEGER I;
IF V == NONE THEN
ERROR("HASHMAP.PUT: NONE IS NOT A VALID VALUE");
I := INDEX(K);
IF KEYTABLE(I) == NONE THEN BEGIN
IF SIZE = N THEN
ERROR("HASHMAP.PUT: TABLE FILLED COMPLETELY");
KEYTABLE(I) :- K;
VALTABLE(I) :- V;
SIZE := SIZE+1;
END ELSE
VALTABLE(I) :- V;
END PUT;
 
! GET SOMETHING OUT ;
REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K;
BEGIN
INTEGER I;
IF K == NONE THEN
ERROR("HASHMAP.GET: NONE IS NOT A VALID KEY");
I := INDEX(K);
IF KEYTABLE(I) == NONE THEN
GET :- NONE ! ERROR("HASHMAP.GET: KEY NOT FOUND");
ELSE
GET :- VALTABLE(I);
END GET;
 
PROCEDURE CLEAR;
BEGIN
INTEGER I;
FOR I := 0 STEP 1 UNTIL N-1 DO BEGIN
KEYTABLE(I) :- NONE;
VALTABLE(I) :- NONE;
END;
SIZE := 0;
END CLEAR;
 
! DATA MEMBERS OF CLASS HASHMAP ;
REF(HASHKEY) ARRAY KEYTABLE(0:N-1);
REF(HASHVAL) ARRAY VALTABLE(0:N-1);
INTEGER SIZE;
 
END INNERHASHMAP;
 
PROCEDURE PUT(K,V); REF(HASHKEY) K; REF(HASHVAL) V;
BEGIN
IF IMAP.SIZE >= 0.75 * IMAP.N THEN
BEGIN
COMMENT RESIZE HASHMAP ;
REF(INNERHASHMAP) NEWIMAP;
REF(ITERATOR) IT;
NEWIMAP :- NEW INNERHASHMAP(2 * IMAP.N);
IT :- NEW ITERATOR(THIS HASHMAP);
WHILE IT.MORE DO
BEGIN
REF(HASHKEY) KEY;
KEY :- IT.NEXT;
NEWIMAP.PUT(KEY, IMAP.GET(KEY));
END;
IMAP.CLEAR;
IMAP :- NEWIMAP;
END;
IMAP.PUT(K, V);
END;
 
REF(HASHVAL) PROCEDURE GET(K); REF(HASHKEY) K;
GET :- IMAP.GET(K);
 
PROCEDURE CLEAR;
IMAP.CLEAR;
 
INTEGER PROCEDURE SIZE;
SIZE := IMAP.SIZE;
 
REF(INNERHASHMAP) IMAP;
 
IMAP :- NEW INNERHASHMAP(16);
END HASHMAP;
 
CLASS ITERATOR(H); REF(HASHMAP) H;
BEGIN
INTEGER POS,KEYCOUNT;
 
BOOLEAN PROCEDURE MORE;
MORE := KEYCOUNT < H.SIZE;
 
REF(HASHKEY) PROCEDURE NEXT;
BEGIN
INSPECT H.IMAP DO
BEGIN
WHILE KEYTABLE(POS) == NONE DO
POS := POS+1;
NEXT :- KEYTABLE(POS);
KEYCOUNT := KEYCOUNT+1;
POS := POS+1;
END;
END NEXT;
 
END ITERATOR;
 
COMMENT ----- PROBLEM SPECIFIC CLASSES ;
 
HASHKEY CLASS TEXTHASHKEY(T); VALUE T; TEXT T;
BEGIN
INTEGER PROCEDURE HASH;
BEGIN
INTEGER I;
T.SETPOS(1);
WHILE T.MORE DO
I := 31*I+RANK(T.GETCHAR);
HASH := I;
END HASH;
BOOLEAN PROCEDURE EQUALTO(K); REF(HASHKEY) K;
EQUALTO := T = K QUA TEXTHASHKEY.T;
END TEXTHASHKEY;
 
HASHVAL CLASS COUNTER;
BEGIN
INTEGER COUNT;
END COUNTER;
 
REF(INFILE) INF;
REF(HASHMAP) MAP;
REF(TEXTHASHKEY) KEY;
REF(COUNTER) VAL;
REF(ITERATOR) IT;
TEXT LINE, WORD;
INTEGER I, J, MAXCOUNT, LINENO;
INTEGER ARRAY MAXCOUNTS(1:10);
REF(TEXTHASHKEY) ARRAY MAXWORDS(1:10);
 
WORD :- BLANKS(1000);
MAP :- NEW HASHMAP;
COMMENT MAP WORDS TO COUNTERS ;
 
INF :- NEW INFILE("135-0.txt");
INF.OPEN(BLANKS(4096));
WHILE NOT INF.LASTITEM DO
BEGIN
BOOLEAN INWORD;
 
PROCEDURE SAVE;
BEGIN
IF WORD.POS > 1 THEN
BEGIN
KEY :- NEW TEXTHASHKEY(WORD.SUB(1, WORD.POS - 1));
VAL :- MAP.GET(KEY);
IF VAL == NONE THEN
BEGIN
VAL :- NEW COUNTER;
MAP.PUT(KEY, VAL);
END;
VAL.COUNT := VAL.COUNT + 1;
WORD := " ";
WORD.SETPOS(1);
END;
END SAVE;
 
LINENO := LINENO + 1;
LINE :- COPY(INF.IMAGE).STRIP; INF.INIMAGE;
 
COMMENT SEARCH WORDS IN LINE ;
COMMENT A WORD IS ANY SEQUENCE OF LETTERS ;
 
INWORD := FALSE;
LINE.SETPOS(1);
WHILE LINE.MORE DO
BEGIN
CHARACTER CH;
CH := LINE.GETCHAR;
IF CH >= 'a' AND CH <= 'z' THEN
CH := CHAR(RANK(CH) - RANK('a') + RANK('A'));
IF CH >= 'A' AND CH <= 'Z' THEN
BEGIN
IF NOT INWORD THEN
BEGIN
SAVE;
INWORD := TRUE;
END;
WORD.PUTCHAR(CH);
END ELSE
BEGIN
IF INWORD THEN
BEGIN
SAVE;
INWORD := FALSE;
END;
END;
END;
END;
INF.CLOSE;
 
COMMENT FIND 10 MOST COMMON WORDS ;
 
IT :- NEW ITERATOR(MAP);
WHILE IT.MORE DO
BEGIN
KEY :- IT.NEXT;
VAL :- MAP.GET(KEY);
FOR I := 1 STEP 1 UNTIL 10 DO
BEGIN
IF VAL.COUNT >= MAXCOUNTS(I) THEN
BEGIN
FOR J := 10 STEP -1 UNTIL I + 1 DO
BEGIN
MAXCOUNTS(J) := MAXCOUNTS(J - 1);
MAXWORDS(J) :- MAXWORDS(J - 1);
END;
MAXCOUNTS(I) := VAL.COUNT;
MAXWORDS(I) :- KEY;
GO TO BREAK;
END;
END;
BREAK:
END;
 
COMMENT OUTPUT 10 MOST COMMON WORDS ;
 
FOR I := 1 STEP 1 UNTIL 10 DO
BEGIN
OUTINT(MAXCOUNTS(I), 10);
OUTTEXT(" ");
OUTTEXT(MAXWORDS(I) QUA TEXTHASHKEY.T);
OUTIMAGE;
END;
 
END
</lang>
{{out}}
<pre>
41089 THE
19949 OF
14942 AND
14608 A
13951 TO
11214 IN
9648 HE
8621 WAS
7924 THAT
6661 IT
 
6 garbage collection(s) in 0.2 seconds.
</pre>
 
Anonymous user