Anonymous user
Inverted index: Difference between revisions
m
→{{header|REXX}}: added/changed comments, added DO-END comment labels, added whitespace, changed indentation, simplified some statements. -- ~~~~
m (→{{header|REXX}}: added kinda of a disclaimer concerning (dates of) Burma Shave roadside signs. -- ~~~~) |
m (→{{header|REXX}}: added/changed comments, added DO-END comment labels, added whitespace, changed indentation, simplified some statements. -- ~~~~) |
||
Line 1,756:
=={{header|REXX}}==
Note: In this algorithm, word indices start at 1.
<br><br>Note: the Burma Shave signs were created from 1930 --> 1951.
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/
@.='' /*dictionary of words (so far).*/
!='' /*a list of found words (so far).*/
call invertI 0, 'BURMA0.TXT'
call invertI 1, 'BURMA1.TXT'
call invertI 2, 'BURMA2.TXT'
call invertI 3, 'BURMA3.TXT'
call invertI 4, 'BURMA4.TXT'
call invertI 5, 'BURMA5.TXT'
call invertI 6, 'BURMA6.TXT'
call invertI 7, 'BURMA7.TXT'
call invertI 8, 'BURMA8.TXT'
call invertI 9, 'BURMA9.TXT'
call findAword 'does' /*find a word. */
Line 1,777 ⟶ 1,776:
call findAword "don't" /*and find another word. */
call findAword "burma-shave" /*and find yet another word. */
exit /*
/*──────────────────────────────────FINDAWORD subroutine────────────────*/
findAword: procedure expose @.
parse arg ox;
_=@.x
oxo='───'ox"───"
if _=='' then do▼
say 'word' oxo "not found."
▲if _=='' then do
end▼
_@=_ /*save _, pass it back to invoker*/
say 'word' oxo "found in:"
say ' file='f ' word='w▼
▲ say ' file='f ' word='w
return _@
/*─────────────────────────────────────INVERTI subroutine───────────────*/
invertI: procedure expose @. !; parse arg #,fn
call lineout fn /*close the file, just in case. */
w=0 /*number of words so far. */
do while lines(fn)\==0
_=space(linein(fn)
say 'file' #",record="_ /*echo a record, just to be verbose.*/▼
▲say 'file' #",record="_ /*echo a record, just to be verbose.*/
▲ upper _ /*make it case insensative. */
do until _==''
parse upper var _ xxx _
xxx=stripper(xxx) /*
if xxx='' then iterate /*is the word now blank (null) ? */
w=w+1 /*bump the word counter. */
@.xxx=@.xxx # w
if wordpos(xxx,!)==0 then !=! xxx /*add to THE list of words found.*/
end /*until
say; call lineout fn /*close the file, just to be neat*/
▲ end /*while lines...*/
return w /*return the index of the word. */
▲call lineout fn /*close the file, just to be neat*/
/*─────────────────────────────────────STRIPPER subroutine──────────────*/
stripper: procedure; parse arg q
@punctuation='.,:;?¿!¡' /*serveral punctuation marks. */
do j=1 for length(@punctuation)
▲ end /*j*/
return q</lang>
'''output'''
Line 1,840 ⟶ 1,828:
file 0,record=for a half-pound jar
file 0,record=Burma-shave
file 1,record=A peach
file 1,record=looks good
Line 1,846 ⟶ 1,835:
file 1,record=and never was
file 1,record=Burma-shave
file 2,record=Does your husband
file 2,record=misbehave
Line 1,852 ⟶ 1,842:
file 2,record=shoot the brute some
file 2,record=Burma-shave
file 3,record=Don't take a curve
file 3,record=at 60 per
Line 1,857 ⟶ 1,848:
file 3,record=a customer
file 3,record=Burma-shave
file 4,record=Every shaver
file 4,record=now can snore
Line 1,863 ⟶ 1,855:
file 4,record=by using
file 4,record=Burma-shave
file 5,record=He played
file 5,record=a sax
Line 1,869 ⟶ 1,862:
file 5,record=so they let him go
file 5,record=Burma-shave
file 6,record=Henry the Eighth
file 6,record=Prince of Friskers
Line 1,874 ⟶ 1,868:
file 6,record=but kept his whiskers
file 6,record=Burma-shave
file 7,record=Listen, birds
file 7,record=those signs cost
Line 1,880 ⟶ 1,875:
file 7,record=don't get funny
file 7,record=Burma-shave
file 8,record=My man
file 8,record=won't shave
Line 1,886 ⟶ 1,882:
file 8,record=Dora's does
file 8,record=Burma-shave
file 9,record=Past schoolhouses
file 9,record=take it slow
Line 1,892 ⟶ 1,889:
file 9,record=grow
file 9,record=Burma-shave
word ---does--- found in:
file=2 word=1
file=8 word=13
word ---60--- found in:
file=3 word=6
word ---don't--- found in:
file=3 word=1
file=7 word=12
word ---burma-shave--- found in:
file=0 word=14
|