Inverted index: Difference between revisions

m
→‎{{header|REXX}}: added/changed comments, added DO-END comment labels, added whitespace, changed indentation, simplified some statements. -- ~~~~
m (→‎{{header|REXX}}: added kinda of a disclaimer concerning (dates of) Burma Shave roadside signs. -- ~~~~)
m (→‎{{header|REXX}}: added/changed comments, added DO-END comment labels, added whitespace, changed indentation, simplified some statements. -- ~~~~)
Line 1,756:
=={{header|REXX}}==
Note: In this algorithm, word indices start at 1.
<br><br>Note: the Burma Shave signs were created from 1930 --&gt; 1951.
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/
 
@.='' /*dictionary of words (so far).*/
!='' /*a list of found words (so far).*/
 
call invertI 0, 'BURMA0.TXT' /*read file 0 ... */
call invertI 1, 'BURMA1.TXT' /* " " 1 ... */
call invertI 2, 'BURMA2.TXT' /* " " 2 ... */
call invertI 3, 'BURMA3.TXT' /* " " 3 ... */
call invertI 4, 'BURMA4.TXT' /* " " 4 ... */
call invertI 5, 'BURMA5.TXT' /* " " 5 ... */
call invertI 6, 'BURMA6.TXT' /* " " 6 ... */
call invertI 7, 'BURMA7.TXT' /* " " 7 ... */
call invertI 8, 'BURMA8.TXT' /* " " 8 ... */
call invertI 9, 'BURMA9.TXT' /* " " 9 ... */
 
call findAword 'does' /*find a word. */
Line 1,777 ⟶ 1,776:
call findAword "don't" /*and find another word. */
call findAword "burma-shave" /*and find yet another word. */
exit /*enoughstick ofa thisfork in it, Iwe'mre tireddone. */
/*──────────────────────────────────FINDAWORD subroutine────────────────*/
/*─────────────────────────────────────FINDAWORD subroutine─────────────*/
findAword: procedure expose @. /*get A word, and uppercase it. */
parse arg ox; arg x arg x /*OX= word; X= uppercase version*/
_=@.x
oxo='───'ox"───"
if _=='' then do
 
say 'word' oxo "not found."
if _=='' then do
say 'word'return oxo "not found."0
return 0end
end
 
_@=_ /*save _, pass it back to invoker*/
say 'word' oxo "found in:"
upper _ do until _==''; /*make it case insensative. parse var _ f w _; */say
 
say ' file='f ' word='w
do until _==''
call lineout fn /*close the file, justend to be neat/*until ... */
parse var _ f w _
say ' file='f ' word='w
end /*until*/
 
return _@
/*─────────────────────────────────────INVERTI subroutine───────────────*/
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/
call lineout fn /*close the file, just in case. */
w=0 /*number of words so far. */
 
do while lines(fn)\==0 /*readprocess the entire file. (below)*/
_=space(linein(fn) ) /*read the file, 1 line, atelide aextra timeblanks*/
_=space(_); if _=='' then iterate /*if blank record, then ignore it*/
say 'file' #",record="_ /*echo a record, just to be verbose.*/
 
say 'file' #",record="_ /*echo a record, just to be verbose.*/
 
upper _ /*make it case insensative. */
 
do until _=='' /*pick off words until done. */
parse upper var _ xxx _ /*pick off a word (uppercased). */
xxx=stripper(xxx) /*go and strip offany ending punctpunctuation. */
if xxx='' then iterate /*is the word now blank (null) ? */
w=w+1 /*bump the word counter. */
@.xxx=@.xxx # w
if wordpos(xxx,!)==0 then !=! xxx /*add to THE list of words found.*/
end /*until _==''... */
end /*while lines...(fn)¬==0*/
 
say; call lineout fn /*close the file, just to be neat*/
end /*while lines...*/
return w /*return the index of the word. */
 
call lineout fn /*close the file, just to be neat*/
return w
/*─────────────────────────────────────STRIPPER subroutine──────────────*/
stripper: procedure; parse arg q /*remove punctuation at word-end.*/
@punctuation='.,:;?¿!¡' /*serveral punctuation marks. */
do j=1 for length(@punctuation)
 
do j=1 for length q=strip(q,'T',substr(@punctuation,j,1))
end /*j*/
q=strip(q,'T',substr(@punctuation,j,1))
end /*j*/
 
return q</lang>
'''output'''
Line 1,840 ⟶ 1,828:
file 0,record=for a half-pound jar
file 0,record=Burma-shave
 
file 1,record=A peach
file 1,record=looks good
Line 1,846 ⟶ 1,835:
file 1,record=and never was
file 1,record=Burma-shave
 
file 2,record=Does your husband
file 2,record=misbehave
Line 1,852 ⟶ 1,842:
file 2,record=shoot the brute some
file 2,record=Burma-shave
 
file 3,record=Don't take a curve
file 3,record=at 60 per
Line 1,857 ⟶ 1,848:
file 3,record=a customer
file 3,record=Burma-shave
 
file 4,record=Every shaver
file 4,record=now can snore
Line 1,863 ⟶ 1,855:
file 4,record=by using
file 4,record=Burma-shave
 
file 5,record=He played
file 5,record=a sax
Line 1,869 ⟶ 1,862:
file 5,record=so they let him go
file 5,record=Burma-shave
 
file 6,record=Henry the Eighth
file 6,record=Prince of Friskers
Line 1,874 ⟶ 1,868:
file 6,record=but kept his whiskers
file 6,record=Burma-shave
 
file 7,record=Listen, birds
file 7,record=those signs cost
Line 1,880 ⟶ 1,875:
file 7,record=don't get funny
file 7,record=Burma-shave
 
file 8,record=My man
file 8,record=won't shave
Line 1,886 ⟶ 1,882:
file 8,record=Dora's does
file 8,record=Burma-shave
 
file 9,record=Past schoolhouses
file 9,record=take it slow
Line 1,892 ⟶ 1,889:
file 9,record=grow
file 9,record=Burma-shave
 
word ---does--- found in:
file=2 word=1
file=8 word=13
 
word ---60--- found in:
file=3 word=6
 
word ---don't--- found in:
file=3 word=1
file=7 word=12
 
word ---burma-shave--- found in:
file=0 word=14