Inverted index: Difference between revisions

Content added Content deleted
m (→‎{{header|Perl 6}}: Updating deprecated 'uniq' to 'unique')
m (→‎{{header|REXX}}: added/changed whitespace and comments, removed OVERFLOW from PRE html STYLE tag.)
Line 2,065: Line 2,065:
=={{header|REXX}}==
=={{header|REXX}}==
Note: In this algorithm, word indices start at 1.
Note: In this algorithm, word indices start at 1.

<br><br>Note: the Burma Shave signs were created from 1930 --&gt; 1951.
Note: &nbsp; the Burma Shave signs were created from 1930 ──► 1951 &nbsp; and were common among the rural byways of America.
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/
@.='' /*dictionary of words (so far).*/
@.='' /*dictionary of words (so far).*/
!='' /*a list of found words (so far).*/
!='' /*a list of found words (so far).*/
call invertI 0, 'BURMA0.TXT' /*read the file: BURMA0.TXT ...*/

call invertI 0, 'BURMA0.TXT' /*read file 0 ... */
call invertI 1, 'BURMA1.TXT' /* " " ~ BURMA1.TXT ...*/
call invertI 1, 'BURMA1.TXT' /* " " 1 ... */
call invertI 2, 'BURMA2.TXT' /* " " ~ BURMA2.TXT ...*/
call invertI 2, 'BURMA2.TXT' /* " " 2 ... */
call invertI 3, 'BURMA3.TXT' /* " " ~ BURMA3.TXT ...*/
call invertI 3, 'BURMA3.TXT' /* " " 3 ... */
call invertI 4, 'BURMA4.TXT' /* " " ~ BURMA4.TXT ...*/
call invertI 4, 'BURMA4.TXT' /* " " 4 ... */
call invertI 5, 'BURMA5.TXT' /* " " ~ BURMA5.TXT ...*/
call invertI 5, 'BURMA5.TXT' /* " " 5 ... */
call invertI 6, 'BURMA6.TXT' /* " " ~ BURMA6.TXT ...*/
call invertI 6, 'BURMA6.TXT' /* " " 6 ... */
call invertI 7, 'BURMA7.TXT' /* " " ~ BURMA7.TXT ...*/
call invertI 7, 'BURMA7.TXT' /* " " 7 ... */
call invertI 8, 'BURMA8.TXT' /* " " ~ BURMA8.TXT ...*/
call invertI 8, 'BURMA8.TXT' /* " " 8 ... */
call invertI 9, 'BURMA9.TXT' /* " " ~ BURMA9.TXT ...*/
call invertI 9, 'BURMA9.TXT' /* " " 9 ... */

call findAword 'does' /*find a word. */
call findAword 'does' /*find a word. */
call findAword '60' /*find another word. */
call findAword '60' /*find another word. */
Line 2,087: Line 2,086:
exit /*stick a fork in it, we're done.*/
exit /*stick a fork in it, we're done.*/
/*──────────────────────────────────FINDAWORD subroutine────────────────*/
/*──────────────────────────────────FINDAWORD subroutine────────────────*/
findAword: procedure expose @. /*get A word, and uppercase it. */
findAword: procedure expose @.; arg x /*get an uppercase version of X. */
parse arg ox; arg x /*OX= word; X= uppercase version*/
parse arg ox /*get original (as-is) value of X*/
_=@.x; oxo='───'ox"───"
_=@.x
oxo='───'ox"───"
if _=='' then do
if _=='' then do
say 'word' oxo "not found."
say 'word' oxo "not found."
return 0
return 0
end
end
_@=_ /*save _, pass it back to invoker*/
_@=_ /*save _, pass it back to invoker*/
say 'word' oxo "found in:"
say 'word' oxo "found in:"
do until _==''; parse var _ f w _; say
do until _==''; parse var _ f w _
say ' file='f ' word='w
say ' file='f ' word='w
end /*until ... */
end /*until ··· */
return _@
return _@
/*─────────────────────────────────────INVERTI subroutine───────────────*/
/*─────────────────────────────────────INVERTI subroutine───────────────*/
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/
call lineout fn /*close the file, just in case. */
call lineout fn /*close the file, just in case. */
w=0 /*number of words so far. */
w=0 /*number of words found (so far).*/
do while lines(fn)\==0 /* [↓] process the entire file.*/
_=space(linein(fn)) /*read a line, elide extra blanks*/
if _=='' then iterate /*if blank record, then ignore it*/
say 'file' #", record:" _ /*echo a record (to be verbose).*/


do while lines(fn)\==0 /*process the entire file (below)*/
do until _=='' /*pick off words until done. */
_=space(linein(fn)) /*read 1 line, elide extra blanks*/
parse upper var _ ? _ /*pick off a word (uppercased). */
if _=='' then iterate /*if blank record, then ignore it*/
?=stripper(?) /*strip any trailing punctuation.*/
say 'file' #",record="_ /*echo a record, just to be verbose.*/
if ?='' then iterate /*is the word now blank (null) ? */
w=w+1 /*bump the word counter (index). */

do until _=='' /*pick off words until done. */
@.?=@.? # w /*append the new word to a list. */
parse upper var _ xxx _ /*pick off a word (uppercased). */
if wordpos(?,!)==0 then !=! ? /*add to the list of words found.*/
end /*until ··· */
xxx=stripper(xxx) /*strip any ending punctuation. */
end /*while ··· */
if xxx='' then iterate /*is the word now blank (null) ? */
w=w+1 /*bump the word counter. */
say; call lineout fn /*close the file, just to be neat*/
@.xxx=@.xxx # w
if wordpos(xxx,!)==0 then !=! xxx /*add to THE list of words found.*/
end /*until ... */
end /*while lines(fn)¬==0*/

say; call lineout fn /*close the file, just to be neat*/
return w /*return the index of the word. */
return w /*return the index of the word. */
/*─────────────────────────────────────STRIPPER subroutine──────────────*/
/*─────────────────────────────────────STRIPPER subroutine──────────────*/
stripper: procedure; parse arg q /*remove punctuation at word-end.*/
stripper: procedure; parse arg q /*remove punctuation at word-end.*/
@punctuation='.,:;?¿!¡' /*serveral punctuation marks. */
@punctuation='.,:;?¿!¡∙·'; do j=1 for length(@punctuation)
do j=1 for length(@punctuation)
q=strip(q,'T',substr(@punctuation,j,1))
q=strip(q,'T',substr(@punctuation,j,1))
end /*j*/
end /*j*/
return q</lang>
return q</lang>
'''output'''
'''output'''
<pre style="height:30ex;overflow:scroll">
<pre style="height:50ex">
file 0,record=Rip a fender
file 0, record: Rip a fender
file 0,record=off your car
file 0, record: Off your Car
file 0,record=send it in
file 0, record: Send it in
file 0,record=for a half-pound jar
file 0, record: For a half-pound jar
file 0,record=Burma-shave
file 0, record: Burma-Shave


file 1,record=A peach
file 1, record: A peach
file 1,record=looks good
file 1, record: Looks good
file 1,record=with lots of fuzz
file 1, record: With lots of fuzz
file 1,record=but a man's no peach
file 1, record: Man's no peach
file 1,record=and never was
file 1, record: And never was
file 1,record=Burma-shave
file 1, record: Burma-Shave


file 2,record=Does your husband
file 2, record: Does your husband
file 2,record=misbehave
file 2, record: Misbehave
file 2,record=grunt and grumble
file 2, record: Grunt and grumble
file 2,record=rant and rave ?
file 2, record: Rant and rave ?
file 2,record=shoot the brute some
file 2, record: Shoot the brute some
file 2,record=Burma-shave
file 2, record: Burma-Shave


file 3,record=Don't take a curve
file 3, record: Don't take a curve
file 3,record=at 60 per
file 3, record: At 60 per
file 3,record=we hate to lose
file 3, record: We hate to lose
file 3,record=a customer
file 3, record: A customer
file 3,record=Burma-shave
file 3, record: Burma-Shave


file 4,record=Every shaver
file 4, record: Every shaver
file 4,record=now can snore
file 4, record: Now can snore
file 4,record=six more minutes
file 4, record: Six more minutes
file 4,record=than before
file 4, record: Than before
file 4,record=by using
file 4, record: By using
file 4,record=Burma-shave
file 4, record: Burma-Shave


file 5,record=He played
file 5, record: He played
file 5,record=a sax
file 5, record: a sax
file 5,record=had no B.O.
file 5, record: Had no B.O.
file 5,record=but his whiskers scratched
file 5, record: But his whiskers scratched
file 5,record=so they let him go
file 5, record: So she let him go
file 5,record=Burma-shave
file 5, record: Burma-Shave


file 6,record=Henry the Eighth
file 6, record: Henry the Eighth
file 6,record=Prince of Friskers
file 6, record: Prince of Friskers
file 6,record=lost five wives
file 6, record: Lost five wives
file 6,record=but kept his whiskers
file 6, record: But kept his whiskers
file 6,record=Burma-shave
file 6, record: Burma-Shave


file 7,record=Listen, birds
file 7, record: Listen birds
file 7,record=those signs cost
file 7, record: These signs cost
file 7,record=money
file 7, record: Money
file 7,record=so roost a while but
file 7, record: So roost a while
file 7,record=don't get funny
file 7, record: But don't get funny
file 7,record=Burma-shave
file 7, record: Burma-Shave


file 8,record=My man
file 8, record: My man
file 8,record=won't shave
file 8, record: Won't shave
file 8,record=sez Hazel Huz
file 8, record: Sez Hazel Huz
file 8,record=but I should worry
file 8, record: But I should worry
file 8,record=Dora's does
file 8, record: Dora's does
file 8,record=Burma-shave
file 8, record: Burma-Shave


file 9,record=Past schoolhouses
file 9, record: Past
file 9,record=take it slow
file 9, record: Schoolhouses
file 9,record=let the little
file 9, record: Take it slow
file 9,record=shavers
file 9, record: Let the little
file 9,record=grow
file 9, record: Shavers grow
file 9,record=Burma-shave
file 9, record: Burma-Shave


word ───does─── found in:
word ───does─── found in:
file=2 word=1
file=2 word=1
file=8 word=13
file=8 word=13

word ───60─── found in:
word ───60─── found in:
file=3 word=6
file=3 word=6

word ───don't─── found in:
word ───don't─── found in:
file=3 word=1
file=3 word=1
file=7 word=12
file=7 word=12

word ───burma-shave─── found in:
word ───burma-shave─── found in:
file=0 word=14
file=0 word=14
file=1 word=17
file=1 word=15
file=2 word=15
file=2 word=15
file=3 word=14
file=3 word=14
file=4 word=13
file=4 word=13
file=5 word=17
file=5 word=17
file=6 word=14
file=6 word=14
file=7 word=15
file=7 word=15
file=8 word=14
file=8 word=14
file=9 word=11
file=9 word=11
</pre>
</pre>