Inverted index: Difference between revisions
Content added Content deleted
m (→{{header|Perl 6}}: Updating deprecated 'uniq' to 'unique') |
m (→{{header|REXX}}: added/changed whitespace and comments, removed OVERFLOW from PRE html STYLE tag.) |
||
Line 2,065: | Line 2,065: | ||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
Note: In this algorithm, word indices start at 1. |
Note: In this algorithm, word indices start at 1. |
||
<br><br>Note: the Burma Shave signs were created from 1930 --> 1951. |
|||
Note: the Burma Shave signs were created from 1930 ──► 1951 and were common among the rural byways of America. |
|||
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/ |
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/ |
||
@.='' /*dictionary of words (so far).*/ |
@.='' /*dictionary of words (so far).*/ |
||
!='' /*a list of found words (so far).*/ |
!='' /*a list of found words (so far).*/ |
||
⚫ | |||
call invertI |
call invertI 1, 'BURMA1.TXT' /* " " ~ BURMA1.TXT ...*/ |
||
call invertI |
call invertI 2, 'BURMA2.TXT' /* " " ~ BURMA2.TXT ...*/ |
||
call invertI |
call invertI 3, 'BURMA3.TXT' /* " " ~ BURMA3.TXT ...*/ |
||
call invertI |
call invertI 4, 'BURMA4.TXT' /* " " ~ BURMA4.TXT ...*/ |
||
call invertI |
call invertI 5, 'BURMA5.TXT' /* " " ~ BURMA5.TXT ...*/ |
||
call invertI |
call invertI 6, 'BURMA6.TXT' /* " " ~ BURMA6.TXT ...*/ |
||
call invertI |
call invertI 7, 'BURMA7.TXT' /* " " ~ BURMA7.TXT ...*/ |
||
call invertI |
call invertI 8, 'BURMA8.TXT' /* " " ~ BURMA8.TXT ...*/ |
||
call invertI |
call invertI 9, 'BURMA9.TXT' /* " " ~ BURMA9.TXT ...*/ |
||
⚫ | |||
call findAword 'does' /*find a word. */ |
call findAword 'does' /*find a word. */ |
||
call findAword '60' /*find another word. */ |
call findAword '60' /*find another word. */ |
||
Line 2,087: | Line 2,086: | ||
exit /*stick a fork in it, we're done.*/ |
exit /*stick a fork in it, we're done.*/ |
||
/*──────────────────────────────────FINDAWORD subroutine────────────────*/ |
/*──────────────────────────────────FINDAWORD subroutine────────────────*/ |
||
findAword: procedure expose @. |
findAword: procedure expose @.; arg x /*get an uppercase version of X. */ |
||
parse arg ox |
parse arg ox /*get original (as-is) value of X*/ |
||
⚫ | |||
_=@.x |
|||
⚫ | |||
if _=='' then do |
if _=='' then do |
||
say 'word' oxo "not found." |
say 'word' oxo "not found." |
||
return 0 |
return 0 |
||
end |
end |
||
_@=_ /*save _, pass it back to invoker*/ |
_@=_ /*save _, pass it back to invoker*/ |
||
say 'word' oxo "found in:" |
say 'word' oxo "found in:" |
||
do until _==''; |
do until _==''; parse var _ f w _ |
||
say ' file='f ' word='w |
say ' file='f ' word='w |
||
end /*until |
end /*until ··· */ |
||
return _@ |
return _@ |
||
/*─────────────────────────────────────INVERTI subroutine───────────────*/ |
/*─────────────────────────────────────INVERTI subroutine───────────────*/ |
||
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/ |
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/ |
||
call lineout fn /*close the file, just in case. */ |
call lineout fn /*close the file, just in case. */ |
||
w=0 /*number of words so far. |
w=0 /*number of words found (so far).*/ |
||
do while lines(fn)\==0 /* [↓] process the entire file.*/ |
|||
_=space(linein(fn)) /*read a line, elide extra blanks*/ |
|||
⚫ | |||
say 'file' #", record:" _ /*echo a record (to be verbose).*/ |
|||
do |
do until _=='' /*pick off words until done. */ |
||
parse upper var _ ? _ /*pick off a word (uppercased). */ |
|||
?=stripper(?) /*strip any trailing punctuation.*/ |
|||
if ?='' then iterate /*is the word now blank (null) ? */ |
|||
⚫ | |||
@.?=@.? # w /*append the new word to a list. */ |
|||
if wordpos(?,!)==0 then !=! ? /*add to the list of words found.*/ |
|||
⚫ | |||
xxx=stripper(xxx) /*strip any ending punctuation. */ |
|||
⚫ | |||
⚫ | |||
say; call lineout fn /*close the file, just to be neat*/ |
|||
@.xxx=@.xxx # w |
|||
if wordpos(xxx,!)==0 then !=! xxx /*add to THE list of words found.*/ |
|||
⚫ | |||
⚫ | |||
say; call lineout fn /*close the file, just to be neat*/ |
|||
return w /*return the index of the word. */ |
return w /*return the index of the word. */ |
||
/*─────────────────────────────────────STRIPPER subroutine──────────────*/ |
/*─────────────────────────────────────STRIPPER subroutine──────────────*/ |
||
stripper: procedure; parse arg q /*remove punctuation at word-end.*/ |
stripper: procedure; parse arg q /*remove punctuation at word-end.*/ |
||
@punctuation='.,:;?¿!¡' |
@punctuation='.,:;?¿!¡∙·'; do j=1 for length(@punctuation) |
||
q=strip(q,'T',substr(@punctuation,j,1)) |
|||
end /*j*/ |
|||
⚫ | |||
return q</lang> |
return q</lang> |
||
'''output''' |
'''output''' |
||
<pre style="height: |
<pre style="height:50ex"> |
||
file 0,record |
file 0, record: Rip a fender |
||
file 0,record |
file 0, record: Off your Car |
||
file 0,record |
file 0, record: Send it in |
||
file 0,record |
file 0, record: For a half-pound jar |
||
file 0,record |
file 0, record: Burma-Shave |
||
file 1,record |
file 1, record: A peach |
||
file 1,record |
file 1, record: Looks good |
||
file 1,record |
file 1, record: With lots of fuzz |
||
file 1, |
file 1, record: Man's no peach |
||
file 1,record |
file 1, record: And never was |
||
file 1,record |
file 1, record: Burma-Shave |
||
file 2,record |
file 2, record: Does your husband |
||
file 2,record |
file 2, record: Misbehave |
||
file 2,record |
file 2, record: Grunt and grumble |
||
file 2,record |
file 2, record: Rant and rave ? |
||
file 2,record |
file 2, record: Shoot the brute some |
||
file 2,record |
file 2, record: Burma-Shave |
||
file 3,record |
file 3, record: Don't take a curve |
||
file 3,record |
file 3, record: At 60 per |
||
file 3,record |
file 3, record: We hate to lose |
||
file 3,record |
file 3, record: A customer |
||
file 3,record |
file 3, record: Burma-Shave |
||
file 4,record |
file 4, record: Every shaver |
||
file 4,record |
file 4, record: Now can snore |
||
file 4,record |
file 4, record: Six more minutes |
||
file 4,record |
file 4, record: Than before |
||
file 4,record |
file 4, record: By using |
||
file 4,record |
file 4, record: Burma-Shave |
||
file 5,record |
file 5, record: He played |
||
file 5,record |
file 5, record: a sax |
||
file 5,record |
file 5, record: Had no B.O. |
||
file 5,record |
file 5, record: But his whiskers scratched |
||
file 5,record |
file 5, record: So she let him go |
||
file 5,record |
file 5, record: Burma-Shave |
||
file 6,record |
file 6, record: Henry the Eighth |
||
file 6,record |
file 6, record: Prince of Friskers |
||
file 6,record |
file 6, record: Lost five wives |
||
file 6,record |
file 6, record: But kept his whiskers |
||
file 6,record |
file 6, record: Burma-Shave |
||
file 7,record |
file 7, record: Listen birds |
||
file 7,record |
file 7, record: These signs cost |
||
file 7,record |
file 7, record: Money |
||
file 7,record |
file 7, record: So roost a while |
||
file 7,record |
file 7, record: But don't get funny |
||
file 7,record |
file 7, record: Burma-Shave |
||
file 8,record |
file 8, record: My man |
||
file 8,record |
file 8, record: Won't shave |
||
file 8,record |
file 8, record: Sez Hazel Huz |
||
file 8,record |
file 8, record: But I should worry |
||
file 8,record |
file 8, record: Dora's does |
||
file 8,record |
file 8, record: Burma-Shave |
||
file 9,record |
file 9, record: Past |
||
file 9, |
file 9, record: Schoolhouses |
||
file 9,record |
file 9, record: Take it slow |
||
file 9,record |
file 9, record: Let the little |
||
file 9,record |
file 9, record: Shavers grow |
||
file 9,record |
file 9, record: Burma-Shave |
||
word ───does─── found in: |
word ───does─── found in: |
||
file=2 word=1 |
file=2 word=1 |
||
file=8 word=13 |
file=8 word=13 |
||
word ───60─── found in: |
word ───60─── found in: |
||
file=3 word=6 |
file=3 word=6 |
||
word ───don't─── found in: |
word ───don't─── found in: |
||
file=3 word=1 |
file=3 word=1 |
||
file=7 word=12 |
file=7 word=12 |
||
word ───burma-shave─── found in: |
word ───burma-shave─── found in: |
||
file=0 word=14 |
file=0 word=14 |
||
file=1 word= |
file=1 word=15 |
||
file=2 word=15 |
file=2 word=15 |
||
file=3 word=14 |
file=3 word=14 |
||
file=4 word=13 |
file=4 word=13 |
||
file=5 word=17 |
file=5 word=17 |
||
file=6 word=14 |
file=6 word=14 |
||
file=7 word=15 |
file=7 word=15 |
||
file=8 word=14 |
file=8 word=14 |
||
file=9 word=11 |
file=9 word=11 |
||
</pre> |
</pre> |
||