Inverted index: Difference between revisions

Content added Content deleted
m (added whitespace before the TOC (table of contents), added a ;Task: (bold) header.)
m (→‎{{header|REXX}}: changed/added comments and whitespace, changed indentations, changed a word (for finding).)
Line 2,325: Line 2,325:


To see more about Burma Shave signs, see the Wikipedia entry:   [http://en.wikipedia.org/wiki/Burma-Shave Burma Shave signs.]
To see more about Burma Shave signs, see the Wikipedia entry:   [http://en.wikipedia.org/wiki/Burma-Shave Burma Shave signs.]
<lang rexx>/*REXX program illustrates building a simple inverted index & word find.*/
<lang rexx>/*REXX program illustrates building a simple inverted index and a method of word find.*/
@.='' /*dictionary of words (so far).*/
@.= /*a dictionary of words (so far). */
!='' /*a list of found words (so far).*/
!= /*a list of found words (so far). */
call invertI 0, 'BURMA0.TXT' /*read the file: BURMA0.TXT ...*/
call invertI 0, 'BURMA0.TXT' /*read the file: BURMA0.TXT ··· */
call invertI 1, 'BURMA1.TXT' /* " " ~ BURMA1.TXT ...*/
call invertI 1, 'BURMA1.TXT' /* " " " BURMA1.TXT ··· */
call invertI 2, 'BURMA2.TXT' /* " " ~ BURMA2.TXT ...*/
call invertI 2, 'BURMA2.TXT' /* " " " BURMA2.TXT ··· */
call invertI 3, 'BURMA3.TXT' /* " " ~ BURMA3.TXT ...*/
call invertI 3, 'BURMA3.TXT' /* " " " BURMA3.TXT ··· */
call invertI 4, 'BURMA4.TXT' /* " " ~ BURMA4.TXT ...*/
call invertI 4, 'BURMA4.TXT' /* " " " BURMA4.TXT ··· */
call invertI 5, 'BURMA5.TXT' /* " " ~ BURMA5.TXT ...*/
call invertI 5, 'BURMA5.TXT' /* " " " BURMA5.TXT ··· */
call invertI 6, 'BURMA6.TXT' /* " " ~ BURMA6.TXT ...*/
call invertI 6, 'BURMA6.TXT' /* " " " BURMA6.TXT ··· */
call invertI 7, 'BURMA7.TXT' /* " " ~ BURMA7.TXT ...*/
call invertI 7, 'BURMA7.TXT' /* " " " BURMA7.TXT ··· */
call invertI 8, 'BURMA8.TXT' /* " " ~ BURMA8.TXT ...*/
call invertI 8, 'BURMA8.TXT' /* " " " BURMA8.TXT ··· */
call invertI 9, 'BURMA9.TXT' /* " " ~ BURMA9.TXT ...*/
call invertI 9, 'BURMA9.TXT' /* " " " BURMA9.TXT ··· */
call findAword 'does' /*find a word. */
call findAword "huz" /*find a word. */
call findAword '60' /*find another word. */
call findAword "60" /*find another word. */
call findAword "don't" /*and find another word. */
call findAword "don't" /*and find another word. */
call findAword "burma-shave" /*and find yet another word. */
call findAword "burma-shave" /*and find yet another word. */
exit /*stick a fork in it, we're done.*/
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*──────────────────────────────────FINDAWORD subroutine────────────────*/
findAword: procedure expose @.; arg x /*get an uppercase version of X. */
findAword: procedure expose @.; arg x /*get an uppercase version of the X arg*/
parse arg ox /*get original (as-is) value of X*/
parse arg ox /*get original (as-is) value of X arg.*/
_=@.x; oxo='───'ox"───"
_=@.x; oxo='───'ox"───"
if _=='' then do
if _=='' then do
say 'word' oxo "not found."
say 'word' oxo "not found."
return 0
return 0
end
end
_@=_ /*save _, pass it back to invoker*/
_@=_ /*save _ text, pass it back to invoker.*/
say 'word' oxo "found in:"
say 'word' oxo "found in:"
do until _==''; parse var _ f w _
do until _==''; parse var _ f w _
say ' file='f ' word='w
say ' file='f " word="w
end /*until ··· */
end /*until ··· */
return _@
return _@
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*─────────────────────────────────────INVERTI subroutine───────────────*/
invertI: procedure expose @. !; parse arg #,fn /*file#, filename*/
invertI: procedure expose @. !; parse arg #,fn /*the file number and the filename. */
call lineout fn /*close the file, just in case. */
call lineout fn /*close the file, ··· just in case. */
w=0 /*number of words found (so far).*/
w=0 /*the number of words found (so far). */
do while lines(fn)\==0 /* [↓] process the entire file.*/
do while lines(fn)\==0 /* [↓] process the entire file. */
_=space(linein(fn)) /*read a line, elide extra blanks*/
_=space( linein(fn) ) /*read a line, elide superfluous blanks*/
if _=='' then iterate /*if blank record, then ignore it*/
if _=='' then iterate /*if a blank record, then ignore it. */
say 'file' #", record:" _ /*echo a record (to be verbose).*/
say 'file' #", record:" _ /*display the record ──► terminal. */


do until _=='' /*pick off words until done. */
do until _=='' /*pick off words from record until done*/
parse upper var _ ? _ /*pick off a word (uppercased). */
parse upper var _ ? _ /*pick off a word (it's in uppercase).*/
?=stripper(?) /*strip any trailing punctuation.*/
?=stripper(?) /*strip any trailing punctuation. */
if ?='' then iterate /*is the word now blank (null) ? */
if ?='' then iterate /*is the word now all blank (or null)? */
w=w+1 /*bump the word counter (index). */
w=w+1 /*bump the word counter (index). */
@.?=@.? # w /*append the new word to a list. */
@.?=@.? # w /*append the new word to a list. */
if wordpos(?,!)==0 then !=! ? /*add to the list of words found.*/
if wordpos(?,!)==0 then !=! ? /*add it to the list of words found. */
end /*until ··· */
end /*until ··· */
end /*while ··· */
end /*while ··· */
say; call lineout fn /*close the file, just to be neat*/
say; call lineout fn /*close the file, just to be neat&safe.*/
return w /*return the index of the word. */
return w /*return the index of word in record. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*─────────────────────────────────────STRIPPER subroutine──────────────*/
stripper: procedure; parse arg q /*remove punctuation at word-end.*/
stripper: procedure; parse arg q /*remove punctuation at the end of word*/
@punctuation='.,:;?¿!¡∙·'; do j=1 for length(@punctuation)
@punctuation= '.,:;?¿!¡∙·'; do j=1 for length(@punctuation)
q=strip(q,'T',substr(@punctuation,j,1))
q=strip(q, 'T', substr(@punctuation, j, 1) )
end /*j*/
end /*j*/
return q</lang>
return q</lang>
'''output'''
'''output'''
<pre style="height:50ex">
<pre style="height:50ex">
Line 2,452: Line 2,452:
file 9, record: Burma-Shave
file 9, record: Burma-Shave


word ───does─── found in:
word ───huz─── found in:
file=2 word=1
file=8 word=7
file=8 word=13
word ───60─── found in:
word ───60─── found in:
file=3 word=6
file=3 word=6