Inverted index: Difference between revisions
Content added Content deleted
m (added whitespace before the TOC (table of contents), added a ;Task: (bold) header.) |
m (→{{header|REXX}}: changed/added comments and whitespace, changed indentations, changed a word (for finding).) |
||
Line 2,325: | Line 2,325: | ||
To see more about Burma Shave signs, see the Wikipedia entry: [http://en.wikipedia.org/wiki/Burma-Shave Burma Shave signs.] |
To see more about Burma Shave signs, see the Wikipedia entry: [http://en.wikipedia.org/wiki/Burma-Shave Burma Shave signs.] |
||
<lang rexx>/*REXX program illustrates building a simple inverted index |
<lang rexx>/*REXX program illustrates building a simple inverted index and a method of word find.*/ |
||
@.= |
@.= /*a dictionary of words (so far). */ |
||
!= |
!= /*a list of found words (so far). */ |
||
call invertI 0, 'BURMA0.TXT' /*read the file: BURMA0.TXT |
call invertI 0, 'BURMA0.TXT' /*read the file: BURMA0.TXT ··· */ |
||
call invertI 1, 'BURMA1.TXT' /* " " |
call invertI 1, 'BURMA1.TXT' /* " " " BURMA1.TXT ··· */ |
||
call invertI 2, 'BURMA2.TXT' /* " " |
call invertI 2, 'BURMA2.TXT' /* " " " BURMA2.TXT ··· */ |
||
call invertI 3, 'BURMA3.TXT' /* " " |
call invertI 3, 'BURMA3.TXT' /* " " " BURMA3.TXT ··· */ |
||
call invertI 4, 'BURMA4.TXT' /* " " |
call invertI 4, 'BURMA4.TXT' /* " " " BURMA4.TXT ··· */ |
||
call invertI 5, 'BURMA5.TXT' /* " " |
call invertI 5, 'BURMA5.TXT' /* " " " BURMA5.TXT ··· */ |
||
call invertI 6, 'BURMA6.TXT' /* " " |
call invertI 6, 'BURMA6.TXT' /* " " " BURMA6.TXT ··· */ |
||
call invertI 7, 'BURMA7.TXT' /* " " |
call invertI 7, 'BURMA7.TXT' /* " " " BURMA7.TXT ··· */ |
||
call invertI 8, 'BURMA8.TXT' /* " " |
call invertI 8, 'BURMA8.TXT' /* " " " BURMA8.TXT ··· */ |
||
call invertI 9, 'BURMA9.TXT' /* " " |
call invertI 9, 'BURMA9.TXT' /* " " " BURMA9.TXT ··· */ |
||
call findAword |
call findAword "huz" /*find a word. */ |
||
call findAword |
call findAword "60" /*find another word. */ |
||
call findAword "don't" /*and find another word. */ |
call findAword "don't" /*and find another word. */ |
||
call findAword "burma-shave" /*and find yet another word. */ |
call findAword "burma-shave" /*and find yet another word. */ |
||
exit /*stick a fork in it, we're done.*/ |
exit /*stick a fork in it, we're all done. */ |
||
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
|||
/*──────────────────────────────────FINDAWORD subroutine────────────────*/ |
|||
findAword: |
findAword: procedure expose @.; arg x /*get an uppercase version of the X arg*/ |
||
parse arg ox |
parse arg ox /*get original (as-is) value of X arg.*/ |
||
_=@.x; oxo='───'ox"───" |
_=@.x; oxo='───'ox"───" |
||
if _=='' then do |
if _=='' then do |
||
say 'word' oxo "not found." |
say 'word' oxo "not found." |
||
return 0 |
return 0 |
||
end |
end |
||
_@=_ |
_@=_ /*save _ text, pass it back to invoker.*/ |
||
say 'word' oxo "found in:" |
say 'word' oxo "found in:" |
||
do until _==''; parse var _ f w _ |
|||
say ' file='f " word="w |
|||
end /*until ··· */ |
|||
return _@ |
return _@ |
||
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
|||
/*─────────────────────────────────────INVERTI subroutine───────────────*/ |
|||
invertI: procedure expose @. !; |
invertI: procedure expose @. !; parse arg #,fn /*the file number and the filename. */ |
||
call lineout fn |
call lineout fn /*close the file, ··· just in case. */ |
||
w=0 |
w=0 /*the number of words found (so far). */ |
||
do while lines(fn)\==0 |
do while lines(fn)\==0 /* [↓] process the entire file. */ |
||
_=space(linein(fn |
_=space( linein(fn) ) /*read a line, elide superfluous blanks*/ |
||
if _=='' then iterate |
if _=='' then iterate /*if a blank record, then ignore it. */ |
||
say 'file' #", record:" _ |
say 'file' #", record:" _ /*display the record ──► terminal. */ |
||
do until _=='' |
do until _=='' /*pick off words from record until done*/ |
||
parse upper var _ ? _ |
parse upper var _ ? _ /*pick off a word (it's in uppercase).*/ |
||
?=stripper(?) |
?=stripper(?) /*strip any trailing punctuation. */ |
||
if ?='' then iterate |
if ?='' then iterate /*is the word now all blank (or null)? */ |
||
w=w+1 |
w=w+1 /*bump the word counter (index). */ |
||
@.?=@.? # w |
@.?=@.? # w /*append the new word to a list. */ |
||
if wordpos(?,!)==0 then !=! ? |
if wordpos(?,!)==0 then !=! ? /*add it to the list of words found. */ |
||
end /*until ··· */ |
end /*until ··· */ |
||
end /*while ··· */ |
end /*while ··· */ |
||
say; call lineout fn /*close the file, just to be neat&safe.*/ |
|||
return w |
return w /*return the index of word in record. */ |
||
/*──────────────────────────────────────────────────────────────────────────────────────*/ |
|||
/*─────────────────────────────────────STRIPPER subroutine──────────────*/ |
|||
stripper: procedure; parse arg q /*remove punctuation at |
stripper: procedure; parse arg q /*remove punctuation at the end of word*/ |
||
@punctuation='.,:;?¿!¡∙·'; do j=1 for length(@punctuation) |
@punctuation= '.,:;?¿!¡∙·'; do j=1 for length(@punctuation) |
||
q=strip(q,'T',substr(@punctuation,j,1)) |
q=strip(q, 'T', substr(@punctuation, j, 1) ) |
||
end /*j*/ |
end /*j*/ |
||
return q</lang> |
return q</lang> |
||
'''output''' |
'''output''' |
||
<pre style="height:50ex"> |
<pre style="height:50ex"> |
||
Line 2,452: | Line 2,452: | ||
file 9, record: Burma-Shave |
file 9, record: Burma-Shave |
||
word |
word ───huz─── found in: |
||
file= |
file=8 word=7 |
||
file=8 word=13 |
|||
word ───60─── found in: |
word ───60─── found in: |
||
file=3 word=6 |
file=3 word=6 |