Inverted index: Difference between revisions

Content added Content deleted
(Shorter D entry, used text files from Wikipedia)
(→‎{{header|D}}: less verbose; case-insensitive)
Line 967:
=={{header|D}}==
<lang d>import std.stdio, std.algorithm, std.string, std.file, std.regex;
 
void parseFile(in string fn, ref string[][string] idx) {
if (!exists(fn) || !isFile(fn))
throw new Exception("File not found");
 
foreach (immutable word; readText(fn).splitter(regex(r"\W")))
if (!idx.get(word, null).canFind(fn))
idx[word] ~= fn;
}
 
void main() {
string[][string] index;
 
void parseFile(in string fn, ref string[][string] idx) {
foreach (immutable fileName; ["inverted_index0.txt",
if (!exists(fn) || !isFile(fn))
"inverted_index1.txt",
throw new Exception("File not "inverted_index2.txtfound"]);
 
parseFile(fileName, index);
foreach (immutable word; readText(fn).splitter(regex(r"\W"))) {
word = word.toLower();
if (!idxindex.get(word, null).canFind(fn))
idx index[word] ~= fn;
}
}
 
immutable filenames = ["a.txt", "b.txt", "c.txt"];
foreach (fname; filenames)
parseFile(fileName, indexfname);
 
while (true) {
writef("\nEnter a word to search for: (q to quit): ");
immutableauto w = readln().strip().toLower();
if (w.toLower() == "q") {
writeln("quitting.");
break;
Line 999 ⟶ 1,000:
}</lang>
Both the demo text files and the queries are from the Wikipedia page, they contain:
itIt is what it is.
 
whatWhat is it?
 
itIt is a banana!
{{out}}
<pre>Enter a word to search for: (q to quit): acat
'acat' not found in "inverted_index2.txt".
 
Enter a word to search for: (q to quit): banana
'banana' found in "inverted_index2.txt".
 
Enter a word to search for: (q to quit): is
'is' found in "inverted_index0a.txt" "inverted_index1b.txt" "inverted_index2c.txt".
 
Enter a word to search for: (q to quit): banana
'banana' found in "inverted_index2c.txt".
 
Enter a word to search for: (q to quit): it
'it' found in "inverted_index0a.txt" "inverted_index1b.txt" "inverted_index2c.txt".
 
Enter a word to search for: (q to quit): what
'what' found in "inverted_index0a.txt" "inverted_index1b.txt".
 
Enter a word to search for: (q to quit): q