Inverted index: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) (Rename Perl 6 -> Raku, alphabetize, minor clean-up) |
|||
Line 842: | Line 842: | ||
Search for "banana": other_file |
Search for "banana": other_file |
||
Search for "boo": not found</lang> |
Search for "boo": not found</lang> |
||
=={{header|C sharp|C#}}== |
|||
<lang csharp>using System; |
|||
using System.Collections.Generic; |
|||
using System.IO; |
|||
using System.Linq; |
|||
class InvertedIndex |
|||
{ |
|||
static Dictionary<TItem, IEnumerable<TKey>> Invert<TKey, TItem>(Dictionary<TKey, IEnumerable<TItem>> dictionary) |
|||
{ |
|||
return dictionary |
|||
.SelectMany(keyValuePair => keyValuePair.Value.Select(item => new KeyValuePair<TItem, TKey>(item, keyValuePair.Key))) |
|||
.GroupBy(keyValuePair => keyValuePair.Key) |
|||
.ToDictionary(group => group.Key, group => group.Select(keyValuePair => keyValuePair.Value)); |
|||
} |
|||
static void Main() |
|||
{ |
|||
Console.Write("files: "); |
|||
var files = Console.ReadLine(); |
|||
Console.Write("find: "); |
|||
var find = Console.ReadLine(); |
|||
var dictionary = files.Split().ToDictionary(file => file, file => File.ReadAllText(file).Split().AsEnumerable()); |
|||
Console.WriteLine("{0} found in: {1}", find, string.Join(" ", Invert(dictionary)[find])); |
|||
} |
|||
}</lang> |
|||
Sample output: |
|||
<lang>files: file1 file2 file3 |
|||
find: what |
|||
what found in: file1 file2</lang> |
|||
=={{header|C++}}== |
=={{header|C++}}== |
||
Same idea as the C implementation - trie to store the words |
Same idea as the C implementation - trie to store the words |
||
Line 980: | Line 1,012: | ||
fat was not found! |
fat was not found! |
||
</pre> |
</pre> |
||
=={{header|C sharp|C#}}== |
|||
<lang csharp>using System; |
|||
using System.Collections.Generic; |
|||
using System.IO; |
|||
using System.Linq; |
|||
class InvertedIndex |
|||
{ |
|||
static Dictionary<TItem, IEnumerable<TKey>> Invert<TKey, TItem>(Dictionary<TKey, IEnumerable<TItem>> dictionary) |
|||
{ |
|||
return dictionary |
|||
.SelectMany(keyValuePair => keyValuePair.Value.Select(item => new KeyValuePair<TItem, TKey>(item, keyValuePair.Key))) |
|||
.GroupBy(keyValuePair => keyValuePair.Key) |
|||
.ToDictionary(group => group.Key, group => group.Select(keyValuePair => keyValuePair.Value)); |
|||
} |
|||
static void Main() |
|||
{ |
|||
Console.Write("files: "); |
|||
var files = Console.ReadLine(); |
|||
Console.Write("find: "); |
|||
var find = Console.ReadLine(); |
|||
var dictionary = files.Split().ToDictionary(file => file, file => File.ReadAllText(file).Split().AsEnumerable()); |
|||
Console.WriteLine("{0} found in: {1}", find, string.Join(" ", Invert(dictionary)[find])); |
|||
} |
|||
}</lang> |
|||
Sample output: |
|||
<lang>files: file1 file2 file3 |
|||
find: what |
|||
what found in: file1 file2</lang> |
|||
=={{header|Clojure}}== |
=={{header|Clojure}}== |
||
Line 1,096: | Line 1,097: | ||
knuth_sample.coffee:12 |
knuth_sample.coffee:12 |
||
</lang> |
</lang> |
||
=={{header|Common Lisp}}== |
=={{header|Common Lisp}}== |
||
Line 1,258: | Line 1,257: | ||
[3]→ null |
[3]→ null |
||
</lang> |
</lang> |
||
=={{header|Erlang}}== |
=={{header|Erlang}}== |
||
Line 1,300: | Line 1,298: | ||
search_common( Files, Acc ) -> [X || X <- Acc, lists:member(X, Files)]. |
search_common( Files, Acc ) -> [X || X <- Acc, lists:member(X, Files)]. |
||
</lang> |
|||
=={{header|Factor}}== |
|||
<lang factor>USING: assocs fry io.encodings.utf8 io.files kernel sequences |
|||
sets splitting vectors ; |
|||
IN: rosettacode.inverted-index |
|||
: file-words ( file -- assoc ) |
|||
utf8 file-contents " ,;:!?.()[]{}\n\r" split harvest ; |
|||
: add-to-file-list ( files file -- files ) |
|||
over [ swap [ adjoin ] keep ] [ nip 1vector ] if ; |
|||
: add-to-index ( words index file -- ) |
|||
'[ _ [ _ add-to-file-list ] change-at ] each ; |
|||
: (index-files) ( files index -- ) |
|||
[ [ [ file-words ] keep ] dip swap add-to-index ] curry each ; |
|||
: index-files ( files -- index ) |
|||
H{ } clone [ (index-files) ] keep ; |
|||
: query ( terms index -- files ) |
|||
[ at ] curry map [ ] [ intersect ] map-reduce ; |
|||
</lang> |
|||
Example use : |
|||
<lang>( scratchpad ) { "f1" "f2" "f3" } index-files |
|||
--- Data stack: |
|||
H{ { "a" ~vector~ } { "is" ~vector~ } { "what" ~vector~ } { ... |
|||
( scratchpad ) { "what" "is" "it" } swap query . |
|||
V{ "f1" "f2" } |
|||
</lang> |
</lang> |
||
Line 1,373: | Line 1,343: | ||
Find: what is |
Find: what is |
||
Found in: file1.txt file2.txt</pre> |
Found in: file1.txt file2.txt</pre> |
||
=={{header|Factor}}== |
|||
<lang factor>USING: assocs fry io.encodings.utf8 io.files kernel sequences |
|||
sets splitting vectors ; |
|||
IN: rosettacode.inverted-index |
|||
: file-words ( file -- assoc ) |
|||
utf8 file-contents " ,;:!?.()[]{}\n\r" split harvest ; |
|||
: add-to-file-list ( files file -- files ) |
|||
over [ swap [ adjoin ] keep ] [ nip 1vector ] if ; |
|||
: add-to-index ( words index file -- ) |
|||
'[ _ [ _ add-to-file-list ] change-at ] each ; |
|||
: (index-files) ( files index -- ) |
|||
[ [ [ file-words ] keep ] dip swap add-to-index ] curry each ; |
|||
: index-files ( files -- index ) |
|||
H{ } clone [ (index-files) ] keep ; |
|||
: query ( terms index -- files ) |
|||
[ at ] curry map [ ] [ intersect ] map-reduce ; |
|||
</lang> |
|||
Example use : |
|||
<lang>( scratchpad ) { "f1" "f2" "f3" } index-files |
|||
--- Data stack: |
|||
H{ { "a" ~vector~ } { "is" ~vector~ } { "what" ~vector~ } { ... |
|||
( scratchpad ) { "what" "is" "it" } swap query . |
|||
V{ "f1" "f2" } |
|||
</lang> |
|||
=={{header|Go}}== |
=={{header|Go}}== |
||
Line 1,704: | Line 1,702: | ||
>search 'around' |
>search 'around' |
||
~help/primer/gui.htm</lang> |
~help/primer/gui.htm</lang> |
||
=={{header|Java}}== |
=={{header|Java}}== |
||
<lang Java> |
<lang Java> |
||
Line 1,883: | Line 1,882: | ||
0 |
0 |
||
$</lang> |
$</lang> |
||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
Line 2,188: | Line 2,186: | ||
print "$_\n" |
print "$_\n" |
||
foreach search_words_with_index({createindex(@ARGV)}, @searchwords);</lang> |
foreach search_words_with_index({createindex(@ARGV)}, @searchwords);</lang> |
||
=={{header|Perl 6}}== |
|||
{{works with|rakudo|2015-09-16}} |
|||
<lang perl6>sub MAIN (*@files) { |
|||
my %norm; |
|||
do for @files -> $file { |
|||
%norm.push: $file X=> slurp($file).lc.words; |
|||
} |
|||
(my %inv).push: %norm.invert.unique; |
|||
while prompt("Search terms: ").words -> @words { |
|||
for @words -> $word { |
|||
say "$word => {%inv.{$word.lc}//'(not found)'}"; |
|||
} |
|||
} |
|||
}</lang> |
|||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Line 2,640: | Line 2,623: | ||
Terms found at: F1, F2. |
Terms found at: F1, F2. |
||
</pre> |
</pre> |
||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{works with|rakudo|2015-09-16}} |
|||
<lang perl6>sub MAIN (*@files) { |
|||
my %norm; |
|||
do for @files -> $file { |
|||
%norm.push: $file X=> slurp($file).lc.words; |
|||
} |
|||
(my %inv).push: %norm.invert.unique; |
|||
while prompt("Search terms: ").words -> @words { |
|||
for @words -> $word { |
|||
say "$word => {%inv.{$word.lc}//'(not found)'}"; |
|||
} |
|||
} |
|||
}</lang> |
|||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
Line 2,862: | Line 2,862: | ||
> ./indexsearch.rb It iS\! |
> ./indexsearch.rb It iS\! |
||
["file1", "file2", "file3"]</pre> |
["file1", "file2", "file3"]</pre> |
||
=={{header|Scala}}== |
=={{header|Scala}}== |
||
<lang Scala>object InvertedIndex extends App { |
<lang Scala>object InvertedIndex extends App { |