Hash join: Difference between revisions
Content added Content deleted
(added perl) |
m (add index arguments) |
||
Line 187: | Line 187: | ||
=={{header|OCaml}}== |
=={{header|OCaml}}== |
||
This exploits the fact that Hashtbl implements a hash table that can store multiple values for a key, for an especially simple solution: |
This exploits the fact that Hashtbl implements a hash table that can store multiple values for a key, for an especially simple solution: |
||
<lang ocaml>let hash_join table1 table2 = |
<lang ocaml>let hash_join table1 f1 table2 f2 = |
||
let h = Hashtbl.create 42 in |
let h = Hashtbl.create 42 in |
||
(* hash phase *) |
(* hash phase *) |
||
List.iter (fun |
List.iter (fun s -> |
||
Hashtbl.add h |
Hashtbl.add h (f1 s) s) table1; |
||
(* join phase *) |
(* join phase *) |
||
List.concat (List.map (fun |
List.concat (List.map (fun r -> |
||
List.map (fun s -> s, r) (Hashtbl.find_all h |
List.map (fun s -> s, r) (Hashtbl.find_all h (f2 r))) table2)</lang> |
||
Sample run: |
Sample run: |
||
<pre> |
<pre> |
||
Line 207: | Line 207: | ||
"Alan", "Zombies"; |
"Alan", "Zombies"; |
||
"Glory", "Buffy"];; |
"Glory", "Buffy"];; |
||
# hash_join table1 table2;; |
# hash_join table1 snd table2 fst;; |
||
- : ((int * string) * (string * string)) list = |
- : ((int * string) * (string * string)) list = |
||
[((27, "Jonah"), ("Jonah", "Whales")); ((27, "Jonah"), ("Jonah", "Spiders")); |
[((27, "Jonah"), ("Jonah", "Whales")); ((27, "Jonah"), ("Jonah", "Spiders")); |
||
Line 219: | Line 219: | ||
sub hashJoin { |
sub hashJoin { |
||
my ($table1, $table2) = @_; |
my ($table1, $index1, $table2, $index2) = @_; |
||
my %h; |
my %h; |
||
# hash phase |
# hash phase |
||
foreach my $s (@$table1) { |
foreach my $s (@$table1) { |
||
push @{ $h{$s->[$index1]} }, $s; |
|||
push @{ $h{$k} }, $s; |
|||
} |
} |
||
# join phase |
# join phase |
||
map { my $r = $_; |
map { my $r = $_; |
||
map [$_, $r], @{ $h{$r->[$index2]} } |
|||
map [$_, $r], @{ $h{$k} } |
|||
} @$table2; |
} @$table2; |
||
} |
} |
||
Line 245: | Line 243: | ||
$Data::Dumper::Indent = 0; |
$Data::Dumper::Indent = 0; |
||
foreach my $row (hashJoin(\@table1, \@table2)) { |
foreach my $row (hashJoin(\@table1, 1, \@table2, 0)) { |
||
print Dumper($row), "\n"; |
print Dumper($row), "\n"; |
||
}</lang> |
}</lang> |
||
Line 288: | Line 286: | ||
<lang python>from collections import defaultdict |
<lang python>from collections import defaultdict |
||
def hashJoin(table1, table2): |
def hashJoin(table1, index1, table2, index2): |
||
h = defaultdict(list) |
h = defaultdict(list) |
||
# hash phase |
# hash phase |
||
for s in table1: |
for s in table1: |
||
h[s[index1]].append(s) |
|||
h[k].append(s) |
|||
# join phase |
# join phase |
||
return [(s, r) for r in table2 for s in h[r[index2]]] |
|||
result = [] |
|||
for r in table2: |
|||
k = r[0] |
|||
for s in h[k]: |
|||
result.append((s, r)) |
|||
return result |
|||
table1 = [(27, "Jonah"), |
table1 = [(27, "Jonah"), |
||
Line 313: | Line 305: | ||
("Glory", "Buffy")] |
("Glory", "Buffy")] |
||
for row in hashJoin(table1, table2): |
for row in hashJoin(table1, 1, table2, 0): |
||
print(row)</lang> |
print(row)</lang> |
||
{{out}} |
{{out}} |
||
Line 327: | Line 319: | ||
=={{header|Ruby}}== |
=={{header|Ruby}}== |
||
<lang ruby>def hashJoin(table1, table2) |
<lang ruby>def hashJoin(table1, index1, table2, index2) |
||
# hash phase |
# hash phase |
||
h = table1.group_by {|s| s[ |
h = table1.group_by {|s| s[index1]} |
||
# join phase |
# join phase |
||
table2.collect {|r| |
table2.collect {|r| |
||
k = r[0] |
k = r[0] |
||
h[ |
h[r[index2]].collect {|s| [s, r]} |
||
}.flatten(1) |
}.flatten(1) |
||
end |
end |
||
Line 348: | Line 340: | ||
["Glory", "Buffy"]] |
["Glory", "Buffy"]] |
||
hashJoin(table1, table2).each { |row| p row }</lang> |
hashJoin(table1, 1, table2, 0).each { |row| p row }</lang> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |