Hash join: Difference between revisions

Content added Content deleted
(added perl)
m (add index arguments)
Line 187: Line 187:
=={{header|OCaml}}==
=={{header|OCaml}}==
This exploits the fact that Hashtbl implements a hash table that can store multiple values for a key, for an especially simple solution:
This exploits the fact that Hashtbl implements a hash table that can store multiple values for a key, for an especially simple solution:
<lang ocaml>let hash_join table1 table2 =
<lang ocaml>let hash_join table1 f1 table2 f2 =
let h = Hashtbl.create 42 in
let h = Hashtbl.create 42 in
(* hash phase *)
(* hash phase *)
List.iter (fun (_,k as s) ->
List.iter (fun s ->
Hashtbl.add h k s) table1;
Hashtbl.add h (f1 s) s) table1;
(* join phase *)
(* join phase *)
List.concat (List.map (fun (k,_ as r) ->
List.concat (List.map (fun r ->
List.map (fun s -> s, r) (Hashtbl.find_all h k)) table2)</lang>
List.map (fun s -> s, r) (Hashtbl.find_all h (f2 r))) table2)</lang>
Sample run:
Sample run:
<pre>
<pre>
Line 207: Line 207:
"Alan", "Zombies";
"Alan", "Zombies";
"Glory", "Buffy"];;
"Glory", "Buffy"];;
# hash_join table1 table2;;
# hash_join table1 snd table2 fst;;
- : ((int * string) * (string * string)) list =
- : ((int * string) * (string * string)) list =
[((27, "Jonah"), ("Jonah", "Whales")); ((27, "Jonah"), ("Jonah", "Spiders"));
[((27, "Jonah"), ("Jonah", "Whales")); ((27, "Jonah"), ("Jonah", "Spiders"));
Line 219: Line 219:


sub hashJoin {
sub hashJoin {
my ($table1, $table2) = @_;
my ($table1, $index1, $table2, $index2) = @_;
my %h;
my %h;
# hash phase
# hash phase
foreach my $s (@$table1) {
foreach my $s (@$table1) {
my $k = $s->[1];
push @{ $h{$s->[$index1]} }, $s;
push @{ $h{$k} }, $s;
}
}
# join phase
# join phase
map { my $r = $_;
map { my $r = $_;
my $k = $r->[0];
map [$_, $r], @{ $h{$r->[$index2]} }
map [$_, $r], @{ $h{$k} }
} @$table2;
} @$table2;
}
}
Line 245: Line 243:


$Data::Dumper::Indent = 0;
$Data::Dumper::Indent = 0;
foreach my $row (hashJoin(\@table1, \@table2)) {
foreach my $row (hashJoin(\@table1, 1, \@table2, 0)) {
print Dumper($row), "\n";
print Dumper($row), "\n";
}</lang>
}</lang>
Line 288: Line 286:
<lang python>from collections import defaultdict
<lang python>from collections import defaultdict


def hashJoin(table1, table2):
def hashJoin(table1, index1, table2, index2):
h = defaultdict(list)
h = defaultdict(list)
# hash phase
# hash phase
for s in table1:
for s in table1:
k = s[1]
h[s[index1]].append(s)
h[k].append(s)
# join phase
# join phase
return [(s, r) for r in table2 for s in h[r[index2]]]
result = []
for r in table2:
k = r[0]
for s in h[k]:
result.append((s, r))
return result


table1 = [(27, "Jonah"),
table1 = [(27, "Jonah"),
Line 313: Line 305:
("Glory", "Buffy")]
("Glory", "Buffy")]


for row in hashJoin(table1, table2):
for row in hashJoin(table1, 1, table2, 0):
print(row)</lang>
print(row)</lang>
{{out}}
{{out}}
Line 327: Line 319:


=={{header|Ruby}}==
=={{header|Ruby}}==
<lang ruby>def hashJoin(table1, table2)
<lang ruby>def hashJoin(table1, index1, table2, index2)
# hash phase
# hash phase
h = table1.group_by {|s| s[1]}
h = table1.group_by {|s| s[index1]}
# join phase
# join phase
table2.collect {|r|
table2.collect {|r|
k = r[0]
k = r[0]
h[k].collect {|s| [s, r]}
h[r[index2]].collect {|s| [s, r]}
}.flatten(1)
}.flatten(1)
end
end
Line 348: Line 340:
["Glory", "Buffy"]]
["Glory", "Buffy"]]


hashJoin(table1, table2).each { |row| p row }</lang>
hashJoin(table1, 1, table2, 0).each { |row| p row }</lang>
{{out}}
{{out}}
<pre>
<pre>