Hash join

The classic hash join algorithm for an inner join of two relations has the following steps:

Hash phase: Create a hash table for one of the two relations by applying a hash function to the join attribute of each row. Ideally we should create a hash table for the smaller relation, thus optimizing for creation time and memory size of the hash table.
Join phase: Scan the larger relation and find the relevant rows by looking in the hash table created before.

The algorithm is as follows:

for each tuple s in S do
   let h = hash on join attributes s(b)
   place s in hash table S_h in bucket keyed by hash value h
for each tuple r in R do
   let h = hash on join attributes r(a)
   if h indicates a nonempty bucket (B) of hash table S_h
      if h matches any s in B
         concatenate r and s
      place relation in Q

Task: implement the Hash Join algorithm and show the result of joining two tables with it. You should use your implementation to show the joining of these tables:

Age	Name
27	Jonah
18	Alan
28	Glory
18	Popeye
28	Alan

Name	Nemesis
Jonah	Whales
Jonah	Spiders
Alan	Ghosts
Alan	Zombies
Glory	Buffy

Bracmat

This solution creates a hash table for the smaller relation in the function join. This function takes as arguments the smallest table, the biggest table and then three pieces of code: two patterns that describe each table's field order and code that generates one row of output. These pieces of code are inserted in a fixed skeleton of code using macro substitution. <lang bracmat>( (27.Jonah)

     (18.Alan)
     (28.Glory)
     (18.Popeye)
     (28.Alan)
 : ?table-A

& (Jonah.Whales)

     (Jonah.Spiders)
     (Alan.Ghosts)
     (Alan.Zombies)
     (Glory.Buffy)
 : ?table-B

& new$hash:?H & !table-A:? [?lenA & !table-B:? [?lenB & ( join

 =     smalltab bigtab smallschema bigschema joinschema
     , key val val2 keyval2
   .     !arg
       : (?smalltab.?bigtab.(=?smallschema.?bigschema.?joinschema))
     & :?rel
     & !(
        ' (   whl
            ' ( !smalltab:$smallschema ?smalltab
              & (H..insert)$(!key.!val)
              )
          &   whl
            ' ( !bigtab:$bigschema ?bigtab
              & (   (H..find)$!key:?keyval2
                  &   whl
                    ' ( !keyval2:(?key.?val2) ?keyval2
                      & $joinschema !rel:?rel
                      )
                |
                )
              )
          )
        )
     & !rel
 )

& out

 $ ( join
   $ (   !lenA:~<!lenB
       & ( !table-B
         . !table-A
         . (
           = (?key.?val).(?val.?key).!val.!key.!val2
           )
         )
     | ( !table-A
       . !table-B
       . (=(?val.?key).(?key.?val).!val2.!key.!val)
       )
     )
   )

& );</lang> Output:

  (28.Alan.Ghosts)
  (28.Alan.Zombies)
  (28.Glory.Buffy)
  (18.Alan.Ghosts)
  (18.Alan.Zombies)
  (27.Jonah.Whales)
  (27.Jonah.Spiders)

C#

using LINQ to Objects

<lang csharp>using System; using System.Collections.Generic; using System.Linq;

namespace HashJoin {

   public class AgeName
   {
       public AgeName(byte age, string name)
       {
           Age = age;
           Name = name;
       }
       public byte Age { get; private set; }
       public string Name { get; private set; }
   }

   public class NameNemesis
   {
       public NameNemesis(string name, string nemesis)
       {
           Name = name;
           Nemesis = nemesis;
       }
       public string Name { get; private set; }
       public string Nemesis { get; private set; }
   }

   public class DataContext
   {
       public DataContext()
       {
           AgeName = new List<AgeName>();
           NameNemesis = new List<NameNemesis>();
       }
       public List<AgeName> AgeName { get; set; }
       public List<NameNemesis> NameNemesis { get; set; }
   }

   public class AgeNameNemesis
   {
       public AgeNameNemesis(byte age, string name, string nemesis)
       {
           Age = age;
           Name = name;
           Nemesis = nemesis;
       }
       public byte Age { get; private set; }
       public string Name { get; private set; }
       public string Nemesis { get; private set; }
   }

   class Program
   {
       public static void Main()
       {
           var data = GetData();
           var result = ExecuteHashJoin(data);
           WriteResultToConsole(result);
       }

       private static void WriteResultToConsole(List<AgeNameNemesis> result)
       {
           result.ForEach(ageNameNemesis => Console.WriteLine("Age: {0}, Name: {1}, Nemesis: {2}",
               ageNameNemesis.Age, ageNameNemesis.Name, ageNameNemesis.Nemesis));
       }

       private static List<AgeNameNemesis> ExecuteHashJoin(DataContext data)
       {
           return (data.AgeName.Join(data.NameNemesis, 
               ageName => ageName.Name, nameNemesis => nameNemesis.Name,
               (ageName, nameNemesis) => new AgeNameNemesis(ageName.Age, ageName.Name, nameNemesis.Nemesis)))
               .ToList();
       }

       private static DataContext GetData()
       {
           var context = new DataContext();

           context.AgeName.AddRange(new [] {
                   new AgeName(27, "Jonah"), 
                   new AgeName(18, "Alan"), 
                   new AgeName(28, "Glory"), 
                   new AgeName(18, "Popeye"), 
                   new AgeName(28, "Alan")
               });

           context.NameNemesis.AddRange(new[]
           {
               new NameNemesis("Jonah", "Whales"),
               new NameNemesis("Jonah", "Spiders"),
               new NameNemesis("Alan", "Ghosts"),
               new NameNemesis("Alan", "Zombies"),
               new NameNemesis("Glory", "Buffy")
           });

           return context;
       }
   }

}</lang>

Output:

Age: 27, Name: Jonah, Nemesis: Whales
Age: 27, Name: Jonah, Nemesis: Spiders
Age: 18, Name: Alan, Nemesis: Ghosts
Age: 18, Name: Alan, Nemesis: Zombies
Age: 28, Name: Glory, Nemesis: Buffy
Age: 28, Name: Alan, Nemesis: Ghosts
Age: 28, Name: Alan, Nemesis: Zombies

Common Lisp

<lang lisp>(defparameter *table-A* '((27 "Jonah") (18 "Alan") (28 "Glory") (18 "Popeye") (28 "Alan")))

(defparameter *table-B* '(("Jonah" "Whales") ("Jonah" "Spiders") ("Alan" "Ghosts") ("Alan" "Zombies") ("Glory" "Buffy")))

Hash phase

(defparameter *hash-table* (make-hash-table :test #'equal))

(loop for (i r) in *table-A*

  for value = (gethash r *hash-table* (list nil))  do
  (setf (gethash r *hash-table*) value)
  (push (list i r) (first value)))

Join phase

(loop for (i r) in *table-B* do

    (let ((val (car (gethash i *hash-table*))))
      (loop for (a b) in val do

(format t "{~a ~a} {~a ~a}~%" a b i r))))</lang>

Output:

{27 Jonah} {Jonah Whales}
{27 Jonah} {Jonah Spiders}
{28 Alan} {Alan Ghosts}
{18 Alan} {Alan Ghosts}
{28 Alan} {Alan Zombies}
{18 Alan} {Alan Zombies}
{28 Glory} {Glory Buffy}

D

Translation of: Python

<lang d>import std.stdio, std.typecons;

auto hashJoin(size_t index1, size_t index2, T1, T2)

            (in T1[] table1, in T2[] table2) pure /*nothrow*/

if (is(typeof(T1[index1]) == typeof(T2[index2]))) {

   T1[][typeof(T1[index1])] h;
   // Hash phase.
   foreach (s; table1)
       h[s[index1]] ~= s;
   // Join phase.
   Tuple!(T1, const T2)[] result;
   foreach (r; table2)
       foreach (s; h.get(r[index2], []))
           result ~= tuple(s, r);
   return result;

}

void main() {

   alias T = tuple;
   immutable table1 = [T(27, "Jonah"),
                       T(18, "Alan"),
                       T(28, "Glory"),
                       T(18, "Popeye"),
                       T(28, "Alan")];
   immutable table2 = [T("Jonah", "Whales"),
                       T("Jonah", "Spiders"),
                       T("Alan",  "Ghosts"),
                       T("Alan",  "Zombies"),
                       T("Glory", "Buffy")];

   foreach (row; hashJoin!(1, 0)(table1, table2))
       writefln("(%s, %5s) (%5s, %7s)", row[0][], row[1][]);

}</lang>

Output:

(27, Jonah) (Jonah,  Whales)
(27, Jonah) (Jonah, Spiders)
(18,  Alan) ( Alan,  Ghosts)
(28,  Alan) ( Alan,  Ghosts)
(18,  Alan) ( Alan, Zombies)
(28,  Alan) ( Alan, Zombies)
(28, Glory) (Glory,   Buffy)

Erlang

<lang Erlang> -module( hash_join ).

-export( [task/0] ).

task() ->

   Table_1 = [{27, "Jonah"}, {18, "Alan"}, {28, "Glory"}, {18, "Popeye"}, {28, "Alan"}],
   Table_2 = [{"Jonah", "Whales"}, {"Jonah", "Spiders"}, {"Alan", "Ghosts"}, {"Alan", "Zombies"}, {"Glory", "Buffy"}],
   Dict = lists:foldl( fun dict_append/2, dict:new(), Table_1 ),
   lists:flatten( [dict_find( X, Dict ) || X <- Table_2] ).

dict_append( {Key, Value}, Acc ) -> dict:append( Value, {Key, Value}, Acc ).

dict_find( {Key, Value}, Dict ) -> dict_find( dict:find(Key, Dict), Key, Value ).

dict_find( error, _Key, _Value ) -> []; dict_find( {ok, Values}, Key, Value ) -> [{X, {Key, Value}} || X <- Values]. </lang>

Output:

15> hash_join:task().
[{{27,"Jonah"},{"Jonah","Whales"}},
 {{27,"Jonah"},{"Jonah","Spiders"}},
 {{18,"Alan"},{"Alan","Ghosts"}},
 {{28,"Alan"},{"Alan","Ghosts"}},
 {{18,"Alan"},{"Alan","Zombies"}},
 {{28,"Alan"},{"Alan","Zombies"}},
 {{28,"Glory"},{"Glory","Buffy"}}]

Go

<lang go>package main

import "fmt"

func main() {

   tableA := []struct {
       value int
       key   string
   }{
       {27, "Jonah"}, {18, "Alan"}, {28, "Glory"}, {18, "Popeye"},
       {28, "Alan"},
   }
   tableB := []struct {
       key   string
       value string
   }{
       {"Jonah", "Whales"}, {"Jonah", "Spiders"},
       {"Alan", "Ghosts"}, {"Alan", "Zombies"}, {"Glory", "Buffy"},
   }
   // hash phase
   h := map[string][]int{}
   for i, r := range tableA {
       h[r.key] = append(h[r.key], i)
   }
   // join phase
   for _, x := range tableB {
       for _, a := range h[x.key] {
           fmt.Println(tableA[a], x)
       }
   }

}</lang>

Output:

{27 Jonah} {Jonah Whales}
{27 Jonah} {Jonah Spiders}
{18 Alan} {Alan Ghosts}
{28 Alan} {Alan Ghosts}
{18 Alan} {Alan Zombies}
{28 Alan} {Alan Zombies}
{28 Glory} {Glory Buffy}

Haskell

The ST monad allows us to utilise mutable memory behind a referentially transparent interface, allowing us to use hashtables (efficiently).

Our hashJoin function takes two lists and two selector functions.

Placing all relations with the same selector value in a list in the hashtable allows us to join many to one/many relations. <lang Haskell>{-# LANGUAGE LambdaCase, TupleSections #-} import qualified Data.HashTable.ST.Basic as H import Data.Hashable import Control.Monad.ST import Control.Monad import Data.STRef

hashJoin :: (Eq k, Hashable k) =>

           [t] -> (t -> k) -> [a] -> (a -> k) -> [(t, a)]

hashJoin xs fx ys fy = runST $ do

 l <- newSTRef []
 ht <- H.new
 forM_ ys $ \y -> H.insert ht (fy y) =<< 
   (H.lookup ht (fy y) >>= \case
     Nothing -> return [y]
     Just v -> return (y:v))
 forM_ xs $ \x -> do
   H.lookup ht (fx x) >>= \case
     Nothing -> return ()
     Just v -> modifySTRef' l ((map (x,)  v) ++) 
 readSTRef l

test = mapM_ print $ hashJoin

   [(1, "Jonah"), (2, "Alan"), (3, "Glory"), (4, "Popeye")]
       snd
   [("Jonah", "Whales"), ("Jonah", "Spiders"), 
     ("Alan", "Ghosts"), ("Alan", "Zombies"), ("Glory", "Buffy")]
       fst

</lang>

λ> test
((3,"Glory"),("Glory","Buffy"))
((2,"Alan"),("Alan","Zombies"))
((2,"Alan"),("Alan","Ghosts"))
((1,"Jonah"),("Jonah","Spiders"))
((1,"Jonah"),("Jonah","Whales"))

The task require hashtables; however, a cleaner and more functional solution would be to use Data.Map (based on binary trees): <lang Haskell>{-# LANGUAGE TupleSections #-} import qualified Data.Map as M import Data.List import Data.Maybe import Control.Applicative

mapJoin xs fx ys fy = joined

 where yMap = foldl' f M.empty ys
       f m y = M.insertWith (++) (fy y) [y] m
       joined = concat . catMaybes . 
                map (\x -> map (x,) <$> M.lookup (fx x) yMap) $ xs

test = mapM_ print $ mapJoin

   [(1, "Jonah"), (2, "Alan"), (3, "Glory"), (4, "Popeye")]
       snd
   [("Jonah", "Whales"), ("Jonah", "Spiders"), 
    ("Alan", "Ghosts"), ("Alan", "Zombies"), ("Glory", "Buffy")]
       fst

</lang>

λ> test
((1,"Jonah"),("Jonah","Spiders"))
((1,"Jonah"),("Jonah","Whales"))
((2,"Alan"),("Alan","Zombies"))
((2,"Alan"),("Alan","Ghosts"))
((3,"Glory"),("Glory","Buffy"))

J

Data:

<lang J>table1=: ;:;._2(0 :0)

 27 Jonah
 18 Alan
 28 Glory
 18 Popeye
 28 Alan

)

table2=: ;:;._2(0 :0)

 Jonah Whales
 Jonah Spiders
 Alan Ghosts
 Alan Zombies
 Glory Buffy

)</lang>

The task does not specify the hash function to use, so we'll use an identity function. But SHA-1 could be used instead, with a little more work (you'd need to convert the name into the bit vector needed by the SHA-1 interface). Practically speaking, though, the only benefit of SHA-1 in this context would be to slow down the join.

Implementation:

<lang J>hash=: ] dojoin=:3 :0

 c1=. {.{.y
 c2=. (1 {"1 y) -. a:
 c3=. (2 {"1 y) -. a:
 >{c1;c2;<c3

)

JOIN=: ; -.&a: ,/each(hash@{."1 <@dojoin/. ]) (1 1 0&#inv@|."1 table1), 1 0 1#inv"1 table2</lang>

Result:

<lang J> JOIN ┌─────┬──┬───────┐ │Jonah│27│Whales │ ├─────┼──┼───────┤ │Jonah│27│Spiders│ ├─────┼──┼───────┤ │Alan │18│Ghosts │ ├─────┼──┼───────┤ │Alan │18│Zombies│ ├─────┼──┼───────┤ │Alan │28│Ghosts │ ├─────┼──┼───────┤ │Alan │28│Zombies│ ├─────┼──┼───────┤ │Glory│28│Buffy │ └─────┴──┴───────┘</lang>

OCaml

This exploits the fact that Hashtbl implements a hash table that can store multiple values for a key, for an especially simple solution: <lang ocaml>let hash_join table1 f1 table2 f2 =

 let h = Hashtbl.create 42 in
 (* hash phase *)
 List.iter (fun s ->
   Hashtbl.add h (f1 s) s) table1;
 (* join phase *)
 List.concat (List.map (fun r ->
   List.map (fun s -> s, r) (Hashtbl.find_all h (f2 r))) table2)</lang>

Sample run:

# let table1 = [27, "Jonah";
                18, "Alan";
                28, "Glory";
                18, "Popeye";
                28, "Alan"];;
# let table2 = ["Jonah", "Whales";
                "Jonah", "Spiders";
                "Alan", "Ghosts";
                "Alan", "Zombies";
                "Glory", "Buffy"];;
# hash_join table1 snd table2 fst;;
- : ((int * string) * (string * string)) list =
[((27, "Jonah"), ("Jonah", "Whales")); ((27, "Jonah"), ("Jonah", "Spiders"));
 ((28, "Alan"), ("Alan", "Ghosts")); ((18, "Alan"), ("Alan", "Ghosts"));
 ((28, "Alan"), ("Alan", "Zombies")); ((18, "Alan"), ("Alan", "Zombies"));
 ((28, "Glory"), ("Glory", "Buffy"))]

Perl

<lang perl>use Data::Dumper qw(Dumper);

sub hashJoin {

   my ($table1, $index1, $table2, $index2) = @_;
   my %h;
   # hash phase
   foreach my $s (@$table1) {

push @{ $h{$s->[$index1]} }, $s;

   }
   # join phase
   map { my $r = $_;

map [$_, $r], @{ $h{$r->[$index2]} }

   } @$table2;

}

@table1 = ([27, "Jonah"],

          [18, "Alan"],
          [28, "Glory"],
          [18, "Popeye"],
          [28, "Alan"]);

@table2 = (["Jonah", "Whales"],

          ["Jonah", "Spiders"],
          ["Alan", "Ghosts"],
          ["Alan", "Zombies"],
          ["Glory", "Buffy"]);

$Data::Dumper::Indent = 0; foreach my $row (hashJoin(\@table1, 1, \@table2, 0)) {

   print Dumper($row), "\n";

}</lang>

Output:

$VAR1 = [[27,'Jonah'],['Jonah','Whales']];
$VAR1 = [[27,'Jonah'],['Jonah','Spiders']];
$VAR1 = [[18,'Alan'],['Alan','Ghosts']];
$VAR1 = [[28,'Alan'],['Alan','Ghosts']];
$VAR1 = [[18,'Alan'],['Alan','Zombies']];
$VAR1 = [[28,'Alan'],['Alan','Zombies']];
$VAR1 = [[28,'Glory'],['Glory','Buffy']];

Perl 6

<lang perl6>my @A = [1, "Jonah"],

       [2, "Alan"],
       [3, "Glory"],
       [4, "Popeye"];

my @B = ["Jonah", "Whales"],

       ["Jonah", "Spiders"],
       ["Alan", "Ghosts"],
       ["Alan", "Zombies"],
       ["Glory", "Buffy"];

sub hash-join(@a, &a, @b, &b) {

   my %hash{Any};
   %hash{.&a} = $_ for @a;
   ([%hash{.&b} // next, $_] for @b);

}

.perl.say for hash-join @A, *.[1], @B, *.[0];</lang>

Output:

[[1, "Jonah"], ["Jonah", "Whales"]]
[[1, "Jonah"], ["Jonah", "Spiders"]]
[[2, "Alan"], ["Alan", "Ghosts"]]
[[2, "Alan"], ["Alan", "Zombies"]]
[[3, "Glory"], ["Glory", "Buffy"]]

PHP

<lang php><?php function hashJoin($table1, $index1, $table2, $index2) {

   // hash phase
   foreach ($table1 as $s)
       $h[$s[$index1]][] = $s;
   // join phase
   foreach ($table2 as $r)
   	foreach ($h[$r[$index2]] as $s)

$result[] = array($s, $r);

   return $result;

}

$table1 = array(array(27, "Jonah"),

          array(18, "Alan"),
          array(28, "Glory"),
          array(18, "Popeye"),
          array(28, "Alan"));

$table2 = array(array("Jonah", "Whales"),

          array("Jonah", "Spiders"),
          array("Alan", "Ghosts"),
          array("Alan", "Zombies"),
          array("Glory", "Buffy"),
          array("Bob", "foo"));

foreach (hashJoin($table1, 1, $table2, 0) as $row)

   print_r($row);

?></lang>

Output:

Array
(
    [0] => Array
        (
            [0] => 27
            [1] => Jonah
        )

    [1] => Array
        (
            [0] => Jonah
            [1] => Whales
        )

)
Array
(
    [0] => Array
        (
            [0] => 27
            [1] => Jonah
        )

    [1] => Array
        (
            [0] => Jonah
            [1] => Spiders
        )

)
Array
(
    [0] => Array
        (
            [0] => 18
            [1] => Alan
        )

    [1] => Array
        (
            [0] => Alan
            [1] => Ghosts
        )

)
Array
(
    [0] => Array
        (
            [0] => 28
            [1] => Alan
        )

    [1] => Array
        (
            [0] => Alan
            [1] => Ghosts
        )

)
Array
(
    [0] => Array
        (
            [0] => 18
            [1] => Alan
        )

    [1] => Array
        (
            [0] => Alan
            [1] => Zombies
        )

)
Array
(
    [0] => Array
        (
            [0] => 28
            [1] => Alan
        )

    [1] => Array
        (
            [0] => Alan
            [1] => Zombies
        )

)
Array
(
    [0] => Array
        (
            [0] => 28
            [1] => Glory
        )

    [1] => Array
        (
            [0] => Glory
            [1] => Buffy
        )

)

Python

<lang python>from collections import defaultdict

def hashJoin(table1, index1, table2, index2):

   h = defaultdict(list)
   # hash phase
   for s in table1:
       h[s[index1]].append(s)
   # join phase
   return [(s, r) for r in table2 for s in h[r[index2]]]

table1 = [(27, "Jonah"),

         (18, "Alan"),
         (28, "Glory"),
         (18, "Popeye"),
         (28, "Alan")]

table2 = [("Jonah", "Whales"),

         ("Jonah", "Spiders"),
         ("Alan", "Ghosts"),
         ("Alan", "Zombies"),
         ("Glory", "Buffy")]

for row in hashJoin(table1, 1, table2, 0):

   print(row)</lang>

Output:

((27, 'Jonah'), ('Jonah', 'Whales'))
((27, 'Jonah'), ('Jonah', 'Spiders'))
((18, 'Alan'), ('Alan', 'Ghosts'))
((28, 'Alan'), ('Alan', 'Ghosts'))
((18, 'Alan'), ('Alan', 'Zombies'))
((28, 'Alan'), ('Alan', 'Zombies'))
((28, 'Glory'), ('Glory', 'Buffy'))

Racket

<lang racket>#lang racket (struct A (age name)) (struct B (name nemesis)) (struct AB (name age nemesis) #:transparent)

(define Ages-table

 (list (A 27 "Jonah") (A 18 "Alan")
       (A 28 "Glory") (A 18 "Popeye")
       (A 28 "Alan")))

(define Nemeses-table

 (list
  (B "Jonah" "Whales") (B "Jonah" "Spiders")
  (B "Alan" "Ghosts") (B "Alan" "Zombies")
  (B "Glory" "Buffy")))

Hash phase

(define name->ages#

 (for/fold ((rv (hash)))
   ((a (in-list Ages-table)))
   (match-define (A age name) a)
   (hash-update rv name (λ (ages) (append ages (list age))) null)))

Join phase

(for*/list

   ((b (in-list Nemeses-table))
    (key (in-value (B-name b)))
    (age (in-list (hash-ref name->ages# key))))
 (AB key age (B-nemesis b)))</lang>

Output:

(#(struct:AB "Jonah" 27 "Whales")
 #(struct:AB "Jonah" 27 "Spiders")
 #(struct:AB "Alan" 18 "Ghosts")
 #(struct:AB "Alan" 28 "Ghosts")
 #(struct:AB "Alan" 18 "Zombies")
 #(struct:AB "Alan" 28 "Zombies")
 #(struct:AB "Glory" 28 "Buffy"))

REXX

<lang rexx>/*REXX pgm demonstrates the classic hash join algorithm for 2 relations.*/

S.  =                       ;     R.  =
S.1 = 27 'Jonah'            ;     R.1 = 'Jonah Whales'
S.2 = 18 'Alan'             ;     R.2 = 'Jonah Spiders'
S.3 = 28 'Glory'            ;     R.3 = 'Alan Ghosts'
S.4 = 18 'Popeye'           ;     R.4 = 'Alan Zombies'
S.5 = 28 'Alan'             ;     R.5 = 'Glory Buffy'

hash.= /*initialize the hash table. */

      do #=1  while S.#\==;     parse var S.# age name /*extract info*/
      hash.name=hash.name #           /*build a hash table entry.      */
      end   /*#*/                     /* [↑]  REXX does the heavy work.*/

=#-1 /*adjust for DO loop (#) overage.*/

      do j=1  while R.j\==          /*process a nemesis for a name.  */
      parse var R.j x nemesis         /*extract name and it's nemesis. */
      if hash.x==  then do          /*Not in hash?   Then a new name.*/
                          #=#+1       /*bump the number of  S entries. */
                          S.#=',' x   /*add new name to the S table.   */
                          hash.x=#    /*add new name to the hash table.*/
                          end         /* [↑]  this DO isn't used today.*/
           do k=1  for words(hash.x);  _=word(hash.x,k)  /*get pointer.*/
           S._=S._  nemesis           /*add nemesis──► applicable hash.*/
           end   /*k*/
      end        /*j*/

_='─' /*character used for separater. */ pad=left(,6-2) /*spacing used in hdr/sep/output.*/ say pad center('age',3) pad center('name',20) pad center('nemesis',30) say pad center('───',3) pad center( ,20,_) pad center( ,30,_)

    do n=1  for #;     parse  var  S.n    age  name  nems  /*get info. */
    if nems==  then iterate         /*if no nemesis, then don't show.*/
    say pad  right(age,3)  pad  center(name,20)  pad nems  /*show an S.*/
    end   /*n*/
                                      /*stick a fork in it, we're done.*/</lang>

output using the in-code relations (data):

     age              name                         nemesis
     ───      ────────────────────      ──────────────────────────────
      27             Jonah              Whales Spiders
      18              Alan              Ghosts Zombies
      28             Glory              Buffy
      28              Alan              Ghosts Zombies

Ruby

<lang ruby>def hashJoin(table1, index1, table2, index2)

 # hash phase
 h = table1.group_by {|s| s[index1]}
 h.default = []
 # join phase
 table2.collect {|r|
   h[r[index2]].collect {|s| [s, r]}
 }.flatten(1)

end

table1 = [[27, "Jonah"],

         [18, "Alan"],
         [28, "Glory"],
         [18, "Popeye"],
         [28, "Alan"]]

table2 = [["Jonah", "Whales"],

         ["Jonah", "Spiders"],
         ["Alan", "Ghosts"],
         ["Alan", "Zombies"],
         ["Glory", "Buffy"]]

hashJoin(table1, 1, table2, 0).each { |row| p row }</lang>

Output:

[[27, "Jonah"], ["Jonah", "Whales"]]
[[27, "Jonah"], ["Jonah", "Spiders"]]
[[18, "Alan"], ["Alan", "Ghosts"]]
[[28, "Alan"], ["Alan", "Ghosts"]]
[[18, "Alan"], ["Alan", "Zombies"]]
[[28, "Alan"], ["Alan", "Zombies"]]
[[28, "Glory"], ["Glory", "Buffy"]]

SQL

Setting up the data is a bit verbose: <lang sql>create table people (age decimal(3), name varchar(16)); insert into people (age, name) values (27, 'Jonah'); insert into people (age, name) values (18, 'Alan'); insert into people (age, name) values (28, 'Glory'); insert into people (age, name) values (18, 'Popeye'); insert into people (age, name) values (28, 'Alan');

create table nemesises (name varchar(16), nemesis varchar(16)); insert into nemesises (name, nemesis) values ('Jonah', 'Whales'); insert into nemesises (name, nemesis) values ('Jonah', 'Spiders'); insert into nemesises (name, nemesis) values ('Alan', 'Ghosts'); insert into nemesises (name, nemesis) values ('Alan', 'Zombies'); insert into nemesises (name, nemesis) values ('Glory', 'Buffy');</lang>

Doing the join is concise. But we don't actually have control over how the join is implemented, so this might not actually be a hash join... <lang sql>select * from people p join nemesises n on p.name = n.name</lang>

Output:

       AGE NAME             NAME             NEMESIS
---------- ---------------- ---------------- ----------------
        27 Jonah            Jonah            Whales
        27 Jonah            Jonah            Spiders
        28 Alan             Alan             Ghosts
        18 Alan             Alan             Ghosts
        28 Alan             Alan             Zombies
        18 Alan             Alan             Zombies
        28 Glory            Glory            Buffy

Tcl

Tcl uses hash tables to implement both its associative arrays and its dictionaries. <lang tcl>package require Tcl 8.6

Only for lmap, which can be replaced with foreach

proc joinTables {tableA a tableB b} {

   # Optimisation: if the first table is longer, do in reverse order
   if {[llength $tableB] < [llength $tableA]} {

return [lmap pair [joinTables $tableB $b $tableA $a] { lreverse $pair }]

   foreach value $tableA {

lappend hashmap([lindex $value $a]) [lreplace $value $a $a] #dict version# dict lappend hashmap [lindex $value $a] [lreplace $value $a $a]

   }
   set result {}
   foreach value $tableB {

set key [lindex $value $b] if {![info exists hashmap($key)]} continue #dict version# if {![dict exists $hashmap $key]} continue foreach first $hashmap($key) { #dict version# foreach first [dict get $hashmap $key] lappend result [list {*}$first $key {*}[lreplace $value $b $b]] }

   }
   return $result

}

set tableA {

   {27 "Jonah"} {18 "Alan"} {28 "Glory"} {18 "Popeye"} {28 "Alan"}

} set tableB {

   {"Jonah" "Whales"} {"Jonah" "Spiders"} {"Alan" "Ghosts"} {"Alan" "Zombies"}
   {"Glory" "Buffy"}

} set joined [joinTables $tableA 1 $tableB 0] foreach row $joined {

   puts $row

}</lang>

Output:

27 Jonah Whales
27 Jonah Spiders
18 Alan Ghosts
28 Alan Ghosts
18 Alan Zombies
28 Alan Zombies
28 Glory Buffy

TXR

Generic hash join. Arguments left-key and right-key are functions applied to the elements of the left and right sequences to retrieve the join key.

<lang txr>@(do

  (defvar age-name '((27 Jonah)
                     (18 Alan)
                     (28 Glory)
                     (18 Popeye)
                     (28 Alan)))

  (defvar nemesis-name '((Jonah Whales)
                         (Jonah Spiders)
                         (Alan Ghosts)
                         (Alan Zombies)
                         (Glory Buffy)))

  (defun hash-join (left left-key right right-key)
    (let ((join-hash [group-by left-key left])) ;; hash phase
      (append-each ((r-entry right))            ;; join phase
        (collect-each ((l-entry [join-hash [right-key r-entry]]))
          ^(,l-entry ,r-entry)))))

  (format t "~s\n" [hash-join age-name second nemesis-name first]))

</lang>

Output:

$ txr hash-join.txr
(((27 Jonah) (Jonah Whales)) ((27 Jonah) (Jonah Spiders)) ((18 Alan) (Alan Ghosts)) ((28 Alan) (Alan Ghosts)) ((18 Alan) (Alan Zombies)) ((28 Alan) (Alan Zombies)) ((28 Glory) (Glory Buffy)))

zkl

Join two tables by hashing on the common key (name). The resulting join is the intersection of the two tables. <lang zkl>ageName:=T(27,"Jonah", 18,"Alan", 28,"Glory", 18,"Popeye", 28,"Alan"); nameNemesis:=T("Jonah","Whales", "Jonah","Spiders", "Alan","Ghosts",

     "Alan","Zombies", "Glory","Buffy");

fcn addAN(age,name,d){ // keys are names, values are ( (age,...),() )

  if (r:=d.find(name)) d[name] = T(r[0].append(age),r[1]);
  else d.add(name,T(T(age),T));
  d	// return d so pump will use that as result for assignment

} fcn addNN(name,nemesis,d){ // values-->( (age,age...), (nemesis,...) )

  if (r:=d.find(name)){
     ages,nemesises := r;
     d[name] = T(ages,nemesises.append(nemesis));
  }

}

   // (Void.Read,1) --> take existing i, read next one, pass both to next function

var d=ageName.pump(Void,T.fp(Void.Read,1),T(addAN,Dictionary())); nameNemesis.pump(Void,T.fp(Void.Read,1),T(addNN,d));

d.println(); // the union of the two tables d.keys.sort().pump(Console.println,'wrap(name){ //pretty print the join

  val:=d[name]; if (not val[1])return(Void.Skip);
  String(name,":",d[name][1].concat(","));

}) </lang> zkl Dictionaries only have one key

D(Popeye:L(L(18),L()),Glory:L(L(28),L("Buffy")),
  Jonah:L(L(27),L("Whales","Spiders")),Alan:L(L(18,28),L("Ghosts","Zombies")))
Alan:Ghosts,Zombies
Glory:Buffy
Jonah:Whales,Spiders