Hash join: Difference between revisions
Content added Content deleted
m (→{{header|LFE}}: added missing space) |
(jq) |
||
Line 664: | Line 664: | ||
│Glory│28│Buffy │ |
│Glory│28│Buffy │ |
||
└─────┴──┴───────┘</lang> |
└─────┴──┴───────┘</lang> |
||
=={{header|jq}}== |
|||
{{ works with | jq | 1.4}} |
|||
Relational tables can be represented in several ways in JSON, and so |
|||
in this section we present two distinct "hash join" functions in jq: |
|||
* "hashJoin" can be used if the tables are represented as arrays of JSON objects, or as arrays of arrays, but the result may include the join-column twice; |
|||
* "hashJoinArrays" is intended for use if the tables are represented as arrays of arrays, and avoids the duplication mentioned above. |
|||
Both versions are relationally symmetric, and both versions allow the |
|||
join columns to contain any JSON value. To achieve this generality, |
|||
the collision-free hash function, h, is used. |
|||
===hashJoin=== |
|||
<lang jq># hashJoin(table1; key1; table2; key2) expects the two tables to be |
|||
# arrays, either of JSON objects, or of arrays. |
|||
# In the first case, that is, if the table's rows are represented as |
|||
# objects, then key1 should be the key of the join column of table1, |
|||
# and similarly for key2; if the join columns have different names, |
|||
# then they will both be included in the resultant objects. |
|||
# In the second case, that is, if the rows are arrays, then the |
|||
# 0-based indices of the join columns should be specified, and the |
|||
# rows are simply pasted together, resulting in duplication of the |
|||
# join columns. |
|||
# |
|||
def hashJoin(table1; key1; table2; key2): |
|||
# collision-free hash function: |
|||
def h: |
|||
if type == "object" then with_entries(.value = (.value|h)) | tostring |
|||
elif type == "array" then map(h)|tostring |
|||
else (type[0:1]+tostring) |
|||
end; |
|||
# hash phase: |
|||
reduce table1[] as $row |
|||
({}; |
|||
($row[key1]|h) as $key |
|||
| . + { ($key): (.[$key] + [$row]) } ) |
|||
| . as $hash |
|||
# join phase |
|||
| reduce table2[] as $row |
|||
([]; |
|||
($row[key2]|h) as $key |
|||
| if $hash|has($key) then |
|||
reduce $hash[$key][] as $r (.; . + [ $row + $r ] ) |
|||
else . end) |
|||
;</lang> |
|||
'''Example''' |
|||
<lang jq>def table1: |
|||
[ {"age": 27, "name": "Jonah"}, |
|||
{"age": 18, "name": "Alan"}, |
|||
{"age": 28, "name": "Glory"}, |
|||
{"age": 18, "name": "Popeye"}, |
|||
{"age": 28, "name": "Alan"} ] |
|||
; |
|||
def table2: |
|||
[ {"name": "Jonah", "nemesis": "Whales"}, |
|||
{"name": "Jonah", "nemesis": "Spiders"}, |
|||
{"name": "Alan", "nemesis": "Ghosts"}, |
|||
{"name": "Alan", "nemesis": "Zombies"}, |
|||
{"name": "Glory", "nemesis": "Buffy"} ] |
|||
; |
|||
def table1a: |
|||
[[27, "Jonah"], |
|||
[18, "Alan"], |
|||
[28, "Glory"], |
|||
[18, "Popeye"], |
|||
[28, "Alan"] ] |
|||
; |
|||
def table2a: |
|||
[["Jonah", "Whales"], |
|||
["Jonah", "Spiders"], |
|||
["Alan", "Ghosts"], |
|||
["Alan", "Zombies"], |
|||
["Glory", "Buffy"], |
|||
["Holmes", "Moriarty"] ] |
|||
; |
|||
def pp: |
|||
reduce .[] as $row (""; . + "\n" + ($row|tostring)); |
|||
( hashJoin(table1; "name"; table2; "name"), |
|||
hashJoin(table1a; 1; table2a; 0) |
|||
) | pp</lang> |
|||
{{out}} |
|||
<lang sh>$ jq -c -r -n -f HashJoin.jq |
|||
{"age":27,"name":"Jonah","nemesis":"Whales"} |
|||
{"age":27,"name":"Jonah","nemesis":"Spiders"} |
|||
{"age":28,"name":"Alan","nemesis":"Ghosts"} |
|||
{"age":28,"name":"Alan","nemesis":"Zombies"} |
|||
{"age":28,"name":"Glory","nemesis":"Buffy"} |
|||
[27,"Jonah","Jonah","Whales"] |
|||
[27,"Jonah","Jonah","Spiders"] |
|||
[28,"Alan","Alan","Ghosts"] |
|||
[28,"Alan","Alan","Zombies"] |
|||
[28,"Glory","Glory","Buffy"]</lang> |
|||
===hashJoinArrays=== |
|||
<lang jq># The tables should be arrays of arrays; |
|||
# index1 and index2 should be the 0-based indices of the join columns. |
|||
# |
|||
def hashJoinArrays(table1; index1; table2; index2): |
|||
# collision-free hash function: |
|||
def h: |
|||
if type == "object" then with_entries(.value = (.value|h)) | tostring |
|||
elif type == "array" then map(h)|tostring |
|||
else (type[0:1]+tostring) |
|||
end; |
|||
# hash phase: |
|||
reduce table1[] as $row |
|||
({}; |
|||
($row[index1]|h) as $key |
|||
| . + (.[$key] += [ $row ]) ) |
|||
| . as $hash |
|||
# join phase |
|||
| reduce table2[] as $row |
|||
([]; |
|||
($row[index2]|h) as $key |
|||
| if $hash|has($key) then |
|||
reduce $hash[$key][] as $r |
|||
(.; |
|||
. + [ $r + $row[0:index2] + $row[index2+1:] ] ) |
|||
else . end) |
|||
;</lang> |
|||
'''Example''' |
|||
In the following example, the previously defined pretty-print function (pp) and tables (table1 and table2) |
|||
are used, so their definitions are not repeated here. |
|||
<lang jq>hashJoinArrays(table1; 1; table2; 0) | pp</lang> |
|||
{{out}} |
|||
<lang sh>$ jq -c -r -n -f HashJoinArrays.jq |
|||
[27,"Jonah","Whales"] |
|||
[27,"Jonah","Spiders"] |
|||
[28,"Alan","Ghosts"] |
|||
[28,"Alan","Zombies"] |
|||
[28,"Glory","Buffy"]</lang> |
|||
=={{header|LFE}}== |
=={{header|LFE}}== |