Jaccard index: Difference between revisions
JsfasdF256 (talk | contribs) No edit summary |
m (→{{header|Wren}}: Changed to Wren S/H) |
||
(25 intermediate revisions by 17 users not shown) | |||
Line 16: | Line 16: | ||
Write a program that computes the Jaccard index for every ordered pairing (to show that J(A, B) and J(B, A) are the same) of these sets, including self-pairings. |
Write a program that computes the Jaccard index for every ordered pairing (to show that J(A, B) and J(B, A) are the same) of these sets, including self-pairings. |
||
<br><br> |
<br><br> |
||
=={{header|APL}}== |
|||
<syntaxhighlight lang="apl">task←{ |
|||
jaccard ← (≢∩)÷(≢∪) |
|||
A ← ⍬ |
|||
B ← 1 2 3 4 5 |
|||
C ← 1 3 5 7 9 |
|||
D ← 2 4 6 8 10 |
|||
E ← 2 3 5 7 |
|||
F ← ,8 |
|||
'.ABCDEF' ⍪ 'ABCDEF' , ∘.jaccard⍨ A B C D E F |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>. A B C D E F |
|||
A 1 0 0 0 0 0 |
|||
B 0 1 0.4285714286 0.25 0.5 0 |
|||
C 0 0.4285714286 1 0 0.5 0 |
|||
D 0 0.25 0 1 0.125 0.2 |
|||
E 0 0.5 0.5 0.125 1 0 |
|||
F 0 0 0 0.2 0 1 </pre> |
|||
=={{header|Arturo}}== |
|||
<syntaxhighlight lang="arturo">jaccard: function [a b][ |
|||
if and? empty? a empty? b -> return to :rational 1 |
|||
x: size intersection a b |
|||
y: size union a b |
|||
fdiv to :rational x to :rational y |
|||
] |
|||
sets: [ |
|||
[] |
|||
[1 2 3 4 5] |
|||
[1 3 5 7 9] |
|||
[2 4 6 8 10] |
|||
[2 3 5 7] |
|||
[8] |
|||
] |
|||
loop combine.repeated.by: 2 sets 'p -> |
|||
print [pad ~"|p\0|" 12 pad ~"|p\1|" 12 "->" jaccard p\0 p\1]</syntaxhighlight> |
|||
{{out}} |
|||
<pre> [] [] -> 1/1 |
|||
[] [1 2 3 4 5] -> 0/1 |
|||
[] [1 3 5 7 9] -> 0/1 |
|||
[] [2 4 6 8 10] -> 0/1 |
|||
[] [2 3 5 7] -> 0/1 |
|||
[] [8] -> 0/1 |
|||
[1 2 3 4 5] [1 2 3 4 5] -> 1/1 |
|||
[1 2 3 4 5] [1 3 5 7 9] -> 3/7 |
|||
[1 2 3 4 5] [2 4 6 8 10] -> 1/4 |
|||
[1 2 3 4 5] [2 3 5 7] -> 1/2 |
|||
[1 2 3 4 5] [8] -> 0/1 |
|||
[1 3 5 7 9] [1 3 5 7 9] -> 1/1 |
|||
[1 3 5 7 9] [2 4 6 8 10] -> 0/1 |
|||
[1 3 5 7 9] [2 3 5 7] -> 1/2 |
|||
[1 3 5 7 9] [8] -> 0/1 |
|||
[2 4 6 8 10] [2 4 6 8 10] -> 1/1 |
|||
[2 4 6 8 10] [2 3 5 7] -> 1/8 |
|||
[2 4 6 8 10] [8] -> 1/5 |
|||
[2 3 5 7] [2 3 5 7] -> 1/1 |
|||
[2 3 5 7] [8] -> 0/1 |
|||
[8] [8] -> 1/1</pre> |
|||
=={{header|BQN}}== |
|||
<syntaxhighlight lang="bqn">Jaccard ← ≡◶⟨∊ ÷○(+´) ∊∘∾, 1⟩ |
|||
a ← ⟨⟩ |
|||
b ← ⟨1,2,3,4,5⟩ |
|||
c ← ⟨1,3,5,7,9⟩ |
|||
d ← ⟨2,4,6,8,10⟩ |
|||
e ← ⟨2,3,5,7⟩ |
|||
f ← ⟨8⟩ |
|||
Jaccard⌜˜ ⟨a,b,c,d,e,f⟩</syntaxhighlight> |
|||
{{out}} |
|||
<pre>┌─ |
|||
╵ 1 0 0 0 0 0 |
|||
0 1 0.42857142857142855 0.25 0.5 0 |
|||
0 0.42857142857142855 1 0 0.5 0 |
|||
0 0.25 0 1 0.125 0.2 |
|||
0 0.5 0.5 0.125 1 0 |
|||
0 0 0 0.2 0 1 |
|||
┘</pre> |
|||
=={{header|Emacs Lisp}}== |
|||
<syntaxhighlight lang="lisp"> |
|||
(let* ((v1 '(A () |
|||
B (1 2 3 4 5) |
|||
C (1 3 5 7 9) |
|||
D (2 4 6 8 10) |
|||
E (2 3 5 7) |
|||
F (8))) |
|||
(keys1 (seq-filter (lambda (x) (not (null x))) |
|||
(cl-loop for s1 being the elements of v1 |
|||
using (index idx) |
|||
collect (if (= (% idx 2) 0) s1 nil))))) |
|||
(switch-to-buffer-other-window "*similarity result*") |
|||
(erase-buffer) |
|||
(defun similarity (p1 p2) |
|||
(if (and (null p1) (null p2)) 1 |
|||
(/ (float (seq-length (seq-intersection p1 p2))) |
|||
(float (seq-length (seq-uniq (seq-union p1 p2))))) ) ) |
|||
(insert (format " %s\n" |
|||
(cl-loop for s1 being the elements of keys1 concat |
|||
(format " %s" s1)))) |
|||
(cl-loop for s1 in keys1 do |
|||
(insert (format "%s %s\n" s1 |
|||
(cl-loop for s2 in keys1 concat |
|||
(format " %3.3f" (similarity (plist-get v1 s1) (plist-get v1 s2) )))))) |
|||
) |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
A B C D E F |
|||
A 1.000 0.000 0.000 0.000 0.000 0.000 |
|||
B 0.000 1.000 0.429 0.250 0.500 0.000 |
|||
C 0.000 0.429 1.000 0.000 0.500 0.000 |
|||
D 0.000 0.250 0.000 1.000 0.125 0.200 |
|||
E 0.000 0.500 0.500 0.125 1.000 0.000 |
|||
F 0.000 0.000 0.000 0.200 0.000 1.000 |
|||
</pre> |
|||
=={{header|Factor}}== |
=={{header|Factor}}== |
||
{{works with|Factor|0.99 2021-06-02}} |
{{works with|Factor|0.99 2021-06-02}} |
||
< |
<syntaxhighlight lang="factor">USING: assocs formatting grouping kernel math math.combinatorics |
||
prettyprint sequences sequences.repeating sets ; |
prettyprint sequences sequences.repeating sets ; |
||
Line 28: | Line 158: | ||
{ { } { 1 2 3 4 5 } { 1 3 5 7 9 } { 2 4 6 8 10 } { 2 3 5 7 } { 8 } } |
{ { } { 1 2 3 4 5 } { 1 3 5 7 9 } { 2 4 6 8 10 } { 2 3 5 7 } { 8 } } |
||
[ 2 <combinations> ] [ 2 repeat 2 group append ] bi |
[ 2 <combinations> ] [ 2 repeat 2 group append ] bi |
||
[ 2dup jaccard "%u %u -> %u\n" printf ] assoc-each</ |
[ 2dup jaccard "%u %u -> %u\n" printf ] assoc-each</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 52: | Line 182: | ||
{ 2 3 5 7 } { 2 3 5 7 } -> 1 |
{ 2 3 5 7 } { 2 3 5 7 } -> 1 |
||
{ 8 } { 8 } -> 1 |
{ 8 } { 8 } -> 1 |
||
</pre> |
|||
=={{header|Haskell}}== |
|||
<syntaxhighlight lang="haskell">import Control.Applicative (liftA2) |
|||
import Data.List (genericLength, intersect, nub, union) |
|||
import Data.List.Split (chunksOf) |
|||
import Data.Ratio (denominator, numerator) |
|||
import Text.Tabular (Header(..), Properties(..), Table(..)) |
|||
import Text.Tabular.AsciiArt (render) |
|||
-- The Jaccard index of two sets. If both sets are empty we define the index to |
|||
-- be 1. |
|||
jaccard :: (Eq a, Fractional b) => [a] -> [a] -> b |
|||
jaccard [] [] = 1 |
|||
jaccard xs ys = let uxs = nub xs -- unique xs |
|||
isz = genericLength $ intersect uxs ys |
|||
usz = genericLength $ union uxs ys |
|||
in isz / usz |
|||
-- A table of Jaccard indexes for all pairs of sets given in the argument. |
|||
-- Associated with each set is its "name", which is only used for display |
|||
-- purposes. |
|||
jaccardTable :: Eq a => [(String, [a])] -> String |
|||
jaccardTable xs = render id id showRat |
|||
$ Table (Group SingleLine $ map Header names) |
|||
(Group SingleLine $ map Header names) |
|||
$ chunksOf (length xs) |
|||
$ map (uncurry jaccard) |
|||
$ allPairs sets |
|||
where names = map fst xs |
|||
sets = map snd xs |
|||
-- Show a rational number as numerator/denominator. If the denominator is 1 |
|||
-- then just show the numerator. |
|||
showRat :: Rational -> String |
|||
showRat r = case (numerator r, denominator r) of |
|||
(n, 1) -> show n |
|||
(n, d) -> show n ++ "/" ++ show d |
|||
-- All pairs of elements from the list. For example: |
|||
-- |
|||
-- allPairs [1,2] == [(1,1),(1,2),(2,1),(2,2)] |
|||
allPairs :: [a] -> [(a,a)] |
|||
allPairs xs = liftA2 (,) xs xs |
|||
main :: IO () |
|||
main = putStrLn $ jaccardTable [ ("A", [] :: [Int]) |
|||
, ("B", [1, 2, 3, 4, 5]) |
|||
, ("C", [1, 3, 5, 7, 9]) |
|||
, ("D", [2, 4, 6, 8, 10]) |
|||
, ("E", [2, 3, 5, 7]) |
|||
, ("F", [8])]</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| || A | B | C | D | E | F | |
|||
+===++===+=====+=====+=====+=====+=====+ |
|||
| A || 1 | 0 | 0 | 0 | 0 | 0 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| B || 0 | 1 | 3/7 | 1/4 | 1/2 | 0 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| C || 0 | 3/7 | 1 | 0 | 1/2 | 0 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| D || 0 | 1/4 | 0 | 1 | 1/8 | 1/5 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| E || 0 | 1/2 | 1/2 | 1/8 | 1 | 0 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
| F || 0 | 0 | 0 | 1/5 | 0 | 1 | |
|||
+---++---+-----+-----+-----+-----+-----+ |
|||
</pre> |
|||
=={{header|J}}== |
|||
<syntaxhighlight lang="j">jaccard=. +&# (] %&x: -) [ -&# -. |
|||
a=. $~ 0 |
|||
b=. 1 2 3 4 5 |
|||
c=. 1 3 5 7 9 |
|||
d=. 2 4 6 8 10 |
|||
e=. 2 3 5 7 |
|||
f=. , 8 |
|||
jaccard&.>/~ a ; b ; c ; d ; e ; f</syntaxhighlight> |
|||
{{out}} |
|||
<pre>┌─┬───┬───┬───┬───┬───┐ |
|||
│0│0 │0 │0 │0 │0 │ |
|||
├─┼───┼───┼───┼───┼───┤ |
|||
│0│1 │3r7│1r4│1r2│0 │ |
|||
├─┼───┼───┼───┼───┼───┤ |
|||
│0│3r7│1 │0 │1r2│0 │ |
|||
├─┼───┼───┼───┼───┼───┤ |
|||
│0│1r4│0 │1 │1r8│1r5│ |
|||
├─┼───┼───┼───┼───┼───┤ |
|||
│0│1r2│1r2│1r8│1 │0 │ |
|||
├─┼───┼───┼───┼───┼───┤ |
|||
│0│0 │0 │1r5│0 │1 │ |
|||
└─┴───┴───┴───┴───┴───┘</pre> |
|||
=={{header|jq}}== |
|||
{{works with|jq}} |
|||
'''Works with gojq, the Go implementation of jq''' |
|||
In the following: |
|||
* the Jaccard index is presented as a string representing a reduced fraction, e.g. "0" or "1/7". |
|||
* sets are represented by sorted arrays with distinct elements. |
|||
<br> |
|||
'''Preliminaries''' |
|||
<syntaxhighlight lang="jq">def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .; |
|||
def gcd(a; b): |
|||
# subfunction expects [a,b] as input |
|||
# i.e. a ~ .[0] and b ~ .[1] |
|||
def rgcd: if .[1] == 0 then .[0] |
|||
else [.[1], .[0] % .[1]] | rgcd |
|||
end; |
|||
[a,b] | rgcd;</syntaxhighlight> |
|||
<br> |
|||
'''The Task''' |
|||
<syntaxhighlight lang="jq">def rjaccardIndex(x; y): |
|||
def i(a;b): a - (a-b); |
|||
def u(a;b): a + (b - i(a;b)) | unique; |
|||
def idivide($i; $j): |
|||
if $i == 0 then "0" |
|||
else gcd($i;$j) as $d |
|||
| if $j == $d then "\($i/$d)" |
|||
else "\($i/$d)/\($j/$d)" |
|||
end |
|||
end; |
|||
if (x|length) == 0 and (y|length) == "0" then "1" |
|||
else idivide( i(x;y)|length; u(x;y)|length ) |
|||
end; |
|||
def a : []; |
|||
def b : [1, 2, 3, 4, 5]; |
|||
def c : [1, 3, 5, 7, 9]; |
|||
def d : [2, 4, 6, 8, 10]; |
|||
def e : [2, 3, 5, 7]; |
|||
def f : [8]; |
|||
def task: |
|||
def tidy: map(lpad(4))|join(" "); |
|||
[a,b,c,d,e,f] as $sets |
|||
| [range(0;$sets|length) | [. + 97] | implode] as $names |
|||
| ([""] + $names | tidy), |
|||
(range(0; $sets|length) as $i |
|||
| ([$i + 97] | implode) as $name |
|||
| $sets[$i] as $x |
|||
| $sets | map(rjaccardIndex($x; .)) | tidy |
|||
| " \($name): \(.)" ) ; |
|||
task</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
a b c d e f |
|||
a: 0 0 0 0 0 0 |
|||
b: 0 1 3/7 1/4 1/2 0 |
|||
c: 0 3/7 1 0 1/2 0 |
|||
d: 0 1/4 0 1 1/8 1/5 |
|||
e: 0 1/2 1/2 1/8 1 0 |
|||
f: 0 0 0 1/5 0 1 |
|||
</pre> |
|||
=={{header|Julia}}== |
|||
<syntaxhighlight lang="julia">J(A, B) = begin i, u = length(A ∩ B), length(A ∪ B); u == 0 ? 1//1 : i // u end |
|||
A = Int[] |
|||
B = [1, 2, 3, 4, 5] |
|||
C = [1, 3, 5, 7, 9] |
|||
D = [2, 4, 6, 8, 10] |
|||
E = [2, 3, 5, 7] |
|||
F = [8] |
|||
testsets = [A, B, C, D, E, F] |
|||
println("Set A Set B J(A, B)\n", "-"^44) |
|||
for a in testsets, b in testsets |
|||
println(rpad(isempty(a) ? "[]" : a, 18), rpad(isempty(b) ? "[]" : b, 18), |
|||
replace(string(J(a, b)), "//" => "/")) |
|||
end |
|||
</syntaxhighlight>{{out}} |
|||
<pre> |
|||
Set A Set B J(A, B) |
|||
-------------------------------------------- |
|||
[] [] 1/1 |
|||
[] [1, 2, 3, 4, 5] 0/1 |
|||
[] [1, 3, 5, 7, 9] 0/1 |
|||
[] [2, 4, 6, 8, 10] 0/1 |
|||
[] [2, 3, 5, 7] 0/1 |
|||
[] [8] 0/1 |
|||
[1, 2, 3, 4, 5] [] 0/1 |
|||
[1, 2, 3, 4, 5] [1, 2, 3, 4, 5] 1/1 |
|||
[1, 2, 3, 4, 5] [1, 3, 5, 7, 9] 3/7 |
|||
[1, 2, 3, 4, 5] [2, 4, 6, 8, 10] 1/4 |
|||
[1, 2, 3, 4, 5] [2, 3, 5, 7] 1/2 |
|||
[1, 2, 3, 4, 5] [8] 0/1 |
|||
[1, 3, 5, 7, 9] [] 0/1 |
|||
[1, 3, 5, 7, 9] [1, 2, 3, 4, 5] 3/7 |
|||
[1, 3, 5, 7, 9] [1, 3, 5, 7, 9] 1/1 |
|||
[1, 3, 5, 7, 9] [2, 4, 6, 8, 10] 0/1 |
|||
[1, 3, 5, 7, 9] [2, 3, 5, 7] 1/2 |
|||
[1, 3, 5, 7, 9] [8] 0/1 |
|||
[2, 4, 6, 8, 10] [] 0/1 |
|||
[2, 4, 6, 8, 10] [1, 2, 3, 4, 5] 1/4 |
|||
[2, 4, 6, 8, 10] [1, 3, 5, 7, 9] 0/1 |
|||
[2, 4, 6, 8, 10] [2, 4, 6, 8, 10] 1/1 |
|||
[2, 4, 6, 8, 10] [2, 3, 5, 7] 1/8 |
|||
[2, 4, 6, 8, 10] [8] 1/5 |
|||
[2, 3, 5, 7] [] 0/1 |
|||
[2, 3, 5, 7] [1, 2, 3, 4, 5] 1/2 |
|||
[2, 3, 5, 7] [1, 3, 5, 7, 9] 1/2 |
|||
[2, 3, 5, 7] [2, 4, 6, 8, 10] 1/8 |
|||
[2, 3, 5, 7] [2, 3, 5, 7] 1/1 |
|||
[2, 3, 5, 7] [8] 0/1 |
|||
[8] [] 0/1 |
|||
[8] [1, 2, 3, 4, 5] 0/1 |
|||
[8] [1, 3, 5, 7, 9] 0/1 |
|||
[8] [2, 4, 6, 8, 10] 1/5 |
|||
[8] [2, 3, 5, 7] 0/1 |
|||
[8] [8] 1/1 |
|||
</pre> |
|||
=={{header|Nim}}== |
|||
<syntaxhighlight lang="Nim">import std/[rationals, strformat] |
|||
type Set8 = set[int8] |
|||
const |
|||
A: Set8 = {} |
|||
B: Set8 = {1, 2, 3, 4, 5} |
|||
C: Set8 = {1, 3, 5, 7, 9} |
|||
D: Set8 = {2, 4, 6, 8, 10} |
|||
E: Set8 = {2, 3, 5, 7} |
|||
F: Set8 = {8} |
|||
List = [('A', A), ('B', B), ('C', C), ('D', D), ('E', E), ('F', F)] |
|||
func J(a, b: Set8): Rational[int] = |
|||
## Return the Jaccard index. |
|||
## Return 1 if both sets are empty. |
|||
let card1 = card(a * b) |
|||
let card2 = card(a + b) |
|||
result = if card1 == card2: 1 // 1 else: card1 // card2 |
|||
for i in 0..List.high: |
|||
let (name1, set1) = List[i] |
|||
for j in i..List.high: |
|||
let (name2, set2) = List[j] |
|||
echo &"J({name1}, {name2}) = {J(set1, set2)}" |
|||
if i != j: |
|||
echo &"J({name2}, {name1}) = {J(set2, set1)}" |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre>J(A, A) = 1/1 |
|||
J(A, B) = 0/1 |
|||
J(B, A) = 0/1 |
|||
J(A, C) = 0/1 |
|||
J(C, A) = 0/1 |
|||
J(A, D) = 0/1 |
|||
J(D, A) = 0/1 |
|||
J(A, E) = 0/1 |
|||
J(E, A) = 0/1 |
|||
J(A, F) = 0/1 |
|||
J(F, A) = 0/1 |
|||
J(B, B) = 1/1 |
|||
J(B, C) = 3/7 |
|||
J(C, B) = 3/7 |
|||
J(B, D) = 1/4 |
|||
J(D, B) = 1/4 |
|||
J(B, E) = 1/2 |
|||
J(E, B) = 1/2 |
|||
J(B, F) = 0/1 |
|||
J(F, B) = 0/1 |
|||
J(C, C) = 1/1 |
|||
J(C, D) = 0/1 |
|||
J(D, C) = 0/1 |
|||
J(C, E) = 1/2 |
|||
J(E, C) = 1/2 |
|||
J(C, F) = 0/1 |
|||
J(F, C) = 0/1 |
|||
J(D, D) = 1/1 |
|||
J(D, E) = 1/8 |
|||
J(E, D) = 1/8 |
|||
J(D, F) = 1/5 |
|||
J(F, D) = 1/5 |
|||
J(E, E) = 1/1 |
|||
J(E, F) = 0/1 |
|||
J(F, E) = 0/1 |
|||
J(F, F) = 1/1 |
|||
</pre> |
|||
=={{header|Phix}}== |
|||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
<span style="color: #008080;">include</span> <span style="color: #000000;">sets</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">jaccard</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">intersection</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)),</span> |
|||
<span style="color: #000000;">u</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">union</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">u</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #000000;">1</span><span style="color: #0000FF;">:</span><span style="color: #000000;">i</span><span style="color: #0000FF;">/</span><span style="color: #000000;">u</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{},</span> <span style="color: #000080;font-style:italic;">-- A</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">2</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">3</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">5</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- B</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">3</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">5</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">7</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">9</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- C</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">2</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">6</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">8</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">10</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- D</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">2</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">3</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">5</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">7</span><span style="color: #0000FF;">},</span> <span style="color: #000080;font-style:italic;">-- E</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">8</span><span style="color: #0000FF;">}}</span> <span style="color: #000080;font-style:italic;">-- F</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">i</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"J(%c,%c)"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">+</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">+</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">jij</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">jacard</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">j</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #004080;">atom</span> <span style="color: #000000;">jji</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">jacard</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">jji</span><span style="color: #0000FF;">==</span><span style="color: #000000;">jij</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">s</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">" = J(%c,%c)"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">+</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">+</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s = %g\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #000000;">jij</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
|||
<pre> |
|||
J(A,A) = 1 |
|||
J(A,B) = J(B,A) = 0 |
|||
J(A,C) = J(C,A) = 0 |
|||
J(A,D) = J(D,A) = 0 |
|||
J(A,E) = J(E,A) = 0 |
|||
J(A,F) = J(F,A) = 0 |
|||
J(B,B) = 1 |
|||
J(B,C) = J(C,B) = 0.428571 |
|||
J(B,D) = J(D,B) = 0.25 |
|||
J(B,E) = J(E,B) = 0.5 |
|||
J(B,F) = J(F,B) = 0 |
|||
J(C,C) = 1 |
|||
J(C,D) = J(D,C) = 0 |
|||
J(C,E) = J(E,C) = 0.5 |
|||
J(C,F) = J(F,C) = 0 |
|||
J(D,D) = 1 |
|||
J(D,E) = J(E,D) = 0.125 |
|||
J(D,F) = J(F,D) = 0.2 |
|||
J(E,E) = 1 |
|||
J(E,F) = J(F,E) = 0 |
|||
J(F,F) = 1 |
|||
</pre> |
|||
=={{header|Perl}}== |
|||
<syntaxhighlight lang="perl">#!/usr/bin/perl |
|||
use strict; |
|||
use warnings; |
|||
my %sets = ( |
|||
A => [], |
|||
B => [1, 2, 3, 4, 5], |
|||
C => [1, 3, 5, 7, 9], |
|||
D => [2, 4, 6, 8, 10], |
|||
E => [2, 3, 5, 7], |
|||
F => [8], |
|||
); |
|||
use Data::Dump 'dd'; dd \%sets; |
|||
for my $left (sort keys %sets ) |
|||
{ |
|||
for my $right (sort keys %sets ) |
|||
{ |
|||
my %union; |
|||
$union{ $_ }++ for @{ $sets{$left} }, @{ $sets{$right} }; |
|||
print "J($left,$right) = ", |
|||
%union ? (grep $_ == 2, values %union) / (keys %union) : 1, "\n"; |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
{ |
|||
A => [], |
|||
B => [1 .. 5], |
|||
C => [1, 3, 5, 7, 9], |
|||
D => [2, 4, 6, 8, 10], |
|||
E => [2, 3, 5, 7], |
|||
F => [8], |
|||
} |
|||
J(A,A) = 1 |
|||
J(A,B) = 0 |
|||
J(A,C) = 0 |
|||
J(A,D) = 0 |
|||
J(A,E) = 0 |
|||
J(A,F) = 0 |
|||
J(B,A) = 0 |
|||
J(B,B) = 1 |
|||
J(B,C) = 0.428571428571429 |
|||
J(B,D) = 0.25 |
|||
J(B,E) = 0.5 |
|||
J(B,F) = 0 |
|||
J(C,A) = 0 |
|||
J(C,B) = 0.428571428571429 |
|||
J(C,C) = 1 |
|||
J(C,D) = 0 |
|||
J(C,E) = 0.5 |
|||
J(C,F) = 0 |
|||
J(D,A) = 0 |
|||
J(D,B) = 0.25 |
|||
J(D,C) = 0 |
|||
J(D,D) = 1 |
|||
J(D,E) = 0.125 |
|||
J(D,F) = 0.2 |
|||
J(E,A) = 0 |
|||
J(E,B) = 0.5 |
|||
J(E,C) = 0.5 |
|||
J(E,D) = 0.125 |
|||
J(E,E) = 1 |
|||
J(E,F) = 0 |
|||
J(F,A) = 0 |
|||
J(F,B) = 0 |
|||
J(F,C) = 0 |
|||
J(F,D) = 0.2 |
|||
J(F,E) = 0 |
|||
J(F,F) = 1 |
|||
</pre> |
|||
=={{header|Prolog}}== |
|||
<syntaxhighlight lang="prolog"> |
|||
show([]). |
|||
show([X|Xs]):- write(X), show(Xs). |
|||
j(N,M,X):- M > 0 -> X is N/M; X is 1. |
|||
task:- L = [[], [1,2,3,4,5], [1,3,5,7,9], [2,4,6,8,10], [2,3,5,7], [8]], |
|||
forall((member(A,L), member(B,L)), ( |
|||
findall(X, (member(X,A), member(X,B)), I), length(I,N), |
|||
findall(X, (member(X,B), not(member(X,A))), T), append(A,T,U), length(U,M), |
|||
j(N,M,J), show(["A = ",A,", B = ",B,", J = ",J]), nl)). |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
?- task. |
|||
A = [], B = [], J = 1 |
|||
A = [], B = [1,2,3,4,5], J = 0 |
|||
A = [], B = [1,3,5,7,9], J = 0 |
|||
A = [], B = [2,4,6,8,10], J = 0 |
|||
A = [], B = [2,3,5,7], J = 0 |
|||
A = [], B = [8], J = 0 |
|||
A = [1,2,3,4,5], B = [], J = 0 |
|||
A = [1,2,3,4,5], B = [1,2,3,4,5], J = 1 |
|||
A = [1,2,3,4,5], B = [1,3,5,7,9], J = 0.42857142857142855 |
|||
A = [1,2,3,4,5], B = [2,4,6,8,10], J = 0.25 |
|||
A = [1,2,3,4,5], B = [2,3,5,7], J = 0.5 |
|||
A = [1,2,3,4,5], B = [8], J = 0 |
|||
A = [1,3,5,7,9], B = [], J = 0 |
|||
A = [1,3,5,7,9], B = [1,2,3,4,5], J = 0.42857142857142855 |
|||
A = [1,3,5,7,9], B = [1,3,5,7,9], J = 1 |
|||
A = [1,3,5,7,9], B = [2,4,6,8,10], J = 0 |
|||
A = [1,3,5,7,9], B = [2,3,5,7], J = 0.5 |
|||
A = [1,3,5,7,9], B = [8], J = 0 |
|||
A = [2,4,6,8,10], B = [], J = 0 |
|||
A = [2,4,6,8,10], B = [1,2,3,4,5], J = 0.25 |
|||
A = [2,4,6,8,10], B = [1,3,5,7,9], J = 0 |
|||
A = [2,4,6,8,10], B = [2,4,6,8,10], J = 1 |
|||
A = [2,4,6,8,10], B = [2,3,5,7], J = 0.125 |
|||
A = [2,4,6,8,10], B = [8], J = 0.2 |
|||
A = [2,3,5,7], B = [], J = 0 |
|||
A = [2,3,5,7], B = [1,2,3,4,5], J = 0.5 |
|||
A = [2,3,5,7], B = [1,3,5,7,9], J = 0.5 |
|||
A = [2,3,5,7], B = [2,4,6,8,10], J = 0.125 |
|||
A = [2,3,5,7], B = [2,3,5,7], J = 1 |
|||
A = [2,3,5,7], B = [8], J = 0 |
|||
A = [8], B = [], J = 0 |
|||
A = [8], B = [1,2,3,4,5], J = 0 |
|||
A = [8], B = [1,3,5,7,9], J = 0 |
|||
A = [8], B = [2,4,6,8,10], J = 0.2 |
|||
A = [8], B = [2,3,5,7], J = 0 |
|||
A = [8], B = [8], J = 1 |
|||
true. |
|||
</pre> |
|||
=={{header|Python}}== |
|||
<syntaxhighlight lang="python"> |
|||
# jaccard_index.py by Xing216 |
|||
from itertools import product |
|||
A = set() |
|||
B = {1, 2, 3, 4, 5} |
|||
C = {1, 3, 5, 7, 9} |
|||
D = {2, 4, 6, 8, 10} |
|||
E = {2, 3, 5, 7} |
|||
F = {8} |
|||
sets = list(product([A, B, C, D, E, F], repeat=2)) |
|||
set_names = list(product(["A", "B", "C", "D", "E", "F"], repeat=2)) |
|||
def jaccard_index(set1, set2): |
|||
try: |
|||
return len(set1 & set2)/len(set1 | set2) |
|||
except ZeroDivisionError: |
|||
return 0.0 |
|||
for i,j in sets: |
|||
jacc_idx = jaccard_index(i,j) |
|||
sets_idx = sets.index((i,j)) |
|||
print(f"J({', '.join(set_names[sets_idx])}) -> {jacc_idx}") |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre style="height: 10em"> |
|||
J(A, A) -> 0.0 |
|||
J(A, B) -> 0.0 |
|||
J(A, C) -> 0.0 |
|||
J(A, D) -> 0.0 |
|||
J(A, E) -> 0.0 |
|||
J(A, F) -> 0.0 |
|||
J(B, A) -> 0.0 |
|||
J(B, B) -> 1.0 |
|||
J(B, C) -> 0.42857142857142855 |
|||
J(B, D) -> 0.25 |
|||
J(B, E) -> 0.5 |
|||
J(B, F) -> 0.0 |
|||
J(C, A) -> 0.0 |
|||
J(C, B) -> 0.42857142857142855 |
|||
J(C, C) -> 1.0 |
|||
J(C, D) -> 0.0 |
|||
J(C, E) -> 0.5 |
|||
J(C, F) -> 0.0 |
|||
J(D, A) -> 0.0 |
|||
J(D, B) -> 0.25 |
|||
J(D, C) -> 0.0 |
|||
J(D, D) -> 1.0 |
|||
J(D, E) -> 0.125 |
|||
J(D, F) -> 0.2 |
|||
J(E, A) -> 0.0 |
|||
J(E, B) -> 0.5 |
|||
J(E, C) -> 0.5 |
|||
J(E, D) -> 0.125 |
|||
J(E, E) -> 1.0 |
|||
J(E, F) -> 0.0 |
|||
J(F, A) -> 0.0 |
|||
J(F, B) -> 0.0 |
|||
J(F, C) -> 0.0 |
|||
J(F, D) -> 0.2 |
|||
J(F, E) -> 0.0 |
|||
J(F, F) -> 1.0 |
|||
</pre> |
|||
=={{header|Quackery}}== |
|||
<syntaxhighlight lang="Quackery"> [ $ "bigrat.qky" loadfile ] now! |
|||
[ over size - space swap of |
|||
join echo$ ] is recho$ ( $ n --> $ ) |
|||
[ dip unbuild recho$ ] is recho ( x n --> $ ) |
|||
[ 0 swap witheach [ bit | ] ] is set ( [ --> n ) |
|||
[ & ] is intersection ( n --> n ) |
|||
[ | ] is union ( n --> n ) |
|||
[ [] 0 rot |
|||
[ dup 0 > while |
|||
dup 1 & if |
|||
[ dip [ tuck join swap ] ] |
|||
dip 1+ |
|||
1 >> again ] |
|||
2drop ] is items ( n --> [ ) |
|||
[ 2dup = iff [ 2drop 1 1 ] done |
|||
2dup union items size |
|||
dip [ intersection items size ] |
|||
dup 0 = if [ 2drop 0 1 ] |
|||
] is jaccard ( n n --> n/d ) |
|||
[ ' [ ] set ] constant is A ( --> n ) |
|||
[ ' [ 1 2 3 4 5 ] set ] constant is B ( --> n ) |
|||
[ ' [ 1 3 5 7 9 ] set ] constant is C ( --> n ) |
|||
[ ' [ 2 4 6 8 10 ] set ] constant is D ( --> n ) |
|||
[ ' [ 2 3 5 7 ] set ] constant is E ( --> n ) |
|||
[ ' [ 8 ] set ] constant is F ( --> n ) |
|||
' [ A B C D E F ] |
|||
dup witheach |
|||
[ over witheach |
|||
[ over items 15 recho |
|||
dup items 15 recho |
|||
say "--> " |
|||
2dup jaccard |
|||
proper$ echo$ |
|||
cr drop ] |
|||
drop |
|||
behead drop ] |
|||
drop</syntaxhighlight> |
|||
{{out}} |
|||
<pre>[ ] [ ] --> 1 |
|||
[ ] [ 1 2 3 4 5 ] --> 0 |
|||
[ ] [ 1 3 5 7 9 ] --> 0 |
|||
[ ] [ 2 4 6 8 10 ] --> 0 |
|||
[ ] [ 2 3 5 7 ] --> 0 |
|||
[ ] [ 8 ] --> 0 |
|||
[ 1 2 3 4 5 ] [ 1 2 3 4 5 ] --> 1 |
|||
[ 1 2 3 4 5 ] [ 1 3 5 7 9 ] --> 3/7 |
|||
[ 1 2 3 4 5 ] [ 2 4 6 8 10 ] --> 1/4 |
|||
[ 1 2 3 4 5 ] [ 2 3 5 7 ] --> 1/2 |
|||
[ 1 2 3 4 5 ] [ 8 ] --> 0 |
|||
[ 1 3 5 7 9 ] [ 1 3 5 7 9 ] --> 1 |
|||
[ 1 3 5 7 9 ] [ 2 4 6 8 10 ] --> 0 |
|||
[ 1 3 5 7 9 ] [ 2 3 5 7 ] --> 1/2 |
|||
[ 1 3 5 7 9 ] [ 8 ] --> 0 |
|||
[ 2 4 6 8 10 ] [ 2 4 6 8 10 ] --> 1 |
|||
[ 2 4 6 8 10 ] [ 2 3 5 7 ] --> 1/8 |
|||
[ 2 4 6 8 10 ] [ 8 ] --> 1/5 |
|||
[ 2 3 5 7 ] [ 2 3 5 7 ] --> 1 |
|||
[ 2 3 5 7 ] [ 8 ] --> 0 |
|||
[ 8 ] [ 8 ] --> 1 |
|||
</pre> |
|||
=={{header|Raku}}== |
|||
<syntaxhighlight lang="raku" line>sub J(\A, \B) { A ∪ B ?? (A ∩ B) / (A ∪ B) !! A ∪ B == A ∩ B ?? 1 !! 0 } |
|||
my %p = |
|||
A => < >, |
|||
B => <1 2 3 4 5>, |
|||
C => <1 3 5 7 9>, |
|||
D => <2 4 6 8 10>, |
|||
E => <2 3 5 7>, |
|||
F => <8>, |
|||
; |
|||
.say for %p.sort; |
|||
say ''; |
|||
say "J({.join: ','}) = ", J |%p{$_} for [X] <A B C D E F> xx 2;</syntaxhighlight> |
|||
{{out}} |
|||
<pre>A => () |
|||
B => (1 2 3 4 5) |
|||
C => (1 3 5 7 9) |
|||
D => (2 4 6 8 10) |
|||
E => (2 3 5 7) |
|||
F => 8 |
|||
J(A,A) = 1 |
|||
J(A,B) = 0 |
|||
J(A,C) = 0 |
|||
J(A,D) = 0 |
|||
J(A,E) = 0 |
|||
J(A,F) = 0 |
|||
J(B,A) = 0 |
|||
J(B,B) = 1 |
|||
J(B,C) = 0.428571 |
|||
J(B,D) = 0.25 |
|||
J(B,E) = 0.5 |
|||
J(B,F) = 0 |
|||
J(C,A) = 0 |
|||
J(C,B) = 0.428571 |
|||
J(C,C) = 1 |
|||
J(C,D) = 0 |
|||
J(C,E) = 0.5 |
|||
J(C,F) = 0 |
|||
J(D,A) = 0 |
|||
J(D,B) = 0.25 |
|||
J(D,C) = 0 |
|||
J(D,D) = 1 |
|||
J(D,E) = 0.125 |
|||
J(D,F) = 0.2 |
|||
J(E,A) = 0 |
|||
J(E,B) = 0.5 |
|||
J(E,C) = 0.5 |
|||
J(E,D) = 0.125 |
|||
J(E,E) = 1 |
|||
J(E,F) = 0 |
|||
J(F,A) = 0 |
|||
J(F,B) = 0 |
|||
J(F,C) = 0 |
|||
J(F,D) = 0.2 |
|||
J(F,E) = 0 |
|||
J(F,F) = 1</pre> |
|||
=={{header|RPL}}== |
|||
{{works with|Halcyon Calc|4.2.7}} |
|||
{| class="wikitable" |
|||
! RPL code |
|||
! Comment |
|||
|- |
|||
| |
|||
≪ → a b |
|||
≪ a 1 b SIZE FOR j |
|||
b j GET IF a OVER POS THEN DROP ELSE + END |
|||
NEXT |
|||
≫ ≫ ''''UNION'''' STO |
|||
≪ → a b |
|||
≪ { } 1 a SIZE FOR j |
|||
a j GET IF b OVER POS THEN + ELSE DROP END |
|||
NEXT |
|||
≫ ≫ ''''INTER'''' STO |
|||
≪ → a b |
|||
≪ a b '''INTER''' SIZE a b '''UNION''' SIZE / |
|||
≫ ≫ ''''JACAR'''' STO |
|||
| |
|||
'''UNION''' ''( {A} {B} -- {A ∪ B} )'' |
|||
Scan {B}... |
|||
... and add to {A} all {B} items not already in {A} |
|||
'''INTER''' ''( {A} {B} -- {A ∩ B} )'' |
|||
Scan {A}... |
|||
... and keep {A} items also in {B} |
|||
'''JACAR''' ''( {A} {B} -- Jaccard_index )'' |
|||
|} |
|||
{{in}} |
|||
<pre> |
|||
{ 1 2 3 4 5 } { 1 3 5 7 9 } JACAR |
|||
{ 1 3 5 7 9 } { 1 2 3 4 5 } JACAR |
|||
</pre> |
|||
{{out}} |
|||
<pre> |
|||
2: 0.428571428571 |
|||
1: 0.428571428571 |
|||
</pre> |
</pre> |
||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
{{libheader|Wren-set}} |
{{libheader|Wren-set}} |
||
{{libheader|Wren- |
{{libheader|Wren-iterate}} |
||
{{libheader|Wren-fmt}} |
{{libheader|Wren-fmt}} |
||
Note that the Set object in the above module is implemented as a Map and consequently the iteration order (and the order in which elements are printed) is undefined. |
Note that the Set object in the above module is implemented as a Map and consequently the iteration order (and the order in which elements are printed) is undefined. |
||
< |
<syntaxhighlight lang="wren">import "./set" for Set |
||
import "./ |
import "./iterate" for Indexed |
||
import "./fmt" for Fmt |
import "./fmt" for Fmt |
||
var |
var jaccardIndex = Fn.new { |a, b| |
||
if (a.count == 0 && b.count == 0) return 1 |
if (a.count == 0 && b.count == 0) return 1 |
||
return a.intersect(b).count / a.union(b).count |
return a.intersect(b).count / a.union(b).count |
||
Line 74: | Line 926: | ||
var e = Set.new([2, 3, 5, 7]) |
var e = Set.new([2, 3, 5, 7]) |
||
var f = Set.new([8]) |
var f = Set.new([8]) |
||
var |
var isets = Indexed.new([a, b, c, d, e, f]) |
||
for (se in isets) { |
|||
var i = String.fromByte(se.index + 65) |
|||
for (se in Indexed.new(sets)) { |
|||
var |
var v = se.value |
||
v = v.toList.sort() // force original sorted order |
|||
var s = se.value |
|||
Fmt.print("$s = $n", i, v) |
|||
s = s.toList.sort() // force original sorted order |
|||
Fmt.print("$s = $n", String.fromByte(65 + i), s) |
|||
} |
} |
||
var pairs = [ |
|||
[a, a], [a, b], [a, c], [a, d], [a, e], [a, f], [b, b], [b, c], [b, d], [b, e], [b, f], |
|||
[c, c], [c, d], [c, e], [c, f], [d, d], [d, e], [d, f], [e, e], [e, f], [f, f] |
|||
] |
|||
var names = [ |
|||
"AA", "AB", "AC", "AD", "AE", "AF", "BB", "BC", "BD", "BE", "BF", |
|||
"CC", "CD", "CE", "CF", "DD", "DE", "DF", "EE", "EF", "FF" |
|||
] |
|||
System.print() |
System.print() |
||
for ( |
for (se1 in isets) { |
||
var |
var i1 = String.fromByte(se1.index + 65) |
||
var |
var v1 = se1.value |
||
for (se2 in isets) { |
|||
Fmt.print("J($s, $s) = $h", n[0], n[1], jacardIndex.call(ss[0], ss[1])) |
|||
var i2 = String.fromByte(se2.index + 65) |
|||
}</lang> |
|||
var v2 = se2.value |
|||
Fmt.print("J($s, $s) = $h", i1, i2, jaccardIndex.call(v1, v2)) |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
Line 115: | Line 959: | ||
J(A, E) = 0 |
J(A, E) = 0 |
||
J(A, F) = 0 |
J(A, F) = 0 |
||
J(B, A) = 0 |
|||
J(B, B) = 1 |
J(B, B) = 1 |
||
J(B, C) = 0.428571 |
J(B, C) = 0.428571 |
||
Line 120: | Line 965: | ||
J(B, E) = 0.5 |
J(B, E) = 0.5 |
||
J(B, F) = 0 |
J(B, F) = 0 |
||
J(C, A) = 0 |
|||
J(C, B) = 0.428571 |
|||
J(C, C) = 1 |
J(C, C) = 1 |
||
J(C, D) = 0 |
J(C, D) = 0 |
||
J(C, E) = 0.5 |
J(C, E) = 0.5 |
||
J(C, F) = 0 |
J(C, F) = 0 |
||
J(D, A) = 0 |
|||
J(D, B) = 0.25 |
|||
J(D, C) = 0 |
|||
J(D, D) = 1 |
J(D, D) = 1 |
||
J(D, E) = 0.125 |
J(D, E) = 0.125 |
||
J(D, F) = 0.2 |
J(D, F) = 0.2 |
||
J(E, A) = 0 |
|||
J(E, B) = 0.5 |
|||
J(E, C) = 0.5 |
|||
J(E, D) = 0.125 |
|||
J(E, E) = 1 |
J(E, E) = 1 |
||
J(E, F) = 0 |
J(E, F) = 0 |
||
J(F, A) = 0 |
|||
J(F, B) = 0 |
|||
J(F, C) = 0 |
|||
J(F, D) = 0.2 |
|||
J(F, E) = 0 |
|||
J(F, F) = 1 |
J(F, F) = 1 |
||
</pre> |
</pre> |
Latest revision as of 18:34, 11 December 2023
This page uses content from Wikipedia. The original article was at Jaccard index. The list of authors can be seen in the page history. As with Rosetta Code, the text of Wikipedia is available under the GNU FDL. (See links for details on variance) |
The Jaccard index, also known as the Jaccard similarity coefficient, is a statistic used for gauging the similarity and diversity of sample sets. It was developed by Paul Jaccard, originally giving the French name coefficient de communauté, and independently formulated again by T. Tanimoto. Thus, the Tanimoto index or Tanimoto coefficient are also used in some fields. However, they are identical in generally taking the ratio of Intersection over Union. The Jaccard coefficient measures similarity between finite sample sets, and is defined as the size of the intersection divided by the size of the union of the sample sets:
- J(A, B) = |A ∩ B|/|A ∪ B|
Define sets as follows, using any linear data structure:
A = {} B = {1, 2, 3, 4, 5} C = {1, 3, 5, 7, 9} D = {2, 4, 6, 8, 10} E = {2, 3, 5, 7} F = {8}
Write a program that computes the Jaccard index for every ordered pairing (to show that J(A, B) and J(B, A) are the same) of these sets, including self-pairings.
APL
task←{
jaccard ← (≢∩)÷(≢∪)
A ← ⍬
B ← 1 2 3 4 5
C ← 1 3 5 7 9
D ← 2 4 6 8 10
E ← 2 3 5 7
F ← ,8
'.ABCDEF' ⍪ 'ABCDEF' , ∘.jaccard⍨ A B C D E F
}
- Output:
. A B C D E F A 1 0 0 0 0 0 B 0 1 0.4285714286 0.25 0.5 0 C 0 0.4285714286 1 0 0.5 0 D 0 0.25 0 1 0.125 0.2 E 0 0.5 0.5 0.125 1 0 F 0 0 0 0.2 0 1
Arturo
jaccard: function [a b][
if and? empty? a empty? b -> return to :rational 1
x: size intersection a b
y: size union a b
fdiv to :rational x to :rational y
]
sets: [
[]
[1 2 3 4 5]
[1 3 5 7 9]
[2 4 6 8 10]
[2 3 5 7]
[8]
]
loop combine.repeated.by: 2 sets 'p ->
print [pad ~"|p\0|" 12 pad ~"|p\1|" 12 "->" jaccard p\0 p\1]
- Output:
[] [] -> 1/1 [] [1 2 3 4 5] -> 0/1 [] [1 3 5 7 9] -> 0/1 [] [2 4 6 8 10] -> 0/1 [] [2 3 5 7] -> 0/1 [] [8] -> 0/1 [1 2 3 4 5] [1 2 3 4 5] -> 1/1 [1 2 3 4 5] [1 3 5 7 9] -> 3/7 [1 2 3 4 5] [2 4 6 8 10] -> 1/4 [1 2 3 4 5] [2 3 5 7] -> 1/2 [1 2 3 4 5] [8] -> 0/1 [1 3 5 7 9] [1 3 5 7 9] -> 1/1 [1 3 5 7 9] [2 4 6 8 10] -> 0/1 [1 3 5 7 9] [2 3 5 7] -> 1/2 [1 3 5 7 9] [8] -> 0/1 [2 4 6 8 10] [2 4 6 8 10] -> 1/1 [2 4 6 8 10] [2 3 5 7] -> 1/8 [2 4 6 8 10] [8] -> 1/5 [2 3 5 7] [2 3 5 7] -> 1/1 [2 3 5 7] [8] -> 0/1 [8] [8] -> 1/1
BQN
Jaccard ← ≡◶⟨∊ ÷○(+´) ∊∘∾, 1⟩
a ← ⟨⟩
b ← ⟨1,2,3,4,5⟩
c ← ⟨1,3,5,7,9⟩
d ← ⟨2,4,6,8,10⟩
e ← ⟨2,3,5,7⟩
f ← ⟨8⟩
Jaccard⌜˜ ⟨a,b,c,d,e,f⟩
- Output:
┌─ ╵ 1 0 0 0 0 0 0 1 0.42857142857142855 0.25 0.5 0 0 0.42857142857142855 1 0 0.5 0 0 0.25 0 1 0.125 0.2 0 0.5 0.5 0.125 1 0 0 0 0 0.2 0 1 ┘
Emacs Lisp
(let* ((v1 '(A ()
B (1 2 3 4 5)
C (1 3 5 7 9)
D (2 4 6 8 10)
E (2 3 5 7)
F (8)))
(keys1 (seq-filter (lambda (x) (not (null x)))
(cl-loop for s1 being the elements of v1
using (index idx)
collect (if (= (% idx 2) 0) s1 nil)))))
(switch-to-buffer-other-window "*similarity result*")
(erase-buffer)
(defun similarity (p1 p2)
(if (and (null p1) (null p2)) 1
(/ (float (seq-length (seq-intersection p1 p2)))
(float (seq-length (seq-uniq (seq-union p1 p2))))) ) )
(insert (format " %s\n"
(cl-loop for s1 being the elements of keys1 concat
(format " %s" s1))))
(cl-loop for s1 in keys1 do
(insert (format "%s %s\n" s1
(cl-loop for s2 in keys1 concat
(format " %3.3f" (similarity (plist-get v1 s1) (plist-get v1 s2) ))))))
)
- Output:
A B C D E F A 1.000 0.000 0.000 0.000 0.000 0.000 B 0.000 1.000 0.429 0.250 0.500 0.000 C 0.000 0.429 1.000 0.000 0.500 0.000 D 0.000 0.250 0.000 1.000 0.125 0.200 E 0.000 0.500 0.500 0.125 1.000 0.000 F 0.000 0.000 0.000 0.200 0.000 1.000
Factor
USING: assocs formatting grouping kernel math math.combinatorics
prettyprint sequences sequences.repeating sets ;
: jaccard ( seq1 seq2 -- x )
2dup [ empty? ] both? [ 2drop 1 ]
[ [ intersect ] [ union ] 2bi [ length ] bi@ / ] if ;
{ { } { 1 2 3 4 5 } { 1 3 5 7 9 } { 2 4 6 8 10 } { 2 3 5 7 } { 8 } }
[ 2 <combinations> ] [ 2 repeat 2 group append ] bi
[ 2dup jaccard "%u %u -> %u\n" printf ] assoc-each
- Output:
{ } { 1 2 3 4 5 } -> 0 { } { 1 3 5 7 9 } -> 0 { } { 2 4 6 8 10 } -> 0 { } { 2 3 5 7 } -> 0 { } { 8 } -> 0 { 1 2 3 4 5 } { 1 3 5 7 9 } -> 3/7 { 1 2 3 4 5 } { 2 4 6 8 10 } -> 1/4 { 1 2 3 4 5 } { 2 3 5 7 } -> 1/2 { 1 2 3 4 5 } { 8 } -> 0 { 1 3 5 7 9 } { 2 4 6 8 10 } -> 0 { 1 3 5 7 9 } { 2 3 5 7 } -> 1/2 { 1 3 5 7 9 } { 8 } -> 0 { 2 4 6 8 10 } { 2 3 5 7 } -> 1/8 { 2 4 6 8 10 } { 8 } -> 1/5 { 2 3 5 7 } { 8 } -> 0 { } { } -> 1 { 1 2 3 4 5 } { 1 2 3 4 5 } -> 1 { 1 3 5 7 9 } { 1 3 5 7 9 } -> 1 { 2 4 6 8 10 } { 2 4 6 8 10 } -> 1 { 2 3 5 7 } { 2 3 5 7 } -> 1 { 8 } { 8 } -> 1
Haskell
import Control.Applicative (liftA2)
import Data.List (genericLength, intersect, nub, union)
import Data.List.Split (chunksOf)
import Data.Ratio (denominator, numerator)
import Text.Tabular (Header(..), Properties(..), Table(..))
import Text.Tabular.AsciiArt (render)
-- The Jaccard index of two sets. If both sets are empty we define the index to
-- be 1.
jaccard :: (Eq a, Fractional b) => [a] -> [a] -> b
jaccard [] [] = 1
jaccard xs ys = let uxs = nub xs -- unique xs
isz = genericLength $ intersect uxs ys
usz = genericLength $ union uxs ys
in isz / usz
-- A table of Jaccard indexes for all pairs of sets given in the argument.
-- Associated with each set is its "name", which is only used for display
-- purposes.
jaccardTable :: Eq a => [(String, [a])] -> String
jaccardTable xs = render id id showRat
$ Table (Group SingleLine $ map Header names)
(Group SingleLine $ map Header names)
$ chunksOf (length xs)
$ map (uncurry jaccard)
$ allPairs sets
where names = map fst xs
sets = map snd xs
-- Show a rational number as numerator/denominator. If the denominator is 1
-- then just show the numerator.
showRat :: Rational -> String
showRat r = case (numerator r, denominator r) of
(n, 1) -> show n
(n, d) -> show n ++ "/" ++ show d
-- All pairs of elements from the list. For example:
--
-- allPairs [1,2] == [(1,1),(1,2),(2,1),(2,2)]
allPairs :: [a] -> [(a,a)]
allPairs xs = liftA2 (,) xs xs
main :: IO ()
main = putStrLn $ jaccardTable [ ("A", [] :: [Int])
, ("B", [1, 2, 3, 4, 5])
, ("C", [1, 3, 5, 7, 9])
, ("D", [2, 4, 6, 8, 10])
, ("E", [2, 3, 5, 7])
, ("F", [8])]
- Output:
+---++---+-----+-----+-----+-----+-----+ | || A | B | C | D | E | F | +===++===+=====+=====+=====+=====+=====+ | A || 1 | 0 | 0 | 0 | 0 | 0 | +---++---+-----+-----+-----+-----+-----+ | B || 0 | 1 | 3/7 | 1/4 | 1/2 | 0 | +---++---+-----+-----+-----+-----+-----+ | C || 0 | 3/7 | 1 | 0 | 1/2 | 0 | +---++---+-----+-----+-----+-----+-----+ | D || 0 | 1/4 | 0 | 1 | 1/8 | 1/5 | +---++---+-----+-----+-----+-----+-----+ | E || 0 | 1/2 | 1/2 | 1/8 | 1 | 0 | +---++---+-----+-----+-----+-----+-----+ | F || 0 | 0 | 0 | 1/5 | 0 | 1 | +---++---+-----+-----+-----+-----+-----+
J
jaccard=. +&# (] %&x: -) [ -&# -.
a=. $~ 0
b=. 1 2 3 4 5
c=. 1 3 5 7 9
d=. 2 4 6 8 10
e=. 2 3 5 7
f=. , 8
jaccard&.>/~ a ; b ; c ; d ; e ; f
- Output:
┌─┬───┬───┬───┬───┬───┐ │0│0 │0 │0 │0 │0 │ ├─┼───┼───┼───┼───┼───┤ │0│1 │3r7│1r4│1r2│0 │ ├─┼───┼───┼───┼───┼───┤ │0│3r7│1 │0 │1r2│0 │ ├─┼───┼───┼───┼───┼───┤ │0│1r4│0 │1 │1r8│1r5│ ├─┼───┼───┼───┼───┼───┤ │0│1r2│1r2│1r8│1 │0 │ ├─┼───┼───┼───┼───┼───┤ │0│0 │0 │1r5│0 │1 │ └─┴───┴───┴───┴───┴───┘
jq
Works with gojq, the Go implementation of jq In the following:
- the Jaccard index is presented as a string representing a reduced fraction, e.g. "0" or "1/7".
- sets are represented by sorted arrays with distinct elements.
Preliminaries
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
def gcd(a; b):
# subfunction expects [a,b] as input
# i.e. a ~ .[0] and b ~ .[1]
def rgcd: if .[1] == 0 then .[0]
else [.[1], .[0] % .[1]] | rgcd
end;
[a,b] | rgcd;
The Task
def rjaccardIndex(x; y):
def i(a;b): a - (a-b);
def u(a;b): a + (b - i(a;b)) | unique;
def idivide($i; $j):
if $i == 0 then "0"
else gcd($i;$j) as $d
| if $j == $d then "\($i/$d)"
else "\($i/$d)/\($j/$d)"
end
end;
if (x|length) == 0 and (y|length) == "0" then "1"
else idivide( i(x;y)|length; u(x;y)|length )
end;
def a : [];
def b : [1, 2, 3, 4, 5];
def c : [1, 3, 5, 7, 9];
def d : [2, 4, 6, 8, 10];
def e : [2, 3, 5, 7];
def f : [8];
def task:
def tidy: map(lpad(4))|join(" ");
[a,b,c,d,e,f] as $sets
| [range(0;$sets|length) | [. + 97] | implode] as $names
| ([""] + $names | tidy),
(range(0; $sets|length) as $i
| ([$i + 97] | implode) as $name
| $sets[$i] as $x
| $sets | map(rjaccardIndex($x; .)) | tidy
| " \($name): \(.)" ) ;
task
- Output:
a b c d e f a: 0 0 0 0 0 0 b: 0 1 3/7 1/4 1/2 0 c: 0 3/7 1 0 1/2 0 d: 0 1/4 0 1 1/8 1/5 e: 0 1/2 1/2 1/8 1 0 f: 0 0 0 1/5 0 1
Julia
J(A, B) = begin i, u = length(A ∩ B), length(A ∪ B); u == 0 ? 1//1 : i // u end
A = Int[]
B = [1, 2, 3, 4, 5]
C = [1, 3, 5, 7, 9]
D = [2, 4, 6, 8, 10]
E = [2, 3, 5, 7]
F = [8]
testsets = [A, B, C, D, E, F]
println("Set A Set B J(A, B)\n", "-"^44)
for a in testsets, b in testsets
println(rpad(isempty(a) ? "[]" : a, 18), rpad(isempty(b) ? "[]" : b, 18),
replace(string(J(a, b)), "//" => "/"))
end
- Output:
Set A Set B J(A, B) -------------------------------------------- [] [] 1/1 [] [1, 2, 3, 4, 5] 0/1 [] [1, 3, 5, 7, 9] 0/1 [] [2, 4, 6, 8, 10] 0/1 [] [2, 3, 5, 7] 0/1 [] [8] 0/1 [1, 2, 3, 4, 5] [] 0/1 [1, 2, 3, 4, 5] [1, 2, 3, 4, 5] 1/1 [1, 2, 3, 4, 5] [1, 3, 5, 7, 9] 3/7 [1, 2, 3, 4, 5] [2, 4, 6, 8, 10] 1/4 [1, 2, 3, 4, 5] [2, 3, 5, 7] 1/2 [1, 2, 3, 4, 5] [8] 0/1 [1, 3, 5, 7, 9] [] 0/1 [1, 3, 5, 7, 9] [1, 2, 3, 4, 5] 3/7 [1, 3, 5, 7, 9] [1, 3, 5, 7, 9] 1/1 [1, 3, 5, 7, 9] [2, 4, 6, 8, 10] 0/1 [1, 3, 5, 7, 9] [2, 3, 5, 7] 1/2 [1, 3, 5, 7, 9] [8] 0/1 [2, 4, 6, 8, 10] [] 0/1 [2, 4, 6, 8, 10] [1, 2, 3, 4, 5] 1/4 [2, 4, 6, 8, 10] [1, 3, 5, 7, 9] 0/1 [2, 4, 6, 8, 10] [2, 4, 6, 8, 10] 1/1 [2, 4, 6, 8, 10] [2, 3, 5, 7] 1/8 [2, 4, 6, 8, 10] [8] 1/5 [2, 3, 5, 7] [] 0/1 [2, 3, 5, 7] [1, 2, 3, 4, 5] 1/2 [2, 3, 5, 7] [1, 3, 5, 7, 9] 1/2 [2, 3, 5, 7] [2, 4, 6, 8, 10] 1/8 [2, 3, 5, 7] [2, 3, 5, 7] 1/1 [2, 3, 5, 7] [8] 0/1 [8] [] 0/1 [8] [1, 2, 3, 4, 5] 0/1 [8] [1, 3, 5, 7, 9] 0/1 [8] [2, 4, 6, 8, 10] 1/5 [8] [2, 3, 5, 7] 0/1 [8] [8] 1/1
Nim
import std/[rationals, strformat]
type Set8 = set[int8]
const
A: Set8 = {}
B: Set8 = {1, 2, 3, 4, 5}
C: Set8 = {1, 3, 5, 7, 9}
D: Set8 = {2, 4, 6, 8, 10}
E: Set8 = {2, 3, 5, 7}
F: Set8 = {8}
List = [('A', A), ('B', B), ('C', C), ('D', D), ('E', E), ('F', F)]
func J(a, b: Set8): Rational[int] =
## Return the Jaccard index.
## Return 1 if both sets are empty.
let card1 = card(a * b)
let card2 = card(a + b)
result = if card1 == card2: 1 // 1 else: card1 // card2
for i in 0..List.high:
let (name1, set1) = List[i]
for j in i..List.high:
let (name2, set2) = List[j]
echo &"J({name1}, {name2}) = {J(set1, set2)}"
if i != j:
echo &"J({name2}, {name1}) = {J(set2, set1)}"
- Output:
J(A, A) = 1/1 J(A, B) = 0/1 J(B, A) = 0/1 J(A, C) = 0/1 J(C, A) = 0/1 J(A, D) = 0/1 J(D, A) = 0/1 J(A, E) = 0/1 J(E, A) = 0/1 J(A, F) = 0/1 J(F, A) = 0/1 J(B, B) = 1/1 J(B, C) = 3/7 J(C, B) = 3/7 J(B, D) = 1/4 J(D, B) = 1/4 J(B, E) = 1/2 J(E, B) = 1/2 J(B, F) = 0/1 J(F, B) = 0/1 J(C, C) = 1/1 J(C, D) = 0/1 J(D, C) = 0/1 J(C, E) = 1/2 J(E, C) = 1/2 J(C, F) = 0/1 J(F, C) = 0/1 J(D, D) = 1/1 J(D, E) = 1/8 J(E, D) = 1/8 J(D, F) = 1/5 J(F, D) = 1/5 J(E, E) = 1/1 J(E, F) = 0/1 J(F, E) = 0/1 J(F, F) = 1/1
Phix
with javascript_semantics include sets.e function jaccard(sequence a, b) integer i = length(intersection(a,b)), u = length(union(a,b)) return iff(u=0?1:i/u) end function constant tests = {{}, -- A {1, 2, 3, 4, 5}, -- B {1, 3, 5, 7, 9}, -- C {2, 4, 6, 8, 10}, -- D {2, 3, 5, 7}, -- E {8}} -- F for i=1 to length(tests) do for j=i to length(tests) do string s = sprintf("J(%c,%c)",{'A'+i-1,'A'+j-1}) atom jij = jacard(tests[i],tests[j]) if i!=j then atom jji = jacard(tests[j],tests[i]) assert(jji==jij) s &= sprintf(" = J(%c,%c)",{'A'+j-1,'A'+i-1}) end if printf(1,"%s = %g\n",{s,jij}) end for end for
- Output:
J(A,A) = 1 J(A,B) = J(B,A) = 0 J(A,C) = J(C,A) = 0 J(A,D) = J(D,A) = 0 J(A,E) = J(E,A) = 0 J(A,F) = J(F,A) = 0 J(B,B) = 1 J(B,C) = J(C,B) = 0.428571 J(B,D) = J(D,B) = 0.25 J(B,E) = J(E,B) = 0.5 J(B,F) = J(F,B) = 0 J(C,C) = 1 J(C,D) = J(D,C) = 0 J(C,E) = J(E,C) = 0.5 J(C,F) = J(F,C) = 0 J(D,D) = 1 J(D,E) = J(E,D) = 0.125 J(D,F) = J(F,D) = 0.2 J(E,E) = 1 J(E,F) = J(F,E) = 0 J(F,F) = 1
Perl
#!/usr/bin/perl
use strict;
use warnings;
my %sets = (
A => [],
B => [1, 2, 3, 4, 5],
C => [1, 3, 5, 7, 9],
D => [2, 4, 6, 8, 10],
E => [2, 3, 5, 7],
F => [8],
);
use Data::Dump 'dd'; dd \%sets;
for my $left (sort keys %sets )
{
for my $right (sort keys %sets )
{
my %union;
$union{ $_ }++ for @{ $sets{$left} }, @{ $sets{$right} };
print "J($left,$right) = ",
%union ? (grep $_ == 2, values %union) / (keys %union) : 1, "\n";
}
}
- Output:
{ A => [], B => [1 .. 5], C => [1, 3, 5, 7, 9], D => [2, 4, 6, 8, 10], E => [2, 3, 5, 7], F => [8], } J(A,A) = 1 J(A,B) = 0 J(A,C) = 0 J(A,D) = 0 J(A,E) = 0 J(A,F) = 0 J(B,A) = 0 J(B,B) = 1 J(B,C) = 0.428571428571429 J(B,D) = 0.25 J(B,E) = 0.5 J(B,F) = 0 J(C,A) = 0 J(C,B) = 0.428571428571429 J(C,C) = 1 J(C,D) = 0 J(C,E) = 0.5 J(C,F) = 0 J(D,A) = 0 J(D,B) = 0.25 J(D,C) = 0 J(D,D) = 1 J(D,E) = 0.125 J(D,F) = 0.2 J(E,A) = 0 J(E,B) = 0.5 J(E,C) = 0.5 J(E,D) = 0.125 J(E,E) = 1 J(E,F) = 0 J(F,A) = 0 J(F,B) = 0 J(F,C) = 0 J(F,D) = 0.2 J(F,E) = 0 J(F,F) = 1
Prolog
show([]).
show([X|Xs]):- write(X), show(Xs).
j(N,M,X):- M > 0 -> X is N/M; X is 1.
task:- L = [[], [1,2,3,4,5], [1,3,5,7,9], [2,4,6,8,10], [2,3,5,7], [8]],
forall((member(A,L), member(B,L)), (
findall(X, (member(X,A), member(X,B)), I), length(I,N),
findall(X, (member(X,B), not(member(X,A))), T), append(A,T,U), length(U,M),
j(N,M,J), show(["A = ",A,", B = ",B,", J = ",J]), nl)).
- Output:
?- task. A = [], B = [], J = 1 A = [], B = [1,2,3,4,5], J = 0 A = [], B = [1,3,5,7,9], J = 0 A = [], B = [2,4,6,8,10], J = 0 A = [], B = [2,3,5,7], J = 0 A = [], B = [8], J = 0 A = [1,2,3,4,5], B = [], J = 0 A = [1,2,3,4,5], B = [1,2,3,4,5], J = 1 A = [1,2,3,4,5], B = [1,3,5,7,9], J = 0.42857142857142855 A = [1,2,3,4,5], B = [2,4,6,8,10], J = 0.25 A = [1,2,3,4,5], B = [2,3,5,7], J = 0.5 A = [1,2,3,4,5], B = [8], J = 0 A = [1,3,5,7,9], B = [], J = 0 A = [1,3,5,7,9], B = [1,2,3,4,5], J = 0.42857142857142855 A = [1,3,5,7,9], B = [1,3,5,7,9], J = 1 A = [1,3,5,7,9], B = [2,4,6,8,10], J = 0 A = [1,3,5,7,9], B = [2,3,5,7], J = 0.5 A = [1,3,5,7,9], B = [8], J = 0 A = [2,4,6,8,10], B = [], J = 0 A = [2,4,6,8,10], B = [1,2,3,4,5], J = 0.25 A = [2,4,6,8,10], B = [1,3,5,7,9], J = 0 A = [2,4,6,8,10], B = [2,4,6,8,10], J = 1 A = [2,4,6,8,10], B = [2,3,5,7], J = 0.125 A = [2,4,6,8,10], B = [8], J = 0.2 A = [2,3,5,7], B = [], J = 0 A = [2,3,5,7], B = [1,2,3,4,5], J = 0.5 A = [2,3,5,7], B = [1,3,5,7,9], J = 0.5 A = [2,3,5,7], B = [2,4,6,8,10], J = 0.125 A = [2,3,5,7], B = [2,3,5,7], J = 1 A = [2,3,5,7], B = [8], J = 0 A = [8], B = [], J = 0 A = [8], B = [1,2,3,4,5], J = 0 A = [8], B = [1,3,5,7,9], J = 0 A = [8], B = [2,4,6,8,10], J = 0.2 A = [8], B = [2,3,5,7], J = 0 A = [8], B = [8], J = 1 true.
Python
# jaccard_index.py by Xing216
from itertools import product
A = set()
B = {1, 2, 3, 4, 5}
C = {1, 3, 5, 7, 9}
D = {2, 4, 6, 8, 10}
E = {2, 3, 5, 7}
F = {8}
sets = list(product([A, B, C, D, E, F], repeat=2))
set_names = list(product(["A", "B", "C", "D", "E", "F"], repeat=2))
def jaccard_index(set1, set2):
try:
return len(set1 & set2)/len(set1 | set2)
except ZeroDivisionError:
return 0.0
for i,j in sets:
jacc_idx = jaccard_index(i,j)
sets_idx = sets.index((i,j))
print(f"J({', '.join(set_names[sets_idx])}) -> {jacc_idx}")
- Output:
J(A, A) -> 0.0 J(A, B) -> 0.0 J(A, C) -> 0.0 J(A, D) -> 0.0 J(A, E) -> 0.0 J(A, F) -> 0.0 J(B, A) -> 0.0 J(B, B) -> 1.0 J(B, C) -> 0.42857142857142855 J(B, D) -> 0.25 J(B, E) -> 0.5 J(B, F) -> 0.0 J(C, A) -> 0.0 J(C, B) -> 0.42857142857142855 J(C, C) -> 1.0 J(C, D) -> 0.0 J(C, E) -> 0.5 J(C, F) -> 0.0 J(D, A) -> 0.0 J(D, B) -> 0.25 J(D, C) -> 0.0 J(D, D) -> 1.0 J(D, E) -> 0.125 J(D, F) -> 0.2 J(E, A) -> 0.0 J(E, B) -> 0.5 J(E, C) -> 0.5 J(E, D) -> 0.125 J(E, E) -> 1.0 J(E, F) -> 0.0 J(F, A) -> 0.0 J(F, B) -> 0.0 J(F, C) -> 0.0 J(F, D) -> 0.2 J(F, E) -> 0.0 J(F, F) -> 1.0
Quackery
[ $ "bigrat.qky" loadfile ] now!
[ over size - space swap of
join echo$ ] is recho$ ( $ n --> $ )
[ dip unbuild recho$ ] is recho ( x n --> $ )
[ 0 swap witheach [ bit | ] ] is set ( [ --> n )
[ & ] is intersection ( n --> n )
[ | ] is union ( n --> n )
[ [] 0 rot
[ dup 0 > while
dup 1 & if
[ dip [ tuck join swap ] ]
dip 1+
1 >> again ]
2drop ] is items ( n --> [ )
[ 2dup = iff [ 2drop 1 1 ] done
2dup union items size
dip [ intersection items size ]
dup 0 = if [ 2drop 0 1 ]
] is jaccard ( n n --> n/d )
[ ' [ ] set ] constant is A ( --> n )
[ ' [ 1 2 3 4 5 ] set ] constant is B ( --> n )
[ ' [ 1 3 5 7 9 ] set ] constant is C ( --> n )
[ ' [ 2 4 6 8 10 ] set ] constant is D ( --> n )
[ ' [ 2 3 5 7 ] set ] constant is E ( --> n )
[ ' [ 8 ] set ] constant is F ( --> n )
' [ A B C D E F ]
dup witheach
[ over witheach
[ over items 15 recho
dup items 15 recho
say "--> "
2dup jaccard
proper$ echo$
cr drop ]
drop
behead drop ]
drop
- Output:
[ ] [ ] --> 1 [ ] [ 1 2 3 4 5 ] --> 0 [ ] [ 1 3 5 7 9 ] --> 0 [ ] [ 2 4 6 8 10 ] --> 0 [ ] [ 2 3 5 7 ] --> 0 [ ] [ 8 ] --> 0 [ 1 2 3 4 5 ] [ 1 2 3 4 5 ] --> 1 [ 1 2 3 4 5 ] [ 1 3 5 7 9 ] --> 3/7 [ 1 2 3 4 5 ] [ 2 4 6 8 10 ] --> 1/4 [ 1 2 3 4 5 ] [ 2 3 5 7 ] --> 1/2 [ 1 2 3 4 5 ] [ 8 ] --> 0 [ 1 3 5 7 9 ] [ 1 3 5 7 9 ] --> 1 [ 1 3 5 7 9 ] [ 2 4 6 8 10 ] --> 0 [ 1 3 5 7 9 ] [ 2 3 5 7 ] --> 1/2 [ 1 3 5 7 9 ] [ 8 ] --> 0 [ 2 4 6 8 10 ] [ 2 4 6 8 10 ] --> 1 [ 2 4 6 8 10 ] [ 2 3 5 7 ] --> 1/8 [ 2 4 6 8 10 ] [ 8 ] --> 1/5 [ 2 3 5 7 ] [ 2 3 5 7 ] --> 1 [ 2 3 5 7 ] [ 8 ] --> 0 [ 8 ] [ 8 ] --> 1
Raku
sub J(\A, \B) { A ∪ B ?? (A ∩ B) / (A ∪ B) !! A ∪ B == A ∩ B ?? 1 !! 0 }
my %p =
A => < >,
B => <1 2 3 4 5>,
C => <1 3 5 7 9>,
D => <2 4 6 8 10>,
E => <2 3 5 7>,
F => <8>,
;
.say for %p.sort;
say '';
say "J({.join: ','}) = ", J |%p{$_} for [X] <A B C D E F> xx 2;
- Output:
A => () B => (1 2 3 4 5) C => (1 3 5 7 9) D => (2 4 6 8 10) E => (2 3 5 7) F => 8 J(A,A) = 1 J(A,B) = 0 J(A,C) = 0 J(A,D) = 0 J(A,E) = 0 J(A,F) = 0 J(B,A) = 0 J(B,B) = 1 J(B,C) = 0.428571 J(B,D) = 0.25 J(B,E) = 0.5 J(B,F) = 0 J(C,A) = 0 J(C,B) = 0.428571 J(C,C) = 1 J(C,D) = 0 J(C,E) = 0.5 J(C,F) = 0 J(D,A) = 0 J(D,B) = 0.25 J(D,C) = 0 J(D,D) = 1 J(D,E) = 0.125 J(D,F) = 0.2 J(E,A) = 0 J(E,B) = 0.5 J(E,C) = 0.5 J(E,D) = 0.125 J(E,E) = 1 J(E,F) = 0 J(F,A) = 0 J(F,B) = 0 J(F,C) = 0 J(F,D) = 0.2 J(F,E) = 0 J(F,F) = 1
RPL
RPL code | Comment |
---|---|
≪ → a b ≪ a 1 b SIZE FOR j b j GET IF a OVER POS THEN DROP ELSE + END NEXT ≫ ≫ 'UNION' STO ≪ → a b ≪ { } 1 a SIZE FOR j a j GET IF b OVER POS THEN + ELSE DROP END NEXT ≫ ≫ 'INTER' STO ≪ → a b ≪ a b INTER SIZE a b UNION SIZE / ≫ ≫ 'JACAR' STO |
UNION ( {A} {B} -- {A ∪ B} ) Scan {B}... ... and add to {A} all {B} items not already in {A} INTER ( {A} {B} -- {A ∩ B} ) Scan {A}... ... and keep {A} items also in {B} JACAR ( {A} {B} -- Jaccard_index ) |
- Input:
{ 1 2 3 4 5 } { 1 3 5 7 9 } JACAR { 1 3 5 7 9 } { 1 2 3 4 5 } JACAR
- Output:
2: 0.428571428571 1: 0.428571428571
Wren
Note that the Set object in the above module is implemented as a Map and consequently the iteration order (and the order in which elements are printed) is undefined.
import "./set" for Set
import "./iterate" for Indexed
import "./fmt" for Fmt
var jaccardIndex = Fn.new { |a, b|
if (a.count == 0 && b.count == 0) return 1
return a.intersect(b).count / a.union(b).count
}
var a = Set.new([])
var b = Set.new([1, 2, 3, 4, 5])
var c = Set.new([1, 3, 5, 7, 9])
var d = Set.new([2, 4, 6, 8, 10])
var e = Set.new([2, 3, 5, 7])
var f = Set.new([8])
var isets = Indexed.new([a, b, c, d, e, f])
for (se in isets) {
var i = String.fromByte(se.index + 65)
var v = se.value
v = v.toList.sort() // force original sorted order
Fmt.print("$s = $n", i, v)
}
System.print()
for (se1 in isets) {
var i1 = String.fromByte(se1.index + 65)
var v1 = se1.value
for (se2 in isets) {
var i2 = String.fromByte(se2.index + 65)
var v2 = se2.value
Fmt.print("J($s, $s) = $h", i1, i2, jaccardIndex.call(v1, v2))
}
}
- Output:
A = [] B = [1, 2, 3, 4, 5] C = [1, 3, 5, 7, 9] D = [2, 4, 6, 8, 10] E = [2, 3, 5, 7] F = [8] J(A, A) = 1 J(A, B) = 0 J(A, C) = 0 J(A, D) = 0 J(A, E) = 0 J(A, F) = 0 J(B, A) = 0 J(B, B) = 1 J(B, C) = 0.428571 J(B, D) = 0.25 J(B, E) = 0.5 J(B, F) = 0 J(C, A) = 0 J(C, B) = 0.428571 J(C, C) = 1 J(C, D) = 0 J(C, E) = 0.5 J(C, F) = 0 J(D, A) = 0 J(D, B) = 0.25 J(D, C) = 0 J(D, D) = 1 J(D, E) = 0.125 J(D, F) = 0.2 J(E, A) = 0 J(E, B) = 0.5 J(E, C) = 0.5 J(E, D) = 0.125 J(E, E) = 1 J(E, F) = 0 J(F, A) = 0 J(F, B) = 0 J(F, C) = 0 J(F, D) = 0.2 J(F, E) = 0 J(F, F) = 1