Jaccard index: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
(Initial Haskell version.)
m (→‎{{header|Wren}}: Changed to Wren S/H)
 
(9 intermediate revisions by 9 users not shown)
Line 18:
 
=={{header|APL}}==
<langsyntaxhighlight lang="apl">task←{
jaccard ← (≢∩)÷(≢∪)
 
Line 29:
 
'.ABCDEF' ⍪ 'ABCDEF' , ∘.jaccard⍨ A B C D E F
}</langsyntaxhighlight>
{{out}}
<pre>. A B C D E F
Line 38:
E 0 0.5 0.5 0.125 1 0
F 0 0 0 0.2 0 1 </pre>
 
=={{header|Arturo}}==
 
<syntaxhighlight lang="arturo">jaccard: function [a b][
if and? empty? a empty? b -> return to :rational 1
x: size intersection a b
y: size union a b
fdiv to :rational x to :rational y
]
 
sets: [
[]
[1 2 3 4 5]
[1 3 5 7 9]
[2 4 6 8 10]
[2 3 5 7]
[8]
]
 
loop combine.repeated.by: 2 sets 'p ->
print [pad ~"|p\0|" 12 pad ~"|p\1|" 12 "->" jaccard p\0 p\1]</syntaxhighlight>
 
{{out}}
 
<pre> [] [] -> 1/1
[] [1 2 3 4 5] -> 0/1
[] [1 3 5 7 9] -> 0/1
[] [2 4 6 8 10] -> 0/1
[] [2 3 5 7] -> 0/1
[] [8] -> 0/1
[1 2 3 4 5] [1 2 3 4 5] -> 1/1
[1 2 3 4 5] [1 3 5 7 9] -> 3/7
[1 2 3 4 5] [2 4 6 8 10] -> 1/4
[1 2 3 4 5] [2 3 5 7] -> 1/2
[1 2 3 4 5] [8] -> 0/1
[1 3 5 7 9] [1 3 5 7 9] -> 1/1
[1 3 5 7 9] [2 4 6 8 10] -> 0/1
[1 3 5 7 9] [2 3 5 7] -> 1/2
[1 3 5 7 9] [8] -> 0/1
[2 4 6 8 10] [2 4 6 8 10] -> 1/1
[2 4 6 8 10] [2 3 5 7] -> 1/8
[2 4 6 8 10] [8] -> 1/5
[2 3 5 7] [2 3 5 7] -> 1/1
[2 3 5 7] [8] -> 0/1
[8] [8] -> 1/1</pre>
 
=={{header|BQN}}==
<langsyntaxhighlight lang="bqn">Jaccard ← ≡◶⟨∊ ÷○(+´) ∊∘∾, 1⟩
 
a ← ⟨⟩
Line 49 ⟶ 94:
f ← ⟨8⟩
 
Jaccard⌜˜ ⟨a,b,c,d,e,f⟩</langsyntaxhighlight>
{{out}}
<pre>┌─
Line 59 ⟶ 104:
0 0 0 0.2 0 1
┘</pre>
 
=={{header|Emacs Lisp}}==
<syntaxhighlight lang="lisp">
(let* ((v1 '(A ()
B (1 2 3 4 5)
C (1 3 5 7 9)
D (2 4 6 8 10)
E (2 3 5 7)
F (8)))
(keys1 (seq-filter (lambda (x) (not (null x)))
(cl-loop for s1 being the elements of v1
using (index idx)
collect (if (= (% idx 2) 0) s1 nil)))))
 
(switch-to-buffer-other-window "*similarity result*")
(erase-buffer)
(defun similarity (p1 p2)
(if (and (null p1) (null p2)) 1
(/ (float (seq-length (seq-intersection p1 p2)))
(float (seq-length (seq-uniq (seq-union p1 p2))))) ) )
 
(insert (format " %s\n"
(cl-loop for s1 being the elements of keys1 concat
(format " %s" s1))))
(cl-loop for s1 in keys1 do
(insert (format "%s %s\n" s1
(cl-loop for s2 in keys1 concat
(format " %3.3f" (similarity (plist-get v1 s1) (plist-get v1 s2) ))))))
)
</syntaxhighlight>
{{out}}
<pre>
A B C D E F
A 1.000 0.000 0.000 0.000 0.000 0.000
B 0.000 1.000 0.429 0.250 0.500 0.000
C 0.000 0.429 1.000 0.000 0.500 0.000
D 0.000 0.250 0.000 1.000 0.125 0.200
E 0.000 0.500 0.500 0.125 1.000 0.000
F 0.000 0.000 0.000 0.200 0.000 1.000
</pre>
 
=={{header|Factor}}==
{{works with|Factor|0.99 2021-06-02}}
<langsyntaxhighlight lang="factor">USING: assocs formatting grouping kernel math math.combinatorics
prettyprint sequences sequences.repeating sets ;
 
Line 71 ⟶ 158:
{ { } { 1 2 3 4 5 } { 1 3 5 7 9 } { 2 4 6 8 10 } { 2 3 5 7 } { 8 } }
[ 2 <combinations> ] [ 2 repeat 2 group append ] bi
[ 2dup jaccard "%u %u -> %u\n" printf ] assoc-each</langsyntaxhighlight>
{{out}}
<pre>
Line 98 ⟶ 185:
 
=={{header|Haskell}}==
<langsyntaxhighlight lang="haskell">import Control.Applicative (liftA2)
import Data.List (genericLength, intersect, nub, union)
import Data.List.Split (chunksOf)
Line 146 ⟶ 233:
, ("D", [2, 4, 6, 8, 10])
, ("E", [2, 3, 5, 7])
, ("F", [8])]</langsyntaxhighlight>
{{out}}
<pre>
Line 165 ⟶ 252:
+---++---+-----+-----+-----+-----+-----+
</pre>
 
=={{header|J}}==
<syntaxhighlight lang="j">jaccard=. +&# (] %&x: -) [ -&# -.
 
a=. $~ 0
b=. 1 2 3 4 5
c=. 1 3 5 7 9
d=. 2 4 6 8 10
e=. 2 3 5 7
f=. , 8
 
jaccard&.>/~ a ; b ; c ; d ; e ; f</syntaxhighlight>
{{out}}
<pre>┌─┬───┬───┬───┬───┬───┐
│0│0 │0 │0 │0 │0 │
├─┼───┼───┼───┼───┼───┤
│0│1 │3r7│1r4│1r2│0 │
├─┼───┼───┼───┼───┼───┤
│0│3r7│1 │0 │1r2│0 │
├─┼───┼───┼───┼───┼───┤
│0│1r4│0 │1 │1r8│1r5│
├─┼───┼───┼───┼───┼───┤
│0│1r2│1r2│1r8│1 │0 │
├─┼───┼───┼───┼───┼───┤
│0│0 │0 │1r5│0 │1 │
└─┴───┴───┴───┴───┴───┘</pre>
 
=={{header|jq}}==
Line 175 ⟶ 288:
<br>
'''Preliminaries'''
<langsyntaxhighlight lang="jq">def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
 
def gcd(a; b):
Line 183 ⟶ 296:
else [.[1], .[0] % .[1]] | rgcd
end;
[a,b] | rgcd;</langsyntaxhighlight>
<br>
'''The Task'''
<langsyntaxhighlight lang="jq">def rjaccardIndex(x; y):
def i(a;b): a - (a-b);
def u(a;b): a + (b - i(a;b)) | unique;
Line 221 ⟶ 334:
| " \($name): \(.)" ) ;
 
task</langsyntaxhighlight>
{{out}}
<pre>
Line 234 ⟶ 347:
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">J(A, B) = begin i, u = length(A ∩ B), length(A ∪ B); u == 0 ? 1//1 : i // u end
 
A = Int[]
Line 249 ⟶ 362:
replace(string(J(a, b)), "//" => "/"))
end
</langsyntaxhighlight>{{out}}
<pre>
Set A Set B J(A, B)
Line 289 ⟶ 402:
[8] [2, 3, 5, 7] 0/1
[8] [8] 1/1
</pre>
 
=={{header|Nim}}==
<syntaxhighlight lang="Nim">import std/[rationals, strformat]
 
type Set8 = set[int8]
 
const
A: Set8 = {}
B: Set8 = {1, 2, 3, 4, 5}
C: Set8 = {1, 3, 5, 7, 9}
D: Set8 = {2, 4, 6, 8, 10}
E: Set8 = {2, 3, 5, 7}
F: Set8 = {8}
 
List = [('A', A), ('B', B), ('C', C), ('D', D), ('E', E), ('F', F)]
 
func J(a, b: Set8): Rational[int] =
## Return the Jaccard index.
## Return 1 if both sets are empty.
let card1 = card(a * b)
let card2 = card(a + b)
result = if card1 == card2: 1 // 1 else: card1 // card2
 
for i in 0..List.high:
let (name1, set1) = List[i]
for j in i..List.high:
let (name2, set2) = List[j]
echo &"J({name1}, {name2}) = {J(set1, set2)}"
if i != j:
echo &"J({name2}, {name1}) = {J(set2, set1)}"
</syntaxhighlight>
 
{{out}}
<pre>J(A, A) = 1/1
J(A, B) = 0/1
J(B, A) = 0/1
J(A, C) = 0/1
J(C, A) = 0/1
J(A, D) = 0/1
J(D, A) = 0/1
J(A, E) = 0/1
J(E, A) = 0/1
J(A, F) = 0/1
J(F, A) = 0/1
J(B, B) = 1/1
J(B, C) = 3/7
J(C, B) = 3/7
J(B, D) = 1/4
J(D, B) = 1/4
J(B, E) = 1/2
J(E, B) = 1/2
J(B, F) = 0/1
J(F, B) = 0/1
J(C, C) = 1/1
J(C, D) = 0/1
J(D, C) = 0/1
J(C, E) = 1/2
J(E, C) = 1/2
J(C, F) = 0/1
J(F, C) = 0/1
J(D, D) = 1/1
J(D, E) = 1/8
J(E, D) = 1/8
J(D, F) = 1/5
J(F, D) = 1/5
J(E, E) = 1/1
J(E, F) = 0/1
J(F, E) = 0/1
J(F, F) = 1/1
</pre>
 
=={{header|Phix}}==
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">sets</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
Line 321 ⟶ 504:
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 348 ⟶ 531:
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">#!/usr/bin/perl
 
use strict;
Line 372 ⟶ 555:
%union ? (grep $_ == 2, values %union) / (keys %union) : 1, "\n";
}
}</langsyntaxhighlight>
{{out}}
<pre>
Line 422 ⟶ 605:
 
=={{header|Prolog}}==
<langsyntaxhighlight lang="prolog">
show([]).
show([X|Xs]):- write(X), show(Xs).
Line 433 ⟶ 616:
findall(X, (member(X,B), not(member(X,A))), T), append(A,T,U), length(U,M),
j(N,M,J), show(["A = ",A,", B = ",B,", J = ",J]), nl)).
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 474 ⟶ 657:
A = [8], B = [8], J = 1
true.
</pre>
 
=={{header|Python}}==
<syntaxhighlight lang="python">
# jaccard_index.py by Xing216
from itertools import product
A = set()
B = {1, 2, 3, 4, 5}
C = {1, 3, 5, 7, 9}
D = {2, 4, 6, 8, 10}
E = {2, 3, 5, 7}
F = {8}
sets = list(product([A, B, C, D, E, F], repeat=2))
set_names = list(product(["A", "B", "C", "D", "E", "F"], repeat=2))
def jaccard_index(set1, set2):
try:
return len(set1 & set2)/len(set1 | set2)
except ZeroDivisionError:
return 0.0
for i,j in sets:
jacc_idx = jaccard_index(i,j)
sets_idx = sets.index((i,j))
print(f"J({', '.join(set_names[sets_idx])}) -> {jacc_idx}")
</syntaxhighlight>
{{out}}
<pre style="height: 10em">
J(A, A) -> 0.0
J(A, B) -> 0.0
J(A, C) -> 0.0
J(A, D) -> 0.0
J(A, E) -> 0.0
J(A, F) -> 0.0
J(B, A) -> 0.0
J(B, B) -> 1.0
J(B, C) -> 0.42857142857142855
J(B, D) -> 0.25
J(B, E) -> 0.5
J(B, F) -> 0.0
J(C, A) -> 0.0
J(C, B) -> 0.42857142857142855
J(C, C) -> 1.0
J(C, D) -> 0.0
J(C, E) -> 0.5
J(C, F) -> 0.0
J(D, A) -> 0.0
J(D, B) -> 0.25
J(D, C) -> 0.0
J(D, D) -> 1.0
J(D, E) -> 0.125
J(D, F) -> 0.2
J(E, A) -> 0.0
J(E, B) -> 0.5
J(E, C) -> 0.5
J(E, D) -> 0.125
J(E, E) -> 1.0
J(E, F) -> 0.0
J(F, A) -> 0.0
J(F, B) -> 0.0
J(F, C) -> 0.0
J(F, D) -> 0.2
J(F, E) -> 0.0
J(F, F) -> 1.0
</pre>
 
=={{header|Quackery}}==
 
<syntaxhighlight lang="Quackery"> [ $ "bigrat.qky" loadfile ] now!
 
[ over size - space swap of
join echo$ ] is recho$ ( $ n --> $ )
 
[ dip unbuild recho$ ] is recho ( x n --> $ )
 
[ 0 swap witheach [ bit | ] ] is set ( [ --> n )
 
[ & ] is intersection ( n --> n )
 
[ | ] is union ( n --> n )
 
[ [] 0 rot
[ dup 0 > while
dup 1 & if
[ dip [ tuck join swap ] ]
dip 1+
1 >> again ]
2drop ] is items ( n --> [ )
 
[ 2dup = iff [ 2drop 1 1 ] done
2dup union items size
dip [ intersection items size ]
dup 0 = if [ 2drop 0 1 ]
] is jaccard ( n n --> n/d )
 
[ ' [ ] set ] constant is A ( --> n )
[ ' [ 1 2 3 4 5 ] set ] constant is B ( --> n )
[ ' [ 1 3 5 7 9 ] set ] constant is C ( --> n )
[ ' [ 2 4 6 8 10 ] set ] constant is D ( --> n )
[ ' [ 2 3 5 7 ] set ] constant is E ( --> n )
[ ' [ 8 ] set ] constant is F ( --> n )
 
' [ A B C D E F ]
dup witheach
[ over witheach
[ over items 15 recho
dup items 15 recho
say "--> "
2dup jaccard
proper$ echo$
cr drop ]
drop
behead drop ]
drop</syntaxhighlight>
 
{{out}}
 
<pre>[ ] [ ] --> 1
[ ] [ 1 2 3 4 5 ] --> 0
[ ] [ 1 3 5 7 9 ] --> 0
[ ] [ 2 4 6 8 10 ] --> 0
[ ] [ 2 3 5 7 ] --> 0
[ ] [ 8 ] --> 0
[ 1 2 3 4 5 ] [ 1 2 3 4 5 ] --> 1
[ 1 2 3 4 5 ] [ 1 3 5 7 9 ] --> 3/7
[ 1 2 3 4 5 ] [ 2 4 6 8 10 ] --> 1/4
[ 1 2 3 4 5 ] [ 2 3 5 7 ] --> 1/2
[ 1 2 3 4 5 ] [ 8 ] --> 0
[ 1 3 5 7 9 ] [ 1 3 5 7 9 ] --> 1
[ 1 3 5 7 9 ] [ 2 4 6 8 10 ] --> 0
[ 1 3 5 7 9 ] [ 2 3 5 7 ] --> 1/2
[ 1 3 5 7 9 ] [ 8 ] --> 0
[ 2 4 6 8 10 ] [ 2 4 6 8 10 ] --> 1
[ 2 4 6 8 10 ] [ 2 3 5 7 ] --> 1/8
[ 2 4 6 8 10 ] [ 8 ] --> 1/5
[ 2 3 5 7 ] [ 2 3 5 7 ] --> 1
[ 2 3 5 7 ] [ 8 ] --> 0
[ 8 ] [ 8 ] --> 1
</pre>
 
=={{header|Raku}}==
<syntaxhighlight lang="raku" perl6line>sub J(\A, \B) { A ∪ B ?? (A ∩ B) / (A ∪ B) !! A ∪ B == A ∩ B ?? 1 !! 0 }
 
my %p =
Line 490 ⟶ 809:
.say for %p.sort;
say '';
say "J({.join: ','}) = ", J |%p{$_} for [X] <A B C D E F> xx 2;</langsyntaxhighlight>
{{out}}
<pre>A => ()
Line 535 ⟶ 854:
J(F,E) = 0
J(F,F) = 1</pre>
 
=={{header|RPL}}==
{{works with|Halcyon Calc|4.2.7}}
{| class="wikitable"
! RPL code
! Comment
|-
|
≪ → a b
≪ a 1 b SIZE FOR j
b j GET IF a OVER POS THEN DROP ELSE + END
NEXT
≫ ≫ ''''UNION'''' STO
≪ → a b
≪ { } 1 a SIZE FOR j
a j GET IF b OVER POS THEN + ELSE DROP END
NEXT
≫ ≫ ''''INTER'''' STO
≪ → a b
≪ a b '''INTER''' SIZE a b '''UNION''' SIZE /
≫ ≫ ''''JACAR'''' STO
|
'''UNION''' ''( {A} {B} -- {A ∪ B} )''
Scan {B}...
... and add to {A} all {B} items not already in {A}
'''INTER''' ''( {A} {B} -- {A ∩ B} )''
Scan {A}...
... and keep {A} items also in {B}
'''JACAR''' ''( {A} {B} -- Jaccard_index )''
|}
{{in}}
<pre>
{ 1 2 3 4 5 } { 1 3 5 7 9 } JACAR
{ 1 3 5 7 9 } { 1 2 3 4 5 } JACAR
</pre>
 
{{out}}
<pre>
2: 0.428571428571
1: 0.428571428571
</pre>
 
=={{header|Wren}}==
{{libheader|Wren-set}}
{{libheader|Wren-traititerate}}
{{libheader|Wren-fmt}}
Note that the Set object in the above module is implemented as a Map and consequently the iteration order (and the order in which elements are printed) is undefined.
<langsyntaxhighlight ecmascriptlang="wren">import "./set" for Set
import "./traititerate" for Indexed
import "./fmt" for Fmt
 
Line 572 ⟶ 942:
Fmt.print("J($s, $s) = $h", i1, i2, jaccardIndex.call(v1, v2))
}
}</langsyntaxhighlight>
 
{{out}}
9,482

edits