Levenshtein distance/Alignment: Difference between revisions

Content added Content deleted

Inline

Latest revision as of 08:13, 25 May 2024

The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.

An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:

P-LACE
PALACE

Task

Write a function that shows the alignment of two strings for the corresponding levenshtein distance.

As an example, use the words "rosettacode" and "raisethysword".

You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).

11l

Translation of: Nim

F levenshteinAlign(aa, bb)
   V a = aa.lowercase()
   V b = bb.lowercase()
   V costs = [[0] * (b.len + 1)] * (a.len + 1)
   L(j) 0 .. b.len
      costs[0][j] = j
   L(i) 1 .. a.len
      costs[i][0] = i
      L(j) 1 .. b.len
         V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
         costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)

   V aPathRev = ‘’
   V bPathRev = ‘’
   V i = a.len
   V j = b.len
   L i != 0 & j != 0
      V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
      I costs[i][j] == tmp
         aPathRev ‘’= a[--i]
         bPathRev ‘’= b[--j]
      E I costs[i][j] == 1 + costs[i - 1][j]
         aPathRev ‘’= a[--i]
         bPathRev ‘’= ‘-’
      E I costs[i][j] == 1 + costs[i][j - 1]
         aPathRev ‘’= ‘-’
         bPathRev ‘’= b[--j]
      E
         assert(0B, ‘Internal error’)

   R (reversed(aPathRev), reversed(bPathRev))

V result = levenshteinAlign(‘place’, ‘palace’)
print(result[0])
print(result[1])
print()

result = levenshteinAlign(‘rosettacode’, ‘raisethysword’)
print(result[0])
print(result[1])

Output:

p-lace
palace

r-oset-tacode
raisethysword

Arturo

print join.with:"\n" levenshtein.align "place" "palace"
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"

Output:

p-lace
palace
r-oset-tacode
raisethysword

C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct edit_s edit_t, *edit;
struct edit_s {
	char c1, c2;
	int n;
	edit next;
};

void leven(char *a, char *b)
{
	int i, j, la = strlen(a), lb = strlen(b);
	edit *tbl = malloc(sizeof(edit) * (1 + la));
	tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t));
	for (i = 1; i <= la; i++)
		tbl[i] = tbl[i-1] + (1+lb);

	for (i = la; i >= 0; i--) {
		char *aa = a + i;
		for (j = lb; j >= 0; j--) {
			char *bb = b + j;
			if (!*aa && !*bb) continue;

			edit e = &tbl[i][j];
			edit repl = &tbl[i+1][j+1];
			edit dela = &tbl[i+1][j];
			edit delb = &tbl[i][j+1];

			e->c1 = *aa;
			e->c2 = *bb;
			if (!*aa) {
				e->next = delb;
				e->n = e->next->n + 1;
				continue;
			}
			if (!*bb) {
				e->next = dela;
				e->n = e->next->n + 1;
				continue;
			}

			e->next = repl;
			if (*aa == *bb) {
				e->n = e->next->n;
				continue;
			}

			if (e->next->n > delb->n) {
				e->next = delb;
				e->c1 = 0;
			}
			if (e->next->n > dela->n) {
				e->next = dela;
				e->c1 = *aa;
				e->c2 = 0;
			}
			e->n = e->next->n + 1;
		}
	}

	edit p = tbl[0];
	printf("%s -> %s: %d edits\n", a, b, p->n);

	while (p->next) {
		if (p->c1 == p->c2)
			printf("%c", p->c1);
		else {
			putchar('(');
			if (p->c1) putchar(p->c1);
			putchar(',');
			if (p->c2) putchar(p->c2);
			putchar(')');
		}

		p = p->next;
	}
	putchar('\n');

	free(tbl[0]);
	free(tbl);
}

int main(void)
{
	leven("raisethysword", "rosettacode");
	return 0;
}

Output:

raisethysword -> rosettacode: 8 edits
r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e)

C#

Translation of: Java

using System;
using System.Text;

public class LevenshteinAlignment
{
    public static string[] Alignment(string a, string b)
    {
        a = a.ToLower();
        b = b.ToLower();

        int[,] costs = new int[a.Length + 1, b.Length + 1];

        for (int j = 0; j <= b.Length; j++)
            costs[0, j] = j;

        for (int i = 1; i <= a.Length; i++)
        {
            costs[i, 0] = i;
            for (int j = 1; j <= b.Length; j++)
            {
                costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]),
                    a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1);
            }
        }

        StringBuilder aPathRev = new StringBuilder();
        StringBuilder bPathRev = new StringBuilder();

        for (int i = a.Length, j = b.Length; i != 0 && j != 0;)
        {
            if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1))
            {
                aPathRev.Append(a[--i]);
                bPathRev.Append(b[--j]);
            }
            else if (costs[i, j] == 1 + costs[i - 1, j])
            {
                aPathRev.Append(a[--i]);
                bPathRev.Append('-');
            }
            else if (costs[i, j] == 1 + costs[i, j - 1])
            {
                aPathRev.Append('-');
                bPathRev.Append(b[--j]);
            }
        }

        return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) };
    }

    private static string Reverse(string s)
    {
        char[] charArray = s.ToCharArray();
        Array.Reverse(charArray);
        return new string(charArray);
    }

    public static void Main(string[] args)
    {
        string[] result = Alignment("rosettacode", "raisethysword");
        Console.WriteLine(result[0]);
        Console.WriteLine(result[1]);
    }
}

Output:

r-oset-tacode
raisethysword

C++

#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>

std::string to_lower_case(const std::string& text) {
	std::string result = text;
	std::transform(result.begin(), result.end(), result.begin(),
		[](char ch){ return std::tolower(ch); });
	return result;
}

std::vector<std::string> levenshtein_alignment(std::string a, std::string b) {
        a = to_lower_case(a);
        b = to_lower_case(b);

        std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) };
		for ( uint64_t j = 0; j <= b.length(); ++j )
			costs[0][j] = j;
		for ( uint64_t i = 1; i <= a.length(); ++i ) {
			costs[i][0] = i;
			for ( uint64_t j = 1; j <= b.length(); ++j ) {
				costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1,
						               a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1);
			}
		}

		std::string a_path_reversed, b_path_reversed;
		uint64_t i = a.length(), j = b.length();
		while ( i != 0 && j != 0 ) {
			if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) {
				a_path_reversed += a[--i];
				b_path_reversed += b[--j];
			} else if ( costs[i][j] == costs[i - 1][j] + 1 ) {
				a_path_reversed += a[--i];
				b_path_reversed += "-";
			} else if ( costs[i][j] == costs[i][j - 1] + 1 ) {
				a_path_reversed += "-";
				b_path_reversed += b[--j];
			}
		}

		std::reverse(a_path_reversed.begin(), a_path_reversed.end());
		std::reverse(b_path_reversed.begin(), b_path_reversed.end());
		return std::vector<std::string>{ a_path_reversed, b_path_reversed };
}

int main() {
	std::vector<std::string> result = levenshtein_alignment("place", "palace");
	std::cout << result[0] << std::endl;
	std::cout << result[1] << std::endl;
	std::cout << std::endl;

	result = levenshtein_alignment("rosettacode", "raisethysword");
	std::cout << result[0] << std::endl;
	std::cout << result[1] << std::endl;
}

Output:

p-lace
palace

r-oset-tacode
raisethysword

D

Using the standard library.

void main() {
    import std.stdio, std.algorithm;

    immutable s1 = "rosettacode";
    immutable s2 = "raisethysword";

    string s1b, s2b;
    size_t pos1, pos2;

    foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) {
        final switch (c) with (EditOp) {
            case none, substitute:
                s1b ~= s1[pos1++];
                s2b ~= s2[pos2++];
                break;
            case insert:
                s1b ~= "_";
                s2b ~= s2[pos2++];
                break;
            case remove:
                s1b ~= s1[pos1++];
                s2b ~= "_";
                break;
        }
    }

    writeln(s1b, "\n", s2b);
}

Output:

r_oset_tacode
raisethysword

EasyLang

Translation of: FreeBASIC

global dparr[] dpcols .
proc dpnew a b . .
   len dparr[] a * b
   dpcols = b
.
func dp r c .
   return dparr[r * dpcols + c + 1]
.
proc dps r c v . .
   dparr[r * dpcols + c + 1] = v
.
proc align a$ b$ . ar$ br$ .
   dpnew (len a$ + 1) (len b$ + 1)
   for i = 1 to len a$
      dps i 0 i
   .
   for j = 0 to len b$
      dps 0 j j
   .
   for i = 1 to len a$
      for j = 1 to len b$
         if substr a$ i 1 = substr b$ j 1
            dps i j dp (i - 1) (j - 1)
         else
            dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1
         .
      .
   .
   ar$ = "" ; br$ = ""
   i = len a$ ; j = len b$
   while i <> 0 and j <> 0
      if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1
         ar$ = substr a$ i 1 & ar$
         i -= 1
         br$ = substr b$ j 1 & br$
         j -= 1
      elif dp i j = dp (i - 1) j + 1
         ar$ = substr a$ i 1 & ar$
         i -= 1
         br$ = "-" & br$
      else
         ar$ = "-" & ar$
         br$ = substr b$ j 1 & br$
         j -= 1
      .
   .
.
align "rosettacode" "raisethysword" ar$ br$
print ar$ ; print br$

Output:

r-oset-tacode
raisethysword

Eiffel

distance (source, target: STRING): INTEGER
		-- Minimum number of operations to turn `source' into `target'.
	local
		l_distance: ARRAY2 [INTEGER]
		del, ins, subst: INTEGER
	do
		create l_distance.make (source.count, target.count)
		⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲
		⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲

		⟳ ic:(2 |..| source.count) ¦
			⟳ jc:(2 |..| target.count) ¦
				if source [ic] = target [jc] then			-- same char
					l_distance [ic, jc] := l_distance [ic - 1, jc - 1]
				else							-- diff char
					del := l_distance [ic - 1, jc]			-- delete?
					ins := l_distance [ic, jc - 1]			-- insert?
					subst := l_distance [ic - 1, jc -1]		-- substitute/swap?
					l_distance [ic, jc] := del.min (ins.min (subst)) + 1
				end
			⟲
		⟲
		Result:= l_distance [source.count, target.count]
	end

Output:

Result = 8

The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation.

Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance.

FreeBASIC

#define min(a, b) iif((a) < (b), (a), (b))

Sub LevenshteinAlignment(s1 As String, s2 As String)
    Dim As String align1 = "", align2 = ""
    Dim As Integer i, j, len1, len2
    len1 = Len(s1): 
    
    len2 = Len(s2)
    Dim As Integer dp(len1+1, len2+1)
    
    For i = 0 To len1
        dp(i, 0) = i
    Next i
    For j = 0 To len2
        dp(0, j) = j
    Next j
    
    For i = 1 To len1
        For j = 1 To len2
            If Mid(s1, i, 1) = Mid(s2, j, 1) Then
                dp(i, j) = dp(i-1, j-1)
            Else
                dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1
            End If
        Next j
    Next i
    
    i = len1: j = len2
    While i > 0 And j > 0
        If Mid(s1, i, 1) = Mid(s2, j, 1) Then
            align1 = Mid(s1, i, 1) + align1
            align2 = Mid(s2, j, 1) + align2
            i -= 1: j -= 1
        Elseif dp(i, j) = dp(i-1, j-1) + 1 Then
            align1 = Mid(s1, i, 1) + align1
            align2 = Mid(s2, j, 1) + align2
            i -= 1: j -= 1
        Elseif dp(i, j) = dp(i-1, j) + 1 Then
            align1 = Mid(s1, i, 1) + align1
            align2 = "-" + align2
            i -= 1
        Else
            align1 = "-" + align1
            align2 = Mid(s2, j, 1) + align2
            j -= 1
        End If
    Wend
    
    While i > 0
        align1 = Mid(s1, i, 1) + align1
        align2 = "-" + align2
        i -= 1
    Wend
    While j > 0
        align1 = "-" + align1
        align2 = Mid(s2, j, 1) + align2
        j -= 1
    Wend
    
    Print "Levenshtein Distance: "; dp(len1, len2)
    Print align1
    Print align2
    Print
End Sub

LevenshteinAlignment("rosettacode", "raisethysword")
LevenshteinAlignment("place", "palace")

Sleep

Output:

Levenshtein Distance:  8
r-oset-tacode
raisethysword

Levenshtein Distance:  1
p-lace
palace

Go

Library: biogo

Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics.

package main

import (
    "fmt"

    "github.com/biogo/biogo/align"
    ab "github.com/biogo/biogo/alphabet"
    "github.com/biogo/biogo/feat"
    "github.com/biogo/biogo/seq/linear"
)

func main() {
    // Alphabets for things like DNA are predefined in biogo, but we
    // define our own here.
    lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz",
        feat.Undefined, '-', 0, true))
    // Construct scoring matrix for Needleman-Wunch algorithm.
    // We leave zeros on the diagonal for the Levenshtein distance of an
    // exact match and put -1s everywhere else for the Levenshtein distance
    // of an edit.
    nw := make(align.NW, lc.Len())
    for i := range nw {
        r := make([]int, lc.Len())
        nw[i] = r
        for j := range r {
            if j != i {
                r[j] = -1
            }
        }
    }
    // define input sequences
    a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))}
    a.Alpha = lc
    b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))}
    b.Alpha = lc
    // perform alignment
    aln, err := nw.Align(a, b)
    // format and display result
    if err != nil {
        fmt.Println(err)
        return
    }
    fa := align.Format(a, b, aln, '-')
    fmt.Printf("%s\n%s\n", fa[0], fa[1])
    aa := fmt.Sprint(fa[0])
    ba := fmt.Sprint(fa[1])
    ma := make([]byte, len(aa))
    for i := range ma {
        if aa[i] == ba[i] {
            ma[i] = ' '
        } else {
            ma[i] = '|'
        }
    }
    fmt.Println(string(ma))
}

Output:

The lines after the alignment point out the 8 edits.

r-oset-tacode
raisethysword
 ||   |||| ||

Haskell

The Wagner–Fischer matrix construction is adopted from Levenshtein_distance#Haskell, however it is reversed in order to simplify it's traversing.

costs :: String -> String -> [[Int]]
costs s1 s2 = reverse $ reverse <$> matrix
  where
    matrix = scanl transform [0 .. length s1] s2
    transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1
      where
        calc z (c1, x, y) = minimum [ y + 1, z + 1
                                    , x + fromEnum (c1 /= c)]

levenshteinDistance :: String -> String -> Int
levenshteinDistance s1 s2 = head.head $ costs s1 s2

A sample alignment

The alignment is built in the same way as it is done in Levenshtein_distance/Alignment#Java. Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers.

alignment :: String -> String -> (String, String)
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[])
  where
    go c _ _ r | isEmpty c = r
    go _ [] s r = ('-' <$ s, reverse s) <> r
    go _ s [] r = (reverse s, '-' <$ s) <> r
    go c s1@(h1:t1) s2@(h2:t2) (r1, r2) =
      let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1
      in case get c of
        x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2)
          | x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2)
          | x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2)

    -- Functions which treat table as zipper
    get ((h:_):_) = h
    nextRow = map tail
    nextCol = tail
    isEmpty c = null c || null (head c)

λ> alignment "palace" "place"
("palace","p-lace")

λ> alignment "rosettacode" "raisethysword"
("r-oset-tacode","raisethysword")

λ> alignment "rosettacode" "rat"
("rosettacode","r-----a---t")

All alignments

The alternative solution, which extensively produces all minimal alignments for given strings.

-- Produces all possible alignments for two strings.
allAlignments :: String -> String -> [[(Char, Char)]]
allAlignments s1 s2 = go (length s2 - length s1) s1 s2
  where
    go _ s [] = [(\x -> (x, '-')) <$> s]
    go _ [] s = [(\x -> ('-' ,x)) <$> s]
    go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2
      ++ case compare n 0 of
           LT -> (h1, '-') <:> go (n+1) t1 s2
           EQ -> []
           GT -> ('-', h2) <:> go (n-1) s1 t2

    x <:> l = fmap (x :) l

-- Returns a lazy list of all optimal alignments.
levenshteinAlignments :: String -> String -> [(String, String)]
levenshteinAlignments s1 s2 = unzip <$> best
  where
    best = filter ((lev ==) . dist) $ allAlignments s1 s2
    lev = levenshteinDistance s1 s2
    dist = length . filter (uncurry (/=))

λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword"
("ro-settac-ode","raisethysword")
("ro-setta-code","raisethysword")
("ro-sett-acode","raisethysword")
("ro-set-tacode","raisethysword")
("r-osettac-ode","raisethysword")
("r-osetta-code","raisethysword")
("r-osett-acode","raisethysword")
("r-oset-tacode","raisethysword")

λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat"
("rosettacode","ra--t------")
("rosettacode","ra---t-----")
("rosettacode","r-a-t------")
("rosettacode","r-a--t-----")
("rosettacode","r--at------")
("rosettacode","r--a-t-----")
("rosettacode","r---at-----")
("rosettacode","r-----at---")
("rosettacode","r-----a-t--")
("rosettacode","r-----a--t-")
("rosettacode","r-----a---t")

J

Translation of java:

levalign=:4 :0
  assert. x <:&# y
  D=. x +/&i.&>:&# y
  for_i. 1+i.#x do.
    for_j. 1+i.#y do.
      if. ((<:i){x)=(<:j){y do.
        D=. (D {~<<:i,j) (<i,j)} D
      else.
        min=. 1+<./D{~(i,j) <@:-"1#:1 2 3
        D=. min (<i,j)} D
      end.
    end.
  end.
  A=. B=. ''
  ij=. x ,&# y
  while. */ij do.
    'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4
    'x1 y1'=. (ij-1){each x;y
    if. d00 = d11+x1~:y1 do.
      A=. A,x1 [ B=. B,y1 [ ij=. ij-1
    elseif. d00 = 1+d10 do.
      A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0
    elseif. d00 = 1+d01 do.
      A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1
    end.
  end.
  A,:&|.B
)

Task examples:

   'place' levalign 'palace'
p-lace
palace
   'rosettacode' levalign 'raisethysword'
r-oset-tacode
raisethysword

Java

public class LevenshteinAlignment {

    public static String[] alignment(String a, String b) {
        a = a.toLowerCase();
        b = b.toLowerCase();
        // i == 0
        int[][] costs = new int[a.length()+1][b.length()+1];
        for (int j = 0; j <= b.length(); j++)
            costs[0][j] = j;
        for (int i = 1; i <= a.length(); i++) {
            costs[i][0] = i;
            for (int j = 1; j <= b.length(); j++) {
                costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1);
            }
        }

	// walk back through matrix to figure out path
	StringBuilder aPathRev = new StringBuilder();
	StringBuilder bPathRev = new StringBuilder();
	for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) {
	    if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) {
		aPathRev.append(a.charAt(--i));
		bPathRev.append(b.charAt(--j));
	    } else if (costs[i][j] == 1 + costs[i-1][j]) {
		aPathRev.append(a.charAt(--i));
		bPathRev.append('-');
	    } else if (costs[i][j] == 1 + costs[i][j-1]) {
		aPathRev.append('-');
		bPathRev.append(b.charAt(--j));
	    }
	}
        return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()};
    }

    public static void main(String[] args) {
	String[] result = alignment("rosettacode", "raisethysword");
	System.out.println(result[0]);
	System.out.println(result[1]);
    }
}

Output:

r-oset-tacode
raisethysword

jq

Adapted from Wren

Works with jq, the C implementation of jq

Works with gojq, the Go implementation of jq

Works with jaq, the Rust implementation of jq

def reverseString: explode | reverse | implode;

def levenshteinAlign($x; $y):
  def min($a;$b): [$a,$b]|min;

    ($x|length + 1) as $a1
  | ($y|length + 1) as $b1
  | ($x | ascii_downcase) as $a
  | ($y | ascii_downcase) as $b
  | [range(0; $b1) | 0] as $zeros
  | {costs: [range(0; $a1) | $zeros]}
  | reduce range(0; $b1) as $j (.; .costs[0][$j] = $j)
  | reduce range(1; $a1) as $i (.;
      .costs[$i][0] = $i
      | reduce range(1; $b1) as $j (.;
            (.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp
            | .costs[$i][$j] = min( $temp;  1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) ))

  # walk back through matrix to figure out path
  | .aPathRev = ""
  | .bPathRev = ""
  | .i = ($a|length)
  | .j = ($b|length)
  | until (.i == 0 or .j == 0;
        (.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp
        | .costs[.i][.j] as $cij
        | if $cij == $temp
          then .i += -1
          | .aPathRev += $a[.i: .i+1]
          | .j += -1
          | .bPathRev += $b[.j: .j+1]
          elif $cij == 1 + .costs[.i-1][.j]
          then .i += -1
          | .aPathRev += $a[.i:.i+1]
          | .bPathRev += "-"
          elif $cij == 1 + .costs[.i][.j-1]
          then .aPathRev += "-"
          | .j += -1
          | .bPathRev += $b[.j: .j+1]
          end)
   | [.aPathRev, .bPathRev ] | map(reverseString);

levenshteinAlign("place"; "palace"),
levenshteinAlign("rosettacode";"raisethysword")

Output:

[
  "p-lace",
  "palace"
]
[
  "r-oset-tacode",
  "raisethysword"
]

Julia

Works with: Julia version 0.6

Translation of: Java

function levenshteinalign(a::AbstractString, b::AbstractString)
    a = lowercase(a)
    b = lowercase(b)
    len_a = length(a)
    len_b = length(b)

    costs = Matrix{Int}(len_a + 1, len_b + 1)
    costs[1, :] .= 0:len_b
    @inbounds for i in 2:(len_a + 1)
        costs[i, 1] = i
        for j in 2:(len_b + 1)
            tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
            costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp)
        end
    end

    apathrev = IOBuffer()
    bpathrev = IOBuffer()
    local i = len_a + 1
    local j = len_b + 1
    @inbounds while i != 1 && j != 1
        tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
        if costs[i, j] == tmp
            i -= 1
            j -= 1
            print(apathrev, a[i])
            print(bpathrev, b[j])
        elseif costs[i, j] == 1 + costs[i-1, j]
            i -= 1
            print(apathrev, a[i])
            print(bpathrev, '-')
        elseif costs[i, j] == 1 + costs[i, j-1]
            j -= 1
            print(apathrev, '-')
            print(bpathrev, b[j])
        end
    end

    return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev)))
end

foreach(println, levenshteinalign("rosettacode", "raisethysword"))
foreach(println, levenshteinalign("place", "palace"))

Output:

r-oset-tacode
raisethysword
p-lace
palace

Kotlin

Translation of: Java

// version 1.1.3

fun levenshteinAlign(a: String, b: String): Array<String> {
    val aa = a.toLowerCase()
    val bb = b.toLowerCase()
    val costs = Array(a.length + 1) { IntArray(b.length + 1) }
    for (j in 0..b.length) costs[0][j] = j
    for (i in 1..a.length) {
        costs[i][0] = i
        for (j in 1..b.length) {
            val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1) 
            costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp)
        }
    }

    // walk back through matrix to figure out path
    val aPathRev = StringBuilder()
    val bPathRev = StringBuilder()
    var i = a.length
    var j = b.length
    while (i != 0 && j != 0) {
        val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
        when (costs[i][j]) {
            temp -> {
                aPathRev.append(aa[--i])
                bPathRev.append(bb[--j])
            }

            1 + costs[i-1][j] -> {
                aPathRev.append(aa[--i])
                bPathRev.append('-')
            }

            1 + costs[i][j-1] -> {
                aPathRev.append('-')
                bPathRev.append(bb[--j])
            }
        }
    }
    return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString())
}

fun main(args: Array<String>) {
    var result = levenshteinAlign("place", "palace")
    println(result[0])
    println(result[1])
    println()    
    result = levenshteinAlign("rosettacode","raisethysword")
    println(result[0])
    println(result[1])
}

Output:

p-lace
palace

r-oset-tacode
raisethysword

Mathematica / Wolfram Language

This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.

Works with: Mathematica version 7

DamerauLevenshteinDistance["rosettacode", "raisethysword"]

Output:

Nim

Translation of: Kotlin

import algorithm, sequtils, strutils

proc levenshteinAlign(a, b: string): tuple[a, b: string] =
  let a = a.toLower()
  let b = b.toLower()
  var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1))
  for j in 0..b.len: costs[0][j] = j
  for i in 1..a.len:
    costs[i][0] = i
    for j in 1..b.len:
      let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
      costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)

  # Walk back through matrix to figure out path.
  var aPathRev, bPathRev: string
  var i = a.len
  var j = b.len
  while i != 0 and j != 0:
    let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
    if costs[i][j] == tmp:
      dec i
      dec j
      aPathRev.add a[i]
      bPathRev.add b[j]
    elif costs[i][j] == 1 + costs[i-1][j]:
      dec i
      aPathRev.add a[i]
      bPathRev.add '-'
    elif costs[i][j] == 1 + costs[i][j-1]:
      dec j
      aPathRev.add '-'
      bPathRev.add b[j]
    else:
      quit "Internal error"

  result = (reversed(aPathRev).join(), reversed(bPathRev).join())

when isMainModule:

  var result = levenshteinAlign("place", "palace")
  echo result.a
  echo result.b
  echo ""

  result = levenshteinAlign("rosettacode","raisethysword")
  echo result.a
  echo result.b

Output:

p-lace
palace

r-oset-tacode
raisethysword

Perl

use strict;
use warnings;

use List::Util qw(min);
 
sub levenshtein_distance_alignment {
    my @s = ('^', split //, shift);
    my @t = ('^', split //, shift);
 
    my @A;
    @{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s;
    @{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_])  for 0 .. $#t;
    for my $i (1 .. $#s) {
        for my $j (1 .. $#t) {
	    if ($s[$i] ne $t[$j]) {
		$A[$i][$j]{d} = 1 + (
		    my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d}
		);
		@{$A[$i][$j]}{qw(s t)} =
		$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') :
		$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) :
		($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]);
	    }
            else {
		@{$A[$i][$j]}{qw(d s t)} = (
		    $A[$i-1][$j-1]{d},
		    $A[$i-1][$j-1]{s}.$s[$i],
		    $A[$i-1][$j-1]{t}.$t[$j]
		);
            }
        }
    }
    return @{$A[-1][-1]}{'s', 't'};
}
 
print  join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";

Output:

ro-settac-o-de
raisethysword-

Phix

Translation of: Kotlin

plus the indicator from Go

function LevenshteinAlignment(string a, b)
    integer la = length(a)+1,
            lb = length(b)+1
    sequence costs = repeat(repeat(0,lb),la)
    for j=1 to lb do
        costs[1][j] = j
    end for
    for i=2 to la do
        costs[i][1] = i
        for j=2 to lb do
            integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]),
                    tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1])
            costs[i][j] = min(tmp1, tmp2)
        end for
    end for
    -- walk back through matrix to figure out the path
    string arev = "",
           brev = ""
    integer i = la, j = lb
    while i>1 and j>1 do
        integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1])
        switch costs[i][j] do
            case tmp:                   i -= 1; arev &= a[i]
                                        j -= 1; brev &= b[j]
            case 1 + costs[i-1][j]:     i -= 1; arev &= a[i]
                                        brev &= '-'
            case 1 + costs[i][j-1]:     arev &= '-'
                                        j -= 1; brev &= b[j]
        end switch
    end while
    return {reverse(arev),reverse(brev)}
end function
 
procedure test(string a,b)
    {a,b} = LevenshteinAlignment(a,b)
    string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' '))
    printf(1,"%s\n%s\n%s\n\n",{a,b,c})
end procedure
test("rosettacode", "raisethysword")
test("place", "palace")

Output:

r-oset-tacode
raisethysword
 ||   |||| ||

p-lace
palace
 |

Python

from difflib import ndiff

def levenshtein(str1, str2):
    result = ""
    pos, removed = 0, 0
    for x in ndiff(str1, str2):
        if pos<len(str1) and str1[pos] == x[2]:
          pos += 1
          result += x[2]
          if x[0] == "-":
              removed += 1
          continue
        else:
          if removed > 0:
            removed -=1
          else:
            result += "-"
    print(result)

levenshtein("place","palace")
levenshtein("rosettacode","raisethysword")

Output:

p-lace
ro-settac-o-de

Racket

Simple version (no aligment)

First we will analyze this solution that only computes the distance. See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html for a discussion of the code.

#lang racket

(define (memoize f)
  (local ([define table (make-hash)])
    (lambda args
      (dict-ref! table args (λ () (apply f args))))))

(define levenshtein
  (memoize
   (lambda (s t)
     (cond
       [(and (empty? s) (empty? t)) 0]
       [(empty? s) (length t)]
       [(empty? t) (length s)]
       [else
        (if (equal? (first s) (first t))
            (levenshtein (rest s) (rest t))
            (min (add1 (levenshtein (rest s) t))
                 (add1 (levenshtein s (rest t)))
                 (add1 (levenshtein (rest s) (rest t)))))]))))

Demonstration:

(levenshtein (string->list "rosettacode") 
             (string->list "raisethysword"))

Output:

Complete version

Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure.

#lang racket

(struct lev (n s t))

(define (lev-add old n sx tx)
  (lev (+ n (lev-n old))
       (cons sx (lev-s old))
       (cons tx (lev-t old))))

(define (list-repeat n v)
  (build-list n (lambda (_) v)))

(define (memoize f)
  (local ([define table (make-hash)])
    (lambda args
      (dict-ref! table args (λ () (apply f args))))))

(define levenshtein/list
  (memoize
   (lambda (s t)
     (cond
       [(and (empty? s) (empty? t)) 
        (lev 0 '() '())]
       [(empty? s) 
        (lev (length t) (list-repeat (length t) #\-) t)]
       [(empty? t) 
        (lev (length s) s (list-repeat (length s) #\-))]
       [else
        (if (equal? (first s) (first t))
            (lev-add (levenshtein/list (rest s) (rest t))
                     0 (first s) (first t))
            (argmin lev-n (list (lev-add (levenshtein/list (rest s) t)
                                         1 (first s) #\-)
                                (lev-add (levenshtein/list s (rest t))
                                         1 #\- (first t))
                                (lev-add (levenshtein/list (rest s) (rest t))
                                         1 (first s) (first t)))))]))))

(define (levenshtein s t)
  (let ([result (levenshtein/list (string->list s) 
                                  (string->list t))])
    (values (lev-n result)
            (list->string (lev-s result)) 
            (list->string (lev-t result)))))

Demonstration:

(let-values ([(dist exp-s exp-t) 
              (levenshtein "rosettacode" "raisethysword")])
  (printf "levenshtein: ~a edits\n" dist)
  (displayln exp-s)
  (displayln exp-t))

Output:

levenshtein: 8 edits
r-oset-taco-de
raisethysword-

Raku

(formerly Perl 6)

Translation of: Perl

sub align ( Str $σ, Str $t ) {
    my @s = flat *, $σ.comb;
    my @t = flat *, $t.comb;
     
    my @A;
    @A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
    @A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;
     
    for 1 ..^ @s X 1..^ @t -> (\i, \j) {
	if @s[i] ne @t[j] {
	    @A[i][j]<d> = 1 + my $min =
	    min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
	    @A[i][j]<s t> =
	    @A[i-1][j]<d> == $min ??  (@A[i-1][j]<s t> Z~ @s[i], '-') !!
	    @A[i][j-1]<d> == $min ??  (@A[i][j-1]<s t> Z~ '-', @t[j]) !!
	    (@A[i-1][j-1]<s t> Z~ @s[i], @t[j]);
	} else {
	    @A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ '', @s[i], @t[j];
	}
    }
     
    return @A[*-1][*-1]<s t>;
}
 
.say for align 'rosettacode', 'raisethysword';

Output:

ro-settac-o-de
raisethysword-

Ruby

Translation of: Tcl

uses "lcs" from here

require 'lcs'

def levenshtein_align(a, b)
  apos, bpos = LCS.new(a, b).backtrack2
  
  c = ""
  d = ""
  x0 = y0 = -1
  dx = dy = 0
  apos.zip(bpos) do |x,y|
    diff = x + dx - y - dy
    if diff < 0
      dx -= diff
      c += "-" * (-diff)
    elsif diff > 0
      dy += diff
      d += "-" * diff
    end
    c += a[x0+1..x]
    x0 = x
    d += b[y0+1..y]
    y0 = y
  end
  
  c += a[x0+1..-1]
  d += b[y0+1..-1]
  diff = a.length + y0 - b.length - x0
  if diff < 0
    c += "-" * (-diff)
  elsif diff > 0
    d += "-" * diff
  end
  [c, d]
end

puts levenshtein_align("rosettacode", "raisethysword")

Output:

r-oset-taco-de
raisethysword-

Rust

Cargo.toml

[dependencies]
edit-distance = "^1.0.0"

src/main.rs

extern crate edit_distance;

edit_distance("rosettacode", "raisethysword");

Scala

Output:

Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or [1].

import scala.collection.mutable
import scala.collection.parallel.ParSeq

object LevenshteinAlignment extends App {
  val vlad = new Levenshtein("rosettacode", "raisethysword")
  val alignment = vlad.revLevenstein()

  class Levenshtein(s1: String, s2: String) {
    val memoizedCosts = mutable.Map[(Int, Int), Int]()

    def revLevenstein(): (String, String) = {
      def revLev: (Int, Int, String, String) => (String, String) = {
        case (_, 0, revS1, revS2) => (revS1, revS2)
        case (0, _, revS1, revS2) => (revS1, revS2)
        case (i, j, revS1, revS2) =>
          if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1)
                                      + (if (s1(i - 1) != s2(j - 1)) 1 else 0)))
            revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2)
          else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j))
            revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2)
          else
            revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2)
      }

      revLev(s1.length, s2.length, "", "")
    }

    private def levenshtein: Int = {
      def lev: ((Int, Int)) => Int = {
        case (k1, k2) =>
          memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
            case (i, 0) => i
            case (0, j) => j
            case (i, j) =>
              ParSeq(1 + lev((i - 1, j)),
                1 + lev((i, j - 1)),
                lev((i - 1, j - 1))
                  + (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
          })
      }

      lev((s1.length, s2.length))
    }

    levenshtein
  }

  println(alignment._1)
  println(alignment._2)

}

Sidef

Translation of: Perl

func align(s, t) {
    s.chars!.prepend!('^')
    t.chars!.prepend!('^')

    var A = []
    {|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s
    {|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t

    for i (1 .. s.end) {
      for j (1 .. t.end) {
        if (s[i] != t[j]) {
          A[i][j]{:d} = 1+(
            var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d})
          )
          A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min
              ? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-']
              : (A[i][j-1]{:d} == min
              ? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]]
              : [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))...
        }
        else {
          A[i][j]{@|<d s t>} = (
              A[i-1][j-1]{:d},
              A[i-1][j-1]{:s}+s[i],
              A[i-1][j-1]{:t}+t[j],
          )
        }
      }
    }
    return [A[-1][-1]{@|<s t>}]
}

align("rosettacode", "raisethysword").each { .say }

Output:

ro-settac-o-de
raisethysword-

Tcl

Library: Tcllib (Package: struct::list)

package require struct::list
proc levenshtein/align {a b} {
    lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\
	    apos bpos
    set c ""
    set d ""
    set x0 [set y0 -1]
    set dx [set dy 0]
    foreach x $apos y $bpos {
	if {$x+$dx < $y+$dy} {
	    set n [expr {($y+$dy)-($x+$dx)}]
	    incr dx $n
	    append c [string repeat "-" $n]
	} elseif {$x+$dx > $y+$dy} {
	    set n [expr {($x+$dx)-($y+$dy)}]
	    incr dy $n
	    append d [string repeat "-" $n]
	}
	append c [string range $a $x0+1 $x]
	set x0 $x
	append d [string range $b $y0+1 $y]
	set y0 $y
    }
    append c [string range $a $x0+1 end]
    append d [string range $b $y0+1 end]
    set al [string length $a]
    set bl [string length $b]
    if {$al+$y0 < $bl+$x0} {
	append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]]
    } elseif {$bl+$x0 < $al+$y0} {
	append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]]
    }
    return $c\n$d
}

puts [levenshtein/align "rosettacode" "raisethysword"]

Output:

r-oset-taco-de
raisethysword-

Wren

Translation of: Kotlin

Library: Wren-str

import "./str" for Str

var levenshteinAlign = Fn.new { |a, b|
    a = Str.lower(a)
    b = Str.lower(b)
    var costs = List.filled(a.count+1, null)
    for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0)
    for (j in 0..b.count) costs[0][j] = j
    for (i in 1..a.count) {
        costs[i][0] = i
        for (j in 1..b.count) {
            var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
            costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1]))
        }
    }
    // walk back through matrix to figure out path
    var aPathRev = ""
    var bPathRev = ""
    var i = a.count
    var j = b.count
    while (i != 0 && j != 0) {
        var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
        var cij = costs[i][j]
        if (cij == temp) {
            i = i - 1
            aPathRev = aPathRev + a[i]
            j = j - 1
            bPathRev = bPathRev + b[j]
        } else if (cij == 1 + costs[i-1][j]) {
            i = i - 1
            aPathRev = aPathRev + a[i]
            bPathRev = bPathRev + "-"
        } else if (cij == 1 + costs[i][j-1]) {
            aPathRev = aPathRev + "-"
            j = j - 1
            bPathRev = bPathRev+ b[j]
        }
    }
    return [aPathRev[-1..0], bPathRev[-1..0]]
}

var result = levenshteinAlign.call("place", "palace")
System.print(result[0])
System.print(result[1])
System.print()
result = levenshteinAlign.call("rosettacode","raisethysword")
System.print(result[0])
System.print(result[1])

Output:

p-lace
palace

r-oset-tacode
raisethysword

zkl

Translation of: Java

fcn alignment(a,b){
   a,b = a.toLower(), b.toLower();
   costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) });
   foreach i,j in (a.len()+1, [1..b.len()]){
      costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) ) 
         .min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1);
   }
   // walk back through matrix to figure out path
   aPathRev,bPathRev := Data(),Data();  // byte buckets
   i,j := a.len(), b.len(); 
   while(i!=0 and j!= 0){
      if (costs[i][j] == 
          ( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){
         aPathRev.append(a[i-=1]);
	 bPathRev.append(b[j-=1]);
      } else if(costs[i][j] == 1+costs[i-1][j]){
	 aPathRev.append(a[i-=1]);
	 bPathRev.append("-");
      } else if (costs[i][j] == 1+costs[i][j-1]){
	 aPathRev.append("-");
	 bPathRev.append(b[j-=1]);
      }
   }
   return(aPathRev.text.reverse(), bPathRev.text.reverse())
}

result := alignment("rosettacode", "raisethysword");
println(result[0]);
println(result[1]);

Output:

r-oset-tacode
raisethysword