Jump to content

Levenshtein distance/Alignment

From Rosetta Code
Task
Levenshtein distance/Alignment
You are encouraged to solve this task according to the task description, using any language you may know.

The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.

An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:

P-LACE
PALACE


Task

Write a function that shows the alignment of two strings for the corresponding levenshtein distance.

As an example, use the words "rosettacode" and "raisethysword".

You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).

11l

Translation of: Nim
F levenshteinAlign(aa, bb)
   V a = aa.lowercase()
   V b = bb.lowercase()
   V costs = [[0] * (b.len + 1)] * (a.len + 1)
   L(j) 0 .. b.len
      costs[0][j] = j
   L(i) 1 .. a.len
      costs[i][0] = i
      L(j) 1 .. b.len
         V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
         costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)

   V aPathRev = ‘’
   V bPathRev = ‘’
   V i = a.len
   V j = b.len
   L i != 0 & j != 0
      V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
      I costs[i][j] == tmp
         aPathRev ‘’= a[--i]
         bPathRev ‘’= b[--j]
      E I costs[i][j] == 1 + costs[i - 1][j]
         aPathRev ‘’= a[--i]
         bPathRev ‘’= ‘-’
      E I costs[i][j] == 1 + costs[i][j - 1]
         aPathRev ‘’= ‘-’
         bPathRev ‘’= b[--j]
      E
         assert(0B, ‘Internal error’)

   R (reversed(aPathRev), reversed(bPathRev))

V result = levenshteinAlign(‘place’, ‘palace’)
print(result[0])
print(result[1])
print()

result = levenshteinAlign(‘rosettacode’, ‘raisethysword’)
print(result[0])
print(result[1])
Output:
p-lace
palace

r-oset-tacode
raisethysword

Arturo

print join.with:"\n" levenshtein.align "place" "palace"
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"
Output:
p-lace
palace
r-oset-tacode
raisethysword

C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct edit_s edit_t, *edit;
struct edit_s {
	char c1, c2;
	int n;
	edit next;
};

void leven(char *a, char *b)
{
	int i, j, la = strlen(a), lb = strlen(b);
	edit *tbl = malloc(sizeof(edit) * (1 + la));
	tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t));
	for (i = 1; i <= la; i++)
		tbl[i] = tbl[i-1] + (1+lb);

	for (i = la; i >= 0; i--) {
		char *aa = a + i;
		for (j = lb; j >= 0; j--) {
			char *bb = b + j;
			if (!*aa && !*bb) continue;

			edit e = &tbl[i][j];
			edit repl = &tbl[i+1][j+1];
			edit dela = &tbl[i+1][j];
			edit delb = &tbl[i][j+1];

			e->c1 = *aa;
			e->c2 = *bb;
			if (!*aa) {
				e->next = delb;
				e->n = e->next->n + 1;
				continue;
			}
			if (!*bb) {
				e->next = dela;
				e->n = e->next->n + 1;
				continue;
			}

			e->next = repl;
			if (*aa == *bb) {
				e->n = e->next->n;
				continue;
			}

			if (e->next->n > delb->n) {
				e->next = delb;
				e->c1 = 0;
			}
			if (e->next->n > dela->n) {
				e->next = dela;
				e->c1 = *aa;
				e->c2 = 0;
			}
			e->n = e->next->n + 1;
		}
	}

	edit p = tbl[0];
	printf("%s -> %s: %d edits\n", a, b, p->n);

	while (p->next) {
		if (p->c1 == p->c2)
			printf("%c", p->c1);
		else {
			putchar('(');
			if (p->c1) putchar(p->c1);
			putchar(',');
			if (p->c2) putchar(p->c2);
			putchar(')');
		}

		p = p->next;
	}
	putchar('\n');

	free(tbl[0]);
	free(tbl);
}

int main(void)
{
	leven("raisethysword", "rosettacode");
	return 0;
}
Output:
raisethysword -> rosettacode: 8 edits
r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e)

C#

Translation of: Java
using System;
using System.Text;

public class LevenshteinAlignment
{
    public static string[] Alignment(string a, string b)
    {
        a = a.ToLower();
        b = b.ToLower();

        int[,] costs = new int[a.Length + 1, b.Length + 1];

        for (int j = 0; j <= b.Length; j++)
            costs[0, j] = j;

        for (int i = 1; i <= a.Length; i++)
        {
            costs[i, 0] = i;
            for (int j = 1; j <= b.Length; j++)
            {
                costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]),
                    a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1);
            }
        }

        StringBuilder aPathRev = new StringBuilder();
        StringBuilder bPathRev = new StringBuilder();

        for (int i = a.Length, j = b.Length; i != 0 && j != 0;)
        {
            if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1))
            {
                aPathRev.Append(a[--i]);
                bPathRev.Append(b[--j]);
            }
            else if (costs[i, j] == 1 + costs[i - 1, j])
            {
                aPathRev.Append(a[--i]);
                bPathRev.Append('-');
            }
            else if (costs[i, j] == 1 + costs[i, j - 1])
            {
                aPathRev.Append('-');
                bPathRev.Append(b[--j]);
            }
        }

        return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) };
    }

    private static string Reverse(string s)
    {
        char[] charArray = s.ToCharArray();
        Array.Reverse(charArray);
        return new string(charArray);
    }

    public static void Main(string[] args)
    {
        string[] result = Alignment("rosettacode", "raisethysword");
        Console.WriteLine(result[0]);
        Console.WriteLine(result[1]);
    }
}
Output:
r-oset-tacode
raisethysword

C++

#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>

std::string to_lower_case(const std::string& text) {
	std::string result = text;
	std::transform(result.begin(), result.end(), result.begin(),
		[](char ch){ return std::tolower(ch); });
	return result;
}

std::vector<std::string> levenshtein_alignment(std::string a, std::string b) {
        a = to_lower_case(a);
        b = to_lower_case(b);

        std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) };
		for ( uint64_t j = 0; j <= b.length(); ++j )
			costs[0][j] = j;
		for ( uint64_t i = 1; i <= a.length(); ++i ) {
			costs[i][0] = i;
			for ( uint64_t j = 1; j <= b.length(); ++j ) {
				costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1,
						               a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1);
			}
		}

		std::string a_path_reversed, b_path_reversed;
		uint64_t i = a.length(), j = b.length();
		while ( i != 0 && j != 0 ) {
			if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) {
				a_path_reversed += a[--i];
				b_path_reversed += b[--j];
			} else if ( costs[i][j] == costs[i - 1][j] + 1 ) {
				a_path_reversed += a[--i];
				b_path_reversed += "-";
			} else if ( costs[i][j] == costs[i][j - 1] + 1 ) {
				a_path_reversed += "-";
				b_path_reversed += b[--j];
			}
		}

		std::reverse(a_path_reversed.begin(), a_path_reversed.end());
		std::reverse(b_path_reversed.begin(), b_path_reversed.end());
		return std::vector<std::string>{ a_path_reversed, b_path_reversed };
}

int main() {
	std::vector<std::string> result = levenshtein_alignment("place", "palace");
	std::cout << result[0] << std::endl;
	std::cout << result[1] << std::endl;
	std::cout << std::endl;

	result = levenshtein_alignment("rosettacode", "raisethysword");
	std::cout << result[0] << std::endl;
	std::cout << result[1] << std::endl;
}
Output:
p-lace
palace

r-oset-tacode
raisethysword

D

Using the standard library.

void main() {
    import std.stdio, std.algorithm;

    immutable s1 = "rosettacode";
    immutable s2 = "raisethysword";

    string s1b, s2b;
    size_t pos1, pos2;

    foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) {
        final switch (c) with (EditOp) {
            case none, substitute:
                s1b ~= s1[pos1++];
                s2b ~= s2[pos2++];
                break;
            case insert:
                s1b ~= "_";
                s2b ~= s2[pos2++];
                break;
            case remove:
                s1b ~= s1[pos1++];
                s2b ~= "_";
                break;
        }
    }

    writeln(s1b, "\n", s2b);
}
Output:
r_oset_tacode
raisethysword

EasyLang

Translation of: FreeBASIC
global dparr[] dpcols .
proc dpnew a b . .
   len dparr[] a * b
   dpcols = b
.
func dp r c .
   return dparr[r * dpcols + c + 1]
.
proc dps r c v . .
   dparr[r * dpcols + c + 1] = v
.
proc align a$ b$ . ar$ br$ .
   dpnew (len a$ + 1) (len b$ + 1)
   for i = 1 to len a$
      dps i 0 i
   .
   for j = 0 to len b$
      dps 0 j j
   .
   for i = 1 to len a$
      for j = 1 to len b$
         if substr a$ i 1 = substr b$ j 1
            dps i j dp (i - 1) (j - 1)
         else
            dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1
         .
      .
   .
   ar$ = "" ; br$ = ""
   i = len a$ ; j = len b$
   while i <> 0 and j <> 0
      if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1
         ar$ = substr a$ i 1 & ar$
         i -= 1
         br$ = substr b$ j 1 & br$
         j -= 1
      elif dp i j = dp (i - 1) j + 1
         ar$ = substr a$ i 1 & ar$
         i -= 1
         br$ = "-" & br$
      else
         ar$ = "-" & ar$
         br$ = substr b$ j 1 & br$
         j -= 1
      .
   .
.
align "rosettacode" "raisethysword" ar$ br$
print ar$ ; print br$
Output:
r-oset-tacode
raisethysword

Eiffel

distance (source, target: STRING): INTEGER
		-- Minimum number of operations to turn `source' into `target'.
	local
		l_distance: ARRAY2 [INTEGER]
		del, ins, subst: INTEGER
	do
		create l_distance.make (source.count, target.count)
		 ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 
		 ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 

		 ic:(2 |..| source.count) ¦
			 jc:(2 |..| target.count) ¦
				if source [ic] = target [jc] then			-- same char
					l_distance [ic, jc] := l_distance [ic - 1, jc - 1]
				else							-- diff char
					del := l_distance [ic - 1, jc]			-- delete?
					ins := l_distance [ic, jc - 1]			-- insert?
					subst := l_distance [ic - 1, jc -1]		-- substitute/swap?
					l_distance [ic, jc] := del.min (ins.min (subst)) + 1
				end
			
		
		Result:= l_distance [source.count, target.count]
	end
Output:

Result = 8

The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation.

Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance.

FreeBASIC

#define min(a, b) iif((a) < (b), (a), (b))

Sub LevenshteinAlignment(s1 As String, s2 As String)
    Dim As String align1 = "", align2 = ""
    Dim As Integer i, j, len1, len2
    len1 = Len(s1): 
    
    len2 = Len(s2)
    Dim As Integer dp(len1+1, len2+1)
    
    For i = 0 To len1
        dp(i, 0) = i
    Next i
    For j = 0 To len2
        dp(0, j) = j
    Next j
    
    For i = 1 To len1
        For j = 1 To len2
            If Mid(s1, i, 1) = Mid(s2, j, 1) Then
                dp(i, j) = dp(i-1, j-1)
            Else
                dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1
            End If
        Next j
    Next i
    
    i = len1: j = len2
    While i > 0 And j > 0
        If Mid(s1, i, 1) = Mid(s2, j, 1) Then
            align1 = Mid(s1, i, 1) + align1
            align2 = Mid(s2, j, 1) + align2
            i -= 1: j -= 1
        Elseif dp(i, j) = dp(i-1, j-1) + 1 Then
            align1 = Mid(s1, i, 1) + align1
            align2 = Mid(s2, j, 1) + align2
            i -= 1: j -= 1
        Elseif dp(i, j) = dp(i-1, j) + 1 Then
            align1 = Mid(s1, i, 1) + align1
            align2 = "-" + align2
            i -= 1
        Else
            align1 = "-" + align1
            align2 = Mid(s2, j, 1) + align2
            j -= 1
        End If
    Wend
    
    While i > 0
        align1 = Mid(s1, i, 1) + align1
        align2 = "-" + align2
        i -= 1
    Wend
    While j > 0
        align1 = "-" + align1
        align2 = Mid(s2, j, 1) + align2
        j -= 1
    Wend
    
    Print "Levenshtein Distance: "; dp(len1, len2)
    Print align1
    Print align2
    Print
End Sub

LevenshteinAlignment("rosettacode", "raisethysword")
LevenshteinAlignment("place", "palace")

Sleep
Output:
Levenshtein Distance:  8
r-oset-tacode
raisethysword

Levenshtein Distance:  1
p-lace
palace

Go

Library: biogo

Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics.

package main

import (
    "fmt"

    "github.com/biogo/biogo/align"
    ab "github.com/biogo/biogo/alphabet"
    "github.com/biogo/biogo/feat"
    "github.com/biogo/biogo/seq/linear"
)

func main() {
    // Alphabets for things like DNA are predefined in biogo, but we
    // define our own here.
    lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz",
        feat.Undefined, '-', 0, true))
    // Construct scoring matrix for Needleman-Wunch algorithm.
    // We leave zeros on the diagonal for the Levenshtein distance of an
    // exact match and put -1s everywhere else for the Levenshtein distance
    // of an edit.
    nw := make(align.NW, lc.Len())
    for i := range nw {
        r := make([]int, lc.Len())
        nw[i] = r
        for j := range r {
            if j != i {
                r[j] = -1
            }
        }
    }
    // define input sequences
    a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))}
    a.Alpha = lc
    b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))}
    b.Alpha = lc
    // perform alignment
    aln, err := nw.Align(a, b)
    // format and display result
    if err != nil {
        fmt.Println(err)
        return
    }
    fa := align.Format(a, b, aln, '-')
    fmt.Printf("%s\n%s\n", fa[0], fa[1])
    aa := fmt.Sprint(fa[0])
    ba := fmt.Sprint(fa[1])
    ma := make([]byte, len(aa))
    for i := range ma {
        if aa[i] == ba[i] {
            ma[i] = ' '
        } else {
            ma[i] = '|'
        }
    }
    fmt.Println(string(ma))
}
Output:

The lines after the alignment point out the 8 edits.

r-oset-tacode
raisethysword
 ||   |||| ||

Haskell

The Wagner–Fischer matrix construction is adopted from Levenshtein_distance#Haskell, however it is reversed in order to simplify it's traversing.

costs :: String -> String -> [[Int]]
costs s1 s2 = reverse $ reverse <$> matrix
  where
    matrix = scanl transform [0 .. length s1] s2
    transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1
      where
        calc z (c1, x, y) = minimum [ y + 1, z + 1
                                    , x + fromEnum (c1 /= c)]

levenshteinDistance :: String -> String -> Int
levenshteinDistance s1 s2 = head.head $ costs s1 s2


A sample alignment

The alignment is built in the same way as it is done in Levenshtein_distance/Alignment#Java. Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers.

alignment :: String -> String -> (String, String)
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[])
  where
    go c _ _ r | isEmpty c = r
    go _ [] s r = ('-' <$ s, reverse s) <> r
    go _ s [] r = (reverse s, '-' <$ s) <> r
    go c s1@(h1:t1) s2@(h2:t2) (r1, r2) =
      let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1
      in case get c of
        x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2)
          | x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2)
          | x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2)

    -- Functions which treat table as zipper
    get ((h:_):_) = h
    nextRow = map tail
    nextCol = tail
    isEmpty c = null c || null (head c)
λ> alignment "palace" "place"
("palace","p-lace")

λ> alignment "rosettacode" "raisethysword"
("r-oset-tacode","raisethysword")

λ> alignment "rosettacode" "rat"
("rosettacode","r-----a---t")

All alignments

The alternative solution, which extensively produces all minimal alignments for given strings.

-- Produces all possible alignments for two strings.
allAlignments :: String -> String -> [[(Char, Char)]]
allAlignments s1 s2 = go (length s2 - length s1) s1 s2
  where
    go _ s [] = [(\x -> (x, '-')) <$> s]
    go _ [] s = [(\x -> ('-' ,x)) <$> s]
    go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2
      ++ case compare n 0 of
           LT -> (h1, '-') <:> go (n+1) t1 s2
           EQ -> []
           GT -> ('-', h2) <:> go (n-1) s1 t2

    x <:> l = fmap (x :) l

-- Returns a lazy list of all optimal alignments.
levenshteinAlignments :: String -> String -> [(String, String)]
levenshteinAlignments s1 s2 = unzip <$> best
  where
    best = filter ((lev ==) . dist) $ allAlignments s1 s2
    lev = levenshteinDistance s1 s2
    dist = length . filter (uncurry (/=))


λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword"
("ro-settac-ode","raisethysword")
("ro-setta-code","raisethysword")
("ro-sett-acode","raisethysword")
("ro-set-tacode","raisethysword")
("r-osettac-ode","raisethysword")
("r-osetta-code","raisethysword")
("r-osett-acode","raisethysword")
("r-oset-tacode","raisethysword")

λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat"
("rosettacode","ra--t------")
("rosettacode","ra---t-----")
("rosettacode","r-a-t------")
("rosettacode","r-a--t-----")
("rosettacode","r--at------")
("rosettacode","r--a-t-----")
("rosettacode","r---at-----")
("rosettacode","r-----at---")
("rosettacode","r-----a-t--")
("rosettacode","r-----a--t-")
("rosettacode","r-----a---t")


J

Translation of java:

levalign=:4 :0
  assert. x <:&# y
  D=. x +/&i.&>:&# y
  for_i. 1+i.#x do.
    for_j. 1+i.#y do.
      if. ((<:i){x)=(<:j){y do.
        D=. (D {~<<:i,j) (<i,j)} D
      else.
        min=. 1+<./D{~(i,j) <@:-"1#:1 2 3
        D=. min (<i,j)} D
      end.
    end.
  end.
  A=. B=. ''
  ij=. x ,&# y
  while. */ij do.
    'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4
    'x1 y1'=. (ij-1){each x;y
    if. d00 = d11+x1~:y1 do.
      A=. A,x1 [ B=. B,y1 [ ij=. ij-1
    elseif. d00 = 1+d10 do.
      A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0
    elseif. d00 = 1+d01 do.
      A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1
    end.
  end.
  A,:&|.B
)

Task examples:

   'place' levalign 'palace'
p-lace
palace
   'rosettacode' levalign 'raisethysword'
r-oset-tacode
raisethysword

Java

public class LevenshteinAlignment {

    public static String[] alignment(String a, String b) {
        a = a.toLowerCase();
        b = b.toLowerCase();
        // i == 0
        int[][] costs = new int[a.length()+1][b.length()+1];
        for (int j = 0; j <= b.length(); j++)
            costs[0][j] = j;
        for (int i = 1; i <= a.length(); i++) {
            costs[i][0] = i;
            for (int j = 1; j <= b.length(); j++) {
                costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1);
            }
        }

	// walk back through matrix to figure out path
	StringBuilder aPathRev = new StringBuilder();
	StringBuilder bPathRev = new StringBuilder();
	for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) {
	    if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) {
		aPathRev.append(a.charAt(--i));
		bPathRev.append(b.charAt(--j));
	    } else if (costs[i][j] == 1 + costs[i-1][j]) {
		aPathRev.append(a.charAt(--i));
		bPathRev.append('-');
	    } else if (costs[i][j] == 1 + costs[i][j-1]) {
		aPathRev.append('-');
		bPathRev.append(b.charAt(--j));
	    }
	}
        return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()};
    }

    public static void main(String[] args) {
	String[] result = alignment("rosettacode", "raisethysword");
	System.out.println(result[0]);
	System.out.println(result[1]);
    }
}
Output:
r-oset-tacode
raisethysword

jq

Adapted from Wren

Works with jq, the C implementation of jq

Works with gojq, the Go implementation of jq

Works with jaq, the Rust implementation of jq

def reverseString: explode | reverse | implode;

def levenshteinAlign($x; $y):
  def min($a;$b): [$a,$b]|min;

    ($x|length + 1) as $a1
  | ($y|length + 1) as $b1
  | ($x | ascii_downcase) as $a
  | ($y | ascii_downcase) as $b
  | [range(0; $b1) | 0] as $zeros
  | {costs: [range(0; $a1) | $zeros]}
  | reduce range(0; $b1) as $j (.; .costs[0][$j] = $j)
  | reduce range(1; $a1) as $i (.;
      .costs[$i][0] = $i
      | reduce range(1; $b1) as $j (.;
            (.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp
            | .costs[$i][$j] = min( $temp;  1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) ))

  # walk back through matrix to figure out path
  | .aPathRev = ""
  | .bPathRev = ""
  | .i = ($a|length)
  | .j = ($b|length)
  | until (.i == 0 or .j == 0;
        (.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp
        | .costs[.i][.j] as $cij
        | if $cij == $temp
          then .i += -1
          | .aPathRev += $a[.i: .i+1]
          | .j += -1
          | .bPathRev += $b[.j: .j+1]
          elif $cij == 1 + .costs[.i-1][.j]
          then .i += -1
          | .aPathRev += $a[.i:.i+1]
          | .bPathRev += "-"
          elif $cij == 1 + .costs[.i][.j-1]
          then .aPathRev += "-"
          | .j += -1
          | .bPathRev += $b[.j: .j+1]
          end)
   | [.aPathRev, .bPathRev ] | map(reverseString);

levenshteinAlign("place"; "palace"),
levenshteinAlign("rosettacode";"raisethysword")
Output:
[
  "p-lace",
  "palace"
]
[
  "r-oset-tacode",
  "raisethysword"
]

Julia

Works with: Julia version 0.6
Translation of: Java
function levenshteinalign(a::AbstractString, b::AbstractString)
    a = lowercase(a)
    b = lowercase(b)
    len_a = length(a)
    len_b = length(b)

    costs = Matrix{Int}(len_a + 1, len_b + 1)
    costs[1, :] .= 0:len_b
    @inbounds for i in 2:(len_a + 1)
        costs[i, 1] = i
        for j in 2:(len_b + 1)
            tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
            costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp)
        end
    end

    apathrev = IOBuffer()
    bpathrev = IOBuffer()
    local i = len_a + 1
    local j = len_b + 1
    @inbounds while i != 1 && j != 1
        tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
        if costs[i, j] == tmp
            i -= 1
            j -= 1
            print(apathrev, a[i])
            print(bpathrev, b[j])
        elseif costs[i, j] == 1 + costs[i-1, j]
            i -= 1
            print(apathrev, a[i])
            print(bpathrev, '-')
        elseif costs[i, j] == 1 + costs[i, j-1]
            j -= 1
            print(apathrev, '-')
            print(bpathrev, b[j])
        end
    end

    return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev)))
end

foreach(println, levenshteinalign("rosettacode", "raisethysword"))
foreach(println, levenshteinalign("place", "palace"))
Output:
r-oset-tacode
raisethysword
p-lace
palace

Kotlin

Translation of: Java
// version 1.1.3

fun levenshteinAlign(a: String, b: String): Array<String> {
    val aa = a.toLowerCase()
    val bb = b.toLowerCase()
    val costs = Array(a.length + 1) { IntArray(b.length + 1) }
    for (j in 0..b.length) costs[0][j] = j
    for (i in 1..a.length) {
        costs[i][0] = i
        for (j in 1..b.length) {
            val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1) 
            costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp)
        }
    }

    // walk back through matrix to figure out path
    val aPathRev = StringBuilder()
    val bPathRev = StringBuilder()
    var i = a.length
    var j = b.length
    while (i != 0 && j != 0) {
        val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
        when (costs[i][j]) {
            temp -> {
                aPathRev.append(aa[--i])
                bPathRev.append(bb[--j])
            }

            1 + costs[i-1][j] -> {
                aPathRev.append(aa[--i])
                bPathRev.append('-')
            }

            1 + costs[i][j-1] -> {
                aPathRev.append('-')
                bPathRev.append(bb[--j])
            }
        }
    }
    return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString())
}

fun main(args: Array<String>) {
    var result = levenshteinAlign("place", "palace")
    println(result[0])
    println(result[1])
    println()    
    result = levenshteinAlign("rosettacode","raisethysword")
    println(result[0])
    println(result[1])
}
Output:
p-lace
palace

r-oset-tacode
raisethysword

Mathematica / Wolfram Language

This example is incorrect. It does not accomplish the given task. Please fix the code and remove this message.
Works with: Mathematica version 7
DamerauLevenshteinDistance["rosettacode", "raisethysword"]
Output:
8

Nim

Translation of: Kotlin
import algorithm, sequtils, strutils

proc levenshteinAlign(a, b: string): tuple[a, b: string] =
  let a = a.toLower()
  let b = b.toLower()
  var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1))
  for j in 0..b.len: costs[0][j] = j
  for i in 1..a.len:
    costs[i][0] = i
    for j in 1..b.len:
      let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
      costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)

  # Walk back through matrix to figure out path.
  var aPathRev, bPathRev: string
  var i = a.len
  var j = b.len
  while i != 0 and j != 0:
    let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
    if costs[i][j] == tmp:
      dec i
      dec j
      aPathRev.add a[i]
      bPathRev.add b[j]
    elif costs[i][j] == 1 + costs[i-1][j]:
      dec i
      aPathRev.add a[i]
      bPathRev.add '-'
    elif costs[i][j] == 1 + costs[i][j-1]:
      dec j
      aPathRev.add '-'
      bPathRev.add b[j]
    else:
      quit "Internal error"

  result = (reversed(aPathRev).join(), reversed(bPathRev).join())

when isMainModule:

  var result = levenshteinAlign("place", "palace")
  echo result.a
  echo result.b
  echo ""

  result = levenshteinAlign("rosettacode","raisethysword")
  echo result.a
  echo result.b
Output:
p-lace
palace

r-oset-tacode
raisethysword

Perl

use strict;
use warnings;

use List::Util qw(min);
 
sub levenshtein_distance_alignment {
    my @s = ('^', split //, shift);
    my @t = ('^', split //, shift);
 
    my @A;
    @{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s;
    @{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_])  for 0 .. $#t;
    for my $i (1 .. $#s) {
        for my $j (1 .. $#t) {
	    if ($s[$i] ne $t[$j]) {
		$A[$i][$j]{d} = 1 + (
		    my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d}
		);
		@{$A[$i][$j]}{qw(s t)} =
		$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') :
		$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) :
		($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]);
	    }
            else {
		@{$A[$i][$j]}{qw(d s t)} = (
		    $A[$i-1][$j-1]{d},
		    $A[$i-1][$j-1]{s}.$s[$i],
		    $A[$i-1][$j-1]{t}.$t[$j]
		);
            }
        }
    }
    return @{$A[-1][-1]}{'s', 't'};
}
 
print  join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";
Output:
ro-settac-o-de
raisethysword-

Phix

Translation of: Kotlin

plus the indicator from Go

function LevenshteinAlignment(string a, b)
    integer la = length(a)+1,
            lb = length(b)+1
    sequence costs = repeat(repeat(0,lb),la)
    for j=1 to lb do
        costs[1][j] = j
    end for
    for i=2 to la do
        costs[i][1] = i
        for j=2 to lb do
            integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]),
                    tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1])
            costs[i][j] = min(tmp1, tmp2)
        end for
    end for
    -- walk back through matrix to figure out the path
    string arev = "",
           brev = ""
    integer i = la, j = lb
    while i>1 and j>1 do
        integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1])
        switch costs[i][j] do
            case tmp:                   i -= 1; arev &= a[i]
                                        j -= 1; brev &= b[j]
            case 1 + costs[i-1][j]:     i -= 1; arev &= a[i]
                                        brev &= '-'
            case 1 + costs[i][j-1]:     arev &= '-'
                                        j -= 1; brev &= b[j]
        end switch
    end while
    return {reverse(arev),reverse(brev)}
end function
 
procedure test(string a,b)
    {a,b} = LevenshteinAlignment(a,b)
    string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' '))
    printf(1,"%s\n%s\n%s\n\n",{a,b,c})
end procedure
test("rosettacode", "raisethysword")
test("place", "palace")
Output:
r-oset-tacode
raisethysword
 ||   |||| ||

p-lace
palace
 |

Python

from difflib import ndiff

def levenshtein(str1, str2):
    result = ""
    pos, removed = 0, 0
    for x in ndiff(str1, str2):
        if pos<len(str1) and str1[pos] == x[2]:
          pos += 1
          result += x[2]
          if x[0] == "-":
              removed += 1
          continue
        else:
          if removed > 0:
            removed -=1
          else:
            result += "-"
    print(result)

levenshtein("place","palace")
levenshtein("rosettacode","raisethysword")
Output:
p-lace
ro-settac-o-de

Racket

Simple version (no aligment)

First we will analyze this solution that only computes the distance. See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html for a discussion of the code.

#lang racket

(define (memoize f)
  (local ([define table (make-hash)])
    (lambda args
      (dict-ref! table args (λ () (apply f args))))))

(define levenshtein
  (memoize
   (lambda (s t)
     (cond
       [(and (empty? s) (empty? t)) 0]
       [(empty? s) (length t)]
       [(empty? t) (length s)]
       [else
        (if (equal? (first s) (first t))
            (levenshtein (rest s) (rest t))
            (min (add1 (levenshtein (rest s) t))
                 (add1 (levenshtein s (rest t)))
                 (add1 (levenshtein (rest s) (rest t)))))]))))

Demonstration:

(levenshtein (string->list "rosettacode") 
             (string->list "raisethysword"))
Output:
8

Complete version

Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure.

#lang racket

(struct lev (n s t))

(define (lev-add old n sx tx)
  (lev (+ n (lev-n old))
       (cons sx (lev-s old))
       (cons tx (lev-t old))))

(define (list-repeat n v)
  (build-list n (lambda (_) v)))

(define (memoize f)
  (local ([define table (make-hash)])
    (lambda args
      (dict-ref! table args (λ () (apply f args))))))

(define levenshtein/list
  (memoize
   (lambda (s t)
     (cond
       [(and (empty? s) (empty? t)) 
        (lev 0 '() '())]
       [(empty? s) 
        (lev (length t) (list-repeat (length t) #\-) t)]
       [(empty? t) 
        (lev (length s) s (list-repeat (length s) #\-))]
       [else
        (if (equal? (first s) (first t))
            (lev-add (levenshtein/list (rest s) (rest t))
                     0 (first s) (first t))
            (argmin lev-n (list (lev-add (levenshtein/list (rest s) t)
                                         1 (first s) #\-)
                                (lev-add (levenshtein/list s (rest t))
                                         1 #\- (first t))
                                (lev-add (levenshtein/list (rest s) (rest t))
                                         1 (first s) (first t)))))]))))

(define (levenshtein s t)
  (let ([result (levenshtein/list (string->list s) 
                                  (string->list t))])
    (values (lev-n result)
            (list->string (lev-s result)) 
            (list->string (lev-t result)))))

Demonstration:

(let-values ([(dist exp-s exp-t) 
              (levenshtein "rosettacode" "raisethysword")])
  (printf "levenshtein: ~a edits\n" dist)
  (displayln exp-s)
  (displayln exp-t))
Output:
levenshtein: 8 edits
r-oset-taco-de
raisethysword-

Raku

(formerly Perl 6)

Translation of: Perl
sub align ( Str , Str $t ) {
    my @s = flat *, .comb;
    my @t = flat *, $t.comb;
     
    my @A;
    @A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
    @A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;
     
    for 1 ..^ @s X 1..^ @t -> (\i, \j) {
	if @s[i] ne @t[j] {
	    @A[i][j]<d> = 1 + my $min =
	    min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
	    @A[i][j]<s t> =
	    @A[i-1][j]<d> == $min ??  (@A[i-1][j]<s t> Z~ @s[i], '-') !!
	    @A[i][j-1]<d> == $min ??  (@A[i][j-1]<s t> Z~ '-', @t[j]) !!
	    (@A[i-1][j-1]<s t> Z~ @s[i], @t[j]);
	} else {
	    @A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ '', @s[i], @t[j];
	}
    }
     
    return @A[*-1][*-1]<s t>;
}
 
.say for align 'rosettacode', 'raisethysword';
Output:
ro-settac-o-de
raisethysword-

Ruby

Translation of: Tcl

uses "lcs" from here

require 'lcs'

def levenshtein_align(a, b)
  apos, bpos = LCS.new(a, b).backtrack2
  
  c = ""
  d = ""
  x0 = y0 = -1
  dx = dy = 0
  apos.zip(bpos) do |x,y|
    diff = x + dx - y - dy
    if diff < 0
      dx -= diff
      c += "-" * (-diff)
    elsif diff > 0
      dy += diff
      d += "-" * diff
    end
    c += a[x0+1..x]
    x0 = x
    d += b[y0+1..y]
    y0 = y
  end
  
  c += a[x0+1..-1]
  d += b[y0+1..-1]
  diff = a.length + y0 - b.length - x0
  if diff < 0
    c += "-" * (-diff)
  elsif diff > 0
    d += "-" * diff
  end
  [c, d]
end

puts levenshtein_align("rosettacode", "raisethysword")
Output:
r-oset-taco-de
raisethysword-

Rust

Cargo.toml

[dependencies]
edit-distance = "^1.0.0"

src/main.rs

extern crate edit_distance;

edit_distance("rosettacode", "raisethysword");

Scala

Output:

Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or [1].

import scala.collection.mutable
import scala.collection.parallel.ParSeq

object LevenshteinAlignment extends App {
  val vlad = new Levenshtein("rosettacode", "raisethysword")
  val alignment = vlad.revLevenstein()

  class Levenshtein(s1: String, s2: String) {
    val memoizedCosts = mutable.Map[(Int, Int), Int]()

    def revLevenstein(): (String, String) = {
      def revLev: (Int, Int, String, String) => (String, String) = {
        case (_, 0, revS1, revS2) => (revS1, revS2)
        case (0, _, revS1, revS2) => (revS1, revS2)
        case (i, j, revS1, revS2) =>
          if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1)
                                      + (if (s1(i - 1) != s2(j - 1)) 1 else 0)))
            revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2)
          else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j))
            revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2)
          else
            revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2)
      }

      revLev(s1.length, s2.length, "", "")
    }

    private def levenshtein: Int = {
      def lev: ((Int, Int)) => Int = {
        case (k1, k2) =>
          memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
            case (i, 0) => i
            case (0, j) => j
            case (i, j) =>
              ParSeq(1 + lev((i - 1, j)),
                1 + lev((i, j - 1)),
                lev((i - 1, j - 1))
                  + (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
          })
      }

      lev((s1.length, s2.length))
    }

    levenshtein
  }

  println(alignment._1)
  println(alignment._2)

}

Sidef

Translation of: Perl
func align(s, t) {
    s.chars!.prepend!('^')
    t.chars!.prepend!('^')

    var A = []
    {|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s
    {|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t

    for i (1 .. s.end) {
      for j (1 .. t.end) {
        if (s[i] != t[j]) {
          A[i][j]{:d} = 1+(
            var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d})
          )
          A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min
              ? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-']
              : (A[i][j-1]{:d} == min
              ? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]]
              : [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))...
        }
        else {
          A[i][j]{@|<d s t>} = (
              A[i-1][j-1]{:d},
              A[i-1][j-1]{:s}+s[i],
              A[i-1][j-1]{:t}+t[j],
          )
        }
      }
    }
    return [A[-1][-1]{@|<s t>}]
}

align("rosettacode", "raisethysword").each { .say }
Output:
ro-settac-o-de
raisethysword-

Tcl

Library: Tcllib (Package: struct::list)
package require struct::list
proc levenshtein/align {a b} {
    lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\
	    apos bpos
    set c ""
    set d ""
    set x0 [set y0 -1]
    set dx [set dy 0]
    foreach x $apos y $bpos {
	if {$x+$dx < $y+$dy} {
	    set n [expr {($y+$dy)-($x+$dx)}]
	    incr dx $n
	    append c [string repeat "-" $n]
	} elseif {$x+$dx > $y+$dy} {
	    set n [expr {($x+$dx)-($y+$dy)}]
	    incr dy $n
	    append d [string repeat "-" $n]
	}
	append c [string range $a $x0+1 $x]
	set x0 $x
	append d [string range $b $y0+1 $y]
	set y0 $y
    }
    append c [string range $a $x0+1 end]
    append d [string range $b $y0+1 end]
    set al [string length $a]
    set bl [string length $b]
    if {$al+$y0 < $bl+$x0} {
	append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]]
    } elseif {$bl+$x0 < $al+$y0} {
	append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]]
    }
    return $c\n$d
}

puts [levenshtein/align "rosettacode" "raisethysword"]
Output:
r-oset-taco-de
raisethysword-

Wren

Translation of: Kotlin
Library: Wren-str
import "./str" for Str

var levenshteinAlign = Fn.new { |a, b|
    a = Str.lower(a)
    b = Str.lower(b)
    var costs = List.filled(a.count+1, null)
    for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0)
    for (j in 0..b.count) costs[0][j] = j
    for (i in 1..a.count) {
        costs[i][0] = i
        for (j in 1..b.count) {
            var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
            costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1]))
        }
    }
    // walk back through matrix to figure out path
    var aPathRev = ""
    var bPathRev = ""
    var i = a.count
    var j = b.count
    while (i != 0 && j != 0) {
        var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
        var cij = costs[i][j]
        if (cij == temp) {
            i = i - 1
            aPathRev = aPathRev + a[i]
            j = j - 1
            bPathRev = bPathRev + b[j]
        } else if (cij == 1 + costs[i-1][j]) {
            i = i - 1
            aPathRev = aPathRev + a[i]
            bPathRev = bPathRev + "-"
        } else if (cij == 1 + costs[i][j-1]) {
            aPathRev = aPathRev + "-"
            j = j - 1
            bPathRev = bPathRev+ b[j]
        }
    }
    return [aPathRev[-1..0], bPathRev[-1..0]]
}

var result = levenshteinAlign.call("place", "palace")
System.print(result[0])
System.print(result[1])
System.print()
result = levenshteinAlign.call("rosettacode","raisethysword")
System.print(result[0])
System.print(result[1])
Output:
p-lace
palace

r-oset-tacode
raisethysword

zkl

Translation of: Java
fcn alignment(a,b){
   a,b = a.toLower(), b.toLower();
   costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) });
   foreach i,j in (a.len()+1, [1..b.len()]){
      costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) ) 
         .min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1);
   }
   // walk back through matrix to figure out path
   aPathRev,bPathRev := Data(),Data();  // byte buckets
   i,j := a.len(), b.len(); 
   while(i!=0 and j!= 0){
      if (costs[i][j] == 
          ( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){
         aPathRev.append(a[i-=1]);
	 bPathRev.append(b[j-=1]);
      } else if(costs[i][j] == 1+costs[i-1][j]){
	 aPathRev.append(a[i-=1]);
	 bPathRev.append("-");
      } else if (costs[i][j] == 1+costs[i][j-1]){
	 aPathRev.append("-");
	 bPathRev.append(b[j-=1]);
      }
   }
   return(aPathRev.text.reverse(), bPathRev.text.reverse())
}
result := alignment("rosettacode", "raisethysword");
println(result[0]);
println(result[1]);
Output:
r-oset-tacode
raisethysword
Cookies help us deliver our services. By using our services, you agree to our use of cookies.