Longest common subsequence
You are encouraged to solve this task according to the task description, using any language you may know.
The longest common subsequence (or LCS) of groups A and B is the longest group of elements from A and B that are common between the two groups and in the same order in each group. For example, the sequences "1234" and "1224533324" have an LCS of "1234":
1234 1224533324
For a string example, consider the sequences "thisisatest" and "testing123testing". An LCS would be "tsitest":
thisisatest testing123testing
In this puzzle, your code only needs to deal with strings. Write a function which returns an LCS of two strings (case-sensitive). You don't need to show multiple LCS's.
For more information please on this problem see Wikipedia.
Ada
Using recursion: <lang ada>with Ada.Text_IO; use Ada.Text_IO;
procedure Test_LCS is
function LCS (A, B : String) return String is begin if A'Length = 0 or else B'Length = 0 then return ""; elsif A (A'Last) = B (B'Last) then return LCS (A (A'First..A'Last - 1), B (B'First..B'Last - 1)) & A (A'Last); else declare X : String renames LCS (A, B (B'First..B'Last - 1)); Y : String renames LCS (A (A'First..A'Last - 1), B); begin if X'Length > Y'Length then return X; else return Y; end if; end; end if; end LCS;
begin
Put_Line (LCS ("thisisatest", "testing123testing"));
end Test_LCS;</lang> Sample output:
tsitest
Non-recursive solution: <lang ada>with Ada.Text_IO; use Ada.Text_IO;
procedure Test_LCS is
function LCS (A, B : String) return String is L : array (A'First..A'Last + 1, B'First..B'Last + 1) of Natural; begin for I in L'Range (1) loop L (I, B'First) := 0; end loop; for J in L'Range (2) loop L (A'First, J) := 0; end loop; for I in A'Range loop for J in B'Range loop if A (I) = B (J) then L (I + 1, J + 1) := L (I, J) + 1; else L (I + 1, J + 1) := Natural'Max (L (I + 1, J), L (I, J + 1)); end if; end loop; end loop; declare I : Integer := L'Last (1); J : Integer := L'Last (2); R : String (1..Integer'Max (A'Length, B'Length)); K : Integer := R'Last; begin while I > L'First (1) and then J > L'First (2) loop if L (I, J) = L (I - 1, J) then I := I - 1; elsif L (I, J) = L (I, J - 1) then J := J - 1; else I := I - 1; J := J - 1; R (K) := A (I); K := K - 1; end if; end loop; return R (K + 1..R'Last); end; end LCS;
begin
Put_Line (LCS ("thisisatest", "testing123testing"));
end Test_LCS;</lang> Sample output:
tsitest
ALGOL 68
<lang algol68>main:(
PROC lcs = (STRING a, b)STRING: BEGIN IF UPB a = 0 OR UPB b = 0 THEN "" ELIF a [UPB a] = b [UPB b] THEN lcs (a [:UPB a - 1], b [:UPB b - 1]) + a [UPB a] ELSE STRING x = lcs (a, b [:UPB b - 1]); STRING y = lcs (a [:UPB a - 1], b); IF UPB x > UPB y THEN x ELSE y FI FI END # lcs #; print((lcs ("thisisatest", "testing123testing"), new line))
)</lang> Output:
tsitest
AutoHotkey
using dynamic programming
ahk forum: discussion <lang AutoHotkey>lcs(a,b) { ; Longest Common Subsequence of strings, using Dynamic Programming
Loop % StrLen(a)+2 { ; Initialize i := A_Index-1 Loop % StrLen(b)+2 j := A_Index-1, len%i%_%j% := 0 } Loop Parse, a ; scan a { i := A_Index, i1 := i+1, x := A_LoopField Loop Parse, b ; scan b { j := A_Index, j1 := j+1, y := A_LoopField len%i1%_%j1% := x=y ? len%i%_%j% + 1 : (u:=len%i1%_%j%) > (v:=len%i%_%j1%) ? u : v } } x := StrLen(a)+1, y := StrLen(b)+1 While x*y { ; construct solution from lengths x1 := x-1, y1 := y-1 If (len%x%_%y% = len%x1%_%y%) x := x1 Else If (len%x%_%y% = len%x%_%y1%) y := y1 Else x := x1, y := y1, t := SubStr(a,x,1) t } Return t
}</lang>
BASIC
<lang qbasic>FUNCTION lcs$ (a$, b$)
IF LEN(a$) = 0 OR LEN(b$) = 0 THEN
lcs$ = ""
ELSEIF RIGHT$(a$, 1) = RIGHT$(b$, 1) THEN
lcs$ = lcs$(LEFT$(a$, LEN(a$) - 1), LEFT$(b$, LEN(b$) - 1)) + RIGHT$(a$, 1)
ELSE
x$ = lcs$(a$, LEFT$(b$, LEN(b$) - 1)) y$ = lcs$(LEFT$(a$, LEN(a$) - 1), b$) IF LEN(x$) > LEN(y$) THEN lcs$ = x$ ELSE lcs$ = y$ END IF
END IF
END FUNCTION</lang>
C
<lang c>#include <string.h>
- include <stdlib.h>
- include <stdio.h>
- define MAX(A,B) (((A)>(B))? (A) : (B))
char * lcs(const char *a,const char * b) {
int lena = strlen(a)+1; int lenb = strlen(b)+1;
int bufrlen = 40; char bufr[40], *result;
int i,j; const char *x, *y; int *la = calloc(lena*lenb, sizeof( int)); int **lengths = malloc( lena*sizeof( int*)); for (i=0; i<lena; i++) lengths[i] = la + i*lenb;
for (i=0,x=a; *x; i++, x++) { for (j=0,y=b; *y; j++,y++ ) { if (*x == *y) { lengths[i+1][j+1] = lengths[i][j] +1; } else { int ml = MAX(lengths[i+1][j], lengths[i][j+1]); lengths[i+1][j+1] = ml; } } }
result = bufr+bufrlen; *--result = '\0'; i = lena-1; j = lenb-1; while ( (i>0) && (j>0) ) { if (lengths[i][j] == lengths[i-1][j]) i -= 1; else if (lengths[i][j] == lengths[i][j-1]) j-= 1; else {
// assert( a[i-1] == b[j-1]);
*--result = a[i-1]; i-=1; j-=1; } } free(la); free(lengths); return strdup(result);
}</lang> Testing <lang c>int main() {
printf("%s\n", lcs("thisisatest", "testing123testing")); // tsitest return 0;
}</lang>
With recursion
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <string.h>
char* lcs(char *a, char *b, char *out) { int longest = 0; int match(char *a, char *b, int dep) { if (!a || !b) return 0; if (!*a || !*b) { if (dep <= longest) return 0; out[ longest = dep ] = 0; return 1; }
if (*a == *b) return match(a + 1, b + 1, dep + 1) && (out[dep] = *a);
return match(a + 1, b + 1, dep) + match(strchr(a, *b), b, dep) + match(a, strchr(b, *a), dep); }
return match(a, b, 0) ? out : 0; }
int main() { char buf[128]; printf("%s\n", lcs("thisisatest", "testing123testing", buf)); printf("%p\n", lcs("no", "match", buf)); return 0; }</lang>
C#
<lang csharp>using System;
namespace LCS {
class Program { static void Main(string[] args) { string word1 = "thisisatest"; string word2 = "testing123testing"; Console.WriteLine(lcsBack(word1, word2)); Console.ReadKey(); }
public static string lcsBack(string a, string b) { string aSub = a.Substring(0, (a.Length - 1 < 0) ? 0 : a.Length - 1); string bSub = b.Substring(0, (b.Length - 1 < 0) ? 0 : b.Length - 1); if (a.Length == 0 || b.Length == 0) return ""; else if (a[a.Length - 1] == b[b.Length - 1]) return lcsBack(aSub, bSub) + a[a.Length - 1]; else { string x = lcsBack(a, bSub); string y = lcsBack(aSub, b); return (x.Length > y.Length) ? x : y; } } }
}</lang>
Clojure
<lang Clojure>(defn longest [xs ys] (if (> (count xs) (count ys)) xs ys))
(def lcs
(memoize (fn [seqx seqy] (when-let [[x & xs] (seq seqx)] (when-let [[y & ys] (seq seqy)]
(if (= x y) (cons x (lcs xs ys)) (longest (lcs seqx ys) (lcs xs seqy))))))))</lang>
CoffeeScript
<lang coffeescript> lcs = (s1, s2) ->
len1 = s1.length len2 = s2.length # Create a virtual matrix that is (len1 + 1) by (len2 + 1), # where m[i][j] is the longest common string using only # the first i chars of s1 and first j chars of s2. The # matrix is virtual, because we only keep the last two rows # in memory. prior_row = ( for i in [0..len2])
for i in [0...len1] row = [] for j in [0...len2] if s1[i] == s2[j] row.push prior_row[j] + s1[i] else subs1 = row[j] subs2 = prior_row[j+1] if subs1.length > subs2.length row.push subs1 else row.push subs2 prior_row = row row[len2]
s1 = "thisisatest" s2 = "testing123testing" console.log lcs(s1, s2) </lang>
Common Lisp
Here's a memoizing/dynamic-programming solution that uses an n × m array where n and m are the lengths of the input arrays. The first return value is a sequence (of the same type as array1) which is the longest common subsequence. The second return value is the length of the longest common subsequence.
<lang lisp>(defun longest-common-subsequence (array1 array2)
(let* ((l1 (length array1)) (l2 (length array2)) (results (make-array (list l1 l2) :initial-element nil))) (declare (dynamic-extent results)) (labels ((lcs (start1 start2) ;; if either sequence is empty, return (() 0) (if (or (eql start1 l1) (eql start2 l2)) (list '() 0) ;; otherwise, return any memoized value (let ((result (aref results start1 start2))) (if (not (null result)) result ;; otherwise, compute and store a value (setf (aref results start1 start2) (if (eql (aref array1 start1) (aref array2 start2)) ;; if they start with the same element, ;; move forward in both sequences (destructuring-bind (seq len) (lcs (1+ start1) (1+ start2)) (list (cons (aref array1 start1) seq) (1+ len))) ;; otherwise, move ahead in each separately, ;; and return the better result. (let ((a (lcs (1+ start1) start2)) (b (lcs start1 (1+ start2)))) (if (> (second a) (second b)) a b))))))))) (destructuring-bind (seq len) (lcs 0 0) (values (coerce seq (type-of array1)) len)))))</lang>
For example,
<lang lisp>(longest-common-subsequence "123456" "1a2b3c")</lang>
produces the two values
<lang lisp>"123" 3</lang>
D
Recursive version: <lang d>import std.stdio;
T[] lcs(T)(T[] a, T[] b) {
if (!a.length || !b.length) return null; if (a[0] == b[0]) return a[0] ~ lcs(a[1..$], b[1..$]); auto l1 = lcs(a, b[1..$]), l2 = lcs(a[1..$], b); return l1.length > l2.length ? l1 : l2;
}
void main() {
writeln(lcs("thisisatest", "testing123testing"));
}</lang> Output:
tsitest
Faster dynamic programming version (same output): <lang d>import std.stdio, std.algorithm;
T[] lcs(T)(T[] a, T[] b) {
int i, j, m = a.length, n = b.length; auto L = new int[][](m + 1, n + 1); T[] result; for (i = 0; i < m; i++) for (j = 0; j < n; j++) L[i+1][j+1] = a[i]==b[j] ? 1+L[i][j] : max(L[i+1][j],L[i][j+1]); while (i > 0 && j > 0) if (a[i - 1] == b[j - 1]) { result ~= a[i - 1]; i--; j--; } else if (L[i][j - 1] < L[i - 1][j]) i--; else j--; return result.reverse;
}
void main() {
writeln(lcs("thisisatest", "testing123testing"));
}</lang>
Fortran
Using the iso_varying_string module which can be found here (or equivalent module conforming to the ISO/IEC 1539-2:2000 API or to a subset according to the need of this code: char
, len
, //
, extract
, ==
, =
)
<lang fortran>program lcstest
use iso_varying_string implicit none
type(varying_string) :: s1, s2
s1 = "thisisatest" s2 = "testing123testing" print *, char(lcs(s1, s2))
s1 = "1234" s2 = "1224533324" print *, char(lcs(s1, s2))
contains
recursive function lcs(a, b) result(l) type(varying_string) :: l type(varying_string), intent(in) :: a, b
type(varying_string) :: x, y
l = "" if ( (len(a) == 0) .or. (len(b) == 0) ) return if ( extract(a, len(a), len(a)) == extract(b, len(b), len(b)) ) then l = lcs(extract(a, 1, len(a)-1), extract(b, 1, len(b)-1)) // extract(a, len(a), len(a)) else x = lcs(a, extract(b, 1, len(b)-1)) y = lcs(extract(a, 1, len(a)-1), b) if ( len(x) > len(y) ) then l = x else l = y end if end if end function lcs
end program lcstest</lang>
Go
Recursion
Brute force <lang go>func lcs(a, b string) string {
aLen := len(a) bLen := len(b) if aLen == 0 || bLen == 0 { return "" } else if a[aLen-1] == b[bLen-1] { return lcs(a[:aLen-1], b[:bLen-1]) + string(a[aLen-1]) } x := lcs(a, b[:bLen-1]) y := lcs(a[:aLen-1], b) if len(x) > len(y) { return x } return y
}</lang>
Dynamic Programming
<lang go>func lcs(a, b string) string {
aLen := len(a) bLen := len(b) lengths := make([][]int, aLen+1) for i := 0; i <= aLen; i++ { lengths[i] = make([]int, bLen+1) } // row 0 and column 0 are initialized to 0 already
for i := 0; i < aLen; i++ { for j := 0; j < bLen; j++ { if a[i] == b[j] { lengths[i+1][j+1] = lengths[i][j]+1 } else if lengths[i+1][j] > lengths[i][j+1] { lengths[i+1][j+1] = lengths[i+1][j] } else { lengths[i+1][j+1] = lengths[i][j+1] } } }
// read the substring out from the matrix s := make([]byte, 0, lengths[aLen][bLen]) for x, y := aLen, bLen; x != 0 && y != 0; { if lengths[x][y] == lengths[x-1][y] { x-- } else if lengths[x][y] == lengths[x][y-1] { y-- } else { s = append(s, a[x-1]) x-- y-- } } // reverse string r := make([]byte, len(s)) for i := 0; i < len(s); i++ { r[i] = s[len(s)-1-i] } return string(r)
}</lang>
Haskell
The Wikipedia solution translates directly into Haskell, with the only difference that equal characters are added in front:
<lang haskell>longest xs ys = if length xs > length ys then xs else ys
lcs [] _ = [] lcs _ [] = [] lcs (x:xs) (y:ys)
| x == y = x : lcs xs ys | otherwise = longest (lcs (x:xs) ys) (lcs xs (y:ys))</lang>
Memoization (aka dynamic programming) of that uses zip to make both the index and the character available: <lang haskell>import Data.Array
lcs xs ys = a!(0,0) where
n = length xs m = length ys a = array ((0,0),(n,m)) $ l1 ++ l2 ++ l3 l1 = [((i,m),[]) | i <- [0..n]] l2 = [((n,j),[]) | j <- [0..m]] l3 = [((i,j), f x y i j) | (x,i) <- zip xs [0..], (y,j) <- zip ys [0..]] f x y i j | x == y = x : a!(i+1,j+1) | otherwise = longest (a!(i,j+1)) (a!(i+1,j))</lang>
Both solutions work of course not only with strings, but also with any other list. Example: <lang haskell>*Main> lcs "thisisatest" "testing123testing" "tsitest"</lang>
Icon and Unicon
This solution is a modified variant of the recursive solution. The modifications include (a) deleting all characters not common to both strings and (b) stripping off common prefixes and suffixes in a single step.
<lang Icon>procedure main() LCSTEST("thisisatest","testing123testing") LCSTEST("","x") LCSTEST("x","x") LCSTEST("beginning-middle-ending","beginning-diddle-dum-ending") end
link strings
procedure LCSTEST(a,b) #: helper to show inputs and results write("lcs( ",image(a),", ",image(b)," ) = ",image(res := lcs(a,b))) return res end
procedure lcs(a,b) #: return longest common sub-sequence of characters (modified recursive method) local i,x,y local c,nc
if *(a|b) = 0 then return "" # done if either string is empty if a == b then return a # done if equal
if *(a ++ b -- (c := a ** b)) > 0 then { # find all characters not in common a := deletec(a,nc := ~c) # .. remove b := deletec(b,nc) # .. remove } # only unequal strings and shared characters beyond
i := 0 ; while a[i+1] == b[i+1] do i +:=1 # find common prefix ... if *(x := a[1+:i]) > 0 then # if any return x || lcs(a[i+1:0],b[i+1:0]) # ... remove and process remainder
i := 0 ; while a[-(i+1)] == b[-(i+1)] do i +:=1 # find common suffix ... if *(y := a[0-:i]) > 0 then # if any return lcs(a[1:-i],b[1:-i]) || y # ... remove and process remainder
return if *(x := lcs(a,b[1:-1])) > *(y := lcs(a[1:-1],b)) then x else y # divide, discard, and keep longest
end</lang>
Sample output:
lcs( "thisisatest", "testing123testing" ) = "tsitest" lcs( "", "x" ) = "" lcs( "x", "x" ) = "x" lcs( "beginning-middle-ending", "beginning-diddle-dum-ending" ) = "beginning-iddle-ending"
J
<lang j>lcs=: dyad define
|.x{~ 0{"1 cullOne^:_ (\:~~ +/@|:) 4$.$. x =/ y
) cullOne=: verb define
if. (#y) = First0=.0(= i. 1:) 1,*./|: 2 >/\ y do. y else. y #~ 0 First0}(#y)#1 end.
)</lang>
Java
Recursion
This is not a particularly fast algorithm, but it gets the job done eventually. The speed is a result of many recursive function calls. <lang java>public static String lcs(String a, String b){
int aLen = a.length(); int bLen = b.length(); if(aLen == 0 || bLen == 0){ return ""; }else if(a.charAt(aLen-1) == b.charAt(bLen-1)){ return lcs(a.substring(0,aLen-1),b.substring(0,bLen-1)) + a.charAt(aLen-1); }else{ String x = lcs(a, b.substring(0,bLen-1)); String y = lcs(a.substring(0,aLen-1), b); return (x.length() > y.length()) ? x : y; }
}</lang>
Dynamic Programming
<lang java>public static String lcs(String a, String b) {
int[][] lengths = new int[a.length()+1][b.length()+1];
// row 0 and column 0 are initialized to 0 already
for (int i = 0; i < a.length(); i++) for (int j = 0; j < b.length(); j++) if (a.charAt(i) == b.charAt(j)) lengths[i+1][j+1] = lengths[i][j] + 1; else lengths[i+1][j+1] = Math.max(lengths[i+1][j], lengths[i][j+1]);
// read the substring out from the matrix StringBuffer sb = new StringBuffer(); for (int x = a.length(), y = b.length(); x != 0 && y != 0; ) { if (lengths[x][y] == lengths[x-1][y]) x--; else if (lengths[x][y] == lengths[x][y-1]) y--; else { assert a.charAt(x-1) == b.charAt(y-1); sb.append(a.charAt(x-1)); x--; y--; } }
return sb.reverse().toString();
}</lang>
JavaScript
Recursion
This is more or less a translation of the recursive Java version above. <lang javascript>function lcs(a, b) {
var aSub = a.substr(0, a.length-1); var bSub = b.substr(0, b.length-1); if (a.length == 0 || b.length == 0) { return ""; } else if (a.charAt(a.length-1) == b.charAt(b.length-1)) { return lcs(aSub, bSub) + a.charAt(a.length-1); } else { var x = lcs(a, bSub); var y = lcs(aSub, b); return (x.length > y.length) ? x : y; }
}</lang>
Dynamic Programming
This version runs in O(mn) time and consumes O(mn) space. Factoring out loop edge cases could get a small constant time improvement, and it's fairly trivial to edit the final loop to produce a full diff in addition to the lcs. <lang javascript> function lcs(x,y){ var s,i,j,m,n, lcs=[],row=[],c=[], left,diag,latch; //make sure shorter string is the column string if(m<n){s=x;x=y;y=s;} m = x.length; n = y.length; //build the c-table for(j=0;j<n;row[j++]=0); for(i=0;i<m;i++){ c[i] = row = row.slice(); for(diag=0,j=0;j<n;j++,diag=latch){ latch=row[j]; if(x[i] == y[j]){row[j] = diag+1;} else{ left = row[j-1]||0; if(left>row[j]){row[j] = left;} } } } i--,j--; //row[j] now contains the length of the lcs //recover the lcs from the table while(i>-1&&j>-1){ switch(c[i][j]){ default: j--; lcs.unshift(x[i]); case (i&&c[i-1][j]): i--; continue; case (j&&c[i][j-1]): j--; } } return lcs.join(); }</lang> The final loop can be modified to concatenate maximal common substrings rather than individual characters: <lang javascript> var t=i; while(i>-1&&j>-1){ switch(c[i][j]){ default:i--,j--; continue; case (i&&c[i-1][j]): if(t!==i){lcs.unshift(x.substring(i+1,t+1));} t=--i; continue; case (j&&c[i][j-1]): j--; if(t!==i){lcs.unshift(x.substring(i+1,t+1));} t=i; } } if(t!==i){lcs.unshift(x.substring(i+1,t+1));} </lang>
Greedy Algorithm
This is a bit harder to understand, but is significantly faster and less memory intensive than the dynamic programming version, in exchange for giving up the ability to re-use the table to find alternate solutions and greater complexity in generating diffs. Note that this implementation uses a binary buffer for additional efficiency gains, but it's simple to transform to use string or array concatenation; <lang javascript> function lcs_greedy(x,y){ var symbols = {}, r=0,p=0,p1,L=0,idx, m=x.length,n=y.length, S = new Buffer(m<n?n:m); p1 = popsym(0); for(i=0;i < m;i++){ p = (r===p)?p1:popsym(i); p1 = popsym(i+1); idx=(p > p1)?(i++,p1):p; if(idx===n){p=popsym(i);} else{ r=idx; S[L++]=x.charCodeAt(i); } } return S.toString('utf8',0,L);
function popsym(index){ var s = x[index], pos = symbols[s]+1; pos = y.indexOf(s,pos>r?pos:r); if(pos===-1){pos=n;} symbols[s]=pos; return pos; } }</lang>
Logo
This implementation works on both words and lists. <lang logo>to longest :s :t
output ifelse greater? count :s count :t [:s] [:t]
end to lcs :s :t
if empty? :s [output :s] if empty? :t [output :t] if equal? first :s first :t [output combine first :s lcs bf :s bf :t] output longest lcs :s bf :t lcs bf :s :t
end</lang>
Lua
<lang lua>function LCS( a, b )
if #a == 0 or #b == 0 then return "" elseif string.sub( a, -1, -1 ) == string.sub( b, -1, -1 ) then return LCS( string.sub( a, 1, -2 ), string.sub( b, 1, -2 ) ) .. string.sub( a, -1, -1 ) else local a_sub = LCS( a, string.sub( b, 1, -2 ) ) local b_sub = LCS( string.sub( a, 1, -2 ), b ) if #a_sub > #b_sub then return a_sub else return b_sub end end
end
print( LCS( "thisisatest", "testing123testing" ) )</lang>
M4
<lang M4>define(`set2d',`define(`$1[$2][$3]',`$4')') define(`get2d',`defn($1[$2][$3])') define(`tryboth',
`pushdef(`x',lcs(`$1',substr(`$2',1),`$1 $2'))`'pushdef(`y', lcs(substr(`$1',1),`$2',`$1 $2'))`'ifelse(eval(len(x)>len(y)),1, `x',`y')`'popdef(`x')`'popdef(`y')')
define(`checkfirst',
`ifelse(substr(`$1',0,1),substr(`$2',0,1), `substr(`$1',0,1)`'lcs(substr(`$1',1),substr(`$2',1))', `tryboth(`$1',`$2')')')
define(`lcs',
`ifelse(get2d(`c',`$1',`$2'),`', `pushdef(`a',ifelse( `$1',`',`', `$2',`',`', `checkfirst(`$1',`$2')'))`'a`'set2d(`c',`$1',`$2',a)`'popdef(`a')', `get2d(`c',`$1',`$2')')')
lcs(`1234',`1224533324')
lcs(`thisisatest',`testing123testing')</lang> Note: the caching (set2d/get2d) obscures the code even more than usual, but is necessary in order to get the second test to run in a reasonable amount of time.
Mathematica
A built-in function can do this for us: <lang Mathematica>a = "thisisatest"; b = "testing123testing"; LongestCommonSequence[a, b]</lang> gives: <lang Mathematica>tsitest</lang> Note that Mathematica also has a built-in function called LongestCommonSubsequence[a,b]:
finds the longest contiguous subsequence of elements common to the strings or lists a and b.
which would give "test" as the result for LongestCommonSubsequence[a, b].
The description for LongestCommonSequence[a,b] is:
finds the longest sequence of contiguous or disjoint elements common to the strings or lists a and b.
I added this note because the name of this article suggests LongestCommonSubsequence does the job, however LongestCommonSubsequence performs the puzzle-description.
OCaml
Recursion
from Haskell <lang ocaml>let longest xs ys = if List.length xs > List.length ys then xs else ys
let rec lcs a b = match a, b with
[], _ | _, [] -> [] | x::xs, y::ys -> if x = y then x :: lcs xs ys else longest (lcs a ys) (lcs xs b)</lang>
Dynamic programming
<lang ocaml>let lcs xs' ys' =
let xs = Array.of_list xs' and ys = Array.of_list ys' in let n = Array.length xs and m = Array.length ys in let a = Array.make_matrix (n+1) (m+1) [] in for i = n-1 downto 0 do for j = m-1 downto 0 do a.(i).(j) <- if xs.(i) = ys.(j) then xs.(i) :: a.(i+1).(j+1) else longest a.(i).(j+1) a.(i+1).(j) done done; a.(0).(0)</lang>
Because both solutions only work with lists, here are some functions to convert to and from strings: <lang ocaml>let list_of_string str =
let result = ref [] in String.iter (fun x -> result := x :: !result) str; List.rev !result
let string_of_list lst =
let result = String.create (List.length lst) in ignore (List.fold_left (fun i x -> result.[i] <- x; i+1) 0 lst); result</lang>
Both solutions work. Example:
# string_of_list (lcs (list_of_string "thisisatest") (list_of_string "testing123testing"));; - : string = "tsitest"
Oz
Recursive solution: <lang oz>declare
fun {LCS Xs Ys} case [Xs Ys] of [nil _] then nil [] [_ nil] then nil [] [X|Xr Y|Yr] andthen X==Y then X|{LCS Xr Yr} [] [_|Xr _|Yr] then {Longest {LCS Xs Yr} {LCS Xr Ys}} end end
fun {Longest Xs Ys} if {Length Xs} > {Length Ys} then Xs else Ys end end
in
{System.showInfo {LCS "thisisatest" "testing123testing"}}</lang>
Pascal
<lang pascal>Program LongestCommonSubsequence(output);
function lcs(a, b: string): string;
var x, y: string; lenga, lengb: integer; begin lenga := length(a); lengb := length(b); lcs := ; if (lenga > 0) and (lengb > 0) then if a[lenga] = b[lengb] then lcs := lcs(copy(a, 1, lenga-1), copy(b, 1, lengb-1)) + a[lenga] else begin x := lcs(a, copy(b, 1, lengb-1)); y := lcs(copy(a, 1, lenga-1), b); if length(x) > length(y) then lcs := x else lcs := y; end; end;
var
s1, s2: string;
begin
s1 := 'thisisatest'; s2 := 'testing123testing'; writeln (lcs(s1, s2)); s1 := '1234'; s2 := '1224533324'; writeln (lcs(s1, s2));
end.</lang> Output:
:> ./LongestCommonSequence tsitest 1234
Perl
<lang perl> use Algorithm::Diff qw/ LCS /;
my @a = split //, 'thisisatest'; my @b = split //, 'testing123testing';
print LCS( \@a, \@b ); </lang>
Perl 6
Recursion
This solution is similar to the Haskell one. It is slow. <lang perl6> sub lcs(Str $xstr, Str $ystr) {
return "" unless $xstr & $ystr;
my ($x, $xs, $y, $ys) = $xstr.substr(0, 1), $xstr.substr(1), $ystr.substr(0, 1), $ystr.substr(1); return $x eq $y ?? $x ~ lcs($xs, $ys) !! max({ $^a.chars }, lcs($xstr, $ys), lcs($xs, $ystr) );
}
say lcs("thisisatest", "testing123testing"); </lang>
Dynamic Programming
<lang perl6> sub lcs(Str $xstr, Str $ystr) {
my ($xlen, $ylen) = ($xstr, $ystr)>>.chars; my @lengths = map {[(0) xx ($ylen+1)]}, 0..$xlen;
for $xstr.comb.kv -> $i, $x { for $ystr.comb.kv -> $j, $y { @lengths[$i+1][$j+1] = $x eq $y ?? @lengths[$i][$j]+1 !! (@lengths[$i+1][$j], @lengths[$i][$j+1]).max; } }
my @x = $xstr.comb; my ($x, $y) = ($xlen, $ylen); my $result = ""; while $x != 0 && $y != 0 { if @lengths[$x][$y] == @lengths[$x-1][$y] { $x--; } elsif @lengths[$x][$y] == @lengths[$x][$y-1] { $y--; } else { $result = @x[$x-1] ~ $result; $x--; $y--; } }
return $result;
}
say lcs("thisisatest", "testing123testing"); </lang>
PicoLisp
<lang PicoLisp>(de commonSequences (A B)
(when A (conc (when (member (car A) B) (mapcar '((L) (cons (car A) L)) (cons NIL (commonSequences (cdr A) (cdr @))) ) ) (commonSequences (cdr A) B) ) ) )
(maxi length
(commonSequences (chop "thisisatest") (chop "testing123testing") ) )</lang>
Output:
-> ("t" "s" "i" "t" "e" "s" "t")
Prolog
Recursive Version
First version: <lang Prolog> test :-
time(lcs("thisisatest", "testing123testing", Lcs)), writef('%s',[Lcs]).
lcs([ H|L1],[ H|L2],[H|Lcs]) :- !,
lcs(L1,L2,Lcs).
lcs([H1|L1],[H2|L2],Lcs):-
lcs( L1 ,[H2|L2],Lcs1), lcs([H1|L1], L2 ,Lcs2), longest(Lcs1,Lcs2,Lcs),!.
lcs(_,_,[]).
longest(L1,L2,Longest) :-
length(L1,Length1), length(L2,Length2), ((Length1 > Length2) -> Longest = L1; Longest = L2).
</lang>
Second version, with memorization: <lang Prolog> %declare that we will add lcs_db facts during runtime
- - dynamic lcs_db/3.
test :-
retractall(lcs_db(_,_,_)), %clear the database of known results time(lcs("thisisatest", "testing123testing", Lcs)), writef('%s',[Lcs]).
% check if the result is known
lcs(L1,L2,Lcs) :-
lcs_db(L1,L2,Lcs),!.
lcs([ H|L1],[ H|L2],[H|Lcs]) :- !,
lcs(L1,L2,Lcs).
lcs([H1|L1],[H2|L2],Lcs) :-
lcs( L1 ,[H2|L2],Lcs1), lcs([H1|L1], L2 ,Lcs2), longest(Lcs1,Lcs2,Lcs),!, assert(lcs_db([H1|L1],[H2|L2],Lcs)).
lcs(_,_,[]).
longest(L1,L2,Longest) :-
length(L1,Length1), length(L2,Length2), ((Length1 > Length2) -> Longest = L1; Longest = L2).
</lang>
Example for "beginning-middle-ending" and "beginning-diddle-dum-ending"
First version :
<lang Prolog>
?- time(lcs("beginning-middle-ending","beginning-diddle-dum-ending", Lcs)),writef('%s', [Lcs]).
% 10,875,184 inferences, 1.840 CPU in 1.996 seconds (92% CPU, 5910426 Lips)
beginning-iddle-ending
</lang>
Second version which is much faster : <lang Prolog> ?- time(lcs("beginning-middle-ending","beginning-diddle-dum-ending", Lcs)),writef('%s', [Lcs]). % 2,376 inferences, 0.010 CPU in 0.003 seconds (300% CPU, 237600 Lips) beginning-iddle-ending </lang>
PureBasic
<lang PureBasic>Procedure.s lcs(a$, b$)
Protected x$ , lcs$ If Len(a$) = 0 Or Len(b$) = 0 lcs$ = "" ElseIf Right(a$, 1) = Right(b$, 1) lcs$ = lcs(Left(a$, Len(a$) - 1), Left(b$, Len(b$) - 1)) + Right(a$, 1) Else x$ = lcs(a$, Left(b$, Len(b$) - 1)) y$ = lcs(Left(a$, Len(a$) - 1), b$) If Len(x$) > Len(y$) lcs$ = x$ Else lcs$ = y$ EndIf EndIf ProcedureReturn lcs$
EndProcedure OpenConsole() PrintN( lcs("thisisatest", "testing123testing")) PrintN("Press any key to exit"): Repeat: Until Inkey() <> ""</lang>
Python
Recursion
This solution is similar to the Haskell one. It is slow. <lang python>def lcs(xstr, ystr):
""" >>> lcs('thisisatest', 'testing123testing') 'tsitest' """ if not xstr or not ystr: return "" x, xs, y, ys = xstr[0], xstr[1:], ystr[0], ystr[1:] if x == y: return x + lcs(xs, ys) else: return max(lcs(xstr, ys), lcs(xs, ystr), key=len)</lang>
Test it: <lang python>if __name__=="__main__":
import doctest; doctest.testmod()</lang>
Dynamic Programming
<lang python>def lcs(a, b):
lengths = [[0 for j in range(len(b)+1)] for i in range(len(a)+1)] # row 0 and column 0 are initialized to 0 already for i, x in enumerate(a): for j, y in enumerate(b): if x == y: lengths[i+1][j+1] = lengths[i][j] + 1 else: lengths[i+1][j+1] = \ max(lengths[i+1][j], lengths[i][j+1]) # read the substring out from the matrix result = "" x, y = len(a), len(b) while x != 0 and y != 0: if lengths[x][y] == lengths[x-1][y]: x -= 1 elif lengths[x][y] == lengths[x][y-1]: y -= 1 else: assert a[x-1] == b[y-1] result = a[x-1] + result x -= 1 y -= 1 return result</lang>
REXX
<lang rexx> /*REXX program to test LCS subroutine. */
parse arg aaa bbb . /*get two arguments (strings). */ say 'string A='aaa /*echo string A to screen. */ say 'string B='bbb /*echo string B to screen. */ say ' LCS='lcs(aaa,bbb) /*tell Longest Common Sequence. */ exit /* - - - */
/*-------------------------------------LCS subroutine-------------------*/ lcs: procedure; parse arg a,b,z /*Longest Common Subsequence. */
/*reduce recursions by removing the ... */ /*chars in A not in B, and vice-versa.*/
if z== then return lcs(lcs(a,b,0),lcs(b,a,0),9) j=length(a) if z==0 then do /*special invocation to shrink the string*/
do j=1 for j _=substr(a,j,1); if pos(_,b)\==0 then z=z||_ end return substr(z,2) end
k=length(b) if j==0 | k==0 then return /*Either string null? Bupkis. */ _=substr(a,j,1) if _==substr(b,k,1) then return lcs(substr(a,1,j-1),substr(b,1,k-1),9)_ x=lcs(a,substr(b,1,k-1),9) y=lcs(substr(a,1,j-1),b,9) if length(x)>length(y) then return x
return y
/</lang>
Output when the following is specified:
1234 1224533324
string A=1234 string B=1224533324 LCS=1234
Output when the following is specified:
thisisatest testing123testing
string A=thisisatest string B=testing123testing LCS=tsitest
Ruby
Recursion
This solution is similar to the Haskell one. It is slow.
<lang ruby>=begin irb(main):001:0> lcs('thisisatest', 'testing123testing') => "tsitest" =end def lcs(xstr, ystr)
return "" if xstr.empty? || ystr.empty? x, xs, y, ys = xstr[0..0], xstr[1..-1], ystr[0..0], ystr[1..-1] if x == y x + lcs(xs, ys) else [lcs(xstr, ys), lcs(xs, ystr)].max_by {|x| x.size} end
end</lang>
Dynamic Programming
<lang ruby>def lcs(a, b)
lengths = Array.new(a.size+1) { Array.new(b.size+1) { 0 } } # row 0 and column 0 are initialized to 0 already a.split().each_with_index { |x, i| b.split().each_with_index { |y, j| if x == y lengths[i+1][j+1] = lengths[i][j] + 1 else lengths[i+1][j+1] = \ [lengths[i+1][j], lengths[i][j+1]].max end } } # read the substring out from the matrix result = "" x, y = a.size, b.size while x != 0 and y != 0 if lengths[x][y] == lengths[x-1][y] x -= 1 elsif lengths[x][y] == lengths[x][y-1] y -= 1 else # assert a[x-1] == b[y-1] result << a[x-1] x -= 1 y -= 1 end end result.reverse
end</lang>
SETL
Recursive; Also works on tuples (vectors)
op .longest(a, b); return if #a > #b then a else b end; end .longest; procedure lcs(a, b); if exists empty in {a, b} | #empty = 0 then return empty; elseif a(1) = b(1) then return a(1) + lcs(a(2..), b(2..)); else return lcs(a(2..), b) .longest lcs(a, b(2..)); end; end lcs;
Slate
We define this on the Sequence type since there is nothing string-specific about the concept.
Recursion
<lang slate>s1@(Sequence traits) longestCommonSubsequenceWith: s2@(Sequence traits) [
s1 isEmpty \/ s2 isEmpty ifTrue: [^ {}]. s1 last = s2 last ifTrue: [(s1 allButLast longestCommonSubsequenceWith: s2 allButLast) copyWith: s1 last] ifFalse: [| x y | x: (s1 longestCommonSubsequenceWith: s2 allButLast). y: (s1 allButLast longestCommonSubsequenceWith: s2). x length > y length ifTrue: [x] ifFalse: [y]]
].</lang>
Dynamic Programming
<lang slate>s1@(Sequence traits) longestCommonSubsequenceWith: s2@(Sequence traits) [| lengths |
lengths: (ArrayMD newWithDimensions: {s1 length `cache. s2 length `cache} defaultElement: 0). s1 doWithIndex: [| :elem1 :index1 | s2 doWithIndex: [| :elem2 :index2 | elem1 = elem2 ifTrue: [lengths at: {index1 + 1. index2 + 1} put: (lengths at: {index1. index2}) + 1] ifFalse: [lengths at: {index1 + 1. index2 + 1} put: ((lengths at: {index1 + 1. index2}) max: (lengths at: {index1. index2 + 1}))]]]. ([| :result index1 index2 | index1: s1 length. index2: s2 length. [index1 isPositive /\ index2 isPositive] whileTrue: [(lengths at: {index1. index2}) = (lengths at: {index1 - 1. index2}) ifTrue: [index1: index1 - 1] ifFalse: [(lengths at: {index1. index2}) = (lengths at: {index1. index2 - 1})] ifTrue: [index2: index2 - 1] ifFalse: ["assert: (s1 at: index1 - 1) = (s2 at: index2 - 1)." result nextPut: (s1 at: index1 - 1). index1: index1 - 1. index2: index2 - 1]] ] writingAs: s1) reverse
].</lang>
Tcl
Recursive
<lang tcl>proc r_lcs {a b} {
if {$a eq "" || $b eq ""} {return ""} set a_ [string range $a 1 end] set b_ [string range $b 1 end] if {[set c [string index $a 0]] eq [string index $b 0]} { return "$c[r_lcs $a_ $b_]" } else { set x [r_lcs $a $b_] set y [r_lcs $a_ $b] return [expr {[string length $x] > [string length $y] ? $x :$y}] }
}</lang>
Dynamic
<lang tcl>package require Tcl 8.5 namespace import ::tcl::mathop::+ namespace import ::tcl::mathop::- namespace import ::tcl::mathfunc::max
proc d_lcs {a b} {
set la [string length $a] set lb [string length $b] set lengths [lrepeat [+ $la 1] [lrepeat [+ $lb 1] 0]]
for {set i 0} {$i < $la} {incr i} { for {set j 0} {$j < $lb} {incr j} { if {[string index $a $i] eq [string index $b $j]} { lset lengths [+ $i 1] [+ $j 1] [+ [lindex $lengths $i $j] 1] } else { lset lengths [+ $i 1] [+ $j 1] [max [lindex $lengths [+ $i 1] $j] [lindex $lengths $i [+ $j 1]]] } } }
set result "" set x $la set y $lb while {$x >0 && $x > 0} { if {[lindex $lengths $x $y] == [lindex $lengths [- $x 1] $y]} { incr x -1 } elseif {[lindex $lengths $x $y] == [lindex $lengths $x [- $y 1]]} { incr y -1 } else { if {[set c [string index $a [- $x 1]]] ne [string index $b [- $y 1]]} { error "assertion failed: a.charAt(x-1) == b.charAt(y-1)" } append result $c incr x -1 incr y -1 } } return [string reverse $result]
}</lang>
Performance Comparison
<lang tcl>% time {d_lcs thisisatest testing123testing} 10 637.5 microseconds per iteration % time {r_lcs thisisatest testing123testing} 10 1275566.8 microseconds per iteration</lang>
Ursala
This uses the same recursive algorithm as in the Haskell example, and works on lists of any type. <lang Ursala>#import std
lcs = ~&alrB^& ~&E?abh/~&alh2fabt2RC @faltPrXlrtPXXPW leql?/~&r ~&l</lang> test program: <lang Ursala>#cast %s
example = lcs('thisisatest','testing123testing')</lang> output:
'tsitest'