Longest Common Subsequence

From Rosetta Code

Jump to: navigation, search

Puzzle
This is a programming puzzle. It lays out a problem which Rosetta Code users are encouraged to solve, using languages and techniques they know. Multiple approaches are not discouraged, so long as the puzzle guidelines are followed.

Code examples should be formatted along the lines of one of the existing prototypes.

For other Puzzles, see Category:Puzzles

The longest common subsequence (or LCS) of groups A and B is the longest group of elements from A and B that are common between the two groups and in the same order in each group. For example, the sequences "1234" and "1224533324" have an LCS of "1234":

1234
1224533324

For a string example, consider the sequences "thisisatest" and "testing123testing". An LCS would be "tsitest":

thisisatest
testing123testing

In this puzzle, your code only needs to deal with strings. Write a function which returns an LCS of two strings (case-sensitive). You don't need to show multiple LCS's.

Contents

[edit] Ada

Using recursion:

 
with Ada.Text_IO;  use Ada.Text_IO;
 
procedure Test_LCS is
   function LCS (A, B : String) return String is
   begin
      if A'Length = 0 or else B'Length = 0 then
         return "";
      elsif A (A'Last) = B (B'Last) then
         return LCS (A (A'First..A'Last - 1), B (B'First..B'Last - 1)) & A (A'Last);
      else
         declare
            X : String renames LCS (A, B (B'First..B'Last - 1));
            Y : String renames LCS (A (A'First..A'Last - 1), B);
         begin
            if X'Length > Y'Length then
               return X;
            else
               return Y;
            end if;
         end;
      end if;
   end LCS;
begin
   Put_Line (LCS ("thisisatest", "testing123testing"));
end Test_LCS;
 

Sample output:

tsitest

Non-recursive solution:

 
with Ada.Text_IO;  use Ada.Text_IO;
 
procedure Test_LCS is
   function LCS (A, B : String) return String is
      L : array (A'First..A'Last + 1, B'First..B'Last + 1) of Natural;
   begin
      for I in L'Range (1) loop
         L (I, B'First) := 0;
      end loop;
      for J in L'Range (2) loop
         L (A'First, J) := 0;
      end loop;
      for I in A'Range loop
         for J in B'Range loop
            if A (I) = B (J) then
               L (I + 1, J + 1) := L (I, J) + 1;
            else
               L (I + 1, J + 1) := Natural'Max (L (I + 1, J), L (I, J + 1));
            end if;
         end loop;
      end loop;
      declare
         I : Integer := L'Last (1);
         J : Integer := L'Last (2);
         R : String (1..Integer'Max (A'Length, B'Length));
         K : Integer := R'Last;
      begin
         while I > L'First (1) and then J > L'First (2) loop
            if L (I, J) = L (I - 1, J) then
               I := I - 1;
            elsif L (I, J) = L (I, J - 1) then
               J := J - 1;
            else
               I := I - 1;
               J := J - 1;
               R (K) := A (I);
               K := K - 1;
            end if;
         end loop;
         return R (K + 1..R'Last);
      end;
   end LCS;
begin
   Put_Line (LCS ("thisisatest", "testing123testing"));
end Test_LCS;
 

Sample output:

tsitest

[edit] BASIC

Works with: QuickBasic version 4.5

Translation of: Java

FUNCTION lcs$ (a$, b$)
    IF LEN(a$) = 0 OR LEN(b$) = 0 THEN
	lcs$ = ""
    ELSEIF RIGHT$(a$, 1) = RIGHT$(b$, 1) THEN
	lcs$ = lcs$(LEFT$(a$, LEN(a$) - 1), LEFT$(b$, LEN(b$) - 1)) + RIGHT$(a$, 1)
    ELSE
	x$ = lcs$(a$, LEFT$(b$, LEN(b$) - 1))
	y$ = lcs$(LEFT$(a$, LEN(a$) - 1), b$)
	IF LEN(x$) > LEN(y$) THEN
		lcs$ = x$
	ELSE
		lcs$ = y$
	END IF
    END IF
END FUNCTION

[edit] D

module lcs ;
import std.stdio ;
 
T[] lcsr(T)(T[] a, T[] b) { // recursive
  if(a.length == 0 || b.length == 0) return null ;
  T[] x = a[1..$] , y = b[1..$] ; 
  if(a[0] == b[0]) return a[0] ~ lcsr(x, y) ;
  x = lcsr(x, b) ;  y = lcsr(a, y) ;
  return x.length > y.length ? x : y ;
}
 
T imax(T)(T a, T b) { return a > b ? a : b ; }
 
T[] lcsi(T)(T[] a, T[] b) { // dynamic programming
  int i,j, m = a.length , n = b.length ;
  int[][] L = new int[][](m + 1,n + 1);
  T[] res ;
  for(i = 0 ; i < m ; i++)
    for(j = 0 ; j < n ; j++)
      L[i+1][j+1] = (a[i] == b[j]) ? 1 + L[i][j] : imax(L[i+1][j], L[i][j+1]) ;
  while(i >0 && j >0)
    if(a[i-1] == b[j-1]) { 
      res ~= a[i-1] ; i-- ; j-- ; 
    } else 
      if (L[i][j-1] < L[i-1][j]) 
        i-- ;
      else 
        j-- ;
  return res.reverse ;
}
 
void main(string[] args) {
  writefln(lcsr("thisisatest","testing123testing")) ;
  writefln(lcsi("thisisatest","testing123testing")) ;
}

[edit] Haskell

The Wikipedia solution translates directly into Haskell, with the only difference that equal characters are added in front:

longest xs ys = if length xs > length ys then xs else ys

lcs [] _ = []
lcs _ [] = []
lcs (x:xs) (y:ys) 
  | x == y    = x : lcs xs ys
  | otherwise = longest (lcs (x:xs) ys) (lcs xs (y:ys))

Memoization (aka dynamic programming) of that uses zip to make both the index and the character available:

import Data.Array

lcs xs ys = a!(0,0) where
  n = length xs
  m = length ys
  a = array ((0,0),(n,m)) $ l1 ++ l2 ++ l3
  l1 = [((i,m),[]) | i <- [0..n]]
  l2 = [((n,j),[]) | j <- [0..m]]
  l3 = [((i,j), f x y i j) | (x,i) <- zip xs [0..], (y,j) <- zip ys [0..]]
  f x y i j 
    | x == y    = x : a!(i+1,j+1)
    | otherwise = longest (a!(i,j+1)) (a!(i+1,j))

Both solutions work of course not only with strings, but also with any other list. Example:

*Main> lcs "thisisatest" "testing123testing"
"tsitest"

[edit] J

lcs=: dyad define
 |.x{~ 0{"1 cullOne^:_ (\:~~ +/@|:) 4$.$. x =/ y
)
cullOne=: verb define
 if. (#y) = First0=.0(= i. 1:) 1,*./|: 2 >/\ y 
 do. y  else. y #~ 0 First0}(#y)#1  end.
)

[edit] Java

[edit] Recursion

This is not a particularly fast algorithm, but it gets the job done eventually. The speed is a result of many recursive function calls.
public static String lcs(String a, String b){
    if(a.length() == 0 || b.length() == 0){
        return "";
    }else if(a.charAt(a.length()-1) == b.charAt(b.length()-1)){
        return lcs(a.substring(0,a.length()-1),b.substring(0,b.length()-1))
            + a.charAt(a.length()-1);
    }else{
        String x = lcs(a, b.substring(0,b.length()-1));
        String y = lcs(a.substring(0,a.length()-1), b);
        return (x.length() > y.length()) ? x : y;
    }
}

[edit] Dynamic Programming

public static String lcs(String a, String b) {
    int[][] lengths = new int[a.length()+1][b.length()+1];
 
    // row 0 and column 0 are initialized to 0 already
 
    for (int i = 0; i < a.length(); i++)
        for (int j = 0; j < b.length(); j++)
            if (a.charAt(i) == b.charAt(j))
                lengths[i+1][j+1] = lengths[i][j] + 1;
            else
                lengths[i+1][j+1] =
                    Math.max(lengths[i+1][j], lengths[i][j+1]);
 
    // read the substring out from the matrix
    StringBuffer sb = new StringBuffer();
    for (int x = a.length(), y = b.length();
         x != 0 && y != 0; ) {
        if (lengths[x][y] == lengths[x-1][y])
            x--;
        else if (lengths[x][y] == lengths[x][y-1])
            y--;
        else {
            assert a.charAt(x-1) == b.charAt(y-1);
            sb.append(a.charAt(x-1));
            x--;
            y--;
        }
    }
 
    return sb.reverse().toString();
}

[edit] OCaml

[edit] Recursion

from Haskell

let longest xs ys = if List.length xs > List.length ys then xs else ys
 
let rec lcs a b = match a, b with
   [], _
 | _, []        -> []
 | x::xs, y::ys ->
    if x = y then
      x :: lcs xs ys
    else 
      longest (lcs a ys) (lcs xs b)

[edit] Dynamic programming

let lcs xs' ys' =
  let xs = Array.of_list xs'
  and ys = Array.of_list ys' in
  let n = Array.length xs
  and m = Array.length ys in
  let a = Array.make_matrix (n+1) (m+1) [] in
  for i = n-1 downto 0 do
    for j = m-1 downto 0 do
      a.(i).(j) <- if xs.(i) = ys.(j) then
                     xs.(i) :: a.(i+1).(j+1)
                   else
                     longest a.(i).(j+1) a.(i+1).(j)
    done
  done;
  a.(0).(0)

Because both solutions only work with lists, here are some functions to convert to and from strings:

let list_of_string str =
  let result = ref [] in
  String.iter (fun x -> result := x :: !result)
              str;
  List.rev !result
 
let string_of_list lst =
  let result = String.create (List.length lst) in
  ignore (List.fold_left (fun i x -> result.[i] <- x; i+1) 0 lst);
  result

Both solutions work. Example:

# string_of_list (lcs (list_of_string "thisisatest")
                      (list_of_string "testing123testing"));;
- : string = "tsitest"

[edit] Python

[edit] Recursion

This solution is similar to the Haskell's one. It is slow.

 
longest = lambda xs, ys: (len(xs) > len(ys) and xs) or ys
 
def lcs(xstr, ystr):
    """
    >>> lcs('thisisatest', 'testing123testing')
    'tsitest'
    """
    if not (xstr and ystr): 
        return ""
    x, xs, y, ys = xstr[0], xstr[1:], ystr[0], ystr[1:]
    if x == y:
        return x + lcs(xs, ys)
    else:
        return longest(lcs(xstr, ys), lcs(xs, ystr))
 

Test it:

 
if __name__=="__main__":
    import doctest; doctest.testmod()
 
Personal tools