Shortest common supersequence: Difference between revisions

Content added Content deleted

Inline

Revision as of 09:49, 8 February 2019

The shortest common supersequence is a problem closely related to the longest common subsequence, which you can use as an external function for this task.

Task

Given two strings $u$ and $v$ , find the shortest possible sequence $s$ , which is the shortest common super-sequence of $u$ and $v$ where both $u$ and $v$ are a subsequence of $s$ . Defined as such, $s$ is not necessarily unique.

Demonstrate this by printing $s$ where $u=$ “abcbdab” and $v=$ “bdcaba”.

Also see

The Wikipedia article: shortest common supersequence.

C

The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>

include <string.h>

typedef struct link link_t; struct link { int len; char letter; link_t *next; };

// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];

for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};

for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};

lnk[ly][lx] = (link_t) {0};

for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }

for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;

return 0; }

int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>

Output:

SCS(abcbdab, bdcaba) -> abdcabdab

D

Translation of: Racket

<lang d>import std.stdio, std.functional, std.array, std.range;

dstring scs(in dstring x, in dstring y) nothrow @safe {

   alias mScs = memoize!scs;
   if (x.empty) return y;
   if (y.empty) return x;
   if (x.front == y.front)
       return x.front ~ mScs(x.dropOne, y.dropOne);
   if (mScs(x, y.dropOne).length <= mScs(x.dropOne, y).length)
       return y.front ~ mScs(x, y.dropOne);
   else
       return x.front ~ mScs(x.dropOne, y);

}

void main() @safe {

   scs("abcbdab", "bdcaba").writeln;

}</lang>

Output:

abdcabdab

Go

Translation of: Kotlin

<lang go>package main

import (

   "fmt"
   "strings"

)

func lcs(x, y string) string {

   xl, yl := len(x), len(y)
   if xl == 0 || yl == 0 {
       return ""
   }
   x1, y1 := x[:xl-1], y[:yl-1]
   if x[xl-1] == y[yl-1] {
       return fmt.Sprintf("%s%c", lcs(x1, y1), x[xl-1])
   }
   x2, y2 := lcs(x, y1), lcs(x1, y)
   if len(x2) > len(y2) {
       return x2
   } else {
       return y2
   }

}

func scs(u, v string) string {

   ul, vl := len(u), len(v)
   lcs := lcs(u, v)
   ui, vi := 0, 0
   var sb strings.Builder
   for i := 0; i < len(lcs); i++ {
       for ui < ul && u[ui] != lcs[i] {
           sb.WriteByte(u[ui])
           ui++
       }
       for vi < vl && v[vi] != lcs[i] {
           sb.WriteByte(v[vi])
           vi++
       }
       sb.WriteByte(lcs[i])
       ui++
       vi++
   }
   if ui < ul {
       sb.WriteString(u[ui:])
   }
   if vi < vl {
       sb.WriteString(v[vi:])
   }
   return sb.String()

}

func main() {

   u := "abcbdab"
   v := "bdcaba"
   fmt.Println(scs(u, v))

}</lang>

Output:

abdcabdab

Elixir

Translation of: Ruby

Works with: Elixir version 1.3

uses 'LCS' from here <lang elixir>defmodule SCS do

 def scs(u, v) do
   lcs = LCS.lcs(u, v) |> to_charlist
   scs(to_charlist(u), to_charlist(v), lcs, []) |> to_string
 end
 
 defp scs(u, v, [], res), do: Enum.reverse(res) ++ u ++ v
 defp scs([h|ut], [h|vt], [h|lt], res),      do: scs(ut, vt, lt, [h|res])
 defp scs([h|_]=u, [vh|vt], [h|_]=lcs, res), do: scs(u, vt, lcs, [vh|res])
 defp scs([uh|ut], v, lcs, res),             do: scs(ut, v, lcs, [uh|res])

end

u = "abcbdab" v = "bdcaba" IO.puts "SCS(#{u}, #{v}) = #{SCS.scs(u, v)}"</lang>

Output:

SCS(abcbdab, bdcaba) = abdcabdab

Julia

Translation of: D

<lang Julia>using Memoize

@memoize function scs(x, y)

   if x == ""
       return y
   elseif y == ""
       return x
   elseif x[1] == y[1]
       return "$(x[1])$(scs(x[2:end], y[2:end]))"
   elseif length(scs(x, y[2:end])) <= length(scs(x[2:end], y))
       return "$(y[1])$(scs(x, y[2:end]))"
   else
       return "$(x[1])$(scs(x[2:end], y))"
   end

end

println(scs("abcbdab", "bdcaba")) </lang>

Output:

abdcabdab

Kotlin

Uses 'lcs' function from Longest common subsequence#Kotlin: <lang scala>// version 1.1.2

fun lcs(x: String, y: String): String {

   if (x.length == 0 || y.length == 0) return ""
   val x1 = x.dropLast(1)  
   val y1 = y.dropLast(1)
   if (x.last() == y.last()) return lcs(x1, y1) + x.last()
   val x2 = lcs(x, y1)
   val y2 = lcs(x1, y)
   return if (x2.length > y2.length) x2 else y2

}

fun scs(u: String, v: String): String{

   val lcs = lcs(u, v)
   var ui = 0
   var vi = 0
   val sb = StringBuilder()
   for (i in 0 until lcs.length) {
       while (ui < u.length && u[ui] != lcs[i]) sb.append(u[ui++])       
       while (vi < v.length && v[vi] != lcs[i]) sb.append(v[vi++])
       sb.append(lcs[i])
       ui++; vi++
   }
   if (ui < u.length) sb.append(u.substring(ui))
   if (vi < v.length) sb.append(v.substring(vi))
   return sb.toString()

}

fun main(args: Array<String>) {

   val u = "abcbdab"
   val v = "bdcaba"  
   println(scs(u, v))

}</lang>

Output:

abdcabdab

Perl

<lang perl>sub lcs { # longest common subsequence

   my( $u, $v ) = @_;
   return  unless length($u) and length($v);
   my $longest = ;
   for my $first ( 0..length($u)-1 ) {
       my $char = substr $u, $first, 1;
       my $i = index( $v, $char );
       next if -1==$i;
       my $next = $char;
       $next .= lcs( substr( $u, $first+1), substr( $v, $i+1 ) ) unless $i==length($v)-1;
       $longest = $next if length($next) > length($longest);
   }
   return $longest;

}

sub scs { # shortest common supersequence

   my( $u, $v ) = @_;
   my @lcs = split //, lcs $u, $v;
   my $pat = "(.*)".join("(.*)",@lcs)."(.*)"; 
   my @u = $u =~ /$pat/;
   my @v = $v =~ /$pat/;
   my $scs = shift(@u).shift(@v);
   $scs .= $_.shift(@u).shift(@v) for @lcs;
   return $scs;

}

my $u = "abcbdab"; my $v = "bdcaba"; printf "Strings %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v; </lang>

Output:

Strings abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Perl 6

Using 'lcs' routine from Longest common subsequence task <lang perl6>sub lcs(Str $xstr, Str $ystr) { # longest common subsequence

   return "" unless $xstr && $ystr;
   my ($x, $xs, $y, $ys) = $xstr.substr(0, 1), $xstr.substr(1), $ystr.substr(0, 1), $ystr.substr(1);
   return $x eq $y
       ?? $x ~ lcs($xs, $ys)
       !! max(:by{ $^a.chars }, lcs($xstr, $ys), lcs($xs, $ystr) );

}

sub scs ($u, $v) { # shortest common supersequence

   my @lcs = (lcs $u, $v).comb;
   my $pat = '(.*)' ~ join('(.*)',@lcs) ~ '(.*)';
   my $regex = "rx/$pat/".EVAL;
   my @u = ($u ~~ $regex).list;
   my @v = ($v ~~ $regex).list;
   my $scs = shift(@u) ~ shift(@v);
   $scs ~= $_ ~ shift(@u) ~ shift(@v) for @lcs;
   return $scs;

}

my $u = 'abcbdab'; my $v = 'bdcaba'; printf "Strings: %s %s\n", $u, $v; printf "Longest common subsequence: %s\n", lcs $u, $v; printf "Shortest common supersquence: %s\n", scs $u, $v;</lang>

Output:

Strings: abcbdab bdcaba
Longest common subsequence:   bcba
Shortest common supersquence: abdcabdab

Phix

Translation of: Python

<lang Phix>function longest_common_subsequence(sequence a, b) sequence res = ""

   if length(a) and length(b) then
       if a[$]=b[$] then
           res = longest_common_subsequence(a[1..-2],b[1..-2])&a[$]
       else
           sequence l = longest_common_subsequence(a,b[1..-2]),
                    r = longest_common_subsequence(a[1..-2],b)
           res = iff(length(l)>length(r)?l:r)
       end if
   end if
   return res

end function

function shortest_common_supersequence(string a, b)

   string lcs = longest_common_subsequence(a, b),
          scs = ""
   -- Consume lcs
   while length(lcs) do
       integer c = lcs[1]
       if a[1]==c and b[1]==c then
           -- Part of the lcs, so consume from all strings
           scs &= c
           lcs = lcs[2..$]
           a = a[2..$]
           b = b[2..$]
       elsif a[1]==c then
           scs &= b[1]
           b = b[2..$]
       else
           scs &= a[1]
           a = a[2..$]
       end if
   end while
   -- append remaining characters
   return scs & a & b

end function

?shortest_common_supersequence("abcbdab", "bdcaba") ?shortest_common_supersequence("WEASELS", "WARDANCE")</lang>

Output:

"abdcabdab"
"WEASRDANCELS"

Python

Use the Longest Common Subsequence algorithm

def shortest_common_supersequence(a, b):

   lcs = longest_common_subsequence(a, b)
   scs = ""
   # Consume lcs
   while len(lcs) > 0:
       if a[0]==lcs[0] and b[0]==lcs[0]:
       # Part of the LCS, so consume from all strings
           scs += lcs[0]
           lcs = lcs[1:]
           a = a[1:]
           b = b[1:]
       elif a[0]==lcs[0]:
           scs += b[0]
           b = b[1:]
       else:
           scs += a[0]
           a = a[1:]
   # append remaining characters
   return scs + a + b

</lang>

Output:

Seq1: WEASELS
Seq2: WARDANCE
SCS:  WEASRDANCELS

Racket

Translation of: C

This program is based on the C implementation, but use memorization instead of dynamic programming. More explanations about the memorization part in http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html .

<lang Racket>#lang racket

(struct link (len letters))

(define (link-add li n letter)

 (link (+ n (link-len li)) 
       (cons letter (link-letters li))))

(define (memoize f)

 (local ([define table (make-hash)])
   (lambda args
     (dict-ref! table args (λ () (apply f args))))))

(define scs/list

 (memoize 
  (lambda (x y)
    (cond
      [(null? x)
       (link (length y) y)]
      [(null? y)
       (link (length x) x)]
      [(eq? (car x) (car y))
       (link-add (scs/list (cdr x) (cdr y)) 1 (car x))]
      [(<= (link-len (scs/list x (cdr y)))
           (link-len (scs/list (cdr x) y)))
       (link-add (scs/list x (cdr y)) 1 (car y))]
      [else
       (link-add (scs/list (cdr x) y) 1 (car x))]))))

(define (scs x y)

 (list->string (link-letters (scs/list (string->list x) (string->list y)))))

(scs "abcbdab" "bdcaba")</lang>

Output:

"abdcabdab"

REXX

Translation of: RING

<lang rexx>/*REXX program finds the Shortest common supersequence (SCS) of two character strings.*/ parse arg u v . /*obtain optional arguments from the CL*/ if u== | u=="," then u= 'abcbdab' /*Not specified? Then use the default.*/ if v== | v=="," then v= 'bdcaba' /* " " " " " " */ say ' string u=' u /*echo the value of string U to term.*/ say ' string v=' v /* " " " " " V " " */ $= u /*define initial value for the output. */

     do n=1    to length(u)                     /*process the whole length of string U.*/
       do m=n  to length(v) - 1                 /*   "    right─ish  part   "    "   V.*/
       p= pos( substr(v, m, 1), $)              /*position of mTH  V  char in $ string.*/
       _= substr(v, m+1, 1)                     /*obtain a single character of string V*/
       if p\==0  &  _\==substr($, p+1, 1)  then $= insert(_, $, p)
       end   /*m*/                              /* [↑]  insert _ in $ after position P.*/
     end     /*n*/

say say 'shortest common supersequence=' $ /*stick a fork in it, we're all done. */</lang>

output when using the default inputs:

                     string u= abcbdab
                     string v= bdcaba

shortest common supersequence= abdcabdab

output when using the inputs values: ab ac

                     string u= ab
                     string v= ac

shortest common supersequence= acb

output when using the inputs values: ac ab

                     string u= ac
                     string v= ab

shortest common supersequence= abc

Ring

Project : Shortest common supersequence

str1 = "a b c b d a b" str2 = "bdcaba" str3 = str2list(substr(str1, " ", nl)) for n = 1 to len(str3)

    for m = n to len(str2)-1
         pos = find(str3, str2[m])
         if pos > 0 and str2[m+1] != str3[pos+1]
            insert(str3, pos, str2[m+1])
         ok
    next

next showarray(str3)

func showarray(vect)

      svect = ""
      for n = 1 to len(vect)
            svect = svect + vect[n]
      next
      see svect

</lang> Output:

Shortest common supersequence: abdcabdab

Ruby

Translation of: Tcl

uses 'lcs' from here <lang ruby>require 'lcs'

def scs(u, v)

 lcs = lcs(u, v)
 u, v = u.dup, v.dup
 scs = ""
 # Iterate over the characters until LCS processed
 until lcs.empty?
   if u[0]==lcs[0] and v[0]==lcs[0]
     # Part of the LCS, so consume from all strings
     scs << lcs.slice!(0)
     u.slice!(0)
     v.slice!(0)
   elsif u[0]==lcs[0]
     # char of u = char of LCS, but char of LCS v doesn't so consume just that
     scs << v.slice!(0)
   else
     # char of u != char of LCS, so consume just that
     scs << u.slice!(0)
   end
 end
 # append remaining characters, which are not in common
 scs + u + v

end

u = "abcbdab" v = "bdcaba" puts "SCS(#{u}, #{v}) = #{scs(u, v)}"</lang>

Output:

SCS(abcbdab, bdcaba) = abcbdcaba

Sidef

Translation of: Perl

Uses the lcs function defined here. <lang ruby>func scs(u, v) {

   var ls = lcs(u, v).chars
   var pat = Regex('(.*)'+ls.join('(.*)')+'(.*)')
   u.scan!(pat)
   v.scan!(pat)
   var ss = (u.shift + v.shift)
   ls.each { |c| ss += (c + u.shift + v.shift) }
   return ss

}

say scs("abcbdab", "bdcaba")</lang>

Output:

abdcabdab

Tcl

This example uses either of the lcs implementations from here, assumed renamed to lcs… <lang tcl>proc scs {u v} {

   set lcs [lcs $u $v]
   set scs ""

   # Iterate over the characters until LCS processed
   for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {

set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }

   # append remaining characters, which are not in common
   append scs [string range $u $ui end] [string range $v $vi end]
   return $scs

}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>

Output:

SCS(abcbdab,bdcaba) = abdcabdab

zkl

Translation of: C

<lang zkl>class Link{ var len,letter,next;

  fcn init(l=0,c="",lnk=Void){ len,letter,next=l,c,lnk; }

} fcn scs(x,y,out){

  lx,ly:=x.len(),y.len();
  lnk:=(ly+1).pump(List,'wrap(_){ (lx+1).pump(List(),Link.create) });

  foreach i in (ly){ lnk[i][lx]=Link(ly-i, y[i]) }
  foreach j in (lx){ lnk[ly][j]=Link(lx-j, x[j]) }

  foreach i,j in ([ly-1..0,-1],[lx-1..0,-1]){
     lp:=lnk[i][j];
     if (y[i]==x[j]){

lp.next =lnk[i+1][j+1]; lp.letter=x[j];

     }else if(lnk[i][j+1].len < lnk[i+1][j].len){

lp.next =lnk[i][j+1]; lp.letter=x[j];

     }else{

lp.next =lnk[i+1][j]; lp.letter=y[i];

     }
     lp.len=lp.next.len + 1;
  }

  lp:=lnk[0][0]; while(lp){ out.write(lp.letter); lp=lp.next; }
  out.close()

}</lang> <lang zkl>scs("abcbdab","bdcaba", Sink(String)).println();</lang>

Output:

abdcabdab