Shortest common supersequence: Difference between revisions
(→Tcl: Added implementation) |
|||
Line 7: | Line 7: | ||
<!-- This example is taken from the Wikipedia page. --> |
<!-- This example is taken from the Wikipedia page. --> |
||
=={{header|C}}== |
|||
The C99 code here isn't all that different from Levenstein distance calculation. |
|||
<lang c>#include <stdio.h> |
|||
#include <string.h> |
|||
typedef struct link link_t; |
|||
struct link { |
|||
int len; |
|||
char letter; |
|||
link_t *next; |
|||
}; |
|||
// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. |
|||
int scs(char *x, char *y, char *out) |
|||
{ |
|||
int lx = strlen(x), ly = strlen(y); |
|||
link_t lnk[ly + 1][lx + 1]; |
|||
for (int i = 0; i < ly; i++) |
|||
lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]}; |
|||
for (int j = 0; j < lx; j++) |
|||
lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]}; |
|||
lnk[ly][lx] = (link_t) {0}; |
|||
for (int i = ly; i--; ) { |
|||
for (int j = lx; j--; ) { |
|||
link_t *lp = &lnk[i][j]; |
|||
if (y[i] == x[j]) { |
|||
lp->next = &lnk[i+1][j+1]; |
|||
lp->letter = x[j]; |
|||
} else if (lnk[i][j+1].len < lnk[i+1][j].len) { |
|||
lp->next = &lnk[i][j+1]; |
|||
lp->letter = x[j]; |
|||
} else { |
|||
lp->next = &lnk[i+1][j]; |
|||
lp->letter = y[i]; |
|||
} |
|||
lp->len = lp->next->len + 1; |
|||
} |
|||
} |
|||
for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) |
|||
*out++ = lp->letter; |
|||
return 0; |
|||
} |
|||
int main(void) |
|||
{ |
|||
char x[] = "abcbdab", y[] = "bdcaba", res[128]; |
|||
scs(x, y, res); |
|||
printf("SCS(%s, %s) -> %s\n", x, y, res); |
|||
return 0; |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
SCS(abcbdab, bdcaba) -> abdcabdab |
|||
</pre> |
|||
=={{header|Tcl}}== |
=={{header|Tcl}}== |
||
This example uses either of the <code>lcs</code> implementations from [[longest common subsequence#Tcl|here]], assumed renamed to <tt>lcs</tt>… |
This example uses either of the <code>lcs</code> implementations from [[longest common subsequence#Tcl|here]], assumed renamed to <tt>lcs</tt>… |
Revision as of 16:27, 27 May 2013
The shortest common supersequence is a problem closely related to the longest common subsequence, which you can use as an external function for this task.
Given two strings and , find the shortest possible sequence , which is the shortest common supersequence of and where both and are a subsequence of .
Demonstrate this by printing where “abcbdab” and “bdcaba”.
C
The C99 code here isn't all that different from Levenstein distance calculation. <lang c>#include <stdio.h>
- include <string.h>
typedef struct link link_t; struct link { int len; char letter; link_t *next; };
// Stores a copy of a SCS of x and y in out. Caller needs to make sure out is long enough. int scs(char *x, char *y, char *out) { int lx = strlen(x), ly = strlen(y); link_t lnk[ly + 1][lx + 1];
for (int i = 0; i < ly; i++) lnk[i][lx] = (link_t) {ly - i, y[i], &lnk[i + 1][lx]};
for (int j = 0; j < lx; j++) lnk[ly][j] = (link_t) {lx - j, x[j], &lnk[ly][j + 1]};
lnk[ly][lx] = (link_t) {0};
for (int i = ly; i--; ) { for (int j = lx; j--; ) { link_t *lp = &lnk[i][j]; if (y[i] == x[j]) { lp->next = &lnk[i+1][j+1]; lp->letter = x[j]; } else if (lnk[i][j+1].len < lnk[i+1][j].len) { lp->next = &lnk[i][j+1]; lp->letter = x[j]; } else { lp->next = &lnk[i+1][j]; lp->letter = y[i]; } lp->len = lp->next->len + 1; } }
for (link_t *lp = &lnk[0][0]; lp; lp = lp->next) *out++ = lp->letter;
return 0; }
int main(void) { char x[] = "abcbdab", y[] = "bdcaba", res[128]; scs(x, y, res); printf("SCS(%s, %s) -> %s\n", x, y, res); return 0; }</lang>
- Output:
SCS(abcbdab, bdcaba) -> abdcabdab
Tcl
This example uses either of the lcs
implementations from here, assumed renamed to lcs…
<lang tcl>proc scs {u v} {
set lcs [lcs $u $v] set scs ""
# Iterate over the characters until LCS processed for {set ui [set vi [set li 0]]} {$li<[string length $lcs]} {} {
set uc [string index $u $ui] set vc [string index $v $vi] set lc [string index $lcs $li] if {$uc eq $lc} { if {$vc eq $lc} { # Part of the LCS, so consume from all strings append scs $lc incr ui incr li } else { # char of u = char of LCS, but char of LCS v doesn't so consume just that append scs $vc } incr vi } else { # char of u != char of LCS, so consume just that append scs $uc incr ui }
}
# append remaining characters, which are not in common append scs [string range $u $ui end] [string range $v $vi end] return $scs
}</lang> Demonstrating: <lang tcl>set u "abcbdab" set v "bdcaba" puts "SCS($u,$v) = [scs $u $v]"</lang>
- Output:
SCS(abcbdab,bdcaba) = abdcabdab