Longest common subsequence: Difference between revisions

m
(Corrected definitions made in the introduction.)
 
(12 intermediate revisions by 6 users not shown)
Line 19:
 
A chain '''C''' is a subset of '''M''' consisting of at least one element m; and where either m1 < m2 or m1 > m2 for every pair of distinct elements m1 and m2. An antichain '''D''' is any subset of '''M''' in which every pair of distinct elements m1 and m2 are incomparable.
 
Every Common Sequence of length ''q'' corresponds to a chain of cardinality ''q'', over the set of matches '''M'''. Thus, finding an LCS can be restated as the problem of finding a chain of maximum cardinality ''p''.
 
A chain can be visualized as a strictly increasing curve that passes through matches (i, j) in the m*n coordinate space of '''M'''[i, j].
 
Every Common Sequence of length ''q'' corresponds to a chain of cardinality ''q'', over the set of matches '''M'''. Thus, finding an LCS can be restated as the problem of finding a chain of maximum cardinality ''p''.
 
According to [Dilworth 1950], this cardinality ''p'' equals the minimum number of disjoint antichains into which '''M''' can be decomposed. Note that such a decomposition into the minimal number p of disjoint antichains may not be unique.
Line 323:
 
=={{header|BASIC}}==
==={{header|QuickBASIC}}===
{{works with|QuickBasic|4.5}}
{{trans|Java}}
Line 340 ⟶ 341:
END IF
END FUNCTION</syntaxhighlight>
 
 
=={{header|BASIC256}}==
Line 494:
=={{header|C++}}==
'''Hunt and Szymanski algorithm'''
<syntaxhighlight lang="cpp">#include <stdint.h>
#include <stdint.h>
#include <string>
#include <memory> // for shared_ptr<>
Line 507 ⟶ 508:
class LCS {
protected:
// ThisInstances of the Pair linked list class isare used to tracerecover the LCS candidates:
class Pair {
public:
Line 527 ⟶ 528:
 
typedef deque<shared_ptr<Pair>> PAIRS;
typedef deque<uint32_t> THRESHOLD;
typedef deque<uint32_t> INDEXES;
typedef unordered_map<char, INDEXES> CHAR_TO_INDEXES_MAP;
typedef deque<INDEXES*> MATCHES;
 
static uint32_t FindLCS(MATCHES& indexesOf2MatchedByIndex1, shared_ptr<Pair>* pairs) {
MATCHES& indexesOf2MatchedByIndex1, shared_ptr<Pair>* pairs) {
auto traceLCS = pairs != nullptr;
PAIRS chains;
THRESHOLDINDEXES thresholdprefixEnd;
 
//
//[Assert]After each index1 iteration thresholdprefixEnd[index3] is the least index2
// such that the LCS of s1[0:index1] and s2[0:index2] has length index3 + 1
//
uint32_t index1 = 0;
for (const auto& it1 : indexesOf2MatchedByIndex1) {
ifauto (!it1->empty())dq2 {= *it1;
auto dq2limit = *it1prefixEnd.end();
for (auto limitit2 = thresholddq2.endrbegin(); it2 != dq2.rend(); it2++) {
// Each index1, index2 pair corresponds to a match
for (auto it2 = dq2.rbegin(); it2 != dq2.rend(); it2++) {
// Each of the index1,auto index2 pairs considered here correspond to a= match*it2;
auto index2 = *it2;
 
//
// Note: The reverse iterator it2 visits index2 values in descending order,
// allowing thresholdsin-place toupdate beof updated in-placeprefixEnd[]. std::lower_bound() is used to
// to perform a binary search.
//
limit = lower_bound(thresholdprefixEnd.begin(), limit, index2);
auto index3 = distance(threshold.begin(), limit);
 
//
// Look ahead to the next index2 value to optimize Pairs used by the Hunt
// and Szymanski algorithm. If the next index2 is also an improvement on
// the value currently held in thresholdprefixEnd[index3], a new Pair will only be
// superseded on the next index2 iteration.
//
// Verify thethat a next index2 value ofexists; index2and willthat bethis greatervalue thanis the final elementgreater
// ofthan the nextfinal shorterindex2 value of the LCS prefix at prev(limit):
//
auto preferNextIndex2 = next(it2) != dq2.rend() &&
(limit == thresholdprefixEnd.begin() || *prev(limit) < *next(it2));
 
//
// Depending on match redundancy, this optimization may reduce the number
// of Pair allocations by factors ranging from 2 up to 10 or more.
//
if (preferNextIndex2) continue;
 
auto index3 if= distance(limit == thresholdprefixEnd.endbegin(), limit) {;
 
// Insert Case
if (limit == thresholdprefixEnd.push_backend(index2);) {
// Refresh limitInsert iterator:Case
limit = prev(thresholdprefixEnd.endpush_back()index2);
// Refresh iflimit (traceLCS) {iterator:
limit = prev(prefixEnd.end());
auto prefix = index3 > 0 ? chains[index3 - 1] : nullptr;
if (traceLCS) }{
auto last = make_shared<Pair>(index1, index2, prefix);
chains.push_back(lastpushPair(chains, index3, index1, index2));
}
}
else if (index2 < *limit) {}
else if (index2 < //*limit) Update Case{
// Update limit value:Case
// Update *limit = index2;value:
*limit = if (traceLCS) {index2;
if (traceLCS) }{
auto prefix = index3 > 0 ? chains[index3 - 1] : nullptr;
chains[index3] = autopushPair(chains, lastindex3, = make_shared<Pair>(index1, index2, prefix);
chains[index3] = last;
}
}
}
} } // next index2
}
 
index1++;
Line 605 ⟶ 601:
if (traceLCS) {
// Return the LCS as a linked list of matched index pairs:
auto last = chains.sizeempty() >? 0nullptr ?: chains.back() : nullptr;
// Reverse longest chain
*pairs = Pair::Reverse(last);
}
 
auto length = thresholdprefixEnd.size();
return length;
}
 
private:
static shared_ptr<Pair> pushPair(
PAIRS& chains, const ptrdiff_t& index3, uint32_t& index1, uint32_t& index2) {
auto prefix = index3 > 0 ? chains[index3 - 1] : nullptr;
auto last =return make_shared<Pair>(index1, index2, prefix);
}
 
protected:
//
// Match() avoids m*n comparisons by using CHAR_TO_INDEXES_MAP to
Line 622 ⟶ 626:
// time will be O(log(m+n)), at most.
//
static void Match(
void Match( CHAR_TO_INDEXES_MAP& indexesOf2MatchedByChar, MATCHES& indexesOf2MatchedByIndex1,
const string& s1, const string& s2) {
uint32_t index = 0;
Line 634 ⟶ 639:
}
 
static string Select(shared_ptr<Pair> pairs, uint32_t length,
bool right, const string& s1, const string& s2) {
string buffer;
Line 646 ⟶ 651:
 
public:
static string Correspondence(const string& s1, const string& s2) {
CHAR_TO_INDEXES_MAP indexesOf2MatchedByChar;
MATCHES indexesOf2MatchedByIndex1; // holds references into indexesOf2MatchedByChar
Line 656 ⟶ 661:
};</syntaxhighlight>
'''Example''':
<syntaxhighlight lang="cpp"> LCS lcs;
auto s = lcs.LCS::Correspondence(s1, s2);
cout << s << endl;</syntaxhighlight>
 
More fully featured examples are available at [https://github.com/CNHume/Samples/tree/master/C%2B%2B/LCS Samples/C++/LCS].
 
=={{header|Clojure}}==
Line 973 ⟶ 980:
lcsRecursion('', 'x') =
lcsRecursion('x', 'x') = x</pre>
 
=={{header|EasyLang}}==
{{trans|BASIC256}}
<syntaxhighlight>
func$ right a$ n .
return substr a$ (len a$ - n + 1) n
.
func$ left a$ n .
if n < 0
n = len a$ + n
.
return substr a$ 1 n
.
func$ lcs a$ b$ .
if len a$ = 0 or len b$ = 0
return ""
.
if right a$ 1 = right b$ 1
return lcs left a$ -1 left b$ -1 & right a$ 1
.
x$ = lcs a$ left b$ -1
y$ = lcs left a$ -1 b$
if len x$ > len y$
return x$
else
return y$
.
.
print lcs "1234" "1224533324"
print lcs "thisisatest" "testing123testing"
</syntaxhighlight>
 
{{out}}
<pre>
1234
tsitest
</pre>
 
=={{header|Egison}}==
Line 2,151 ⟶ 2,195:
 
===Alternate letting regex do all the work===
<syntaxhighlight lang="perl">#!/usr/bin/perluse strict;
 
use strict; # https://rosettacode.org/wiki/Longest_common_subsequence
use warnings;
use feature 'bitwise';
 
print "lcs is ", lcs('thisisatest', 'testing123testing'), "\n";
Line 2,161 ⟶ 2,204:
{
my ($c, $d) = @_;
for my $len ( reverse 1 .. length($c &. $d) )
{
"$c\n$d" =~ join '.*', ('(.)') x $len, "\n", map "\\$_", 1 .. $len and
Line 2,169 ⟶ 2,212:
}</syntaxhighlight>
{{out}}
<pre>lcs is tsitesttastiest</pre>
 
=={{header|Phix}}==
Line 3,167 ⟶ 3,210:
<syntaxhighlight lang="ruby">func lcs(xstr, ystr) is cached {
 
xstr.is_empty && return xstr;
ystr.is_empty && return ystr;
 
var(x, xs, y, ys) = (xstr.ftfirst(0,01), xstr.ftslice(1),
ystr.ftfirst(0,01), ystr.ftslice(1));
 
if (x == y) {
x + lcs(xs, ys)
} else {
[lcs(xstr, ys), lcs(xs, ystr)].max_by { .len };
}
}
 
say lcs("thisisatest", "testing123testing");</syntaxhighlight>
{{out}}
<pre>
Line 3,358 ⟶ 3,401:
=={{header|Wren}}==
{{trans|Kotlin}}
<syntaxhighlight lang="ecmascriptwren">var lcs // recursive
lcs = Fn.new { |x, y|
if (x.count == 0 || y.count == 0) return ""
2,078

edits