Longest common subsequence: Difference between revisions
Content added Content deleted
m (Switched std::map to C++11 std::unordered_map to reduce initial Match() overhead.) |
(Resorted to verbose typedef and parameter names to help clarify their purpose.) |
||
Line 490: | Line 490: | ||
typedef deque<uint32_t> THRESHOLD; |
typedef deque<uint32_t> THRESHOLD; |
||
typedef deque<uint32_t> INDEXES; |
typedef deque<uint32_t> INDEXES; |
||
typedef unordered_map<char, INDEXES> |
typedef unordered_map<char, INDEXES> CHAR_TO_INDEXES_MAP; |
||
typedef deque<INDEXES*> MATCHES; |
typedef deque<INDEXES*> MATCHES; |
||
⚫ | |||
// The following implements the Hunt and Szymanski algorithm: |
|||
⚫ | |||
auto trace = pairs != nullptr; |
auto trace = pairs != nullptr; |
||
PAIRS traces; |
PAIRS traces; |
||
Line 504: | Line 503: | ||
// |
// |
||
uint32_t index1 = 0; |
uint32_t index1 = 0; |
||
for (const auto& it1 : |
for (const auto& it1 : indexesOf2MatchedByIndex1) { |
||
if (!it1->empty()) { |
if (!it1->empty()) { |
||
auto dq2 = *it1; |
auto dq2 = *it1; |
||
Line 571: | Line 570: | ||
// |
// |
||
// Match() avoids |
// Match() avoids m*n comparisons by using CHAR_TO_INDEXES_MAP to |
||
// |
// achieve O(m+n) performance, where m and n are the input lengths. |
||
// where m and n are the input lengths. |
|||
// |
// |
||
// The lookup time can be assumed constant in the case of characters. |
// The lookup time can be assumed constant in the case of characters. |
||
Line 579: | Line 577: | ||
// time will be O(log(m+n)), at most. |
// time will be O(log(m+n)), at most. |
||
// |
// |
||
void Match( |
void Match(CHAR_TO_INDEXES_MAP& indexesOf2MatchedByChar, MATCHES& indexesOf2MatchedByIndex1, |
||
const string& s1, const string& s2) { |
const string& s1, const string& s2) { |
||
uint32_t index = 0; |
uint32_t index = 0; |
||
for (const auto& it : s2) |
for (const auto& it : s2) |
||
indexesOf2MatchedByChar[it].push_back(index++); |
|||
for (const auto& it : s1) { |
for (const auto& it : s1) { |
||
auto& dq2 = |
auto& dq2 = indexesOf2MatchedByChar[it]; |
||
indexesOf2MatchedByIndex1.push_back(&dq2); |
|||
} |
} |
||
} |
} |
||
Line 604: | Line 602: | ||
public: |
public: |
||
string Correspondence(const string& s1, const string& s2) { |
string Correspondence(const string& s1, const string& s2) { |
||
CHAR_TO_INDEXES_MAP indexesOf2MatchedByChar; |
|||
CHAR2INDEXES indexes; |
|||
MATCHES |
MATCHES indexesOf2MatchedByIndex1; // holds references into indexesOf2MatchedByChar |
||
Match( |
Match(indexesOf2MatchedByChar, indexesOf2MatchedByIndex1, s1, s2); |
||
shared_ptr<Pair> pairs; // obtain the LCS as index pairs |
shared_ptr<Pair> pairs; // obtain the LCS as index pairs |
||
auto length = Pairs( |
auto length = Pairs(indexesOf2MatchedByIndex1, &pairs); |
||
return Select(pairs, length, false, s1, s2); |
return Select(pairs, length, false, s1, s2); |
||
} |
} |