Hirschberg’s linear space algorithm in C++

 It should work on any container type which supports forward and reverse iteration. It’s similar to the Python implementation, but note:

  1. rather than copy slices of the original sequences, it passes around iterators and reverse iterators into these sequences
  2. rather than recursively accumulating sums of sub-LCS results, it ticks off items in a members array, xs_in_lcs
/\*  
  C++ implementation of   
  "A linear space algorithm for computing maximal common subsequences"  
  D. S. Hirschberg  
  http://portal.acm.org/citation.cfm?id=360861  
    
  See also: http://wordaligned.org/articles/longest-common-subsquence  
\*/  
#include   
#include   
#include   
typedef std::vector<int\> lengths;  
/\*  
  The "members" type is used as a sparse set for LCS calculations.  
  Given a sequence, xs, and members, m, then  
  if m\[i\] is true, xs\[i\] is in the LCS.  
\*/  
typedef std::vector<bool\> members;  
/\*  
  Fill the LCS sequence from the members of a sequence, xs  
  x - an iterator into the sequence xs  
  xs\_in\_lcs - members of xs  
  lcs - an output results iterator  
\*/  
template  <typename it,  typename ot\>  
void set\_lcs(it x, members const  & xs\_in\_lcs, ot lcs)  
{ for  (members::const\_iterator xs\_in \= xs\_in\_lcs.begin();xs\_in != xs\_in\_lcs.end();  ++xs\_in,  ++x) { if  (\*xs\_in) { \*lcs++  \=  \*x; } }  
}  
/\*  
  Calculate LCS row lengths given iterator ranges into two sequences.  
  On completion, \`lens\` holds LCS lengths in the final row.  
\*/  
template  <typename it\>  
void lcs\_lens(it xlo, it xhi, it ylo, it yhi, lengths & lens)  
{ // Two rows of workspace. // Careful! We need the 1 for the leftmost column.  
    lengths curr(1  + distance(ylo, yhi),  0);  
    lengths prev(curr); for  (it x \= xlo; x != xhi;  ++x) {  
        swap(prev, curr); int i \=  0; for  (it y \= ylo; y != yhi;  ++y,  ++i) {  
            curr\[i +  1\]  \=  \*x \==  \*y ? prev\[i\]  +  1 : std::max(curr\[i\], prev\[i +  1\]); } }  
    swap(lens, curr);  
}  
/\*  
  Recursive LCS calculation.  
  See Hirschberg for the theory!  
  This is a divide and conquer algorithm.  
  In the recursive case, we split the xrange in two.  
  Then, by calculating lengths of LCSes from the start and end  
  corners of the \[xlo, xhi\] x \[ylo, yhi\] grid, we determine where  
  the yrange should be split.  
    
  xo is the origin (element 0) of the xs sequence  
  xlo, xhi is the range of xs being processed  
  ylo, yhi is the range of ys being processed  
  Parameter xs\_in\_lcs holds the members of xs in the LCS.  
\*/  
template  <typename it\>  
void  
calculate\_lcs(it xo, it xlo, it xhi, it ylo, it yhi, members & xs\_in\_lcs)  
{ unsigned  const nx \= distance(xlo, xhi); if  (nx \==  0) { // empty range. all done } else  if  (nx \==  1) { // single item in x range. // If it's in the yrange, mark its position in the LCS  
        xs\_in\_lcs\[distance(xo, xlo)\]  \= find(ylo, yhi,  \*xlo)  != yhi; } else { // split the xrangeit xmid \= xlo + nx /  2; // Find LCS lengths at xmid, working from both ends of the range  
        lengths ll\_b, ll\_e;  
        std::reverse\_iterator<it\> hix(xhi), midx(xmid), hiy(yhi), loy(ylo);  
          
        lcs\_lens(xlo, xmid, ylo, yhi, ll\_b);  
        lcs\_lens(hix, midx, hiy, loy, ll\_e); // Find the optimal place to split the y range  
        lengths::const\_reverse\_iterator e \= ll\_e.rbegin(); int lmax \=  \-1;it y \= ylo, ymid \= ylo; for  (lengths::const\_iterator b \= ll\_b.begin();b != ll\_b.end();  ++y,  ++b,  ++e) { if  (\*b +  \*e \> lmax) {lmax \=  \*b +  \*e;ymid \= y; } } // Split the range and recurse  
        calculate\_lcs(xo, xlo, xmid, ylo, ymid, xs\_in\_lcs);  
        calculate\_lcs(xo, xmid, xhi, ymid, yhi, xs\_in\_lcs); }  
}  
// Calculate an LCS of (xs, ys), returning the result in an\_lcs.   
template  <typename seq\>  
void lcs(seq const  & xs, seq const  & ys, seq & an\_lcs)  
{  
    members xs\_in\_lcs(xs.size(),  false);  
    calculate\_lcs(xs.begin(), xs.begin(), xs.end(),  
                  ys.begin(), ys.end(), xs\_in\_lcs);  
    set\_lcs(xs.begin(), xs\_in\_lcs, back\_inserter(an\_lcs));  
}  


See also