Eertree

Revision as of 04:43, 13 October 2017 by rosettacode>Gerard Schildberger (added whitespace before the TOC.)

An eertree is a data structure designed for efficient processing of certain palindrome tasks, for instance counting the number of sub-palindromes in an input string.[1] The data structure has commonalities to both suffix tries and suffix trees.

Eertree is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.


Task

Construct an eertree for the string "eertree", then output all sub-palindromes by traversing the tree.

Kotlin

Translation of: Python

<lang scala>// version 1.1.4

class Node {

   val edges = mutableMapOf<Char, Node>()  // edges (or forward links)
   var link: Node? = null                  // suffix link (backward links)
   var len = 0                             // the length of the node

}

class Eertree(str: String) {

   val nodes = mutableListOf<Node>()
   private val rto = Node()                // odd length root node, or node -1
   private val rte = Node()                // even length root node, or node 0
   private val s = StringBuilder("0")      // accumulated input string, T = S[1..i]
   private var maxSufT = rte               // maximum suffix of tree T
   init {
       // Initialize and build the tree
       rte.link = rto
       rto.link = rte
       rto.len  = -1
       rte.len  = 0
       for (ch in str) add(ch)
   }
   private fun getMaxSuffixPal(startNode: Node, a: Char): Node {
       // We traverse the suffix-palindromes of T in the order of decreasing length.
       // For each palindrome we read its length k and compare T[i-k] against a
       // until we get an equality or arrive at the -1 node.
       var u = startNode
       val i = s.length
       var k = u.len
       while (u !== rto && s[i - k - 1] != a) {
           if (u === u.link!!) throw RuntimeException("Infinite loop detected")
           u = u.link!!
           k = u.len
       }
       return u
   }
   private fun add(a: Char): Boolean {
       // We need to find the maximum suffix-palindrome P of Ta
       // Start by finding maximum suffix-palindrome Q of T.
       // To do this, we traverse the suffix-palindromes of T
       // in the order of decreasing length, starting with maxSuf(T)
       val q = getMaxSuffixPal(maxSufT, a)
       // We check Q to see whether it has an outgoing edge labeled by a.
       val createANewNode = a !in q.edges.keys
       if (createANewNode) {
           // We create the node P of length Q + 2
           val p = Node()
           nodes.add(p)
           p.len = q.len + 2
           if (p.len == 1) {
               // if P = a, create the suffix link (P, 0)
               p.link = rte
           }
           else {
               // It remains to create the suffix link from P if |P|>1. Just
               // continue traversing suffix-palindromes of T starting with the
               // the suffix link of Q.
               p.link = getMaxSuffixPal(q.link!!, a).edges[a]
           }
           // create the edge (Q, P)
           q.edges[a] = p
       }
       // P becomes the new maxSufT
       maxSufT = q.edges[a]!!
       // Store accumulated input string
       s.append(a)
       return createANewNode
   }
   fun getSubPalindromes(): List<String> {
       // Traverse tree to find sub-palindromes
       val result = mutableListOf<String>()
       // Odd length words
       getSubPalindromes(rto, listOf(rto), "", result)
       // Even length words
       getSubPalindromes(rte, listOf(rte), "", result)
       return result
   }
   private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>,
                         charsToHere: String, result: MutableList<String>) {
       // Each node represents a palindrome, which can be reconstructed
       // by the path from the root node to each non-root node.
       // Traverse all edges, since they represent other palindromes
       for ((lnkName, nd2) in nd.edges) {
           getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result)
       }
       // Reconstruct based on charsToHere characters.
       if (nd !== rto && nd !== rte) { // Don't print for root nodes
           val assembled = charsToHere.reversed() +
               if (nodesToHere[0] === rte)  // Even string
                   charsToHere
               else  // Odd string
                   charsToHere.drop(1)
           result.add(assembled)
       }
   }

}

fun main(args: Array<String>) {

   val str = "eertree"
   println("Processing string '$str'")
   val eertree = Eertree(str)
   println("Number of sub-palindromes: ${eertree.nodes.size}")
   val result = eertree.getSubPalindromes()
   println("Sub-palindromes: $result")

}</lang>

Output:
Processing string 'eertree'
Number of sub-palindromes: 7
Sub-palindromes: [e, r, eertree, ertre, rtr, t, ee]

Python

<lang python>#!/bin/python from __future__ import print_function

class Node(object): def __init__(self): self.edges = {} # edges (or forward links) self.link = None # suffix link (backward links) self.len = 0 # the length of the node

class Eertree(object): def __init__(self): self.nodes = [] # two initial root nodes self.rto = Node() #odd length root node, or node -1 self.rte = Node() #even length root node, or node 0

# Initialize empty tree self.rto.link = self.rte.link = self.rto; self.rto.len = -1 self.rte.len = 0 self.S = [0] # accumulated input string, T=S[1..i] self.maxSufT = self.rte # maximum suffix of tree T

def get_max_suffix_pal(self, startNode, a): # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u = startNode i = len(self.S) k = u.len while id(u) != id(self.rto) and self.S[i - k - 1] != a: assert id(u) != id(u.link) #Prevent infinte loop u = u.link k = u.len

return u

def add(self, a):

# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T) Q = self.get_max_suffix_pal(self.maxSufT, a)

# We check Q to see whether it has an outgoing edge labeled by a. createANewNode = not a in Q.edges

if createANewNode: # We create the node P of length Q+2 P = Node() self.nodes.append(P) P.len = Q.len + 2 if P.len == 1: # if P = a, create the suffix link (P,0) P.link = self.rte else: # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link = self.get_max_suffix_pal(Q.link, a).edges[a]

# create the edge (Q,P) Q.edges[a] = P

#P becomes the new maxSufT self.maxSufT = Q.edges[a]

#Store accumulated input string self.S.append(a)

return createANewNode

def get_sub_palindromes(self, nd, nodesToHere, charsToHere, result): #Each node represents a palindrome, which can be reconstructed #by the path from the root node to each non-root node.

#Traverse all edges, since they represent other palindromes for lnkName in nd.edges: nd2 = nd.edges[lnkName] #The lnkName is the character used for this edge self.get_sub_palindromes(nd2, nodesToHere+[nd2], charsToHere+[lnkName], result)

#Reconstruct based on charsToHere characters. if id(nd) != id(self.rto) and id(nd) != id(self.rte): #Don't print for root nodes tmp = "".join(charsToHere) if id(nodesToHere[0]) == id(self.rte): #Even string assembled = tmp[::-1] + tmp else: #Odd string assembled = tmp[::-1] + tmp[1:] result.append(assembled)

if __name__=="__main__": st = "eertree" print ("Processing string", st) eertree = Eertree() for ch in st: eertree.add(ch)

print ("Number of sub-palindromes:", len(eertree.nodes))

#Traverse tree to find sub-palindromes result = [] eertree.get_sub_palindromes(eertree.rto, [eertree.rto], [], result) #Odd length words eertree.get_sub_palindromes(eertree.rte, [eertree.rte], [], result) #Even length words print ("Sub-palindromes:", result)</lang>

Output:
Processing string eertree
Number of sub-palindromes: 7
Sub-palindromes: ['r', 'e', 'eertree', 'ertre', 'rtr', 't', 'ee']

Racket

Translation of: Python

<lang racket>#lang racket (struct node (edges ; edges (or forward links)

             link ; suffix link (backward links)
             len) ; the length of the node 
 #:mutable)

(define (new-node link len) (node (make-hash) link len))

(struct eertree (nodes

                rto ; odd length root node, or node -1
                rte ; even length root node, or node 0
                S ; accumulated input string, T=S[1..i]
                max-suf-t) ; maximum suffix of tree T
 #:mutable)

(define (new-eertree)

 (let* ((rto (new-node #f -1))
        (rte (new-node rto 0)))
   (eertree null rto rte (list 0) rte)))

(define (eertree-get-max-suffix-pal et start-node a)

 #| We traverse the suffix-palindromes of T in the order of decreasing length.
    For each palindrome we read its length k and compare T[i-k] against a
    until we get an equality or arrive at the -1 node. |#
 (match et
   [(eertree nodes rto rte (and S (app length i)) max-suf-t)
    (let loop ((u start-node))
      (let ((k (node-len u)))
        (if (or (eq? u rto) (= (list-ref S (- i k 1)) a))
            u
            (let ((u→ (node-link u)))
              (when (eq? u u→) (error 'eertree-get-max-suffix-pal "infinite loop"))
              (loop u→)))))]))

(define (eertree-add! et a)

 #| We need to find the maximum suffix-palindrome P of Ta
    Start by finding maximum suffix-palindrome Q of T.
    To do this, we traverse the suffix-palindromes of T
    in the order of decreasing length, starting with maxSuf(T) |#
 (match (eertree-get-max-suffix-pal et (eertree-max-suf-t et) a)
   [(node Q.edges Q.→ Q.len) 
    ;; We check Q to see whether it has an outgoing edge labeled by a.
    (define new-node? (not (hash-has-key? Q.edges a)))
    (when new-node?
      (define P (new-node #f (+ Q.len 2))) ; We create the node P of length Q+2
      (set-eertree-nodes! et (append (eertree-nodes et) (list P)))
      (define P→
        (if (= (node-len P) 1)
            (eertree-rte et) ; if P = a, create the suffix link (P,0)
            ;; It remains to c reate the suffix link from P if |P|>1.
            ;; Just continue traversing suffix-palindromes of T starting with the suffix link of Q.
            (hash-ref (node-edges (eertree-get-max-suffix-pal et Q.→ a)) a)))
      (set-node-link! P P→)
      (hash-set! Q.edges a P)) ; create the edge (Q,P)
    
    (set-eertree-max-suf-t! et (hash-ref Q.edges a)) ; P becomes the new maxSufT
    (set-eertree-S! et (append (eertree-S et) (list a))) ; Store accumulated input string
    new-node?]))

(define (eertree-get-sub-palindromes et)

 (define (inr nd (node-path (list nd)) (char-path/rev null))
   ;; Each node represents a palindrome, which can be reconstructed by the path from the root node to
   ;; each non-root node.
   (let ((deeper ; Traverse all edges, since they represent other palindromes     
          (for/fold ((result null)) (([→-name nd2] (in-hash (node-edges nd))))
            ; The lnk-name is the character used for this edge
            (append result (inr nd2 (append node-path (list nd2)) (cons →-name char-path/rev)))))
         (root-node? (or (eq? (eertree-rto et) nd) (eq? (eertree-rte et) nd))))
     (if root-node? ; Don't add root nodes
         deeper
         (let ((even-string? (eq? (car node-path) (eertree-rte et)))
               (char-path (reverse char-path/rev)))
           (cons (append char-path/rev (if even-string? char-path (cdr char-path))) deeper)))))
 inr)

(define (eertree-get-palindromes et)

 (define sub (eertree-get-sub-palindromes et))
 (append (sub (eertree-rto et))
         (sub (eertree-rte et))))

(module+ main

 (define et (new-eertree))
 ;; eertree works in integer space, so we'll map to/from char space here
 (for ((c "eertree")) (eertree-add! et (char->integer c)))
 (map (compose list->string (curry map integer->char)) (eertree-get-palindromes et)))

</lang>

Output:
'("t" "rtr" "ertre" "eertree" "r" "e" "ee")

REXX

This REXX program is modeled after the   Ring   example. <lang rexx>/*REXX program creates a list of (unique) sub─palindromes that exist in an input string.*/

parse arg x . /*obtain optional input string from CL.*/ if x== | x=="," then x= 'eertree' /*Not specified? Then use the default.*/ L=length(x) /*the length (in chars) of input string*/ @.=. /*@ tree indicates uniqueness of pals. */ $= /*list of unsorted & unique palindromes*/

   do     j=1  for L                            /*start at the left side of the string.*/
       do k=1  for L                            /*traverse from left to right of string*/
       parse var  x   =(j)  y   +(k)            /*extract a substring from the string. */
       if reverse(y)\==y  then iterate          /*Partial string a palindrome?  Skip it*/
       if @.y\==.         then iterate          /*Sub─palindrome already exist? Skip it*/
       @.y=y                                    /*indicate a sub─palindrome was found. */
       $=$' ' y                                 /*append the sub─palindrome to the list*/
       end   /*k*/                              /* [↑]  an extra blank is inserted.    */
   end       /*j*/

pad=copies('─', 8) /*a fence to be used as an eyecatcher. */ say pad 'Using the input string: ' x /*echo the input string being parsed.*/ say

  1. =words($) /*get the number of palindromes found. */

subP= 'sub─palindromes' /*a literal to make SAY texts shorter. */ say pad 'The number of' subP "found: " # say say pad 'The list of' subP "found: " say strip($) /*display the list of the palindromes. */

                                                /*stick a fork in it,  we're all done. */</lang>
output   when using the default input:
──────── Using the input string:  eertree

──────── The number of sub─palindromes found:  7

──────── The list of sub─palindromes found:
e  ee  eertree  ertre  r  rtr  t

Ring

<lang ring>

  1. Project : Eertree
  2. Date  : 2017/09/23
  3. Author  : Gal Zsolt (~ CalmoSoft ~)
  4. Email  : <calmosoft@gmail.com>

str = "eertree" pal = [] for n=1 to len(str)

   for m=1 to len(str)
       strrev = ""
       strpal = substr(str, n, m)
       if strpal != ""
          for p=len(strpal) to 1 step -1
              strrev = strrev + strpal[p]
          next
          if strpal = strrev 
             add(pal, strpal)
          ok
       ok
   next

next sortpal = sort(pal) for n=len(sortpal) to 2 step -1

   if sortpal[n] = sortpal[n-1]
      del(sortpal, n)
   ok

next see sortpal + nl </lang> Output:

e
ee
eertree
ertre
r
rtr
t

zkl

Translation of: Python

<lang zkl>class Node{

  fcn init(length){
     var edges=Dictionary(),	# edges (or forward links). (char:Node)
         link=Void,		# suffix link (backward links)

sz =length; # node length.

   }

} class Eertree{

  fcn init(string=Void){
     var nodes=List(),

# two initial root nodes rto=Node(-1), # odd length root node, or node -1 rte=Node(0); # even length root node, or node 0

     rto.link=rte.link=rto;    # Initialize empty tree
     var S      =Data(Void,0), # accumulated input string, T=S[1..i], byte buffer
         maxSufT=rte;          # maximum suffix of tree T
     if(string) string.pump(addChar);  // go ahead and build the tree
  }
  fcn get_max_suffix_pal(startNode,a){
   # We traverse the suffix-palindromes of T in the order of decreasing length.
   # For each palindrome we read its length k and compare T[i-k] against a
   # until we get an equality or arrive at the -1 node.
     u,i,k := startNode, S.len(), u.sz;
     while(u.id!=rto.id and S.charAt(i - k - 1)!=a){

_assert_(u.id!=u.link.id); # Prevent infinte loop u,k = u.link,u.sz;

     }
     return(u);
  }
  fcn addChar(a){

# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T)

     Q:=get_max_suffix_pal(maxSufT,a);
       # We check Q to see whether it has an outgoing edge labeled by a.
     createANewNode:=(not Q.edges.holds(a));
     if(createANewNode){

P:=Node(Q.sz + 2); nodes.append(P); if(P.sz==1) P.link=rte; # if P = a, create the suffix link (P,0) else # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link=get_max_suffix_pal(Q.link,a).edges[a]; Q.edges[a]=P; # create the edge (Q,P)

     }
     maxSufT=Q.edges[a]; # P becomes the new maxSufT
     S.append(a);	  # Store accumulated input string
     return(createANewNode);  // in case anyone wants to know a is new edge
  }
  fcn get_sub_palindromes{
     result:=List();
     sub_palindromes(rto, T(rto),"", result); # Odd length words
     sub_palindromes(rte, T(rte),"", result); # Even length words
     result
  }
  fcn [private] sub_palindromes(nd, nodesToHere, charsToHere, result){
       // nodesToHere needs to be read only

# Each node represents a palindrome, which can be reconstructed # by the path from the root node to each non-root node.

# Traverse all edges, since they represent other palindromes

     nd.edges.pump(Void,'wrap([(lnkName,nd2)]){

sub_palindromes(nd2, nodesToHere+nd2, charsToHere+lnkName, result);

     });
     # Reconstruct based on charsToHere characters.
     if(nd.id!=rto.id and nd.id!=rte.id){ # Don't print for root nodes

if(nodesToHere[0].id==rte.id) # Even string assembled:=charsToHere.reverse() + charsToHere; else assembled:=charsToHere.reverse() + charsToHere[1,*]; # Odd string result.append(assembled);

     }
  }

}</lang> <lang zkl>st:="eertree"; println("Processing string \"", st,"\""); eertree:=Eertree(st); println("Number of sub-palindromes: ", eertree.nodes.len()); println("Sub-palindromes: ", eertree.get_sub_palindromes());</lang>

Output:
Processing string "eertree"
Number of sub-palindromes: 7
Sub-palindromes: L("e","r","eertree","ertre","rtr","t","ee")

References