Eertree

From Rosetta Code
Eertree is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
An eertree is a data structure designed for efficient processing of certain palindrome tasks, for instance counting the number of sub-palindromes in an input string.[1] The data structure has commonalities to both suffix tries and suffix trees.
Task

Construct an eertree for the string "eertree", then output all sub-palindromes by traversing the tree.

Python[edit]

#!/bin/python
from __future__ import print_function
 
class Node(object):
def __init__(self):
self.edges = {} # edges (or forward links)
self.link = None # suffix link (backward links)
self.len = 0 # the length of the node
 
class Eertree(object):
def __init__(self):
self.nodes = []
# two initial root nodes
self.rto = Node() #odd length root node, or node -1
self.rte = Node() #even length root node, or node 0
 
# Initialize empty tree
self.rto.link = self.rte.link = self.rto;
self.rto.len = -1
self.rte.len = 0
self.S = [0] # accumulated input string, T=S[1..i]
self.maxSufT = self.rte # maximum suffix of tree T
 
def get_max_suffix_pal(self, startNode, a):
# We traverse the suffix-palindromes of T in the order of decreasing length.
# For each palindrome we read its length k and compare T[i-k] against a
# until we get an equality or arrive at the -1 node.
u = startNode
i = len(self.S)
k = u.len
while id(u) != id(self.rto) and self.S[i - k - 1] != a:
assert id(u) != id(u.link) #Prevent infinte loop
u = u.link
k = u.len
 
return u
 
def add(self, a):
 
# We need to find the maximum suffix-palindrome P of Ta
# Start by finding maximum suffix-palindrome Q of T.
# To do this, we traverse the suffix-palindromes of T
# in the order of decreasing length, starting with maxSuf(T)
Q = self.get_max_suffix_pal(self.maxSufT, a)
 
# We check Q to see whether it has an outgoing edge labeled by a.
createANewNode = not a in Q.edges
 
if createANewNode:
# We create the node P of length Q+2
P = Node()
self.nodes.append(P)
P.len = Q.len + 2
if P.len == 1:
# if P = a, create the suffix link (P,0)
P.link = self.rte
else:
# It remains to create the suffix link from P if |P|>1. Just
# continue traversing suffix-palindromes of T starting with the suffix
# link of Q.
P.link = self.get_max_suffix_pal(Q.link, a).edges[a]
 
# create the edge (Q,P)
Q.edges[a] = P
 
#P becomes the new maxSufT
self.maxSufT = Q.edges[a]
 
#Store accumulated input string
self.S.append(a)
 
return createANewNode
 
def get_sub_palindromes(self, nd, nodesToHere, charsToHere, result):
#Each node represents a palindrome, which can be reconstructed
#by the path from the root node to each non-root node.
 
#Traverse all edges, since they represent other palindromes
for lnkName in nd.edges:
nd2 = nd.edges[lnkName] #The lnkName is the character used for this edge
self.get_sub_palindromes(nd2, nodesToHere+[nd2], charsToHere+[lnkName], result)
 
#Reconstruct based on charsToHere characters.
if id(nd) != id(self.rto) and id(nd) != id(self.rte): #Don't print for root nodes
tmp = "".join(charsToHere)
if id(nodesToHere[0]) == id(self.rte): #Even string
assembled = tmp[::-1] + tmp
else: #Odd string
assembled = tmp[::-1] + tmp[1:]
result.append(assembled)
 
if __name__=="__main__":
st = "eertree"
print ("Processing string", st)
eertree = Eertree()
for ch in st:
eertree.add(ch)
 
print ("Number of sub-palindromes:", len(eertree.nodes))
 
#Traverse tree to find sub-palindromes
result = []
eertree.get_sub_palindromes(eertree.rto, [eertree.rto], [], result) #Odd length words
eertree.get_sub_palindromes(eertree.rte, [eertree.rte], [], result) #Even length words
print ("Sub-palindromes:", result)
Output:
Processing string eertree
Number of sub-palindromes: 7
Sub-palindromes: ['r', 'e', 'eertree', 'ertre', 'rtr', 't', 'ee']

Racket[edit]

Translation of: Python
#lang racket
(struct node (edges ; edges (or forward links)
link ; suffix link (backward links)
len) ; the length of the node
#:mutable)
 
(define (new-node link len) (node (make-hash) link len))
 
(struct eertree (nodes
rto ; odd length root node, or node -1
rte ; even length root node, or node 0
S ; accumulated input string, T=S[1..i]
max-suf-t) ; maximum suffix of tree T
#:mutable)
 
(define (new-eertree)
(let* ((rto (new-node #f -1))
(rte (new-node rto 0)))
(eertree null rto rte (list 0) rte)))
 
(define (eertree-get-max-suffix-pal et start-node a)
#| We traverse the suffix-palindromes of T in the order of decreasing length.
For each palindrome we read its length k and compare T[i-k] against a
until we get an equality or arrive at the -1 node. |#
(match et
[(eertree nodes rto rte (and S (app length i)) max-suf-t)
(let loop ((u start-node))
(let ((k (node-len u)))
(if (or (eq? u rto) (= (list-ref S (- i k 1)) a))
u
(let ((u→ (node-link u)))
(when (eq? u u→) (error 'eertree-get-max-suffix-pal "infinite loop"))
(loop u→)))))]))
 
(define (eertree-add! et a)
#| We need to find the maximum suffix-palindrome P of Ta
Start by finding maximum suffix-palindrome Q of T.
To do this, we traverse the suffix-palindromes of T
in the order of decreasing length, starting with maxSuf(T) |#
(match (eertree-get-max-suffix-pal et (eertree-max-suf-t et) a)
[(node Q.edges Q.→ Q.len)
 ;; We check Q to see whether it has an outgoing edge labeled by a.
(define new-node? (not (hash-has-key? Q.edges a)))
(when new-node?
(define P (new-node #f (+ Q.len 2))) ; We create the node P of length Q+2
(set-eertree-nodes! et (append (eertree-nodes et) (list P)))
(define P→
(if (= (node-len P) 1)
(eertree-rte et) ; if P = a, create the suffix link (P,0)
 ;; It remains to c reate the suffix link from P if |P|>1.
 ;; Just continue traversing suffix-palindromes of T starting with the suffix link of Q.
(hash-ref (node-edges (eertree-get-max-suffix-pal et Q.→ a)) a)))
(set-node-link! P P→)
(hash-set! Q.edges a P)) ; create the edge (Q,P)
 
(set-eertree-max-suf-t! et (hash-ref Q.edges a)) ; P becomes the new maxSufT
(set-eertree-S! et (append (eertree-S et) (list a))) ; Store accumulated input string
new-node?]))
 
(define (eertree-get-sub-palindromes et)
(define (inr nd (node-path (list nd)) (char-path/rev null))
 ;; Each node represents a palindrome, which can be reconstructed by the path from the root node to
 ;; each non-root node.
(let ((deeper ; Traverse all edges, since they represent other palindromes
(for/fold ((result null)) (([→-name nd2] (in-hash (node-edges nd))))
 ; The lnk-name is the character used for this edge
(append result (inr nd2 (append node-path (list nd2)) (cons →-name char-path/rev)))))
(root-node? (or (eq? (eertree-rto et) nd) (eq? (eertree-rte et) nd))))
(if root-node? ; Don't add root nodes
deeper
(let ((even-string? (eq? (car node-path) (eertree-rte et)))
(char-path (reverse char-path/rev)))
(cons (append char-path/rev (if even-string? char-path (cdr char-path))) deeper)))))
inr)
 
(define (eertree-get-palindromes et)
(define sub (eertree-get-sub-palindromes et))
(append (sub (eertree-rto et))
(sub (eertree-rte et))))
 
(module+ main
(define et (new-eertree))
 ;; eertree works in integer space, so we'll map to/from char space here
(for ((c "eertree")) (eertree-add! et (char->integer c)))
(map (compose list->string (curry map integer->char)) (eertree-get-palindromes et)))
 
Output:
'("t" "rtr" "ertre" "eertree" "r" "e" "ee")

zkl[edit]

Translation of: Python
class Node{
fcn init(length){
var edges=Dictionary(), # edges (or forward links). (char:Node)
link=Void, # suffix link (backward links)
sz =length; # node length.
}
}
class Eertree{
fcn init(string=Void){
var nodes=List(),
# two initial root nodes
rto=Node(-1), # odd length root node, or node -1
rte=Node(0); # even length root node, or node 0
 
rto.link=rte.link=rto; # Initialize empty tree
var S =Data(Void,0), # accumulated input string, T=S[1..i], byte buffer
maxSufT=rte; # maximum suffix of tree T
if(string) string.pump(addChar); // go ahead and build the tree
}
fcn get_max_suffix_pal(startNode,a){
# We traverse the suffix-palindromes of T in the order of decreasing length.
# For each palindrome we read its length k and compare T[i-k] against a
# until we get an equality or arrive at the -1 node.
u,i,k := startNode, S.len(), u.sz;
while(u.id!=rto.id and S.charAt(i - k - 1)!=a){
_assert_(u.id!=u.link.id); # Prevent infinte loop
u,k = u.link,u.sz;
}
return(u);
}
fcn addChar(a){
# We need to find the maximum suffix-palindrome P of Ta
# Start by finding maximum suffix-palindrome Q of T.
# To do this, we traverse the suffix-palindromes of T
# in the order of decreasing length, starting with maxSuf(T)
Q:=get_max_suffix_pal(maxSufT,a);
# We check Q to see whether it has an outgoing edge labeled by a.
createANewNode:=(not Q.edges.holds(a));
if(createANewNode){
P:=Node(Q.sz + 2); nodes.append(P);
if(P.sz==1) P.link=rte; # if P = a, create the suffix link (P,0)
else # It remains to create the suffix link from P if |P|>1. Just
# continue traversing suffix-palindromes of T starting with the suffix
# link of Q.
P.link=get_max_suffix_pal(Q.link,a).edges[a];
Q.edges[a]=P; # create the edge (Q,P)
}
maxSufT=Q.edges[a]; # P becomes the new maxSufT
S.append(a); # Store accumulated input string
return(createANewNode); // in case anyone wants to know a is new edge
}
fcn get_sub_palindromes{
result:=List();
sub_palindromes(rto, T(rto),"", result); # Odd length words
sub_palindromes(rte, T(rte),"", result); # Even length words
result
}
fcn [private] sub_palindromes(nd, nodesToHere, charsToHere, result){
// nodesToHere needs to be read only
# Each node represents a palindrome, which can be reconstructed
# by the path from the root node to each non-root node.
 
# Traverse all edges, since they represent other palindromes
nd.edges.pump(Void,'wrap([(lnkName,nd2)]){
sub_palindromes(nd2, nodesToHere+nd2, charsToHere+lnkName, result);
});
 
# Reconstruct based on charsToHere characters.
if(nd.id!=rto.id and nd.id!=rte.id){ # Don't print for root nodes
if(nodesToHere[0].id==rte.id) # Even string
assembled:=charsToHere.reverse() + charsToHere;
else assembled:=charsToHere.reverse() + charsToHere[1,*]; # Odd string
result.append(assembled);
}
}
}
st:="eertree";
println("Processing string \"", st,"\"");
eertree:=Eertree(st);
println("Number of sub-palindromes: ", eertree.nodes.len());
println("Sub-palindromes: ", eertree.get_sub_palindromes());
Output:
Processing string "eertree"
Number of sub-palindromes: 7
Sub-palindromes: L("e","r","eertree","ertre","rtr","t","ee")

References[edit]

  1. [1]