Eertree: Difference between revisions
Walterpachl (talk | contribs) m tried to fix the links |
Go solution |
||
Line 17: | Line 17: | ||
* [https://arxiv.org/abs/1506.04862 Cornell University Library, Computer Science, Data Structures and Algorithms ───► EERTREE: An Efficient Data Structure for Processing Palindromes in Strings]. |
* [https://arxiv.org/abs/1506.04862 Cornell University Library, Computer Science, Data Structures and Algorithms ───► EERTREE: An Efficient Data Structure for Processing Palindromes in Strings]. |
||
<br><br> |
<br><br> |
||
=={{header|Go}}== |
|||
<lang go>package main |
|||
import "fmt" |
|||
func main() { |
|||
tree := eertree([]byte("eertree")) |
|||
fmt.Println(subPalindromes(tree)) |
|||
} |
|||
type edges map[byte]int |
|||
type node struct { |
|||
length int |
|||
edges |
|||
suffix int |
|||
} |
|||
const evenRoot = 0 |
|||
const oddRoot = 1 |
|||
func eertree(s []byte) []node { |
|||
tree := []node{ |
|||
evenRoot: {length: 0, suffix: oddRoot, edges: edges{}}, |
|||
oddRoot: {length: -1, suffix: oddRoot, edges: edges{}}, |
|||
} |
|||
suffix := oddRoot |
|||
var n, k int |
|||
for i, c := range s { |
|||
for n = suffix; ; n = tree[n].suffix { |
|||
k = tree[n].length |
|||
if b := i - k - 1; b >= 0 && s[b] == c { |
|||
break |
|||
} |
|||
} |
|||
if e, ok := tree[n].edges[c]; ok { |
|||
suffix = e |
|||
continue |
|||
} |
|||
suffix = len(tree) |
|||
tree = append(tree, node{length: k + 2, edges: edges{}}) |
|||
tree[n].edges[c] = suffix |
|||
if tree[suffix].length == 1 { |
|||
tree[suffix].suffix = 0 |
|||
continue |
|||
} |
|||
for { |
|||
n = tree[n].suffix |
|||
if b := i - tree[n].length - 1; b >= 0 && s[b] == c { |
|||
break |
|||
} |
|||
} |
|||
tree[suffix].suffix = tree[n].edges[c] |
|||
} |
|||
return tree |
|||
} |
|||
func subPalindromes(tree []node) (s []string) { |
|||
var children func(int, string) |
|||
children = func(n int, p string) { |
|||
for c, n := range tree[n].edges { |
|||
c := string(c) |
|||
p := c + p + c |
|||
s = append(s, p) |
|||
children(n, p) |
|||
} |
|||
} |
|||
children(0, "") |
|||
for c, n := range tree[1].edges { |
|||
c := string(c) |
|||
s = append(s, c) |
|||
children(n, c) |
|||
} |
|||
return |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
[ee e r t rtr ertre eertree] |
|||
</pre> |
|||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
Revision as of 00:48, 25 October 2017
An eertree is a data structure designed for efficient processing of certain palindrome tasks, for instance counting the number of sub-palindromes in an input string.
The data structure has commonalities to both tries and suffix trees. See links below.
- Task
Construct an eertree for the string "eertree", then output all sub-palindromes by traversing the tree.
- See also
- Wikipedia entry: trie.
- Wikipedia entry: suffix tree
- Cornell University Library, Computer Science, Data Structures and Algorithms ───► EERTREE: An Efficient Data Structure for Processing Palindromes in Strings.
Go
<lang go>package main
import "fmt"
func main() {
tree := eertree([]byte("eertree")) fmt.Println(subPalindromes(tree))
}
type edges map[byte]int
type node struct {
length int edges suffix int
}
const evenRoot = 0 const oddRoot = 1
func eertree(s []byte) []node {
tree := []node{ evenRoot: {length: 0, suffix: oddRoot, edges: edges{}}, oddRoot: {length: -1, suffix: oddRoot, edges: edges{}}, } suffix := oddRoot var n, k int for i, c := range s { for n = suffix; ; n = tree[n].suffix { k = tree[n].length if b := i - k - 1; b >= 0 && s[b] == c { break } } if e, ok := tree[n].edges[c]; ok { suffix = e continue } suffix = len(tree) tree = append(tree, node{length: k + 2, edges: edges{}}) tree[n].edges[c] = suffix if tree[suffix].length == 1 { tree[suffix].suffix = 0 continue } for { n = tree[n].suffix if b := i - tree[n].length - 1; b >= 0 && s[b] == c { break } } tree[suffix].suffix = tree[n].edges[c] } return tree
}
func subPalindromes(tree []node) (s []string) {
var children func(int, string) children = func(n int, p string) { for c, n := range tree[n].edges { c := string(c) p := c + p + c s = append(s, p) children(n, p) } } children(0, "") for c, n := range tree[1].edges { c := string(c) s = append(s, c) children(n, c) } return
}</lang>
- Output:
[ee e r t rtr ertre eertree]
Kotlin
<lang scala>// version 1.1.4
class Node {
val edges = mutableMapOf<Char, Node>() // edges (or forward links) var link: Node? = null // suffix link (backward links) var len = 0 // the length of the node
}
class Eertree(str: String) {
val nodes = mutableListOf<Node>()
private val rto = Node() // odd length root node, or node -1 private val rte = Node() // even length root node, or node 0 private val s = StringBuilder("0") // accumulated input string, T = S[1..i] private var maxSufT = rte // maximum suffix of tree T
init { // Initialize and build the tree rte.link = rto rto.link = rte rto.len = -1 rte.len = 0 for (ch in str) add(ch) }
private fun getMaxSuffixPal(startNode: Node, a: Char): Node { // We traverse the suffix-palindromes of T in the order of decreasing length. // For each palindrome we read its length k and compare T[i-k] against a // until we get an equality or arrive at the -1 node. var u = startNode val i = s.length var k = u.len while (u !== rto && s[i - k - 1] != a) { if (u === u.link!!) throw RuntimeException("Infinite loop detected") u = u.link!! k = u.len } return u }
private fun add(a: Char): Boolean { // We need to find the maximum suffix-palindrome P of Ta // Start by finding maximum suffix-palindrome Q of T. // To do this, we traverse the suffix-palindromes of T // in the order of decreasing length, starting with maxSuf(T) val q = getMaxSuffixPal(maxSufT, a)
// We check Q to see whether it has an outgoing edge labeled by a. val createANewNode = a !in q.edges.keys
if (createANewNode) { // We create the node P of length Q + 2 val p = Node() nodes.add(p) p.len = q.len + 2 if (p.len == 1) { // if P = a, create the suffix link (P, 0) p.link = rte } else { // It remains to create the suffix link from P if |P|>1. Just // continue traversing suffix-palindromes of T starting with the // the suffix link of Q. p.link = getMaxSuffixPal(q.link!!, a).edges[a] }
// create the edge (Q, P) q.edges[a] = p }
// P becomes the new maxSufT maxSufT = q.edges[a]!!
// Store accumulated input string s.append(a)
return createANewNode }
fun getSubPalindromes(): List<String> { // Traverse tree to find sub-palindromes val result = mutableListOf<String>() // Odd length words getSubPalindromes(rto, listOf(rto), "", result) // Even length words getSubPalindromes(rte, listOf(rte), "", result) return result }
private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>, charsToHere: String, result: MutableList<String>) { // Each node represents a palindrome, which can be reconstructed // by the path from the root node to each non-root node.
// Traverse all edges, since they represent other palindromes for ((lnkName, nd2) in nd.edges) { getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result) }
// Reconstruct based on charsToHere characters. if (nd !== rto && nd !== rte) { // Don't print for root nodes val assembled = charsToHere.reversed() + if (nodesToHere[0] === rte) // Even string charsToHere else // Odd string charsToHere.drop(1) result.add(assembled) } }
}
fun main(args: Array<String>) {
val str = "eertree" println("Processing string '$str'") val eertree = Eertree(str) println("Number of sub-palindromes: ${eertree.nodes.size}") val result = eertree.getSubPalindromes() println("Sub-palindromes: $result")
}</lang>
- Output:
Processing string 'eertree' Number of sub-palindromes: 7 Sub-palindromes: [e, r, eertree, ertre, rtr, t, ee]
Python
<lang python>#!/bin/python from __future__ import print_function
class Node(object): def __init__(self): self.edges = {} # edges (or forward links) self.link = None # suffix link (backward links) self.len = 0 # the length of the node
class Eertree(object): def __init__(self): self.nodes = [] # two initial root nodes self.rto = Node() #odd length root node, or node -1 self.rte = Node() #even length root node, or node 0
# Initialize empty tree self.rto.link = self.rte.link = self.rto; self.rto.len = -1 self.rte.len = 0 self.S = [0] # accumulated input string, T=S[1..i] self.maxSufT = self.rte # maximum suffix of tree T
def get_max_suffix_pal(self, startNode, a): # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u = startNode i = len(self.S) k = u.len while id(u) != id(self.rto) and self.S[i - k - 1] != a: assert id(u) != id(u.link) #Prevent infinte loop u = u.link k = u.len
return u
def add(self, a):
# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T) Q = self.get_max_suffix_pal(self.maxSufT, a)
# We check Q to see whether it has an outgoing edge labeled by a. createANewNode = not a in Q.edges
if createANewNode: # We create the node P of length Q+2 P = Node() self.nodes.append(P) P.len = Q.len + 2 if P.len == 1: # if P = a, create the suffix link (P,0) P.link = self.rte else: # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link = self.get_max_suffix_pal(Q.link, a).edges[a]
# create the edge (Q,P) Q.edges[a] = P
#P becomes the new maxSufT self.maxSufT = Q.edges[a]
#Store accumulated input string self.S.append(a)
return createANewNode
def get_sub_palindromes(self, nd, nodesToHere, charsToHere, result): #Each node represents a palindrome, which can be reconstructed #by the path from the root node to each non-root node.
#Traverse all edges, since they represent other palindromes for lnkName in nd.edges: nd2 = nd.edges[lnkName] #The lnkName is the character used for this edge self.get_sub_palindromes(nd2, nodesToHere+[nd2], charsToHere+[lnkName], result)
#Reconstruct based on charsToHere characters. if id(nd) != id(self.rto) and id(nd) != id(self.rte): #Don't print for root nodes tmp = "".join(charsToHere) if id(nodesToHere[0]) == id(self.rte): #Even string assembled = tmp[::-1] + tmp else: #Odd string assembled = tmp[::-1] + tmp[1:] result.append(assembled)
if __name__=="__main__": st = "eertree" print ("Processing string", st) eertree = Eertree() for ch in st: eertree.add(ch)
print ("Number of sub-palindromes:", len(eertree.nodes))
#Traverse tree to find sub-palindromes result = [] eertree.get_sub_palindromes(eertree.rto, [eertree.rto], [], result) #Odd length words eertree.get_sub_palindromes(eertree.rte, [eertree.rte], [], result) #Even length words print ("Sub-palindromes:", result)</lang>
- Output:
Processing string eertree Number of sub-palindromes: 7 Sub-palindromes: ['r', 'e', 'eertree', 'ertre', 'rtr', 't', 'ee']
Racket
<lang racket>#lang racket (struct node (edges ; edges (or forward links)
link ; suffix link (backward links) len) ; the length of the node #:mutable)
(define (new-node link len) (node (make-hash) link len))
(struct eertree (nodes
rto ; odd length root node, or node -1 rte ; even length root node, or node 0 S ; accumulated input string, T=S[1..i] max-suf-t) ; maximum suffix of tree T #:mutable)
(define (new-eertree)
(let* ((rto (new-node #f -1)) (rte (new-node rto 0))) (eertree null rto rte (list 0) rte)))
(define (eertree-get-max-suffix-pal et start-node a)
#| We traverse the suffix-palindromes of T in the order of decreasing length. For each palindrome we read its length k and compare T[i-k] against a until we get an equality or arrive at the -1 node. |# (match et [(eertree nodes rto rte (and S (app length i)) max-suf-t) (let loop ((u start-node)) (let ((k (node-len u))) (if (or (eq? u rto) (= (list-ref S (- i k 1)) a)) u (let ((u→ (node-link u))) (when (eq? u u→) (error 'eertree-get-max-suffix-pal "infinite loop")) (loop u→)))))]))
(define (eertree-add! et a)
#| We need to find the maximum suffix-palindrome P of Ta Start by finding maximum suffix-palindrome Q of T. To do this, we traverse the suffix-palindromes of T in the order of decreasing length, starting with maxSuf(T) |# (match (eertree-get-max-suffix-pal et (eertree-max-suf-t et) a) [(node Q.edges Q.→ Q.len) ;; We check Q to see whether it has an outgoing edge labeled by a. (define new-node? (not (hash-has-key? Q.edges a))) (when new-node? (define P (new-node #f (+ Q.len 2))) ; We create the node P of length Q+2 (set-eertree-nodes! et (append (eertree-nodes et) (list P))) (define P→ (if (= (node-len P) 1) (eertree-rte et) ; if P = a, create the suffix link (P,0) ;; It remains to c reate the suffix link from P if |P|>1. ;; Just continue traversing suffix-palindromes of T starting with the suffix link of Q. (hash-ref (node-edges (eertree-get-max-suffix-pal et Q.→ a)) a))) (set-node-link! P P→) (hash-set! Q.edges a P)) ; create the edge (Q,P) (set-eertree-max-suf-t! et (hash-ref Q.edges a)) ; P becomes the new maxSufT (set-eertree-S! et (append (eertree-S et) (list a))) ; Store accumulated input string new-node?]))
(define (eertree-get-sub-palindromes et)
(define (inr nd (node-path (list nd)) (char-path/rev null)) ;; Each node represents a palindrome, which can be reconstructed by the path from the root node to ;; each non-root node. (let ((deeper ; Traverse all edges, since they represent other palindromes (for/fold ((result null)) (([→-name nd2] (in-hash (node-edges nd)))) ; The lnk-name is the character used for this edge (append result (inr nd2 (append node-path (list nd2)) (cons →-name char-path/rev))))) (root-node? (or (eq? (eertree-rto et) nd) (eq? (eertree-rte et) nd)))) (if root-node? ; Don't add root nodes deeper (let ((even-string? (eq? (car node-path) (eertree-rte et))) (char-path (reverse char-path/rev))) (cons (append char-path/rev (if even-string? char-path (cdr char-path))) deeper))))) inr)
(define (eertree-get-palindromes et)
(define sub (eertree-get-sub-palindromes et)) (append (sub (eertree-rto et)) (sub (eertree-rte et))))
(module+ main
(define et (new-eertree)) ;; eertree works in integer space, so we'll map to/from char space here (for ((c "eertree")) (eertree-add! et (char->integer c))) (map (compose list->string (curry map integer->char)) (eertree-get-palindromes et)))
</lang>
- Output:
'("t" "rtr" "ertre" "eertree" "r" "e" "ee")
REXX
This REXX program is modeled after the Ring example. <lang rexx>/*REXX program creates a list of (unique) sub─palindromes that exist in an input string.*/
parse arg x . /*obtain optional input string from CL.*/ if x== | x=="," then x= 'eertree' /*Not specified? Then use the default.*/ L=length(x) /*the length (in chars) of input string*/ @.=. /*@ tree indicates uniqueness of pals. */ $= /*list of unsorted & unique palindromes*/
do j=1 for L /*start at the left side of the string.*/
do k=1 for L /*traverse from left to right of string*/ parse var x =(j) y +(k) /*extract a substring from the string. */ if reverse(y)\==y then iterate /*Partial string a palindrome? Skip it*/ if @.y\==. then iterate /*Sub─palindrome already exist? Skip it*/ @.y=y /*indicate a sub─palindrome was found. */ $=$' ' y /*append the sub─palindrome to the list*/ end /*k*/ /* [↑] an extra blank is inserted. */
end /*j*/
pad=copies('─', 8) /*a fence to be used as an eyecatcher. */ say pad 'Using the input string: ' x /*echo the input string being parsed.*/ say
- =words($) /*get the number of palindromes found. */
subP= 'sub─palindromes' /*a literal to make SAY texts shorter. */ say pad 'The number of' subP "found: " # say say pad 'The list of' subP "found: " say strip($) /*display the list of the palindromes. */
/*stick a fork in it, we're all done. */</lang>
- output when using the default input:
──────── Using the input string: eertree ──────── The number of sub─palindromes found: 7 ──────── The list of sub─palindromes found: e ee eertree ertre r rtr t
Ring
<lang ring>
- Project : Eertree
- Date : 2017/09/23
- Author : Gal Zsolt (~ CalmoSoft ~)
- Email : <calmosoft@gmail.com>
str = "eertree" pal = [] for n=1 to len(str)
for m=1 to len(str) strrev = "" strpal = substr(str, n, m) if strpal != "" for p=len(strpal) to 1 step -1 strrev = strrev + strpal[p] next if strpal = strrev add(pal, strpal) ok ok next
next sortpal = sort(pal) for n=len(sortpal) to 2 step -1
if sortpal[n] = sortpal[n-1] del(sortpal, n) ok
next see sortpal + nl </lang> Output:
e ee eertree ertre r rtr t
zkl
<lang zkl>class Node{
fcn init(length){ var edges=Dictionary(), # edges (or forward links). (char:Node) link=Void, # suffix link (backward links)
sz =length; # node length.
}
} class Eertree{
fcn init(string=Void){ var nodes=List(),
# two initial root nodes rto=Node(-1), # odd length root node, or node -1 rte=Node(0); # even length root node, or node 0
rto.link=rte.link=rto; # Initialize empty tree var S =Data(Void,0), # accumulated input string, T=S[1..i], byte buffer maxSufT=rte; # maximum suffix of tree T if(string) string.pump(addChar); // go ahead and build the tree } fcn get_max_suffix_pal(startNode,a){ # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u,i,k := startNode, S.len(), u.sz; while(u.id!=rto.id and S.charAt(i - k - 1)!=a){
_assert_(u.id!=u.link.id); # Prevent infinte loop u,k = u.link,u.sz;
} return(u); } fcn addChar(a){
# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T)
Q:=get_max_suffix_pal(maxSufT,a); # We check Q to see whether it has an outgoing edge labeled by a. createANewNode:=(not Q.edges.holds(a)); if(createANewNode){
P:=Node(Q.sz + 2); nodes.append(P); if(P.sz==1) P.link=rte; # if P = a, create the suffix link (P,0) else # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link=get_max_suffix_pal(Q.link,a).edges[a]; Q.edges[a]=P; # create the edge (Q,P)
} maxSufT=Q.edges[a]; # P becomes the new maxSufT S.append(a); # Store accumulated input string return(createANewNode); // in case anyone wants to know a is new edge } fcn get_sub_palindromes{ result:=List(); sub_palindromes(rto, T(rto),"", result); # Odd length words sub_palindromes(rte, T(rte),"", result); # Even length words result } fcn [private] sub_palindromes(nd, nodesToHere, charsToHere, result){ // nodesToHere needs to be read only
# Each node represents a palindrome, which can be reconstructed # by the path from the root node to each non-root node.
# Traverse all edges, since they represent other palindromes
nd.edges.pump(Void,'wrap([(lnkName,nd2)]){
sub_palindromes(nd2, nodesToHere+nd2, charsToHere+lnkName, result);
});
# Reconstruct based on charsToHere characters. if(nd.id!=rto.id and nd.id!=rte.id){ # Don't print for root nodes
if(nodesToHere[0].id==rte.id) # Even string assembled:=charsToHere.reverse() + charsToHere; else assembled:=charsToHere.reverse() + charsToHere[1,*]; # Odd string result.append(assembled);
} }
}</lang> <lang zkl>st:="eertree"; println("Processing string \"", st,"\""); eertree:=Eertree(st); println("Number of sub-palindromes: ", eertree.nodes.len()); println("Sub-palindromes: ", eertree.get_sub_palindromes());</lang>
- Output:
Processing string "eertree" Number of sub-palindromes: 7 Sub-palindromes: L("e","r","eertree","ertre","rtr","t","ee")