Eertree: Difference between revisions
(Added Kotlin) |
(→{{header|Kotlin}}: Updated example see https://github.com/dkandalov/rosettacode-kotlin for details) |
||
Line 9: | Line 9: | ||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
||
{{trans|Python}} |
{{trans|Python}} |
||
<lang scala>// version 1.1. |
<lang scala>// version 1.1.4 |
||
class Node { |
class Node { |
||
Line 17: | Line 17: | ||
} |
} |
||
class Eertree( |
class Eertree(str: String) { |
||
val nodes = mutableListOf<Node>() |
val nodes = mutableListOf<Node>() |
||
Line 31: | Line 31: | ||
rto.len = -1 |
rto.len = -1 |
||
rte.len = 0 |
rte.len = 0 |
||
for (ch in str) add(ch) |
for (ch in str) add(ch) |
||
} |
} |
||
private fun getMaxSuffixPal(startNode: Node, a: Char): Node { |
private fun getMaxSuffixPal(startNode: Node, a: Char): Node { |
||
// We traverse the suffix-palindromes of T in the order of decreasing length. |
// We traverse the suffix-palindromes of T in the order of decreasing length. |
||
Line 40: | Line 40: | ||
var u = startNode |
var u = startNode |
||
val i = s.length |
val i = s.length |
||
var k = u.len |
var k = u.len |
||
while (u !== rto && s[i - k - 1] != a) { |
while (u !== rto && s[i - k - 1] != a) { |
||
if (u === u.link!!) throw RuntimeException("Infinite loop detected") |
if (u === u.link!!) throw RuntimeException("Infinite loop detected") |
||
u = u.link!! |
u = u.link!! |
||
Line 53: | Line 53: | ||
// Start by finding maximum suffix-palindrome Q of T. |
// Start by finding maximum suffix-palindrome Q of T. |
||
// To do this, we traverse the suffix-palindromes of T |
// To do this, we traverse the suffix-palindromes of T |
||
// in the order of decreasing length, starting with maxSuf(T) |
// in the order of decreasing length, starting with maxSuf(T) |
||
val q = getMaxSuffixPal(maxSufT, a) |
val q = getMaxSuffixPal(maxSufT, a) |
||
Line 62: | Line 62: | ||
// We create the node P of length Q + 2 |
// We create the node P of length Q + 2 |
||
val p = Node() |
val p = Node() |
||
nodes.add(p) |
nodes.add(p) |
||
p.len = q.len + 2 |
p.len = q.len + 2 |
||
if (p.len == 1) { |
if (p.len == 1) { |
||
// if P = a, create the suffix link (P, 0) |
// if P = a, create the suffix link (P, 0) |
||
Line 74: | Line 74: | ||
p.link = getMaxSuffixPal(q.link!!, a).edges[a] |
p.link = getMaxSuffixPal(q.link!!, a).edges[a] |
||
} |
} |
||
// create the edge (Q, P) |
// create the edge (Q, P) |
||
q.edges[a] = p |
q.edges[a] = p |
||
Line 83: | Line 83: | ||
// Store accumulated input string |
// Store accumulated input string |
||
s.append(a) |
s.append(a) |
||
return createANewNode |
return createANewNode |
||
} |
} |
||
fun getSubPalindromes(): List<String> { |
fun getSubPalindromes(): List<String> { |
||
// Traverse tree to find sub-palindromes |
// Traverse tree to find sub-palindromes |
||
val result = mutableListOf<String>() |
val result = mutableListOf<String>() |
||
// Odd length words |
// Odd length words |
||
getSubPalindromes(rto, listOf(rto), "", result) |
getSubPalindromes(rto, listOf(rto), "", result) |
||
// Even length words |
// Even length words |
||
Line 97: | Line 97: | ||
return result |
return result |
||
} |
} |
||
private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>, |
private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>, |
||
charsToHere: String, result: MutableList<String>) { |
charsToHere: String, result: MutableList<String>) { |
||
// Each node represents a palindrome, which can be reconstructed |
// Each node represents a palindrome, which can be reconstructed |
||
// by the path from the root node to each non-root node. |
// by the path from the root node to each non-root node. |
||
// Traverse all edges, since they represent other palindromes |
// Traverse all edges, since they represent other palindromes |
||
for ((lnkName, nd2) in nd.edges) { |
for ((lnkName, nd2) in nd.edges) { |
||
getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result) |
getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result) |
||
} |
} |
||
// Reconstruct based on charsToHere characters. |
// Reconstruct based on charsToHere characters. |
||
if (nd !== rto && nd !== rte) { // Don't print for root nodes |
if (nd !== rto && nd !== rte) { // Don't print for root nodes |
||
val assembled = charsToHere.reversed() + |
val assembled = charsToHere.reversed() + |
||
if (nodesToHere[0] === rte) // Even string |
if (nodesToHere[0] === rte) // Even string |
||
Line 125: | Line 125: | ||
val eertree = Eertree(str) |
val eertree = Eertree(str) |
||
println("Number of sub-palindromes: ${eertree.nodes.size}") |
println("Number of sub-palindromes: ${eertree.nodes.size}") |
||
val result = eertree.getSubPalindromes() |
val result = eertree.getSubPalindromes() |
||
println("Sub-palindromes: $result") |
println("Sub-palindromes: $result") |
||
}</lang> |
}</lang> |
Revision as of 16:12, 26 August 2017
An eertree is a data structure designed for efficient processing of certain palindrome tasks, for instance counting the number of sub-palindromes in an input string.[1] The data structure has commonalities to both suffix tries and suffix trees.
- Task
Construct an eertree for the string "eertree", then output all sub-palindromes by traversing the tree.
Kotlin
<lang scala>// version 1.1.4
class Node {
val edges = mutableMapOf<Char, Node>() // edges (or forward links) var link: Node? = null // suffix link (backward links) var len = 0 // the length of the node
}
class Eertree(str: String) {
val nodes = mutableListOf<Node>()
private val rto = Node() // odd length root node, or node -1 private val rte = Node() // even length root node, or node 0 private val s = StringBuilder("0") // accumulated input string, T = S[1..i] private var maxSufT = rte // maximum suffix of tree T
init { // Initialize and build the tree rte.link = rto rto.link = rte rto.len = -1 rte.len = 0 for (ch in str) add(ch) }
private fun getMaxSuffixPal(startNode: Node, a: Char): Node { // We traverse the suffix-palindromes of T in the order of decreasing length. // For each palindrome we read its length k and compare T[i-k] against a // until we get an equality or arrive at the -1 node. var u = startNode val i = s.length var k = u.len while (u !== rto && s[i - k - 1] != a) { if (u === u.link!!) throw RuntimeException("Infinite loop detected") u = u.link!! k = u.len } return u }
private fun add(a: Char): Boolean { // We need to find the maximum suffix-palindrome P of Ta // Start by finding maximum suffix-palindrome Q of T. // To do this, we traverse the suffix-palindromes of T // in the order of decreasing length, starting with maxSuf(T) val q = getMaxSuffixPal(maxSufT, a)
// We check Q to see whether it has an outgoing edge labeled by a. val createANewNode = a !in q.edges.keys
if (createANewNode) { // We create the node P of length Q + 2 val p = Node() nodes.add(p) p.len = q.len + 2 if (p.len == 1) { // if P = a, create the suffix link (P, 0) p.link = rte } else { // It remains to create the suffix link from P if |P|>1. Just // continue traversing suffix-palindromes of T starting with the // the suffix link of Q. p.link = getMaxSuffixPal(q.link!!, a).edges[a] }
// create the edge (Q, P) q.edges[a] = p }
// P becomes the new maxSufT maxSufT = q.edges[a]!!
// Store accumulated input string s.append(a)
return createANewNode }
fun getSubPalindromes(): List<String> { // Traverse tree to find sub-palindromes val result = mutableListOf<String>() // Odd length words getSubPalindromes(rto, listOf(rto), "", result) // Even length words getSubPalindromes(rte, listOf(rte), "", result) return result }
private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>, charsToHere: String, result: MutableList<String>) { // Each node represents a palindrome, which can be reconstructed // by the path from the root node to each non-root node.
// Traverse all edges, since they represent other palindromes for ((lnkName, nd2) in nd.edges) { getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result) }
// Reconstruct based on charsToHere characters. if (nd !== rto && nd !== rte) { // Don't print for root nodes val assembled = charsToHere.reversed() + if (nodesToHere[0] === rte) // Even string charsToHere else // Odd string charsToHere.drop(1) result.add(assembled) } }
}
fun main(args: Array<String>) {
val str = "eertree" println("Processing string '$str'") val eertree = Eertree(str) println("Number of sub-palindromes: ${eertree.nodes.size}") val result = eertree.getSubPalindromes() println("Sub-palindromes: $result")
}</lang>
- Output:
Processing string 'eertree' Number of sub-palindromes: 7 Sub-palindromes: [e, r, eertree, ertre, rtr, t, ee]
Python
<lang python>#!/bin/python from __future__ import print_function
class Node(object): def __init__(self): self.edges = {} # edges (or forward links) self.link = None # suffix link (backward links) self.len = 0 # the length of the node
class Eertree(object): def __init__(self): self.nodes = [] # two initial root nodes self.rto = Node() #odd length root node, or node -1 self.rte = Node() #even length root node, or node 0
# Initialize empty tree self.rto.link = self.rte.link = self.rto; self.rto.len = -1 self.rte.len = 0 self.S = [0] # accumulated input string, T=S[1..i] self.maxSufT = self.rte # maximum suffix of tree T
def get_max_suffix_pal(self, startNode, a): # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u = startNode i = len(self.S) k = u.len while id(u) != id(self.rto) and self.S[i - k - 1] != a: assert id(u) != id(u.link) #Prevent infinte loop u = u.link k = u.len
return u
def add(self, a):
# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T) Q = self.get_max_suffix_pal(self.maxSufT, a)
# We check Q to see whether it has an outgoing edge labeled by a. createANewNode = not a in Q.edges
if createANewNode: # We create the node P of length Q+2 P = Node() self.nodes.append(P) P.len = Q.len + 2 if P.len == 1: # if P = a, create the suffix link (P,0) P.link = self.rte else: # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link = self.get_max_suffix_pal(Q.link, a).edges[a]
# create the edge (Q,P) Q.edges[a] = P
#P becomes the new maxSufT self.maxSufT = Q.edges[a]
#Store accumulated input string self.S.append(a)
return createANewNode
def get_sub_palindromes(self, nd, nodesToHere, charsToHere, result): #Each node represents a palindrome, which can be reconstructed #by the path from the root node to each non-root node.
#Traverse all edges, since they represent other palindromes for lnkName in nd.edges: nd2 = nd.edges[lnkName] #The lnkName is the character used for this edge self.get_sub_palindromes(nd2, nodesToHere+[nd2], charsToHere+[lnkName], result)
#Reconstruct based on charsToHere characters. if id(nd) != id(self.rto) and id(nd) != id(self.rte): #Don't print for root nodes tmp = "".join(charsToHere) if id(nodesToHere[0]) == id(self.rte): #Even string assembled = tmp[::-1] + tmp else: #Odd string assembled = tmp[::-1] + tmp[1:] result.append(assembled)
if __name__=="__main__": st = "eertree" print ("Processing string", st) eertree = Eertree() for ch in st: eertree.add(ch)
print ("Number of sub-palindromes:", len(eertree.nodes))
#Traverse tree to find sub-palindromes result = [] eertree.get_sub_palindromes(eertree.rto, [eertree.rto], [], result) #Odd length words eertree.get_sub_palindromes(eertree.rte, [eertree.rte], [], result) #Even length words print ("Sub-palindromes:", result)</lang>
- Output:
Processing string eertree Number of sub-palindromes: 7 Sub-palindromes: ['r', 'e', 'eertree', 'ertre', 'rtr', 't', 'ee']
Racket
<lang racket>#lang racket (struct node (edges ; edges (or forward links)
link ; suffix link (backward links) len) ; the length of the node #:mutable)
(define (new-node link len) (node (make-hash) link len))
(struct eertree (nodes
rto ; odd length root node, or node -1 rte ; even length root node, or node 0 S ; accumulated input string, T=S[1..i] max-suf-t) ; maximum suffix of tree T #:mutable)
(define (new-eertree)
(let* ((rto (new-node #f -1)) (rte (new-node rto 0))) (eertree null rto rte (list 0) rte)))
(define (eertree-get-max-suffix-pal et start-node a)
#| We traverse the suffix-palindromes of T in the order of decreasing length. For each palindrome we read its length k and compare T[i-k] against a until we get an equality or arrive at the -1 node. |# (match et [(eertree nodes rto rte (and S (app length i)) max-suf-t) (let loop ((u start-node)) (let ((k (node-len u))) (if (or (eq? u rto) (= (list-ref S (- i k 1)) a)) u (let ((u→ (node-link u))) (when (eq? u u→) (error 'eertree-get-max-suffix-pal "infinite loop")) (loop u→)))))]))
(define (eertree-add! et a)
#| We need to find the maximum suffix-palindrome P of Ta Start by finding maximum suffix-palindrome Q of T. To do this, we traverse the suffix-palindromes of T in the order of decreasing length, starting with maxSuf(T) |# (match (eertree-get-max-suffix-pal et (eertree-max-suf-t et) a) [(node Q.edges Q.→ Q.len) ;; We check Q to see whether it has an outgoing edge labeled by a. (define new-node? (not (hash-has-key? Q.edges a))) (when new-node? (define P (new-node #f (+ Q.len 2))) ; We create the node P of length Q+2 (set-eertree-nodes! et (append (eertree-nodes et) (list P))) (define P→ (if (= (node-len P) 1) (eertree-rte et) ; if P = a, create the suffix link (P,0) ;; It remains to c reate the suffix link from P if |P|>1. ;; Just continue traversing suffix-palindromes of T starting with the suffix link of Q. (hash-ref (node-edges (eertree-get-max-suffix-pal et Q.→ a)) a))) (set-node-link! P P→) (hash-set! Q.edges a P)) ; create the edge (Q,P) (set-eertree-max-suf-t! et (hash-ref Q.edges a)) ; P becomes the new maxSufT (set-eertree-S! et (append (eertree-S et) (list a))) ; Store accumulated input string new-node?]))
(define (eertree-get-sub-palindromes et)
(define (inr nd (node-path (list nd)) (char-path/rev null)) ;; Each node represents a palindrome, which can be reconstructed by the path from the root node to ;; each non-root node. (let ((deeper ; Traverse all edges, since they represent other palindromes (for/fold ((result null)) (([→-name nd2] (in-hash (node-edges nd)))) ; The lnk-name is the character used for this edge (append result (inr nd2 (append node-path (list nd2)) (cons →-name char-path/rev))))) (root-node? (or (eq? (eertree-rto et) nd) (eq? (eertree-rte et) nd)))) (if root-node? ; Don't add root nodes deeper (let ((even-string? (eq? (car node-path) (eertree-rte et))) (char-path (reverse char-path/rev))) (cons (append char-path/rev (if even-string? char-path (cdr char-path))) deeper))))) inr)
(define (eertree-get-palindromes et)
(define sub (eertree-get-sub-palindromes et)) (append (sub (eertree-rto et)) (sub (eertree-rte et))))
(module+ main
(define et (new-eertree)) ;; eertree works in integer space, so we'll map to/from char space here (for ((c "eertree")) (eertree-add! et (char->integer c))) (map (compose list->string (curry map integer->char)) (eertree-get-palindromes et)))
</lang>
- Output:
'("t" "rtr" "ertre" "eertree" "r" "e" "ee")
zkl
<lang zkl>class Node{
fcn init(length){ var edges=Dictionary(), # edges (or forward links). (char:Node) link=Void, # suffix link (backward links)
sz =length; # node length.
}
} class Eertree{
fcn init(string=Void){ var nodes=List(),
# two initial root nodes rto=Node(-1), # odd length root node, or node -1 rte=Node(0); # even length root node, or node 0
rto.link=rte.link=rto; # Initialize empty tree var S =Data(Void,0), # accumulated input string, T=S[1..i], byte buffer maxSufT=rte; # maximum suffix of tree T if(string) string.pump(addChar); // go ahead and build the tree } fcn get_max_suffix_pal(startNode,a){ # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u,i,k := startNode, S.len(), u.sz; while(u.id!=rto.id and S.charAt(i - k - 1)!=a){
_assert_(u.id!=u.link.id); # Prevent infinte loop u,k = u.link,u.sz;
} return(u); } fcn addChar(a){
# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T)
Q:=get_max_suffix_pal(maxSufT,a); # We check Q to see whether it has an outgoing edge labeled by a. createANewNode:=(not Q.edges.holds(a)); if(createANewNode){
P:=Node(Q.sz + 2); nodes.append(P); if(P.sz==1) P.link=rte; # if P = a, create the suffix link (P,0) else # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link=get_max_suffix_pal(Q.link,a).edges[a]; Q.edges[a]=P; # create the edge (Q,P)
} maxSufT=Q.edges[a]; # P becomes the new maxSufT S.append(a); # Store accumulated input string return(createANewNode); // in case anyone wants to know a is new edge } fcn get_sub_palindromes{ result:=List(); sub_palindromes(rto, T(rto),"", result); # Odd length words sub_palindromes(rte, T(rte),"", result); # Even length words result } fcn [private] sub_palindromes(nd, nodesToHere, charsToHere, result){ // nodesToHere needs to be read only
# Each node represents a palindrome, which can be reconstructed # by the path from the root node to each non-root node.
# Traverse all edges, since they represent other palindromes
nd.edges.pump(Void,'wrap([(lnkName,nd2)]){
sub_palindromes(nd2, nodesToHere+nd2, charsToHere+lnkName, result);
});
# Reconstruct based on charsToHere characters. if(nd.id!=rto.id and nd.id!=rte.id){ # Don't print for root nodes
if(nodesToHere[0].id==rte.id) # Even string assembled:=charsToHere.reverse() + charsToHere; else assembled:=charsToHere.reverse() + charsToHere[1,*]; # Odd string result.append(assembled);
} }
}</lang> <lang zkl>st:="eertree"; println("Processing string \"", st,"\""); eertree:=Eertree(st); println("Number of sub-palindromes: ", eertree.nodes.len()); println("Sub-palindromes: ", eertree.get_sub_palindromes());</lang>
- Output:
Processing string "eertree" Number of sub-palindromes: 7 Sub-palindromes: L("e","r","eertree","ertre","rtr","t","ee")