Eertree: Difference between revisions

From Rosetta Code
Content added Content deleted
m (→‎{{header|REXX}}: elided superfluous blank lines.)
(Added D)
Line 17: Line 17:
*   [https://arxiv.org/abs/1506.04862 Cornell University Library, Computer Science, Data Structures and Algorithms ───► EERTREE: An Efficient Data Structure for Processing Palindromes in Strings].
*   [https://arxiv.org/abs/1506.04862 Cornell University Library, Computer Science, Data Structures and Algorithms ───► EERTREE: An Efficient Data Structure for Processing Palindromes in Strings].
<br><br>
<br><br>

=={{header|D}}==
{{trans|Go}}
<lang D>import std.array;
import std.stdio;

void main() {
auto tree = eertree("eertree");
writeln(subPalindromes(tree));
}

struct Node {
int length;
int[char] edges;
int suffix;
}

const evenRoot = 0;
const oddRoot = 1;

Node[] eertree(string s) {
Node[] tree = [
Node(0, null, oddRoot),
Node(-1, null, oddRoot),
];
int suffix = oddRoot;
int n, k;
foreach (i, c; s) {
for (n=suffix; ; n=tree[n].suffix) {
k = tree[n].length;
int b = i-k-1;
if (b>=0 && s[b]==c) {
break;
}
}
if (c in tree[n].edges) {
suffix = tree[n].edges[c];
continue;
}
suffix = tree.length;
tree ~= Node(k+2);
tree[n].edges[c] = suffix;
if (tree[suffix].length == 1) {
tree[suffix].suffix = 0;
continue;
}
while (true) {
n = tree[n].suffix;
int b = i-tree[n].length-1;
if (b>=0 && s[b]==c) {
break;
}
}
tree[suffix].suffix = tree[n].edges[c];
}
return tree;
}

auto subPalindromes(Node[] tree) {
auto s = appender!(string[]);
void children(int n, string p) {
foreach (c, n; tree[n].edges) {
p = c ~ p ~ c;
s ~= p;
children(n, p);
}
}
children(0, "");
foreach (c, n; tree[1].edges) {
string ct = [c].idup;
s ~= ct;
children(n, ct);
}
return s.data;
}</lang>
{{out}}
<pre>["ee", "e", "r", "t", "rtr", "ertre", "eertree"]</pre>


=={{header|Go}}==
=={{header|Go}}==

Revision as of 19:38, 6 January 2018

Eertree is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

An eertree is a data structure designed for efficient processing of certain palindrome tasks, for instance counting the number of sub-palindromes in an input string.

The data structure has commonalities to both tries and suffix trees.   See links below.

Task

Construct an eertree for the string "eertree", then output all sub-palindromes by traversing the tree.


See also



D

Translation of: Go

<lang D>import std.array; import std.stdio;

void main() {

   auto tree = eertree("eertree");
   writeln(subPalindromes(tree));

}

struct Node {

   int length;
   int[char] edges;
   int suffix;

}

const evenRoot = 0; const oddRoot = 1;

Node[] eertree(string s) {

   Node[] tree = [
       Node(0, null, oddRoot),
       Node(-1, null, oddRoot),
   ];
   int suffix = oddRoot;
   int n, k;
   foreach (i, c; s) {
       for (n=suffix; ; n=tree[n].suffix) {
           k = tree[n].length;
           int b = i-k-1;
           if (b>=0 && s[b]==c) {
               break;
           }
       }
       if (c in tree[n].edges) {
           suffix = tree[n].edges[c];
           continue;
       }
       suffix = tree.length;
       tree ~= Node(k+2);
       tree[n].edges[c] = suffix;
       if (tree[suffix].length == 1) {
           tree[suffix].suffix = 0;
           continue;
       }
       while (true) {
           n = tree[n].suffix;
           int b = i-tree[n].length-1;
           if (b>=0 && s[b]==c) {
               break;
           }
       }
       tree[suffix].suffix = tree[n].edges[c];
   }
   return tree;

}

auto subPalindromes(Node[] tree) {

   auto s = appender!(string[]);
   void children(int n, string p) {
       foreach (c, n; tree[n].edges) {
           p = c ~ p ~ c;
           s ~= p;
           children(n, p);
       }
   }
   children(0, "");
   foreach (c, n; tree[1].edges) {
       string ct = [c].idup;
       s ~= ct;
       children(n, ct);
   }
   return s.data;

}</lang>

Output:
["ee", "e", "r", "t", "rtr", "ertre", "eertree"]

Go

<lang go>package main

import "fmt"

func main() {

   tree := eertree([]byte("eertree"))
   fmt.Println(subPalindromes(tree))

}

type edges map[byte]int

type node struct {

   length int
   edges
   suffix int

}

const evenRoot = 0 const oddRoot = 1

func eertree(s []byte) []node {

   tree := []node{
       evenRoot: {length: 0, suffix: oddRoot, edges: edges{}},
       oddRoot:  {length: -1, suffix: oddRoot, edges: edges{}},
   }
   suffix := oddRoot
   var n, k int
   for i, c := range s {
       for n = suffix; ; n = tree[n].suffix {
           k = tree[n].length
           if b := i - k - 1; b >= 0 && s[b] == c {
               break
           }
       }
       if e, ok := tree[n].edges[c]; ok {
           suffix = e
           continue
       }
       suffix = len(tree)
       tree = append(tree, node{length: k + 2, edges: edges{}})
       tree[n].edges[c] = suffix
       if tree[suffix].length == 1 {
           tree[suffix].suffix = 0
           continue
       }
       for {
           n = tree[n].suffix
           if b := i - tree[n].length - 1; b >= 0 && s[b] == c {
               break
           }
       }
       tree[suffix].suffix = tree[n].edges[c]
   }
   return tree

}

func subPalindromes(tree []node) (s []string) {

   var children func(int, string)
   children = func(n int, p string) {
       for c, n := range tree[n].edges {
           c := string(c)
           p := c + p + c
           s = append(s, p)
           children(n, p)
       }
   }
   children(0, "")
   for c, n := range tree[1].edges {
       c := string(c)
       s = append(s, c)
       children(n, c)
   }
   return

}</lang>

Output:
[ee e r t rtr ertre eertree]

Kotlin

Translation of: Python

<lang scala>// version 1.1.4

class Node {

   val edges = mutableMapOf<Char, Node>()  // edges (or forward links)
   var link: Node? = null                  // suffix link (backward links)
   var len = 0                             // the length of the node

}

class Eertree(str: String) {

   val nodes = mutableListOf<Node>()
   private val rto = Node()                // odd length root node, or node -1
   private val rte = Node()                // even length root node, or node 0
   private val s = StringBuilder("0")      // accumulated input string, T = S[1..i]
   private var maxSufT = rte               // maximum suffix of tree T
   init {
       // Initialize and build the tree
       rte.link = rto
       rto.link = rte
       rto.len  = -1
       rte.len  = 0
       for (ch in str) add(ch)
   }
   private fun getMaxSuffixPal(startNode: Node, a: Char): Node {
       // We traverse the suffix-palindromes of T in the order of decreasing length.
       // For each palindrome we read its length k and compare T[i-k] against a
       // until we get an equality or arrive at the -1 node.
       var u = startNode
       val i = s.length
       var k = u.len
       while (u !== rto && s[i - k - 1] != a) {
           if (u === u.link!!) throw RuntimeException("Infinite loop detected")
           u = u.link!!
           k = u.len
       }
       return u
   }
   private fun add(a: Char): Boolean {
       // We need to find the maximum suffix-palindrome P of Ta
       // Start by finding maximum suffix-palindrome Q of T.
       // To do this, we traverse the suffix-palindromes of T
       // in the order of decreasing length, starting with maxSuf(T)
       val q = getMaxSuffixPal(maxSufT, a)
       // We check Q to see whether it has an outgoing edge labeled by a.
       val createANewNode = a !in q.edges.keys
       if (createANewNode) {
           // We create the node P of length Q + 2
           val p = Node()
           nodes.add(p)
           p.len = q.len + 2
           if (p.len == 1) {
               // if P = a, create the suffix link (P, 0)
               p.link = rte
           }
           else {
               // It remains to create the suffix link from P if |P|>1. Just
               // continue traversing suffix-palindromes of T starting with the
               // the suffix link of Q.
               p.link = getMaxSuffixPal(q.link!!, a).edges[a]
           }
           // create the edge (Q, P)
           q.edges[a] = p
       }
       // P becomes the new maxSufT
       maxSufT = q.edges[a]!!
       // Store accumulated input string
       s.append(a)
       return createANewNode
   }
   fun getSubPalindromes(): List<String> {
       // Traverse tree to find sub-palindromes
       val result = mutableListOf<String>()
       // Odd length words
       getSubPalindromes(rto, listOf(rto), "", result)
       // Even length words
       getSubPalindromes(rte, listOf(rte), "", result)
       return result
   }
   private fun getSubPalindromes(nd: Node, nodesToHere: List<Node>,
                         charsToHere: String, result: MutableList<String>) {
       // Each node represents a palindrome, which can be reconstructed
       // by the path from the root node to each non-root node.
       // Traverse all edges, since they represent other palindromes
       for ((lnkName, nd2) in nd.edges) {
           getSubPalindromes(nd2, nodesToHere + nd2, charsToHere + lnkName, result)
       }
       // Reconstruct based on charsToHere characters.
       if (nd !== rto && nd !== rte) { // Don't print for root nodes
           val assembled = charsToHere.reversed() +
               if (nodesToHere[0] === rte)  // Even string
                   charsToHere
               else  // Odd string
                   charsToHere.drop(1)
           result.add(assembled)
       }
   }

}

fun main(args: Array<String>) {

   val str = "eertree"
   println("Processing string '$str'")
   val eertree = Eertree(str)
   println("Number of sub-palindromes: ${eertree.nodes.size}")
   val result = eertree.getSubPalindromes()
   println("Sub-palindromes: $result")

}</lang>

Output:
Processing string 'eertree'
Number of sub-palindromes: 7
Sub-palindromes: [e, r, eertree, ertre, rtr, t, ee]

Python

<lang python>#!/bin/python from __future__ import print_function

class Node(object): def __init__(self): self.edges = {} # edges (or forward links) self.link = None # suffix link (backward links) self.len = 0 # the length of the node

class Eertree(object): def __init__(self): self.nodes = [] # two initial root nodes self.rto = Node() #odd length root node, or node -1 self.rte = Node() #even length root node, or node 0

# Initialize empty tree self.rto.link = self.rte.link = self.rto; self.rto.len = -1 self.rte.len = 0 self.S = [0] # accumulated input string, T=S[1..i] self.maxSufT = self.rte # maximum suffix of tree T

def get_max_suffix_pal(self, startNode, a): # We traverse the suffix-palindromes of T in the order of decreasing length. # For each palindrome we read its length k and compare T[i-k] against a # until we get an equality or arrive at the -1 node. u = startNode i = len(self.S) k = u.len while id(u) != id(self.rto) and self.S[i - k - 1] != a: assert id(u) != id(u.link) #Prevent infinte loop u = u.link k = u.len

return u

def add(self, a):

# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T) Q = self.get_max_suffix_pal(self.maxSufT, a)

# We check Q to see whether it has an outgoing edge labeled by a. createANewNode = not a in Q.edges

if createANewNode: # We create the node P of length Q+2 P = Node() self.nodes.append(P) P.len = Q.len + 2 if P.len == 1: # if P = a, create the suffix link (P,0) P.link = self.rte else: # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link = self.get_max_suffix_pal(Q.link, a).edges[a]

# create the edge (Q,P) Q.edges[a] = P

#P becomes the new maxSufT self.maxSufT = Q.edges[a]

#Store accumulated input string self.S.append(a)

return createANewNode

def get_sub_palindromes(self, nd, nodesToHere, charsToHere, result): #Each node represents a palindrome, which can be reconstructed #by the path from the root node to each non-root node.

#Traverse all edges, since they represent other palindromes for lnkName in nd.edges: nd2 = nd.edges[lnkName] #The lnkName is the character used for this edge self.get_sub_palindromes(nd2, nodesToHere+[nd2], charsToHere+[lnkName], result)

#Reconstruct based on charsToHere characters. if id(nd) != id(self.rto) and id(nd) != id(self.rte): #Don't print for root nodes tmp = "".join(charsToHere) if id(nodesToHere[0]) == id(self.rte): #Even string assembled = tmp[::-1] + tmp else: #Odd string assembled = tmp[::-1] + tmp[1:] result.append(assembled)

if __name__=="__main__": st = "eertree" print ("Processing string", st) eertree = Eertree() for ch in st: eertree.add(ch)

print ("Number of sub-palindromes:", len(eertree.nodes))

#Traverse tree to find sub-palindromes result = [] eertree.get_sub_palindromes(eertree.rto, [eertree.rto], [], result) #Odd length words eertree.get_sub_palindromes(eertree.rte, [eertree.rte], [], result) #Even length words print ("Sub-palindromes:", result)</lang>

Output:
Processing string eertree
Number of sub-palindromes: 7
Sub-palindromes: ['r', 'e', 'eertree', 'ertre', 'rtr', 't', 'ee']

Racket

Translation of: Python

<lang racket>#lang racket (struct node (edges ; edges (or forward links)

             link ; suffix link (backward links)
             len) ; the length of the node 
 #:mutable)

(define (new-node link len) (node (make-hash) link len))

(struct eertree (nodes

                rto ; odd length root node, or node -1
                rte ; even length root node, or node 0
                S ; accumulated input string, T=S[1..i]
                max-suf-t) ; maximum suffix of tree T
 #:mutable)

(define (new-eertree)

 (let* ((rto (new-node #f -1))
        (rte (new-node rto 0)))
   (eertree null rto rte (list 0) rte)))

(define (eertree-get-max-suffix-pal et start-node a)

 #| We traverse the suffix-palindromes of T in the order of decreasing length.
    For each palindrome we read its length k and compare T[i-k] against a
    until we get an equality or arrive at the -1 node. |#
 (match et
   [(eertree nodes rto rte (and S (app length i)) max-suf-t)
    (let loop ((u start-node))
      (let ((k (node-len u)))
        (if (or (eq? u rto) (= (list-ref S (- i k 1)) a))
            u
            (let ((u→ (node-link u)))
              (when (eq? u u→) (error 'eertree-get-max-suffix-pal "infinite loop"))
              (loop u→)))))]))

(define (eertree-add! et a)

 #| We need to find the maximum suffix-palindrome P of Ta
    Start by finding maximum suffix-palindrome Q of T.
    To do this, we traverse the suffix-palindromes of T
    in the order of decreasing length, starting with maxSuf(T) |#
 (match (eertree-get-max-suffix-pal et (eertree-max-suf-t et) a)
   [(node Q.edges Q.→ Q.len) 
    ;; We check Q to see whether it has an outgoing edge labeled by a.
    (define new-node? (not (hash-has-key? Q.edges a)))
    (when new-node?
      (define P (new-node #f (+ Q.len 2))) ; We create the node P of length Q+2
      (set-eertree-nodes! et (append (eertree-nodes et) (list P)))
      (define P→
        (if (= (node-len P) 1)
            (eertree-rte et) ; if P = a, create the suffix link (P,0)
            ;; It remains to c reate the suffix link from P if |P|>1.
            ;; Just continue traversing suffix-palindromes of T starting with the suffix link of Q.
            (hash-ref (node-edges (eertree-get-max-suffix-pal et Q.→ a)) a)))
      (set-node-link! P P→)
      (hash-set! Q.edges a P)) ; create the edge (Q,P)
    
    (set-eertree-max-suf-t! et (hash-ref Q.edges a)) ; P becomes the new maxSufT
    (set-eertree-S! et (append (eertree-S et) (list a))) ; Store accumulated input string
    new-node?]))

(define (eertree-get-sub-palindromes et)

 (define (inr nd (node-path (list nd)) (char-path/rev null))
   ;; Each node represents a palindrome, which can be reconstructed by the path from the root node to
   ;; each non-root node.
   (let ((deeper ; Traverse all edges, since they represent other palindromes     
          (for/fold ((result null)) (([→-name nd2] (in-hash (node-edges nd))))
            ; The lnk-name is the character used for this edge
            (append result (inr nd2 (append node-path (list nd2)) (cons →-name char-path/rev)))))
         (root-node? (or (eq? (eertree-rto et) nd) (eq? (eertree-rte et) nd))))
     (if root-node? ; Don't add root nodes
         deeper
         (let ((even-string? (eq? (car node-path) (eertree-rte et)))
               (char-path (reverse char-path/rev)))
           (cons (append char-path/rev (if even-string? char-path (cdr char-path))) deeper)))))
 inr)

(define (eertree-get-palindromes et)

 (define sub (eertree-get-sub-palindromes et))
 (append (sub (eertree-rto et))
         (sub (eertree-rte et))))

(module+ main

 (define et (new-eertree))
 ;; eertree works in integer space, so we'll map to/from char space here
 (for ((c "eertree")) (eertree-add! et (char->integer c)))
 (map (compose list->string (curry map integer->char)) (eertree-get-palindromes et)))

</lang>

Output:
'("t" "rtr" "ertre" "eertree" "r" "e" "ee")

REXX

This REXX program is modeled after the   Ring   example. <lang rexx>/*REXX program creates a list of (unique) sub─palindromes that exist in an input string.*/ parse arg x . /*obtain optional input string from CL.*/ if x== | x=="," then x= 'eertree' /*Not specified? Then use the default.*/ L=length(x) /*the length (in chars) of input string*/ @.=. /*@ tree indicates uniqueness of pals. */ $= /*list of unsorted & unique palindromes*/

   do     j=1  for L                            /*start at the left side of the string.*/
       do k=1  for L                            /*traverse from left to right of string*/
       parse var  x   =(j)  y   +(k)            /*extract a substring from the string. */
       if reverse(y)\==y  then iterate          /*Partial string a palindrome?  Skip it*/
       if @.y\==.         then iterate          /*Sub─palindrome already exist? Skip it*/
       @.y=y                                    /*indicate a sub─palindrome was found. */
       $=$' ' y                                 /*append the sub─palindrome to the list*/
       end   /*k*/                              /* [↑]  an extra blank is inserted.    */
   end       /*j*/

pad=copies('─', 8) /*a fence to be used as an eyecatcher. */ say pad 'Using the input string: ' x /*echo the input string being parsed.*/ say

  1. =words($) /*get the number of palindromes found. */

subP= 'sub─palindromes' /*a literal to make SAY texts shorter. */ say pad 'The number of' subP "found: " # say say pad 'The list of' subP "found: " /*display the list of the palindromes. */ say strip($) /*stick a fork in it, we're all done. */</lang>

output   when using the default input:
──────── Using the input string:  eertree

──────── The number of sub─palindromes found:  7

──────── The list of sub─palindromes found:
e  ee  eertree  ertre  r  rtr  t

Ring

<lang ring>

  1. Project : Eertree
  2. Date  : 2017/09/23
  3. Author  : Gal Zsolt (~ CalmoSoft ~)
  4. Email  : <calmosoft@gmail.com>

str = "eertree" pal = [] for n=1 to len(str)

   for m=1 to len(str)
       strrev = ""
       strpal = substr(str, n, m)
       if strpal != ""
          for p=len(strpal) to 1 step -1
              strrev = strrev + strpal[p]
          next
          if strpal = strrev 
             add(pal, strpal)
          ok
       ok
   next

next sortpal = sort(pal) for n=len(sortpal) to 2 step -1

   if sortpal[n] = sortpal[n-1]
      del(sortpal, n)
   ok

next see sortpal + nl </lang> Output:

e
ee
eertree
ertre
r
rtr
t

zkl

Translation of: Python

<lang zkl>class Node{

  fcn init(length){
     var edges=Dictionary(),	# edges (or forward links). (char:Node)
         link=Void,		# suffix link (backward links)

sz =length; # node length.

   }

} class Eertree{

  fcn init(string=Void){
     var nodes=List(),

# two initial root nodes rto=Node(-1), # odd length root node, or node -1 rte=Node(0); # even length root node, or node 0

     rto.link=rte.link=rto;    # Initialize empty tree
     var S      =Data(Void,0), # accumulated input string, T=S[1..i], byte buffer
         maxSufT=rte;          # maximum suffix of tree T
     if(string) string.pump(addChar);  // go ahead and build the tree
  }
  fcn get_max_suffix_pal(startNode,a){
   # We traverse the suffix-palindromes of T in the order of decreasing length.
   # For each palindrome we read its length k and compare T[i-k] against a
   # until we get an equality or arrive at the -1 node.
     u,i,k := startNode, S.len(), u.sz;
     while(u.id!=rto.id and S.charAt(i - k - 1)!=a){

_assert_(u.id!=u.link.id); # Prevent infinte loop u,k = u.link,u.sz;

     }
     return(u);
  }
  fcn addChar(a){

# We need to find the maximum suffix-palindrome P of Ta # Start by finding maximum suffix-palindrome Q of T. # To do this, we traverse the suffix-palindromes of T # in the order of decreasing length, starting with maxSuf(T)

     Q:=get_max_suffix_pal(maxSufT,a);
       # We check Q to see whether it has an outgoing edge labeled by a.
     createANewNode:=(not Q.edges.holds(a));
     if(createANewNode){

P:=Node(Q.sz + 2); nodes.append(P); if(P.sz==1) P.link=rte; # if P = a, create the suffix link (P,0) else # It remains to create the suffix link from P if |P|>1. Just # continue traversing suffix-palindromes of T starting with the suffix # link of Q. P.link=get_max_suffix_pal(Q.link,a).edges[a]; Q.edges[a]=P; # create the edge (Q,P)

     }
     maxSufT=Q.edges[a]; # P becomes the new maxSufT
     S.append(a);	  # Store accumulated input string
     return(createANewNode);  // in case anyone wants to know a is new edge
  }
  fcn get_sub_palindromes{
     result:=List();
     sub_palindromes(rto, T(rto),"", result); # Odd length words
     sub_palindromes(rte, T(rte),"", result); # Even length words
     result
  }
  fcn [private] sub_palindromes(nd, nodesToHere, charsToHere, result){
       // nodesToHere needs to be read only

# Each node represents a palindrome, which can be reconstructed # by the path from the root node to each non-root node.

# Traverse all edges, since they represent other palindromes

     nd.edges.pump(Void,'wrap([(lnkName,nd2)]){

sub_palindromes(nd2, nodesToHere+nd2, charsToHere+lnkName, result);

     });
     # Reconstruct based on charsToHere characters.
     if(nd.id!=rto.id and nd.id!=rte.id){ # Don't print for root nodes

if(nodesToHere[0].id==rte.id) # Even string assembled:=charsToHere.reverse() + charsToHere; else assembled:=charsToHere.reverse() + charsToHere[1,*]; # Odd string result.append(assembled);

     }
  }

}</lang> <lang zkl>st:="eertree"; println("Processing string \"", st,"\""); eertree:=Eertree(st); println("Number of sub-palindromes: ", eertree.nodes.len()); println("Sub-palindromes: ", eertree.get_sub_palindromes());</lang>

Output:
Processing string "eertree"
Number of sub-palindromes: 7
Sub-palindromes: L("e","r","eertree","ertre","rtr","t","ee")