Ukkonen’s suffix tree construction: Difference between revisions

m
(Created page with "{{draft task}} Suffix Trees are very useful in numerous string processing and computational biology problems. The task is to create a function which implements Ukkonen’s al...")
 
m (→‎{{header|Wren}}: Minor tidy)
 
(18 intermediate revisions by 8 users not shown)
Line 1:
[[Category:String algorithms]]
{{draft task}}
{{task}}
Suffix Trees are very useful in numerous string processing and computational biology problems.
 
Line 10 ⟶ 11:
[http://www.geeksforgeeks.org/ukkonens-suffix-tree-construction-part-5/ Part 5]
[http://www.geeksforgeeks.org/ukkonens-suffix-tree-construction-part-6/ Part 6]
 
Using [[Arithmetic-geometric mean/Calculate Pi]] generate the first 1000, 10000, and 100000 decimal places of pi. Using your implementation with an alphabet of 0 through 9 (plus $ say to make the tree explicit) find the longest repeated string in each list. Time your results and demonstrate that your implementation is linear (i.e. that 10000 takes approx. 10 times as long as 1000). You may vary the size of the lists of decimal places of pi to give reasonable answers.
 
=={{header|Go}}==
This is a translation of the C code [https://www.geeksforgeeks.org/suffix-tree-application-3-longest-repeated-substring/ here] which is an extended form of the code in Part 6 of the task description for finding the longest repeated substring of a given string. In the interests of brevity, the extensive comments in the C version have been largely omitted. The C code doesn't compile as it stands but I have added a fix in the Talk Page.
 
For convenience I have included the code from the [[Arithmetic-geometric_mean/Calculate_Pi#Go]] task in the same package.
 
It takes around 25 seconds on my machine (Celeron @1.6GHz) to calculate the first 100,000 (or so) decimal places of Pi. Having done that, the timings for extracting the longest repeated sequence of digits are quick and fairly linear as expected.
 
As the task doesn't say whether overlapping sequences are to be counted, I've assumed that they are as this is what the algorithm naturally produces.
<syntaxhighlight lang="go">package main
 
import (
"fmt"
"math/big"
"time"
)
 
var maxChar = 128
 
type Node struct {
children []*Node
suffixLink *Node
start int
end *int
suffixIndex int
}
 
var (
text string
root *Node
lastNewNode *Node
activeNode *Node
activeEdge = -1
activeLength = 0
remainingSuffixCount = 0
leafEnd = -1
rootEnd *int
splitEnd *int
size = -1
)
 
func newNode(start int, end *int) *Node {
node := new(Node)
node.children = make([]*Node, maxChar)
node.suffixLink = root
node.start = start
node.end = end
node.suffixIndex = -1
return node
}
 
func edgeLength(n *Node) int {
if n == root {
return 0
}
return *(n.end) - n.start + 1
}
 
func walkDown(currNode *Node) bool {
if activeLength >= edgeLength(currNode) {
activeEdge += edgeLength(currNode)
activeLength -= edgeLength(currNode)
activeNode = currNode
return true
}
return false
}
 
func extendSuffixTree(pos int) {
leafEnd = pos
remainingSuffixCount++
lastNewNode = nil
for remainingSuffixCount > 0 {
if activeLength == 0 {
activeEdge = pos
}
if activeNode.children[text[activeEdge]] == nil {
activeNode.children[text[activeEdge]] = newNode(pos, &leafEnd)
if lastNewNode != nil {
lastNewNode.suffixLink = activeNode
lastNewNode = nil
}
} else {
next := activeNode.children[text[activeEdge]]
if walkDown(next) {
continue
}
if text[next.start+activeLength] == text[pos] {
if lastNewNode != nil && activeNode != root {
lastNewNode.suffixLink = activeNode
lastNewNode = nil
}
activeLength++
break
}
temp := next.start + activeLength - 1
splitEnd = &temp
split := newNode(next.start, splitEnd)
activeNode.children[text[activeEdge]] = split
split.children[text[pos]] = newNode(pos, &leafEnd)
next.start += activeLength
split.children[text[next.start]] = next
if lastNewNode != nil {
lastNewNode.suffixLink = split
}
lastNewNode = split
}
remainingSuffixCount--
if activeNode == root && activeLength > 0 {
activeLength--
activeEdge = pos - remainingSuffixCount + 1
} else if activeNode != root {
activeNode = activeNode.suffixLink
}
}
}
 
func setSuffixIndexByDFS(n *Node, labelHeight int) {
if n == nil {
return
}
if n.start != -1 {
// Uncomment line below to print suffix tree
// fmt.Print(text[n.start: *(n.end) +1])
}
leaf := 1
for i := 0; i < maxChar; i++ {
if n.children[i] != nil {
// Uncomment the 3 lines below to print suffix index
//if leaf == 1 && n.start != -1 {
// fmt.Printf(" [%d]\n", n.suffixIndex)
//}
leaf = 0
setSuffixIndexByDFS(n.children[i], labelHeight+edgeLength(n.children[i]))
}
}
if leaf == 1 {
n.suffixIndex = size - labelHeight
// Uncomment line below to print suffix index
//fmt.Printf(" [%d]\n", n.suffixIndex)
}
}
 
func buildSuffixTree() {
size = len(text)
temp := -1
rootEnd = &temp
root = newNode(-1, rootEnd)
activeNode = root
for i := 0; i < size; i++ {
extendSuffixTree(i)
}
labelHeight := 0
setSuffixIndexByDFS(root, labelHeight)
}
 
func doTraversal(n *Node, labelHeight int, maxHeight, substringStartIndex *int) {
if n == nil {
return
}
if n.suffixIndex == -1 {
for i := 0; i < maxChar; i++ {
if n.children[i] != nil {
doTraversal(n.children[i], labelHeight+edgeLength(n.children[i]),
maxHeight, substringStartIndex)
}
}
} else if n.suffixIndex > -1 && (*maxHeight < labelHeight-edgeLength(n)) {
*maxHeight = labelHeight - edgeLength(n)
*substringStartIndex = n.suffixIndex
}
}
 
func getLongestRepeatedSubstring(s string) {
maxHeight := 0
substringStartIndex := 0
doTraversal(root, 0, &maxHeight, &substringStartIndex)
// Uncomment line below to print maxHeight and substringStartIndex
// fmt.Printf("maxHeight %d, substringStartIndex %d\n", maxHeight, substringStartIndex)
if s == "" {
fmt.Printf(" %s is: ", text)
} else {
fmt.Printf(" %s is: ", s)
}
k := 0
for ; k < maxHeight; k++ {
fmt.Printf("%c", text[k+substringStartIndex])
}
if k == 0 {
fmt.Print("No repeated substring")
}
fmt.Println()
}
 
func calculatePi() *big.Float {
one := big.NewFloat(1)
two := big.NewFloat(2)
four := big.NewFloat(4)
prec := uint(325 * 1024) // enough to calculate Pi to 100,182 decimal digits
 
a := big.NewFloat(1).SetPrec(prec)
g := new(big.Float).SetPrec(prec)
 
// temporary variables
t := new(big.Float).SetPrec(prec)
u := new(big.Float).SetPrec(prec)
 
g.Quo(a, t.Sqrt(two))
sum := new(big.Float)
pow := big.NewFloat(2)
 
for a.Cmp(g) != 0 {
t.Add(a, g)
t.Quo(t, two)
g.Sqrt(u.Mul(a, g))
a.Set(t)
pow.Mul(pow, two)
t.Sub(t.Mul(a, a), u.Mul(g, g))
sum.Add(sum, t.Mul(t, pow))
}
 
t.Mul(a, a)
t.Mul(t, four)
pi := t.Quo(t, u.Sub(one, sum))
return pi
}
 
func main() {
tests := []string{
"GEEKSFORGEEKS$",
"AAAAAAAAAA$",
"ABCDEFG$",
"ABABABA$",
"ATCGATCGA$",
"banana$",
"abcpqrabpqpq$",
"pqrpqpqabab$",
}
fmt.Println("Longest Repeated Substring in:\n")
for _, test := range tests {
text = test
buildSuffixTree()
getLongestRepeatedSubstring("")
}
fmt.Println()
 
pi := calculatePi()
piStr := fmt.Sprintf("%v", pi)
piStr = piStr[2:] // remove initial 3.
numbers := []int{1e3, 1e4, 1e5}
maxChar = 58
for _, number := range numbers {
start := time.Now()
text = piStr[0:number] + "$"
buildSuffixTree()
getLongestRepeatedSubstring(fmt.Sprintf("first %d d.p. of Pi", number))
elapsed := time.Now().Sub(start)
fmt.Printf(" (this took %s)\n\n", elapsed)
}
}</syntaxhighlight>
 
{{out}}
Sample run:
<pre>
Longest Repeated Substring in:
 
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
ABCDEFG$ is: No repeated substring
ABABABA$ is: ABABA
ATCGATCGA$ is: ATCGA
banana$ is: ana
abcpqrabpqpq$ is: ab
pqrpqpqabab$ is: ab
 
first 1000 d.p. of Pi is: 23846
(this took 7.728858ms)
 
first 10000 d.p. of Pi is: 7111369
(this took 57.524478ms)
 
first 100000 d.p. of Pi is: 041021944
(this took 599.770281ms)
</pre>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
 
public final class UkkonenSuffixTree {
 
public static void main(String[] aArgs) throws IOException {
List<Integer> limits = List.of( 1_000, 10_000, 100_000 );
for ( int limit : limits ) {
String contents = Files.readString(Path.of("piDigits.txt"));
String piDigits = contents.substring(0, limit + 1);
final long start = System.currentTimeMillis();
SuffixTree tree = new SuffixTree(piDigits);
Map<String, Set<Integer>> substrings = tree.getLongestRepeatedSubstrings();
final long finish = System.currentTimeMillis();
System.out.println("First " + limit + " digits of pi has longest repeated characters:");
for ( String substring : substrings.keySet() ) {
System.out.print(" '" + substring + "' starting at index ");
for ( Iterator<Integer> iterator = substrings.get(substring).iterator(); iterator.hasNext(); ) {
System.out.print(iterator.next());
if ( iterator.hasNext() ) {
System.out.print(" and ");
}
}
System.out.println();
}
System.out.println("Time taken: " + ( finish - start ) + " milliseconds." + System.lineSeparator());
}
System.out.println("The timings show that the implementation has approximately linear performance.");
}
 
private final static class SuffixTree {
public SuffixTree(String aWord) {
text = Arrays.copyOfRange(aWord.toCharArray(), 0, aWord.length() + 1);
text[aWord.length()] = '\uF123'; // Terminal character
nodes = new Node[2 * aWord.length() + 2];
root = newNode(UNDEFINED, UNDEFINED);
activeNode = root;
for ( char character : text ) {
extendSuffixTree(character);
}
}
public Map<String, Set<Integer>> getLongestRepeatedSubstrings() {
List<Integer> indexes = doTraversal();
String word = String.valueOf(text).substring(0, text.length - 1);
Map<String, Set<Integer>> result = new HashMap<String, Set<Integer>>();
if ( indexes.get(0) > 0 ) {
for ( int i = 1; i < indexes.size(); i++ ) {
String substring = word.substring(indexes.get(i), indexes.get(i) + indexes.get(0));
result.computeIfAbsent(substring, k -> new TreeSet<Integer>()).add(indexes.get(i));
}
}
return result;
}
// PRIVATE //
private void extendSuffixTree(char aCharacter) {
needParentLink = UNDEFINED;
remainder++;
while ( remainder > 0 ) {
if ( activeLength == 0 ) {
activeEdge = textIndex;
}
if ( ! nodes[activeNode].children.containsKey(text[activeEdge]) ) {
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[activeNode].children.put(text[activeEdge], leaf);
addSuffixLink(activeNode);
} else {
final int next = nodes[activeNode].children.get(text[activeEdge]);
if ( walkDown(next) ) {
continue;
}
if ( text[nodes[next].start + activeLength] == aCharacter ) {
activeLength++;
addSuffixLink(activeNode);
break;
}
final int split = newNode(nodes[next].start, nodes[next].start + activeLength);
nodes[activeNode].children.put(text[activeEdge], split);
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[split].children.put(aCharacter, leaf);
nodes[next].start += activeLength;
nodes[split].children.put(text[nodes[next].start], next);
addSuffixLink(split);
}
remainder--;
if ( activeNode == root && activeLength > 0 ) {
activeLength--;
activeEdge = textIndex - remainder + 1;
} else {
activeNode = ( nodes[activeNode].parentLink > 0 ) ? nodes[activeNode].parentLink : root;
}
}
textIndex++;
}
private boolean walkDown(int aNode) {
if ( activeLength >= nodes[aNode].edgeLength() ) {
activeEdge += nodes[aNode].edgeLength();
activeLength -= nodes[aNode].edgeLength();
activeNode = aNode;
return true;
}
return false;
}
private void addSuffixLink(int aNode) {
if ( needParentLink != UNDEFINED ) {
nodes[needParentLink].parentLink = aNode;
}
needParentLink = aNode;
}
private int newNode(int aStart, int aEnd) {
Node node = new Node(aStart, aEnd);
node.leafIndex = ( aEnd == LEAF_NODE ) ? leafIndexGenerator++ : UNDEFINED;
nodes[currentNode] = node;
return currentNode++;
}
private List<Integer> doTraversal() {
List<Integer> indexes = new ArrayList<Integer>();
indexes.add(UNDEFINED);
return traversal(indexes, nodes[root], 0);
}
private List<Integer> traversal(List<Integer> aIndexes, Node aNode, int aHeight) {
if ( aNode.leafIndex == UNDEFINED ) {
for ( int index : aNode.children.values() ) {
Node child = nodes[index];
traversal(aIndexes, child, aHeight + child.edgeLength());
}
} else if ( aIndexes.get(0) < aHeight - aNode.edgeLength() ) {
aIndexes.clear();
aIndexes.add(aHeight - aNode.edgeLength());
aIndexes.add(aNode.leafIndex);
} else if ( aIndexes.get(0) == aHeight - aNode.edgeLength() ) {
aIndexes.add(aNode.leafIndex);
}
return aIndexes;
}
private final class Node {
public Node(int aStart, int aEnd) {
start = aStart;
end = aEnd;
}
public int edgeLength() {
return Math.min(end, textIndex + 1) - start;
}
private int start, end, parentLink, leafIndex;
private Map<Character, Integer> children = new HashMap<Character, Integer>();
}
private Node[] nodes;
private char[] text;
private int activeNode, activeLength, activeEdge;
private int root, textIndex, currentNode, needParentLink, remainder, leafIndexGenerator;
private static final int UNDEFINED = -1;
private static final int LEAF_NODE = Integer.MAX_VALUE;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
First 1000 digits of pi has longest repeated characters:
'82534' starting at index 87 and 902
'33446' starting at index 214 and 830
'60943' starting at index 396 and 550
'23846' starting at index 15 and 578
'93751' starting at index 44 and 940
'99999' starting at index 761 and 762
'42019' starting at index 700 and 993
Time taken: 13 milliseconds.
 
First 10000 digits of pi has longest repeated characters:
'7111369' starting at index 3501 and 6114
'8530614' starting at index 4166 and 4600
Time taken: 8 milliseconds.
 
First 100000 digits of pi has longest repeated characters:
'041021944' starting at index 21760 and 75869
'201890888' starting at index 30626 and 33843
'134926158' starting at index 69596 and 86281
Time taken: 100 milliseconds.
 
The timings show that the implementation has approximately linear performance.
</pre>
 
=={{header|Julia}}==
{{trans|Go}}
Uses array indices instead of the Go version's node pointers.
<syntaxhighlight lang="julia">const oo = typemax(Int)
 
"""The suffix-tree's node."""
mutable struct Node
children::Dict{Char, Int}
start::Int
ending::Int
suffixlink::Int
suffixindex::Int
end
 
Node() = Node(Dict(), 0, oo, 0, -1)
Node(start, ending) = Node(Dict(), start, ending, 0, -1)
 
""" Ukkonen Suffix-Tree """
mutable struct SuffixTree
nodes::Vector{Node}
text::Vector{Char}
root::Int
position::Int
currentnode::Int
needsuffixlink::Int
remainder::Int
activenode::Int
activelength::Int
activeedge::Int
end
 
edgelength(st, n::Node) = min(n.ending, st.position + 1) - n.start
 
function newnode(st, start, ending)
st.currentnode += 1
st.nodes[st.currentnode] = Node(start, ending)
return st.currentnode
end
 
function SuffixTree(str::String)
nodes = [Node() for _ in 1:length(str) * 2]
st = SuffixTree(nodes, [c for c in str], 1, 0, 0, 0, 0, 1, 1, 1)
st.root = newnode(st, 0, 0)
st.activenode = st.root
for i in 1:length(st.text)
extendsuffixtree(st, i)
end
setsuffixindexbyDFS(st, st.nodes[st.root], 0)
return st
end
 
function addsuffixlink(st, nodenum::Int)
if st.needsuffixlink > 0
st.nodes[st.needsuffixlink].suffixlink = nodenum
end
st.needsuffixlink = nodenum
end
 
activeedge(st) = st.text[st.activeedge]
 
function walkdown!(st, currnode::Int)
len = edgelength(st, st.nodes[currnode])
st.activelength < len && return false
st.activeedge += len
st.activelength -= len
st.activenode = currnode
return true
end
 
function extendsuffixtree(st, pos)
st.position = pos
st.needsuffixlink = 0
st.remainder += 1
while st.remainder > 0
st.activelength == 0 && (st.activeedge = st.position)
if !haskey(st.nodes[st.activenode].children, activeedge(st))
nodenum = newnode(st, st.position, oo)
st.nodes[st.activenode].children[activeedge(st)] = nodenum
addsuffixlink(st, st.activenode)
else
next = st.nodes[st.activenode].children[activeedge(st)]
walkdown!(st, next) && continue
if st.text[st.nodes[next].start + st.activelength] == st.text[pos]
addsuffixlink(st, st.activenode)
st.activelength += 1
break
end
splt = newnode(st, st.nodes[next].start, st.nodes[next].start + st.activelength)
st.nodes[st.activenode].children[activeedge(st)] = splt
nodenum = newnode(st, st.position, oo)
st.nodes[splt].children[st.text[pos]] = nodenum
st.nodes[next].start += st.activelength
st.nodes[splt].children[st.text[st.nodes[next].start]] = next
addsuffixlink(st, splt)
end
st.remainder -= 1
if st.activenode == st.root && st.activelength > 0
st.activelength -= 1
st.activeedge = st.position - st.remainder + 1
elseif st.activenode != st.root
st.activenode = st.nodes[st.activenode].suffixlink
end
end
end
 
function setsuffixindexbyDFS(st, node, labelheight, verbose=false)
verbose && node.start > 0 && print(st.text[node.start:min(node.ending, length(st.text))])
isleaf = true
for child in map(v -> st.nodes[v], collect(values(node.children)))
verbose && isleaf && node.start > 0 && println(" [", node.suffixindex, "]")
isleaf = false
setsuffixindexbyDFS(st, child, labelheight + edgelength(st, child))
end
if isleaf
idx = length(st.text) - labelheight
node.suffixindex = idx
verbose && println(" [$idx]")
end
end
 
function dotraversal(st)
maxheight, substringstartindices = 0, [0]
function traversal(node::Node, labelheight)
if node.suffixindex == -1
for child in map(v -> st.nodes[v], collect(values(node.children)))
traversal(child, labelheight + edgelength(st, child))
end
elseif maxheight < labelheight - edgelength(st, node)
maxheight = labelheight - edgelength(st, node)
substringstartindices = [node.suffixindex + 1]
elseif maxheight == labelheight - edgelength(st, node)
push!(substringstartindices, node.suffixindex + 1)
end
end
traversal(st.nodes[st.root], 0)
return maxheight, substringstartindices
end
 
function getlongestrepeatedsubstring(st::SuffixTree, label="", printresult=true)
len, starts = dotraversal(st)
substring = len == 0 ? "" :
join(unique(map(x -> String(st.text[x:x+len-1]), starts)), " (or) ")
if printresult
print(" ", label == "" ? String(st.text) : label, ": ")
println(len == 0 ? "No repeated substring." : substring)
end
return substring
end
 
function testsuffixtree()
tests = [
"CAAAABAAAABD\$",
"GEEKSFORGEEKS\$",
"AAAAAAAAAA\$",
"ABCDEFG\$",
"ABABABA\$",
"ATCGATCGA\$",
"banana\$",
"abcpqrabpqpq\$",
"pqrpqpqabab\$",
]
println("Longest Repeated Substring in:\n")
for test in tests
st = SuffixTree(test)
getlongestrepeatedsubstring(st)
end
println()
sπ = ""
setprecision(4000000) do
sπ = string(BigFloat(π))[3:end]
end
for number in [1000, 10000, 100000, 1000000]
text = sπ[1:number] * "\$"
@time begin
st = SuffixTree(text)
getlongestrepeatedsubstring(st, "first $number d.p. of π")
end
end
end
 
testsuffixtree()
</syntaxhighlight>{{out}}
<pre>
Longest Repeated Substring in:
 
CAAAABAAAABD: AAAAB
GEEKSFORGEEKS: GEEKS
AAAAAAAAAA: AAAAAAAAA
ABCDEFG: No repeated substring.
ABABABA: ABABA
ATCGATCGA: ATCGA
banana: ana
abcpqrabpqpq: ab (or) pq
pqrpqpqabab: ab (or) pq
 
first 1000 d.p. of π: 60943 (or) 42019 (or) 82534 (or) 99999 (or) 93751 (or) 23846 (or) 33446
0.003336 seconds (34.86 k allocations: 4.252 MiB)
first 10000 d.p. of π: 7111369 (or) 8530614
0.038749 seconds (351.60 k allocations: 42.460 MiB, 16.54% gc time)
first 100000 d.p. of π: 134926158 (or) 041021944 (or) 201890888
0.533892 seconds (3.52 M allocations: 425.035 MiB, 22.42% gc time)
first 1000000 d.p. of π: 756130190263
6.008879 seconds (35.25 M allocations: 4.152 GiB, 23.20% gc time)
</pre>
 
=={{header|Nim}}==
{{trans|Julia}}
{{libheader|Nim-Integers}}
<syntaxhighlight lang="Nim">import std/[sequtils, strformat, strutils, tables]
 
const ∞ = int.high
 
type
 
# Suffix-tree node.
Node = ref object
children: Table[char, int]
first: int
last: int
suffixLink: int
suffixIndex: int
 
# Ukkonen suffix-tree.
SuffixTree = object
nodes: seq[Node]
text: string
root: int
position: int
currentNode: int
needSuffixLink: int
remainder: int
activeNode: int
activeLength: int
activeEdge: int
 
 
func newNode(): Node =
Node(first: -1, last: ∞, suffixLink: -1, suffixIndex: -1)
 
func newNode(first, last: int): Node =
Node(first: first, last: last, suffixLink: 0, suffixIndex: -1)
 
func newNode(st: var SuffixTree; first, last: int): int =
inc st.currentNode
st.nodes[st.currentNode] = newNode(first, last)
result = st.currentNode
 
 
func activeEdgeChar(st: SuffixTree): char {.inline.} = st.text[st.activeedge]
 
 
func edgeLength(st: SuffixTree; n: Node): int =
min(n.last, st.position + 1) - n.first
 
 
func addSuffixLink(st: var SuffixTree; nodenum: int) =
if st.needSuffixLink >= 0:
st.nodes[st.needSuffixLink].suffixLink = nodenum
st.needSuffixLink = nodenum
 
func walkdown(st: var SuffixTree; currNode: int): bool =
let length = st.edgeLength(st.nodes[currNode])
if st.activeLength < length: return false
inc st.activeEdge, length
dec st.activeLength, length
st.activeNode = currnode
result = true
 
 
func extendSuffixTree(st: var SuffixTree; pos: int) =
st.position = pos
st.needSuffixLink = -1
inc st.remainder
while st.remainder > 0:
if st.activeLength == 0: st.activeEdge = st.position
if st.activeEdgeChar() notin st.nodes[st.activeNode].children:
let nodeNum = st.newNode(st.position, ∞)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = nodeNum
st.addSuffixLink(st.activeNode)
else:
let next = st.nodes[st.activeNode].children[st.activeEdgeChar()]
if st.walkdown(next): continue
if st.text[st.nodes[next].first + st.activeLength] == st.text[pos]:
st.addSuffixLink(st.activeNode)
inc st.activeLength
break
let split = st.newNode(st.nodes[next].first, st.nodes[next].first + st.activeLength)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = split
let nodeNum = st.newNode(st.position, ∞)
st.nodes[split].children[st.text[pos]] = nodeNum
st.nodes[next].first += st.activelength
st.nodes[split].children[st.text[st.nodes[next].first]] = next
st.addSuffixLink(split)
 
dec st.remainder
if st.activeNode == st.root and st.activeLength > 0:
dec st.activelength
st.activeEdge = st.position - st.remainder + 1
elif st.activeNode != st.root:
st.activeNode = st.nodes[st.activeNode].suffixLink
 
 
proc setSuffixIndexByDFS(st: var SuffixTree; node: Node; labelHeight: Natural; verbose = false) =
if verbose and node.first >= 0:
echo st.text[node.first..<min(node.last, st.text.len)]
var isLeaf = true
for ichild in node.children.values:
let child = st.nodes[ichild]
if verbose and isLeaf and node.first >= 0:
echo &" [{node.suffixindex}]"
isLeaf = false
st.setSuffixIndexbyDFS(child, labelHeight + st.edgeLength(child))
if isleaf:
node.suffixindex = st.text.len - labelHeight
if verbose: echo &" [{node.suffixindex}]"
 
proc initSuffixTree(str: string): SuffixTree =
var nodes = newSeqWith(str.len * 2, newNode())
result = SuffixTree(nodes: nodes, text: str, position: -1, currentNode: -1,
needSuffixLink: -1, remainder: 0, activeLength: 1, activeEdge: 0)
result.root = result.newNode(0, 0)
result.activeNode = result.root
for i in 0..result.text.high:
result.extendSuffixTree(i)
result.setSuffixIndexByDFS(result.nodes[result.root], 0)
 
 
func doTraversal(st: SuffixTree): (int, seq[int]) =
var maxHeight = 0
var substringStartIndices = @[-1]
 
func traversal(node: Node; labelHeight: int) =
if node.suffixIndex == -1:
for ichild in node.children.values:
let child = st.nodes[ichild]
traversal(child, labelHeight + st.edgeLength(child))
elif maxHeight < labelHeight - st.edgeLength(node):
maxHeight = labelHeight - st.edgeLength(node)
substringStartIndices = @[node.suffixIndex]
elif maxHeight == labelHeight - st.edgelength(node):
substringStartIndices.add node.suffixIndex
 
traversal(st.nodes[st.root], 0)
result = (maxHeight, move(substringStartIndices))
 
 
proc getLongestRepeatedSubstring(st: SuffixTree; label = ""; printResult = true): string =
let (length, starts) = st.dotraversal()
result = if length == 0: ""
else: starts.mapIt(st.text[it..it+length-1]).deduplicate().join(" (or) ")
if printResult:
stdout.write " ", if label.len == 0: join(st.text) else: label, ": "
echo if length == 0: "No repeated substring." else: result
 
 
 
const Tests = ["CAAAABAAAABD$",
"GEEKSFORGEEKS$",
"AAAAAAAAAA$",
"ABCDEFG$",
"ABABABA$",
"ATCGATCGA$",
"banana$",
"abcpqrabpqpq$",
"pqrpqpqabab$"]
 
echo "Longest Repeated Substring in:\n"
for test in Tests:
var st = initSuffixTree(test)
discard st.getLongestRepeatedSubstring()
echo()
 
 
#############################################################################
# Pi calculation.
 
import std/[monotimes, times]
import integers
 
func isr(term, guess: Integer): Integer =
var term = term
result = guess
let value = term * result
while true:
if abs(term - result) <= 1:
break
result = (result + term) shr 1
term = value div result
 
func calcAgm(lam, gm: Integer; z: var Integer; ep: Integer): Integer =
var am: Integer
var lam = lam
var gm = gm
var n = 1
while true:
am = (lam + gm) shr 1
gm = isr(lam, gm)
let v = am - lam
let zi = v * v * n
if zi < ep:
break
z -= zi
inc n, n
lam = am
result = am
 
func bip(exp: int; man = 1): Integer {.inline.} = man * 10^exp
 
func calculatePi(): string =
const Digits = 4_000_000
let am = bip(Digits)
let gm = isqrt(bip(Digits + Digits - 1, 5))
var z = bip(Digits + Digits - 2, 25)
let agm = calcAGM(am, gm, z, bip(Digits + 1))
let pi = agm * agm * bip(Digits - 2) div z
result = $pi
 
let sπ = calculatePi()
for number in [1000, 10000, 100000, 1000000]:
let text = sπ[2..<number] & '$'
let start = getMonoTime()
var st = initSuffixTree(text)
discard st.getlongestrepeatedsubstring(&"first {number} d.p. of π")
echo &" → Temps: {(getMonoTime() - start).inMicroseconds} µs"
 
</syntaxhighlight>
 
{{out}}
<pre>Longest Repeated Substring in:
 
CAAAABAAAABD$: AAAAB
GEEKSFORGEEKS$: GEEKS
AAAAAAAAAA$: AAAAAAAAA
ABCDEFG$: No repeated substring.
ABABABA$: ABABA
ATCGATCGA$: ATCGA
banana$: ana
abcpqrabpqpq$: ab (or) pq
pqrpqpqabab$: ab (or) pq
 
first 1000 d.p. of π: 60943 (or) 42019 (or) 82534 (or) 33446 (or) 93751 (or) 99999 (or) 23846
→ Temps: 780 µs
first 10000 d.p. of π: 7111369 (or) 8530614
→ Temps: 14726 µs
first 100000 d.p. of π: 134926158 (or) 041021944 (or) 201890888
→ Temps: 178656 µs
first 1000000 d.p. of π: 756130190263
→ Temps: 1919290 µs
</pre>
 
=={{header|Phix}}==
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">-- demo/rosetta/Ukkonens_Suffix_Tree.exw</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'z'</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">v</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">,</span><span style="color: #000000;">v</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">text</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">leafEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">activeNode</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">bKids</span><span style="color: #0000FF;">=</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">,</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">bKids</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">):</span><span style="color: #000000;">0</span><span style="color: #0000FF;">))</span>
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">,</span><span style="color: #000000;">root</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">,</span><span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">return</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span><span style="color: #0000FF;">?</span><span style="color: #000000;">0</span><span style="color: #0000FF;">:</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">l</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">l</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">l</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">currNode</span>
<span style="color: #008080;">return</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">pos</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">0</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">ca0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">next</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ca0</span><span style="color: #0000FF;">?</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">:</span><span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">next</span><span style="color: #0000FF;">==</span><span style="color: #004600;">null</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ca0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">else</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #000000;">next</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">continue</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">tp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">activeLength</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">tp</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">temp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">splitEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">temp</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitnode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">tp</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">activeLength</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]]]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">next</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeLength</span><span style="color: #0000FF;">></span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">size</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">labelHeight</span>
<span style="color: #008080;">else</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">))</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">leaf</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (sanity check)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">rootEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">root</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">size</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">labelHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">newHeight</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">==</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">newHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">></span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">-</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]<</span><span style="color: #000000;">newHeight</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newHeight</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">nsi</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeightIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">substringStartIndex</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">],</span>
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">"No repeated substring"</span>
<span style="color: #0000FF;">:</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"CAAAABAAAABD$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"GEEKSFORGEEKS$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"AAAAAAAAAA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ABCDEFG$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ABABABA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ATCGATCGA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"banana$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"abcpqrabpqpq$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"pqrpqpqabab$"</span><span style="color: #0000FF;">}</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Longest Repeated Substring in:\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s is: %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">include</span> <span style="color: #004080;">mpfr</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">piStr</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">mpfr</span> <span style="color: #000000;">pi</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_init</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1001</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (set precision to 1,000 dp, plus the "3.")</span>
<span style="color: #7060A8;">mpfr_const_pi</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_get_fixed</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1000</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (all we can really manage under pwa/p2js)</span>
<span style="color: #008080;">else</span>
<span style="color: #000080;font-style:italic;">-- gmp crashes when I try 100,000 dp, so just get from file</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`E:\downloads\misc\arm\pi-10million.txt`</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">..$]</span> <span style="color: #000080;font-style:italic;">-- discard leading "3."</span>
<span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'9'</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">3</span> <span style="color: #008080;">to</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span><span style="color: #0000FF;">?</span><span style="color: #000000;">3</span><span style="color: #0000FF;">:</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">t0</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">time</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">power</span><span style="color: #0000FF;">(</span><span style="color: #000000;">10</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">&</span> <span style="color: #008000;">"$"</span>
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">r</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">elapsed</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">time</span><span style="color: #0000FF;">()-</span><span style="color: #000000;">t0</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" first %,d d.p. of Pi is: %s (%s)\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">r</span><span style="color: #0000FF;">,</span><span style="color: #000000;">e</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Longest Repeated Substring in:
CAAAABAAAABD$ is: AAAAB
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
ABCDEFG$ is: No repeated substring
ABABABA$ is: ABABA
ATCGATCGA$ is: ATCGA
banana$ is: ana
abcpqrabpqpq$ is: ab
pqrpqpqabab$ is: ab
 
first 1,000 d.p. of Pi is: 23846 (0s)
first 10,000 d.p. of Pi is: 7111369 (0.0s)
first 100,000 d.p. of Pi is: 041021944 (0.3s)
first 1,000,000 d.p. of Pi is: 756130190263 (3.2s)
</pre>
Note that mpfr_const_pi() struggles to generate more than 1,000 digits of pi under pwa/p2js [and will continue to do so unless someone graciously donates a decent/fast Chudnovsky method in pure Phix or JavaScript...]
 
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-big}}
{{libheader|Wren-dynamic}}
{{libheader|Wren-trait}}
As it would take a very long time to calculate the first 100,000 digits of Pi using the code from the [[Arithmetic-geometric_mean/Calculate_Pi#Wren]] task, I have instead saved the digits produced by the Go entry to a file (which only takes a few seconds) and then loaded that into the Wren script.
 
Having done that, the timings for extracting the longest repeated sequence of digits are reasonably quick and fairly linear as expected.
<syntaxhighlight lang="wren">import "./big" for BigRat
import "./dynamic" for Struct
import "./trait" for ByRef
import "io" for File
 
var maxChar = 128
 
var Node = Struct.create("Node", ["children", "suffixLink", "start", "pEnd", "suffixIndex"])
 
var text = ""
var root = null
var lastNewNode = null
var activeNode = null
var activeEdge = -1
var activeLength = 0
var remainingSuffixCount = 0
var pLeafEnd = ByRef.new(-1)
var pRootEnd = null
var pSplitEnd = null
var size = -1
 
var newNode = Fn.new { |start, pEnd|
var children = List.filled(maxChar, null)
var suffixLink = root
var suffixIndex = -1
return Node.new(children, suffixLink, start, pEnd, suffixIndex)
}
 
var edgeLength = Fn.new { |n|
if (n == root) return 0
return n.pEnd.value - n.start + 1
}
 
var walkDown = Fn.new { |currNode|
var el = edgeLength.call(currNode)
if (activeLength >= el) {
activeEdge = activeEdge + el
activeLength = activeLength - el
activeNode = currNode
return true
}
return false
}
 
var extendSuffixTree = Fn.new { |pos|
pLeafEnd.value = pos
remainingSuffixCount = remainingSuffixCount + 1
lastNewNode = null
while (remainingSuffixCount > 0) {
if (activeLength == 0) activeEdge = pos
if (!activeNode.children[text[activeEdge].bytes[0]]) {
activeNode.children[text[activeEdge].bytes[0]] = newNode.call(pos, pLeafEnd)
if (lastNewNode) {
lastNewNode.suffixLink = activeNode
lastNewNode = null
}
} else {
var next = activeNode.children[text[activeEdge].bytes[0]]
if (walkDown.call(next)) continue
if (text[next.start + activeLength] == text[pos]) {
if (lastNewNode && activeNode != root) {
lastNewNode.suffixLink = activeNode
lastNewNode = null
}
activeLength = activeLength + 1
break
}
var temp = next.start + activeLength - 1
pSplitEnd = ByRef.new(temp)
var split = newNode.call(next.start, pSplitEnd)
activeNode.children[text[activeEdge].bytes[0]] = split
split.children[text[pos].bytes[0]] = newNode.call(pos, pLeafEnd)
next.start = next.start + activeLength
split.children[text[next.start].bytes[0]] = next
if (lastNewNode) lastNewNode.suffixLink = split
lastNewNode = split
}
remainingSuffixCount = remainingSuffixCount - 1
if (activeNode == root && activeLength > 0) {
activeLength = activeLength - 1
activeEdge = pos - remainingSuffixCount + 1
} else if (activeNode != root) {
activeNode = activeNode.suffixLink
}
}
}
 
var setSuffixIndexByDFS // recursive
setSuffixIndexByDFS = Fn.new { |n, labelHeight|
if (!n) return
if (n.start != -1) {
// Uncomment line below to print suffix tree
// System.write(text[n.start..n.pEnd.value])
}
var leaf = 1
for (i in 0...maxChar) {
if (n.children[i]) {
// Uncomment the 3 lines below to print suffix index
// if (leaf == 1 && n.start != -1) {
// System.print(" [%(n.suffixIndex)]")
// }
leaf = 0
setSuffixIndexByDFS.call(n.children[i], labelHeight + edgeLength.call(n.children[i]))
}
}
if (leaf == 1) {
n.suffixIndex = size - labelHeight
// Uncomment line below to print suffix index
// System.print(" [%(n.suffixIndex)]")
}
}
 
var buildSuffixTree = Fn.new {
size = text.count
var temp = -1
pRootEnd = ByRef.new(temp)
root = newNode.call(-1, pRootEnd)
activeNode = root
for (i in 0...size) extendSuffixTree.call(i)
var labelHeight = 0
setSuffixIndexByDFS.call(root, labelHeight)
}
 
var doTraversal // recursive
doTraversal = Fn.new { |n, labelHeight, pMaxHeight, pSubstringStartIndex|
if (!n) return
if (n.suffixIndex == -1) {
for (i in 0...maxChar) {
if (n.children[i]) {
doTraversal.call(n.children[i], labelHeight + edgeLength.call(n.children[i]),
pMaxHeight, pSubstringStartIndex)
}
}
} else if (n.suffixIndex > -1 && (pMaxHeight.value < labelHeight - edgeLength.call(n))) {
pMaxHeight.value = labelHeight - edgeLength.call(n)
pSubstringStartIndex.value = n.suffixIndex
}
}
 
var getLongestRepeatedSubstring = Fn.new { |s|
var maxHeight = 0
var substringStartIndex = 0
var pMaxHeight = ByRef.new(maxHeight)
var pSubstringStartIndex = ByRef.new(substringStartIndex)
doTraversal.call(root, 0, pMaxHeight, pSubstringStartIndex)
maxHeight = pMaxHeight.value
substringStartIndex = pSubstringStartIndex.value
// Uncomment line below to print maxHeight and substringStartIndex
// System.print("maxHeight %(maxHeight), substringStartIndex %(substringStartIndex)")
if (s == "") {
System.write(" %(text) is: ")
} else {
System.write(" %(s) is: ")
}
var k = 0
while (k < maxHeight) {
System.write(text[k + substringStartIndex])
k = k + 1
}
if (k == 0) {
System.write("No repeated substring")
}
System.print()
}
 
var tests = [
"GEEKSFORGEEKS$",
"AAAAAAAAAA$",
"ABCDEFG$",
"ABABABA$",
"ATCGATCGA$",
"banana$",
"abcpqrabpqpq$",
"pqrpqpqabab$",
]
System.print("Longest Repeated Substring in:\n")
for (test in tests) {
text = test
buildSuffixTree.call()
getLongestRepeatedSubstring.call("")
}
System.print()
 
// load pi to 100,182 digits
var piStr = File.read("pi_100000.txt")
piStr = piStr[2..-1] // remove initial 3.
var numbers = [1e3, 1e4, 1e5]
maxChar = 58
for (number in numbers) {
var start = System.clock
text = piStr[0...number] + "$"
buildSuffixTree.call()
getLongestRepeatedSubstring.call("first %(number) d.p. of Pi")
var elapsed = (System.clock - start) * 1000
System.print(" (this took %(elapsed) ms)\n")
}</syntaxhighlight>
 
{{out}}
<pre>
Longest Repeated Substring in:
 
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
ABCDEFG$ is: No repeated substring
ABABABA$ is: ABABA
ATCGATCGA$ is: ATCGA
banana$ is: ana
abcpqrabpqpq$ is: ab
pqrpqpqabab$ is: ab
 
first 1000 d.p. of Pi is: 23846
(this took 9.987 ms)
 
first 10000 d.p. of Pi is: 7111369
(this took 89.12 ms)
 
first 100000 d.p. of Pi is: 041021944
(this took 1031.072 ms)
</pre>
9,487

edits