Ukkonen’s suffix tree construction: Difference between revisions

m
m (→‎{{header|Wren}}: Minor tidy)
 
(11 intermediate revisions by 7 users not shown)
Line 1:
[[Category:String algorithms]]
{{draft task}}
Suffix Trees are very useful in numerous string processing and computational biology problems.
 
Line 22:
 
As the task doesn't say whether overlapping sequences are to be counted, I've assumed that they are as this is what the algorithm naturally produces.
<langsyntaxhighlight lang="go">package main
 
import (
Line 272:
fmt.Printf(" (this took %s)\n\n", elapsed)
}
}</langsyntaxhighlight>
 
{{out}}
Line 296:
first 100000 d.p. of Pi is: 041021944
(this took 599.770281ms)
</pre>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
 
public final class UkkonenSuffixTree {
 
public static void main(String[] aArgs) throws IOException {
List<Integer> limits = List.of( 1_000, 10_000, 100_000 );
for ( int limit : limits ) {
String contents = Files.readString(Path.of("piDigits.txt"));
String piDigits = contents.substring(0, limit + 1);
final long start = System.currentTimeMillis();
SuffixTree tree = new SuffixTree(piDigits);
Map<String, Set<Integer>> substrings = tree.getLongestRepeatedSubstrings();
final long finish = System.currentTimeMillis();
System.out.println("First " + limit + " digits of pi has longest repeated characters:");
for ( String substring : substrings.keySet() ) {
System.out.print(" '" + substring + "' starting at index ");
for ( Iterator<Integer> iterator = substrings.get(substring).iterator(); iterator.hasNext(); ) {
System.out.print(iterator.next());
if ( iterator.hasNext() ) {
System.out.print(" and ");
}
}
System.out.println();
}
System.out.println("Time taken: " + ( finish - start ) + " milliseconds." + System.lineSeparator());
}
System.out.println("The timings show that the implementation has approximately linear performance.");
}
 
private final static class SuffixTree {
public SuffixTree(String aWord) {
text = Arrays.copyOfRange(aWord.toCharArray(), 0, aWord.length() + 1);
text[aWord.length()] = '\uF123'; // Terminal character
nodes = new Node[2 * aWord.length() + 2];
root = newNode(UNDEFINED, UNDEFINED);
activeNode = root;
for ( char character : text ) {
extendSuffixTree(character);
}
}
public Map<String, Set<Integer>> getLongestRepeatedSubstrings() {
List<Integer> indexes = doTraversal();
String word = String.valueOf(text).substring(0, text.length - 1);
Map<String, Set<Integer>> result = new HashMap<String, Set<Integer>>();
if ( indexes.get(0) > 0 ) {
for ( int i = 1; i < indexes.size(); i++ ) {
String substring = word.substring(indexes.get(i), indexes.get(i) + indexes.get(0));
result.computeIfAbsent(substring, k -> new TreeSet<Integer>()).add(indexes.get(i));
}
}
return result;
}
// PRIVATE //
private void extendSuffixTree(char aCharacter) {
needParentLink = UNDEFINED;
remainder++;
while ( remainder > 0 ) {
if ( activeLength == 0 ) {
activeEdge = textIndex;
}
if ( ! nodes[activeNode].children.containsKey(text[activeEdge]) ) {
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[activeNode].children.put(text[activeEdge], leaf);
addSuffixLink(activeNode);
} else {
final int next = nodes[activeNode].children.get(text[activeEdge]);
if ( walkDown(next) ) {
continue;
}
if ( text[nodes[next].start + activeLength] == aCharacter ) {
activeLength++;
addSuffixLink(activeNode);
break;
}
final int split = newNode(nodes[next].start, nodes[next].start + activeLength);
nodes[activeNode].children.put(text[activeEdge], split);
final int leaf = newNode(textIndex, LEAF_NODE);
nodes[split].children.put(aCharacter, leaf);
nodes[next].start += activeLength;
nodes[split].children.put(text[nodes[next].start], next);
addSuffixLink(split);
}
remainder--;
if ( activeNode == root && activeLength > 0 ) {
activeLength--;
activeEdge = textIndex - remainder + 1;
} else {
activeNode = ( nodes[activeNode].parentLink > 0 ) ? nodes[activeNode].parentLink : root;
}
}
textIndex++;
}
private boolean walkDown(int aNode) {
if ( activeLength >= nodes[aNode].edgeLength() ) {
activeEdge += nodes[aNode].edgeLength();
activeLength -= nodes[aNode].edgeLength();
activeNode = aNode;
return true;
}
return false;
}
private void addSuffixLink(int aNode) {
if ( needParentLink != UNDEFINED ) {
nodes[needParentLink].parentLink = aNode;
}
needParentLink = aNode;
}
private int newNode(int aStart, int aEnd) {
Node node = new Node(aStart, aEnd);
node.leafIndex = ( aEnd == LEAF_NODE ) ? leafIndexGenerator++ : UNDEFINED;
nodes[currentNode] = node;
return currentNode++;
}
private List<Integer> doTraversal() {
List<Integer> indexes = new ArrayList<Integer>();
indexes.add(UNDEFINED);
return traversal(indexes, nodes[root], 0);
}
private List<Integer> traversal(List<Integer> aIndexes, Node aNode, int aHeight) {
if ( aNode.leafIndex == UNDEFINED ) {
for ( int index : aNode.children.values() ) {
Node child = nodes[index];
traversal(aIndexes, child, aHeight + child.edgeLength());
}
} else if ( aIndexes.get(0) < aHeight - aNode.edgeLength() ) {
aIndexes.clear();
aIndexes.add(aHeight - aNode.edgeLength());
aIndexes.add(aNode.leafIndex);
} else if ( aIndexes.get(0) == aHeight - aNode.edgeLength() ) {
aIndexes.add(aNode.leafIndex);
}
return aIndexes;
}
private final class Node {
public Node(int aStart, int aEnd) {
start = aStart;
end = aEnd;
}
public int edgeLength() {
return Math.min(end, textIndex + 1) - start;
}
private int start, end, parentLink, leafIndex;
private Map<Character, Integer> children = new HashMap<Character, Integer>();
}
private Node[] nodes;
private char[] text;
private int activeNode, activeLength, activeEdge;
private int root, textIndex, currentNode, needParentLink, remainder, leafIndexGenerator;
private static final int UNDEFINED = -1;
private static final int LEAF_NODE = Integer.MAX_VALUE;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
First 1000 digits of pi has longest repeated characters:
'82534' starting at index 87 and 902
'33446' starting at index 214 and 830
'60943' starting at index 396 and 550
'23846' starting at index 15 and 578
'93751' starting at index 44 and 940
'99999' starting at index 761 and 762
'42019' starting at index 700 and 993
Time taken: 13 milliseconds.
 
First 10000 digits of pi has longest repeated characters:
'7111369' starting at index 3501 and 6114
'8530614' starting at index 4166 and 4600
Time taken: 8 milliseconds.
 
First 100000 digits of pi has longest repeated characters:
'041021944' starting at index 21760 and 75869
'201890888' starting at index 30626 and 33843
'134926158' starting at index 69596 and 86281
Time taken: 100 milliseconds.
 
The timings show that the implementation has approximately linear performance.
</pre>
 
Line 301 ⟶ 532:
{{trans|Go}}
Uses array indices instead of the Go version's node pointers.
<langsyntaxhighlight lang="julia">const oo = typemax(Int)
 
"""The suffix-tree's node."""
Line 404 ⟶ 635:
 
function setsuffixindexbyDFS(st, node, labelheight, verbose=false)
verbose && node.start > 0 && print(st.text[node.start:min(node.ending, length(st.text))])
isleaf = true
for child in map(v -> st.nodes[v], collect(values(node.children)))
Line 466 ⟶ 697:
println()
sπ = ""
setprecision(4000004000000) do
sπ = string(BigFloat(π))[3:end]
end
for number in [1000, 10000, 100000, 1000000]
text = sπ[1:number] * "\$"
@time begin
Line 479 ⟶ 710:
 
testsuffixtree()
</langsyntaxhighlight>{{out}}
<pre>
Longest Repeated Substring in:
Line 499 ⟶ 730:
first 100000 d.p. of π: 134926158 (or) 041021944 (or) 201890888
0.533892 seconds (3.52 M allocations: 425.035 MiB, 22.42% gc time)
first 1000000 d.p. of π: 756130190263
6.008879 seconds (35.25 M allocations: 4.152 GiB, 23.20% gc time)
</pre>
 
=={{header|Nim}}==
{{trans|Julia}}
{{libheader|Nim-Integers}}
<syntaxhighlight lang="Nim">import std/[sequtils, strformat, strutils, tables]
 
const ∞ = int.high
 
type
 
# Suffix-tree node.
Node = ref object
children: Table[char, int]
first: int
last: int
suffixLink: int
suffixIndex: int
 
# Ukkonen suffix-tree.
SuffixTree = object
nodes: seq[Node]
text: string
root: int
position: int
currentNode: int
needSuffixLink: int
remainder: int
activeNode: int
activeLength: int
activeEdge: int
 
 
func newNode(): Node =
Node(first: -1, last: ∞, suffixLink: -1, suffixIndex: -1)
 
func newNode(first, last: int): Node =
Node(first: first, last: last, suffixLink: 0, suffixIndex: -1)
 
func newNode(st: var SuffixTree; first, last: int): int =
inc st.currentNode
st.nodes[st.currentNode] = newNode(first, last)
result = st.currentNode
 
 
func activeEdgeChar(st: SuffixTree): char {.inline.} = st.text[st.activeedge]
 
 
func edgeLength(st: SuffixTree; n: Node): int =
min(n.last, st.position + 1) - n.first
 
 
func addSuffixLink(st: var SuffixTree; nodenum: int) =
if st.needSuffixLink >= 0:
st.nodes[st.needSuffixLink].suffixLink = nodenum
st.needSuffixLink = nodenum
 
func walkdown(st: var SuffixTree; currNode: int): bool =
let length = st.edgeLength(st.nodes[currNode])
if st.activeLength < length: return false
inc st.activeEdge, length
dec st.activeLength, length
st.activeNode = currnode
result = true
 
 
func extendSuffixTree(st: var SuffixTree; pos: int) =
st.position = pos
st.needSuffixLink = -1
inc st.remainder
while st.remainder > 0:
if st.activeLength == 0: st.activeEdge = st.position
if st.activeEdgeChar() notin st.nodes[st.activeNode].children:
let nodeNum = st.newNode(st.position, ∞)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = nodeNum
st.addSuffixLink(st.activeNode)
else:
let next = st.nodes[st.activeNode].children[st.activeEdgeChar()]
if st.walkdown(next): continue
if st.text[st.nodes[next].first + st.activeLength] == st.text[pos]:
st.addSuffixLink(st.activeNode)
inc st.activeLength
break
let split = st.newNode(st.nodes[next].first, st.nodes[next].first + st.activeLength)
st.nodes[st.activeNode].children[st.activeEdgeChar()] = split
let nodeNum = st.newNode(st.position, ∞)
st.nodes[split].children[st.text[pos]] = nodeNum
st.nodes[next].first += st.activelength
st.nodes[split].children[st.text[st.nodes[next].first]] = next
st.addSuffixLink(split)
 
dec st.remainder
if st.activeNode == st.root and st.activeLength > 0:
dec st.activelength
st.activeEdge = st.position - st.remainder + 1
elif st.activeNode != st.root:
st.activeNode = st.nodes[st.activeNode].suffixLink
 
 
proc setSuffixIndexByDFS(st: var SuffixTree; node: Node; labelHeight: Natural; verbose = false) =
if verbose and node.first >= 0:
echo st.text[node.first..<min(node.last, st.text.len)]
var isLeaf = true
for ichild in node.children.values:
let child = st.nodes[ichild]
if verbose and isLeaf and node.first >= 0:
echo &" [{node.suffixindex}]"
isLeaf = false
st.setSuffixIndexbyDFS(child, labelHeight + st.edgeLength(child))
if isleaf:
node.suffixindex = st.text.len - labelHeight
if verbose: echo &" [{node.suffixindex}]"
 
proc initSuffixTree(str: string): SuffixTree =
var nodes = newSeqWith(str.len * 2, newNode())
result = SuffixTree(nodes: nodes, text: str, position: -1, currentNode: -1,
needSuffixLink: -1, remainder: 0, activeLength: 1, activeEdge: 0)
result.root = result.newNode(0, 0)
result.activeNode = result.root
for i in 0..result.text.high:
result.extendSuffixTree(i)
result.setSuffixIndexByDFS(result.nodes[result.root], 0)
 
 
func doTraversal(st: SuffixTree): (int, seq[int]) =
var maxHeight = 0
var substringStartIndices = @[-1]
 
func traversal(node: Node; labelHeight: int) =
if node.suffixIndex == -1:
for ichild in node.children.values:
let child = st.nodes[ichild]
traversal(child, labelHeight + st.edgeLength(child))
elif maxHeight < labelHeight - st.edgeLength(node):
maxHeight = labelHeight - st.edgeLength(node)
substringStartIndices = @[node.suffixIndex]
elif maxHeight == labelHeight - st.edgelength(node):
substringStartIndices.add node.suffixIndex
 
traversal(st.nodes[st.root], 0)
result = (maxHeight, move(substringStartIndices))
 
 
proc getLongestRepeatedSubstring(st: SuffixTree; label = ""; printResult = true): string =
let (length, starts) = st.dotraversal()
result = if length == 0: ""
else: starts.mapIt(st.text[it..it+length-1]).deduplicate().join(" (or) ")
if printResult:
stdout.write " ", if label.len == 0: join(st.text) else: label, ": "
echo if length == 0: "No repeated substring." else: result
 
 
 
const Tests = ["CAAAABAAAABD$",
"GEEKSFORGEEKS$",
"AAAAAAAAAA$",
"ABCDEFG$",
"ABABABA$",
"ATCGATCGA$",
"banana$",
"abcpqrabpqpq$",
"pqrpqpqabab$"]
 
echo "Longest Repeated Substring in:\n"
for test in Tests:
var st = initSuffixTree(test)
discard st.getLongestRepeatedSubstring()
echo()
 
 
#############################################################################
# Pi calculation.
 
import std/[monotimes, times]
import integers
 
func isr(term, guess: Integer): Integer =
var term = term
result = guess
let value = term * result
while true:
if abs(term - result) <= 1:
break
result = (result + term) shr 1
term = value div result
 
func calcAgm(lam, gm: Integer; z: var Integer; ep: Integer): Integer =
var am: Integer
var lam = lam
var gm = gm
var n = 1
while true:
am = (lam + gm) shr 1
gm = isr(lam, gm)
let v = am - lam
let zi = v * v * n
if zi < ep:
break
z -= zi
inc n, n
lam = am
result = am
 
func bip(exp: int; man = 1): Integer {.inline.} = man * 10^exp
 
func calculatePi(): string =
const Digits = 4_000_000
let am = bip(Digits)
let gm = isqrt(bip(Digits + Digits - 1, 5))
var z = bip(Digits + Digits - 2, 25)
let agm = calcAGM(am, gm, z, bip(Digits + 1))
let pi = agm * agm * bip(Digits - 2) div z
result = $pi
 
let sπ = calculatePi()
for number in [1000, 10000, 100000, 1000000]:
let text = sπ[2..<number] & '$'
let start = getMonoTime()
var st = initSuffixTree(text)
discard st.getlongestrepeatedsubstring(&"first {number} d.p. of π")
echo &" → Temps: {(getMonoTime() - start).inMicroseconds} µs"
 
</syntaxhighlight>
 
{{out}}
<pre>Longest Repeated Substring in:
 
CAAAABAAAABD$: AAAAB
GEEKSFORGEEKS$: GEEKS
AAAAAAAAAA$: AAAAAAAAA
ABCDEFG$: No repeated substring.
ABABABA$: ABABA
ATCGATCGA$: ATCGA
banana$: ana
abcpqrabpqpq$: ab (or) pq
pqrpqpqabab$: ab (or) pq
 
first 1000 d.p. of π: 60943 (or) 42019 (or) 82534 (or) 33446 (or) 93751 (or) 99999 (or) 23846
→ Temps: 780 µs
first 10000 d.p. of π: 7111369 (or) 8530614
→ Temps: 14726 µs
first 100000 d.p. of π: 134926158 (or) 041021944 (or) 201890888
→ Temps: 178656 µs
first 1000000 d.p. of π: 756130190263
→ Temps: 1919290 µs
</pre>
 
=={{header|Phix}}==
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>-- demo/rosetta/Ukkonens_Suffix_Tree.exw
<span style="color: #000080;font-style:italic;">-- demo/rosetta/Ukkonens_Suffix_Tree.exw</span>
integer maxChar = 'z'
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'z'</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{},</span>
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">v</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">)</span>
sequence children = {},
<span style="color: #000000;">leaves</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">,</span><span style="color: #000000;">v</span><span style="color: #0000FF;">)</span>
suffixLinks = {},
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">)</span>
starts = {},
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
ends = {},
suffixIndices = {}
string text
atom pSplitEnd,
pRootEnd,
pLeafEnd = allocate_word(0,true)
integer root = NULL,
lastNewNode,
activeNode,
activeEdge = -1,
activeLength = 0,
remainingSuffixCount = 0,
size = -1
<span style="color: #004080;">string</span> <span style="color: #000000;">text</span>
function newNode(integer start, atom pFinish, bool bKids=false)
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span>
children = append(children,iff(bKids?repeat(NULL,maxChar):0))
<span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">,</span>
suffixLinks = append(suffixLinks,root)
<span style="color: #000000;">leafEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
starts = append(starts,start)
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span>
ends = append(ends,pFinish)
<span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">,</span>
suffixIndices = append(suffixIndices,-1)
<span style="color: #000000;">activeNode</span><span style="color: #0000FF;">,</span>
return length(children)
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
end function
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
function edgeLength(integer n)
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
if n == root then
return 0
<span style="color: #008080;">function</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">bKids</span><span style="color: #0000FF;">=</span><span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">,</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">bKids</span><span style="color: #0000FF;">?</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">):</span><span style="color: #000000;">0</span><span style="color: #0000FF;">))</span>
return peekns(ends[n]) - starts[n] + 1
<span style="color: #000000;">suffixLinks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">,</span><span style="color: #000000;">root</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #000000;">starts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">endIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">,</span><span style="color: #000000;">finishIdx</span><span style="color: #0000FF;">)</span>
function walkDown(integer currNode)
<span style="color: #000000;">suffixIndices</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
integer l = edgeLength(currNode)
<span style="color: #008080;">return</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span>
if activeLength >= l then
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
activeEdge += l
activeLength -= l
<span style="color: #008080;">function</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">return</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span><span style="color: #0000FF;">?</span><span style="color: #000000;">0</span><span style="color: #0000FF;">:</span><span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">endIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">currNode</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">l</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">l</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">l</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">currNode</span>
<span style="color: #008080;">return</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">return</span> <span style="color: #004600;">false</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">pos</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">0</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">ca0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">next</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ca0</span><span style="color: #0000FF;">?</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">:</span><span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">next</span><span style="color: #0000FF;">==</span><span style="color: #004600;">null</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ca0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">NULL</span><span style="color: #0000FF;">,</span><span style="color: #000000;">maxChar</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">else</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">walkDown</span><span style="color: #0000FF;">(</span><span style="color: #000000;">next</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">continue</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">tp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">activeLength</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">tp</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">activeNode</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">NULL</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">temp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">splitEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">temp</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">splitnode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">splitEndIdx</span><span style="color: #0000FF;">,</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">ta</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeEdge</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">ta</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">tp</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pos</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">leafEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">activeLength</span>
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">splitnode</span><span style="color: #0000FF;">][</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">starts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">next</span><span style="color: #0000FF;">]]]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">next</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">lastNewNode</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">lastNewNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">splitnode</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">==</span><span style="color: #000000;">root</span> <span style="color: #008080;">and</span> <span style="color: #000000;">activeLength</span><span style="color: #0000FF;">></span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeLength</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">activeEdge</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">pos</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">remainingSuffixCount</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">activeNode</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">root</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixLinks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">activeNode</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">size</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">labelHeight</span>
<span style="color: #008080;">else</span>
<span style="color: #004080;">bool</span> <span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">leaf</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">))</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">leaf</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> <span style="color: #000080;font-style:italic;">-- (sanity check)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">size</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">text</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">rootEndIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">root</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newNode</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">rootEndIdx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">activeNode</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">root</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">size</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">extendSuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">labelHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #000000;">setSuffixIndexByDFS</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixIndices</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">newHeight</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">==</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">maxChar</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">nci</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">nci</span><span style="color: #0000FF;">!=</span><span style="color: #004600;">NULL</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">+</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nci</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">newHeight</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">elsif</span> <span style="color: #000000;">nsi</span> <span style="color: #0000FF;">></span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">newHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">labelHeight</span><span style="color: #0000FF;">-</span><span style="color: #000000;">edgeLength</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]<</span><span style="color: #000000;">newHeight</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">newHeight</span>
<span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">nsi</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeightIdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">substringStartIndex</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">new_leaf</span><span style="color: #0000FF;">()</span>
<span style="color: #000000;">doTraversal</span><span style="color: #0000FF;">(</span><span style="color: #000000;">root</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">maxHeight</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">maxHeightIdx</span><span style="color: #0000FF;">],</span>
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">leaves</span><span style="color: #0000FF;">[</span><span style="color: #000000;">substringStartIndex</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">"No repeated substring"</span>
<span style="color: #0000FF;">:</span><span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">maxHeight</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"CAAAABAAAABD$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"GEEKSFORGEEKS$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"AAAAAAAAAA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ABCDEFG$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ABABABA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"ATCGATCGA$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"banana$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"abcpqrabpqpq$"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"pqrpqpqabab$"</span><span style="color: #0000FF;">}</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Longest Repeated Substring in:\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" %s is: %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">()})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">include</span> <span style="color: #004080;">mpfr</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">piStr</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">mpfr</span> <span style="color: #000000;">pi</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_init</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1001</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (set precision to 1,000 dp, plus the "3.")</span>
<span style="color: #7060A8;">mpfr_const_pi</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">mpfr_get_fixed</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1000</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (all we can really manage under pwa/p2js)</span>
<span style="color: #008080;">else</span>
<span style="color: #000080;font-style:italic;">-- gmp crashes when I try 100,000 dp, so just get from file</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">get_text</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`E:\downloads\misc\arm\pi-10million.txt`</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">piStr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">3</span><span style="color: #0000FF;">..$]</span> <span style="color: #000080;font-style:italic;">-- discard leading "3."</span>
<span style="color: #000000;">maxChar</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'9'</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">3</span> <span style="color: #008080;">to</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">platform</span><span style="color: #0000FF;">()=</span><span style="color: #004600;">JS</span><span style="color: #0000FF;">?</span><span style="color: #000000;">3</span><span style="color: #0000FF;">:</span><span style="color: #000000;">6</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">atom</span> <span style="color: #000000;">t0</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">time</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">power</span><span style="color: #0000FF;">(</span><span style="color: #000000;">10</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">text</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">piStr</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">n</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">&</span> <span style="color: #008000;">"$"</span>
<span style="color: #000000;">buildSuffixTree</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">r</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">getLongestRepeatedSubstring</span><span style="color: #0000FF;">(),</span>
<span style="color: #000000;">e</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">elapsed</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">time</span><span style="color: #0000FF;">()-</span><span style="color: #000000;">t0</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" first %,d d.p. of Pi is: %s (%s)\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">n</span><span style="color: #0000FF;">,</span><span style="color: #000000;">r</span><span style="color: #0000FF;">,</span><span style="color: #000000;">e</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Longest Repeated Substring in:
CAAAABAAAABD$ is: AAAAB
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
ABCDEFG$ is: No repeated substring
ABABABA$ is: ABABA
ATCGATCGA$ is: ATCGA
banana$ is: ana
abcpqrabpqpq$ is: ab
pqrpqpqabab$ is: ab
 
first 1,000 d.p. of Pi is: 23846 (0s)
first 10,000 d.p. of Pi is: 7111369 (0.0s)
first 100,000 d.p. of Pi is: 041021944 (0.3s)
first 1,000,000 d.p. of Pi is: 756130190263 (3.2s)
</pre>
Note that mpfr_const_pi() struggles to generate more than 1,000 digits of pi under pwa/p2js [and will continue to do so unless someone graciously donates a decent/fast Chudnovsky method in pure Phix or JavaScript...]
 
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-big}}
{{libheader|Wren-dynamic}}
{{libheader|Wren-trait}}
As it would take a very long time to calculate the first 100,000 digits of Pi using the code from the [[Arithmetic-geometric_mean/Calculate_Pi#Wren]] task, I have instead saved the digits produced by the Go entry to a file (which only takes a few seconds) and then loaded that into the Wren script.
 
Having done that, the timings for extracting the longest repeated sequence of digits are reasonably quick and fairly linear as expected.
<syntaxhighlight lang="wren">import "./big" for BigRat
import "./dynamic" for Struct
import "./trait" for ByRef
import "io" for File
 
var maxChar = 128
 
var Node = Struct.create("Node", ["children", "suffixLink", "start", "pEnd", "suffixIndex"])
 
var text = ""
var root = null
var lastNewNode = null
var activeNode = null
var activeEdge = -1
var activeLength = 0
var remainingSuffixCount = 0
var pLeafEnd = ByRef.new(-1)
var pRootEnd = null
var pSplitEnd = null
var size = -1
 
var newNode = Fn.new { |start, pEnd|
var children = List.filled(maxChar, null)
var suffixLink = root
var suffixIndex = -1
return Node.new(children, suffixLink, start, pEnd, suffixIndex)
}
 
var edgeLength = Fn.new { |n|
if (n == root) return 0
return n.pEnd.value - n.start + 1
}
 
var walkDown = Fn.new { |currNode|
var el = edgeLength.call(currNode)
if (activeLength >= el) {
activeEdge = activeEdge + el
activeLength = activeLength - el
activeNode = currNode
return true
end if}
return false
}
end function
 
procedurevar extendSuffixTree(integer = Fn.new { |pos)|
poken(pLeafEnd,.value = pos)
remainingSuffixCount += remainingSuffixCount + 1
lastNewNode = NULLnull
while (remainingSuffixCount > 0) do{
if (activeLength == 0) thenactiveEdge = pos
if (!activeNode.children[text[activeEdge].bytes[0]]) = pos{
activeNode.children[text[activeEdge].bytes[0]] = newNode.call(pos, pLeafEnd)
end if
integer ta = text[activeEdge] if (lastNewNode) {
object ca lastNewNode.suffixLink = children[activeNode]
integer next = iff(ca lastNewNode =0?NULL:ca[ta]) null
if next == null then}
} else if ca=0 then{
var next = activeNode.children[activeNodetext[activeEdge].bytes[0]] = repeat(NULL,maxChar)
end if (walkDown.call(next)) continue
childrenif (text[activeNode][tanext.start + activeLength] == newNode(text[pos, pLeafEnd]) {
if (lastNewNode && activeNode != NULLroot) then{
suffixLinks[ lastNewNode].suffixLink = activeNode
lastNewNode = NULLnull
end if }
activeLength = activeLength + 1
else
if walkDown(next) then break
continue}
endvar iftemp = next.start + activeLength - 1
integer tppSplitEnd = text[pos]ByRef.new(temp)
var split = newNode.call(next.start, pSplitEnd)
if text[starts[next]+activeLength] == tp then
if lastNewNode != NULL and activeNode.children[text[activeEdge].bytes[0]] != root thensplit
suffixLinkssplit.children[text[lastNewNodepos].bytes[0]] = activeNodenewNode.call(pos, pLeafEnd)
next.start = next.start + lastNewNode = NULLactiveLength
split.children[text[next.start].bytes[0]] = end ifnext
if (lastNewNode) lastNewNode.suffixLink activeLength += 1split
lastNewNode = exitsplit
end if}
integer tempremainingSuffixCount = starts[next] + activeLengthremainingSuffixCount - 1
if (activeNode == root && activeLength > 0) {
pSplitEnd = allocate_word(temp,true)
integer splitnodeactiveLength = newNode(starts[next],activeLength - pSplitEnd,true)1
ta = text[activeEdge]
children[activeNode][ta] = splitnode
children[splitnode][tp] = newNode(pos, pLeafEnd)
starts[next] += activeLength
children[splitnode][text[starts[next]]] = next
if lastNewNode != NULL then
suffixLinks[lastNewNode] = splitnode
end if
lastNewNode = splitnode
end if
remainingSuffixCount -= 1
if activeNode == root and activeLength > 0 then
activeLength -= 1
activeEdge = pos - remainingSuffixCount + 1
elsif} else if (activeNode != root) then{
activeNode = suffixLinks[activeNode].suffixLink
end if}
end while}
}
end procedure
 
procedurevar setSuffixIndexByDFS(integer n, integer// labelHeight)recursive
setSuffixIndexByDFS = Fn.new { |n, labelHeight|
if n!=NULL then
if (!n) return
-- Uncomment the 3 lines below to print suffix tree
--if starts[(n].start != -1) then{
// Uncomment line below to print suffix tree
-- printf(1,text[starts[n]..peekns(ends[n])])
// System.write(text[n.start..n.pEnd.value])
--end if
}
if children[n]=0 then
var leaf = 1
suffixIndices[n] = size - labelHeight
for (i in 0...maxChar) {
-- Uncomment line below to print suffix index
if --printf(1," [%d]\n", suffixIndices.children[ni]) {
// Uncomment the 3 lines below to print suffix index
else
bool// if (leaf == 1 && n.start != -1) true{
for// i=1 to maxChar doSystem.print(" [%(n.suffixIndex)]")
// integer nci = children[n][i]}
leaf if nci != NULL then0
setSuffixIndexByDFS.call(n.children[i], labelHeight + edgeLength.call(n.children[i]))
-- Uncomment the 3 lines below to print suffix index
}
--if leaf and starts[n] != -1 then
}
-- printf(1," [%d]\n", suffixIndices[n])
if (leaf == 1) --end if{
n.suffixIndex = size - leaf = falselabelHeight
// Uncomment line below to print suffix index
setSuffixIndexByDFS(nci, labelHeight+edgeLength(nci))
// System.print(" end if[%(n.suffixIndex)]")
end for}
}
if leaf then ?9/0 end if -- (sanity check)
 
end if
var buildSuffixTree = Fn.new {
end if
size = text.count
end procedure
var temp = -1
pRootEnd = ByRef.new(temp)
procedure buildSuffixTree()
sizeroot = lengthnewNode.call(text-1, pRootEnd)
pRootEnd = allocate_word(-1,true)
root = newNode(-1, pRootEnd)
activeNode = root
for (i=1 toin 0...size) doextendSuffixTree.call(i)
var labelHeight = 0
extendSuffixTree(i)
setSuffixIndexByDFS.call(root, labelHeight)
end for
}
integer labelHeight = 0
 
setSuffixIndexByDFS(root, labelHeight)
var doTraversal // recursive
end procedure
doTraversal = Fn.new { |n, labelHeight, pMaxHeight, pSubstringStartIndex|
if (!n) return
procedure doTraversal(integer n, integer labelHeight, atom pMaxHeight, pSubstringStartIndex)
if (n!.suffixIndex =NULL= -1) then{
integerfor nsi(i =in suffixIndices[n],0...maxChar) newHeight{
if nsi(n.children[i]) { == -1 then
doTraversal.call(n.children[i], labelHeight + edgeLength.call(n.children[i]),
for i=1 to maxChar do
integer nci = children[n][i] pMaxHeight, pSubstringStartIndex)
if nci != NULL then}
}
newHeight = labelHeight+edgeLength(nci)
} else if (n.suffixIndex > -1 && (pMaxHeight.value < labelHeight - edgeLength.call(n))) {
doTraversal(nci, newHeight, pMaxHeight, pSubstringStartIndex)
pMaxHeight.value = labelHeight - end ifedgeLength.call(n)
pSubstringStartIndex.value = n.suffixIndex
end for
}
elsif nsi > -1 then
}
newHeight = labelHeight-edgeLength(n)
 
if peekns(pMaxHeight)<newHeight then
var getLongestRepeatedSubstring = Fn.new { |s|
poken(pMaxHeight,newHeight)
var maxHeight = 0
poken(pSubstringStartIndex,nsi)
var substringStartIndex = 0
end if
var pMaxHeight = ByRef.new(maxHeight)
end if
var pSubstringStartIndex = ByRef.new(substringStartIndex)
end if
doTraversal.call(root, 0, pMaxHeight, pSubstringStartIndex)
end procedure
maxHeight = pMaxHeight.value
substringStartIndex = pSubstringStartIndex.value
procedure getLongestRepeatedSubstring(string s)
// Uncomment line below to print maxHeight and substringStartIndex
atom pMaxHeight = allocate_word(0,true),
// System.print("maxHeight %(maxHeight), substringStartIndex %(substringStartIndex)")
pSubstringStartIndex = allocate_word(0,true)
if (s == "") {
doTraversal(root, 0, pMaxHeight, pSubstringStartIndex)
System.write(" %(text) is: ")
integer maxHeight = peekns(pMaxHeight),
} else {
start = peekns(pSubstringStartIndex)
System.write(" %(s) is: ")
-- Uncomment line below to print maxHeight and substringStartIndex
}
--printf(1,"maxHeight %d, substringStartIndex %d\n", {maxHeight, start})
var k = 0
string t = iff(maxHeight=0?"No repeated substring"
while (k < :text[start+1..start+maxHeight]) {
System.write(text[k + substringStartIndex])
printf(1," %s is: %s\n", {iff(s=""?text:s),t})
k = k + 1
end procedure
}
if (k == 0) {
constant tests = {"GEEKSFORGEEKS$",
System.write("No repeated substring")
"AAAAAAAAAA$",
}
"ABCDEFG$",
System.print()
"ABABABA$",
}
"ATCGATCGA$",
 
"banana$",
var tests = [
"abcpqrabpqpq$",
"GEEKSFORGEEKS$",
"pqrpqpqabab$"}
"AAAAAAAAAA$",
printf(1,"Longest Repeated Substring in:\n")
"ABCDEFG$",
for i=1 to length(tests) do
text = tests[i]"ABABABA$",
"ATCGATCGA$",
buildSuffixTree()
"banana$",
getLongestRepeatedSubstring("")
"abcpqrabpqpq$",
end for
"pqrpqpqabab$",
printf(1,"\n")
]
System.print("Longest Repeated Substring in:\n")
-- Sadly gmp crashes when I try 100,000 dp, but I suspect it would be fine with more than my paltry 4GB ram
for (test in tests) {
include mpfr.e
text = test
mpfr pi = mpfr_init(0,-10001) -- (set precision to 10,000 dp, plus the "3.")
buildSuffixTree.call()
mpfr_const_pi(pi)
getLongestRepeatedSubstring.call("")
string piStr = mpfr_sprintf("%.10000Rf", pi), s="Pi"
}
piStr = piStr[3..$] -- discard leading "3."
System.print()
maxChar = '9'
 
for i=3 to 6 do
// load pi to 100,182 digits
atom t0 = time()
var piStr = File.read("pi_100000.txt")
integer n = power(10,i)
piStr = piStr[2..-1] // remove initial 3.
if i>=5 then
var numbers = [1e3, 1e4, 1e5]
text = repeat('0',n)&"$"
maxChar = 58
for c=1 to n do
for (number in numbers) {
text[c] = rand(10)+'0'-1
var start = end forSystem.clock
text = piStr[0...number] + "$"
s = "a made up number"
buildSuffixTree.call()
else
getLongestRepeatedSubstring.call("first %(number) d.p. of Pi")
text = piStr[1..n] & "$"
var elapsed = (System.clock - start) * 1000
end if
System.print(" (this took %(elapsed) ms)\n")
buildSuffixTree()
}</syntaxhighlight>
getLongestRepeatedSubstring(sprintf("first %,d d.p. of %s", {n,s}))
 
printf(1," (this took %gs)\n\n", {time()-t0})
end for</lang>
{{out}}
<pre>
Longest Repeated Substring in:
 
GEEKSFORGEEKS$ is: GEEKS
AAAAAAAAAA$ is: AAAAAAAAA
Line 731 ⟶ 1,429:
pqrpqpqabab$ is: ab
 
first 1,0001000 d.p. of Pi is: 23846
(this took 0s9.987 ms)
 
first 10,00010000 d.p. of Pi is: 7111369
(this took 089.047s12 ms)
 
first 100,000100000 d.p. of a made up numberPi is: 022105155041021944
(this took 01031.422s072 ms)
 
first 1,000,000 d.p. of a made up number is: 09014346795
(this took 4.734s)
</pre>
9,485

edits