Ukkonen’s suffix tree construction: Difference between revisions
m (→{{header|Go}}: Added a note in the preamble about the C code on which this entry is based not compiling as it stands.) |
|||
Line 296: | Line 296: | ||
first 100000 d.p. of Pi is: 041021944 |
first 100000 d.p. of Pi is: 041021944 |
||
(this took 599.770281ms) |
(this took 599.770281ms) |
||
</pre> |
|||
=={{header|Phix}}== |
|||
{{trans|Go}} |
|||
<lang Phix>-- demo/rosetta/Ukkonens_Suffix_Tree.exw |
|||
integer maxChar = 'z' |
|||
sequence children = {}, |
|||
suffixLinks = {}, |
|||
starts = {}, |
|||
ends = {}, |
|||
suffixIndices = {} |
|||
string text |
|||
atom pSplitEnd, |
|||
pRootEnd, |
|||
pLeafEnd = allocate_word(0,true) |
|||
integer root = NULL, |
|||
lastNewNode, |
|||
activeNode, |
|||
activeEdge = -1, |
|||
activeLength = 0, |
|||
remainingSuffixCount = 0, |
|||
size = -1 |
|||
function newNode(integer start, atom pFinish, bool bKids=false) |
|||
children = append(children,iff(bKids?repeat(NULL,maxChar):0)) |
|||
suffixLinks = append(suffixLinks,root) |
|||
starts = append(starts,start) |
|||
ends = append(ends,pFinish) |
|||
suffixIndices = append(suffixIndices,-1) |
|||
return length(children) |
|||
end function |
|||
function edgeLength(integer n) |
|||
if n == root then |
|||
return 0 |
|||
end if |
|||
return peekns(ends[n]) - starts[n] + 1 |
|||
end function |
|||
function walkDown(integer currNode) |
|||
integer l = edgeLength(currNode) |
|||
if activeLength >= l then |
|||
activeEdge += l |
|||
activeLength -= l |
|||
activeNode = currNode |
|||
return true |
|||
end if |
|||
return false |
|||
end function |
|||
procedure extendSuffixTree(integer pos) |
|||
poken(pLeafEnd,pos) |
|||
remainingSuffixCount += 1 |
|||
lastNewNode = NULL |
|||
while remainingSuffixCount > 0 do |
|||
if activeLength == 0 then |
|||
activeEdge = pos |
|||
end if |
|||
integer ta = text[activeEdge] |
|||
object ca = children[activeNode] |
|||
integer next = iff(ca=0?NULL:ca[ta]) |
|||
if next == null then |
|||
if ca=0 then |
|||
children[activeNode] = repeat(NULL,maxChar) |
|||
end if |
|||
children[activeNode][ta] = newNode(pos, pLeafEnd) |
|||
if lastNewNode != NULL then |
|||
suffixLinks[lastNewNode] = activeNode |
|||
lastNewNode = NULL |
|||
end if |
|||
else |
|||
if walkDown(next) then |
|||
continue |
|||
end if |
|||
integer tp = text[pos] |
|||
if text[starts[next]+activeLength] == tp then |
|||
if lastNewNode != NULL and activeNode != root then |
|||
suffixLinks[lastNewNode] = activeNode |
|||
lastNewNode = NULL |
|||
end if |
|||
activeLength += 1 |
|||
exit |
|||
end if |
|||
integer temp = starts[next] + activeLength - 1 |
|||
pSplitEnd = allocate_word(temp,true) |
|||
integer splitnode = newNode(starts[next], pSplitEnd,true) |
|||
ta = text[activeEdge] |
|||
children[activeNode][ta] = splitnode |
|||
children[splitnode][tp] = newNode(pos, pLeafEnd) |
|||
starts[next] += activeLength |
|||
children[splitnode][text[starts[next]]] = next |
|||
if lastNewNode != NULL then |
|||
suffixLinks[lastNewNode] = splitnode |
|||
end if |
|||
lastNewNode = splitnode |
|||
end if |
|||
remainingSuffixCount -= 1 |
|||
if activeNode == root and activeLength > 0 then |
|||
activeLength -= 1 |
|||
activeEdge = pos - remainingSuffixCount + 1 |
|||
elsif activeNode != root then |
|||
activeNode = suffixLinks[activeNode] |
|||
end if |
|||
end while |
|||
end procedure |
|||
procedure setSuffixIndexByDFS(integer n, integer labelHeight) |
|||
if n!=NULL then |
|||
-- Uncomment the 3 lines below to print suffix tree |
|||
--if starts[n]!=-1 then |
|||
-- printf(1,text[starts[n]..peekns(ends[n])]) |
|||
--end if |
|||
if children[n]=0 then |
|||
suffixIndices[n] = size - labelHeight |
|||
-- Uncomment line below to print suffix index |
|||
--printf(1," [%d]\n", suffixIndices[n]) |
|||
else |
|||
bool leaf = true |
|||
for i=1 to maxChar do |
|||
integer nci = children[n][i] |
|||
if nci != NULL then |
|||
-- Uncomment the 3 lines below to print suffix index |
|||
--if leaf and starts[n] != -1 then |
|||
-- printf(1," [%d]\n", suffixIndices[n]) |
|||
--end if |
|||
leaf = false |
|||
setSuffixIndexByDFS(nci, labelHeight+edgeLength(nci)) |
|||
end if |
|||
end for |
|||
if leaf then ?9/0 end if -- (sanity check) |
|||
end if |
|||
end if |
|||
end procedure |
|||
procedure buildSuffixTree() |
|||
size = length(text) |
|||
pRootEnd = allocate_word(-1,true) |
|||
root = newNode(-1, pRootEnd) |
|||
activeNode = root |
|||
for i=1 to size do |
|||
extendSuffixTree(i) |
|||
end for |
|||
integer labelHeight = 0 |
|||
setSuffixIndexByDFS(root, labelHeight) |
|||
end procedure |
|||
procedure doTraversal(integer n, integer labelHeight, atom pMaxHeight, pSubstringStartIndex) |
|||
if n!=NULL then |
|||
integer nsi = suffixIndices[n], newHeight |
|||
if nsi == -1 then |
|||
for i=1 to maxChar do |
|||
integer nci = children[n][i] |
|||
if nci != NULL then |
|||
newHeight = labelHeight+edgeLength(nci) |
|||
doTraversal(nci, newHeight, pMaxHeight, pSubstringStartIndex) |
|||
end if |
|||
end for |
|||
elsif nsi > -1 then |
|||
newHeight = labelHeight-edgeLength(n) |
|||
if peekns(pMaxHeight)<newHeight then |
|||
poken(pMaxHeight,newHeight) |
|||
poken(pSubstringStartIndex,nsi) |
|||
end if |
|||
end if |
|||
end if |
|||
end procedure |
|||
procedure getLongestRepeatedSubstring(string s) |
|||
atom pMaxHeight = allocate_word(0,true), |
|||
pSubstringStartIndex = allocate_word(0,true) |
|||
doTraversal(root, 0, pMaxHeight, pSubstringStartIndex) |
|||
integer maxHeight = peekns(pMaxHeight), |
|||
start = peekns(pSubstringStartIndex) |
|||
-- Uncomment line below to print maxHeight and substringStartIndex |
|||
--printf(1,"maxHeight %d, substringStartIndex %d\n", {maxHeight, start}) |
|||
string t = iff(maxHeight=0?"No repeated substring" |
|||
:text[start+1..start+maxHeight]) |
|||
printf(1," %s is: %s\n", {iff(s=""?text:s),t}) |
|||
end procedure |
|||
constant tests = {"GEEKSFORGEEKS$", |
|||
"AAAAAAAAAA$", |
|||
"ABCDEFG$", |
|||
"ABABABA$", |
|||
"ATCGATCGA$", |
|||
"banana$", |
|||
"abcpqrabpqpq$", |
|||
"pqrpqpqabab$"} |
|||
printf(1,"Longest Repeated Substring in:\n") |
|||
for i=1 to length(tests) do |
|||
text = tests[i] |
|||
buildSuffixTree() |
|||
getLongestRepeatedSubstring("") |
|||
end for |
|||
printf(1,"\n") |
|||
-- Sadly gmp crashes when I try 100,000 dp, but I suspect it would be fine with more than my paltry 4GB ram |
|||
include mpfr.e |
|||
mpfr pi = mpfr_init(0,-10001) -- (set precision to 10,000 dp, plus the "3.") |
|||
mpfr_const_pi(pi) |
|||
string piStr = mpfr_sprintf("%.10000Rf", pi), s="Pi" |
|||
piStr = piStr[3..$] -- discard leading "3." |
|||
maxChar = '9' |
|||
for i=3 to 6 do |
|||
atom t0 = time() |
|||
integer n = power(10,i) |
|||
if i>=5 then |
|||
text = repeat('0',n)&"$" |
|||
for c=1 to n do |
|||
text[c] = rand(10)+'0'-1 |
|||
end for |
|||
s = "a made up number" |
|||
else |
|||
text = piStr[1..n] & "$" |
|||
end if |
|||
buildSuffixTree() |
|||
getLongestRepeatedSubstring(sprintf("first %,d d.p. of %s", {n,s})) |
|||
printf(1," (this took %gs)\n\n", {time()-t0}) |
|||
end for</lang> |
|||
{{out}} |
|||
<pre> |
|||
Longest Repeated Substring in: |
|||
GEEKSFORGEEKS$ is: GEEKS |
|||
AAAAAAAAAA$ is: AAAAAAAAA |
|||
ABCDEFG$ is: No repeated substring |
|||
ABABABA$ is: ABABA |
|||
ATCGATCGA$ is: ATCGA |
|||
banana$ is: ana |
|||
abcpqrabpqpq$ is: ab |
|||
pqrpqpqabab$ is: ab |
|||
first 1,000 d.p. of Pi is: 23846 |
|||
(this took 0s) |
|||
first 10,000 d.p. of Pi is: 7111369 |
|||
(this took 0.047s) |
|||
first 100,000 d.p. of a made up number is: 022105155 |
|||
(this took 0.422s) |
|||
first 1,000,000 d.p. of a made up number is: 09014346795 |
|||
(this took 4.734s) |
|||
</pre> |
</pre> |
Revision as of 09:16, 2 August 2020
Suffix Trees are very useful in numerous string processing and computational biology problems.
The task is to create a function which implements Ukkonen’s algorithm to create a useful Suffix Tree as described:
Part 1 Part 2 Part 3 Part 4 Part 5 Part 6
Using Arithmetic-geometric mean/Calculate Pi generate the first 1000, 10000, and 100000 decimal places of pi. Using your implementation with an alphabet of 0 through 9 (plus $ say to make the tree explicit) find the longest repeated string in each list. Time your results and demonstrate that your implementation is linear (i.e. that 10000 takes approx. 10 times as long as 1000). You may vary the size of the lists of decimal places of pi to give reasonable answers.
Go
This is a translation of the C code here which is an extended form of the code in Part 6 of the task description for finding the longest repeated substring of a given string. In the interests of brevity, the extensive comments in the C version have been largely omitted. The C code doesn't compile as it stands but I have added a fix in the Talk Page.
For convenience I have included the code from the Arithmetic-geometric_mean/Calculate_Pi#Go task in the same package.
It takes around 25 seconds on my machine (Celeron @1.6GHz) to calculate the first 100,000 (or so) decimal places of Pi. Having done that, the timings for extracting the longest repeated sequence of digits are quick and fairly linear as expected.
As the task doesn't say whether overlapping sequences are to be counted, I've assumed that they are as this is what the algorithm naturally produces. <lang go>package main
import (
"fmt" "math/big" "time"
)
var maxChar = 128
type Node struct {
children []*Node suffixLink *Node start int end *int suffixIndex int
}
var (
text string root *Node lastNewNode *Node activeNode *Node activeEdge = -1 activeLength = 0 remainingSuffixCount = 0 leafEnd = -1 rootEnd *int splitEnd *int size = -1
)
func newNode(start int, end *int) *Node {
node := new(Node) node.children = make([]*Node, maxChar) node.suffixLink = root node.start = start node.end = end node.suffixIndex = -1 return node
}
func edgeLength(n *Node) int {
if n == root { return 0 } return *(n.end) - n.start + 1
}
func walkDown(currNode *Node) bool {
if activeLength >= edgeLength(currNode) { activeEdge += edgeLength(currNode) activeLength -= edgeLength(currNode) activeNode = currNode return true } return false
}
func extendSuffixTree(pos int) {
leafEnd = pos remainingSuffixCount++ lastNewNode = nil for remainingSuffixCount > 0 { if activeLength == 0 { activeEdge = pos } if activeNode.children[text[activeEdge]] == nil { activeNode.children[text[activeEdge]] = newNode(pos, &leafEnd) if lastNewNode != nil { lastNewNode.suffixLink = activeNode lastNewNode = nil } } else { next := activeNode.children[text[activeEdge]] if walkDown(next) { continue } if text[next.start+activeLength] == text[pos] { if lastNewNode != nil && activeNode != root { lastNewNode.suffixLink = activeNode lastNewNode = nil } activeLength++ break } temp := next.start + activeLength - 1 splitEnd = &temp split := newNode(next.start, splitEnd) activeNode.children[text[activeEdge]] = split split.children[text[pos]] = newNode(pos, &leafEnd) next.start += activeLength split.children[text[next.start]] = next if lastNewNode != nil { lastNewNode.suffixLink = split } lastNewNode = split } remainingSuffixCount-- if activeNode == root && activeLength > 0 { activeLength-- activeEdge = pos - remainingSuffixCount + 1 } else if activeNode != root { activeNode = activeNode.suffixLink } }
}
func setSuffixIndexByDFS(n *Node, labelHeight int) {
if n == nil { return } if n.start != -1 { // Uncomment line below to print suffix tree // fmt.Print(text[n.start: *(n.end) +1]) } leaf := 1 for i := 0; i < maxChar; i++ { if n.children[i] != nil { // Uncomment the 3 lines below to print suffix index //if leaf == 1 && n.start != -1 { // fmt.Printf(" [%d]\n", n.suffixIndex) //} leaf = 0 setSuffixIndexByDFS(n.children[i], labelHeight+edgeLength(n.children[i])) } } if leaf == 1 { n.suffixIndex = size - labelHeight // Uncomment line below to print suffix index //fmt.Printf(" [%d]\n", n.suffixIndex) }
}
func buildSuffixTree() {
size = len(text) temp := -1 rootEnd = &temp root = newNode(-1, rootEnd) activeNode = root for i := 0; i < size; i++ { extendSuffixTree(i) } labelHeight := 0 setSuffixIndexByDFS(root, labelHeight)
}
func doTraversal(n *Node, labelHeight int, maxHeight, substringStartIndex *int) {
if n == nil { return } if n.suffixIndex == -1 { for i := 0; i < maxChar; i++ { if n.children[i] != nil { doTraversal(n.children[i], labelHeight+edgeLength(n.children[i]), maxHeight, substringStartIndex) } } } else if n.suffixIndex > -1 && (*maxHeight < labelHeight-edgeLength(n)) { *maxHeight = labelHeight - edgeLength(n) *substringStartIndex = n.suffixIndex }
}
func getLongestRepeatedSubstring(s string) {
maxHeight := 0 substringStartIndex := 0 doTraversal(root, 0, &maxHeight, &substringStartIndex) // Uncomment line below to print maxHeight and substringStartIndex // fmt.Printf("maxHeight %d, substringStartIndex %d\n", maxHeight, substringStartIndex) if s == "" { fmt.Printf(" %s is: ", text) } else { fmt.Printf(" %s is: ", s) } k := 0 for ; k < maxHeight; k++ { fmt.Printf("%c", text[k+substringStartIndex]) } if k == 0 { fmt.Print("No repeated substring") } fmt.Println()
}
func calculatePi() *big.Float {
one := big.NewFloat(1) two := big.NewFloat(2) four := big.NewFloat(4) prec := uint(325 * 1024) // enough to calculate Pi to 100,182 decimal digits
a := big.NewFloat(1).SetPrec(prec) g := new(big.Float).SetPrec(prec)
// temporary variables t := new(big.Float).SetPrec(prec) u := new(big.Float).SetPrec(prec)
g.Quo(a, t.Sqrt(two)) sum := new(big.Float) pow := big.NewFloat(2)
for a.Cmp(g) != 0 { t.Add(a, g) t.Quo(t, two) g.Sqrt(u.Mul(a, g)) a.Set(t) pow.Mul(pow, two) t.Sub(t.Mul(a, a), u.Mul(g, g)) sum.Add(sum, t.Mul(t, pow)) }
t.Mul(a, a) t.Mul(t, four) pi := t.Quo(t, u.Sub(one, sum)) return pi
}
func main() {
tests := []string{ "GEEKSFORGEEKS$", "AAAAAAAAAA$", "ABCDEFG$", "ABABABA$", "ATCGATCGA$", "banana$", "abcpqrabpqpq$", "pqrpqpqabab$", } fmt.Println("Longest Repeated Substring in:\n") for _, test := range tests { text = test buildSuffixTree() getLongestRepeatedSubstring("") } fmt.Println()
pi := calculatePi() piStr := fmt.Sprintf("%v", pi) piStr = piStr[2:] // remove initial 3. numbers := []int{1e3, 1e4, 1e5} maxChar = 58 for _, number := range numbers { start := time.Now() text = piStr[0:number] + "$" buildSuffixTree() getLongestRepeatedSubstring(fmt.Sprintf("first %d d.p. of Pi", number)) elapsed := time.Now().Sub(start) fmt.Printf(" (this took %s)\n\n", elapsed) }
}</lang>
- Output:
Sample run:
Longest Repeated Substring in: GEEKSFORGEEKS$ is: GEEKS AAAAAAAAAA$ is: AAAAAAAAA ABCDEFG$ is: No repeated substring ABABABA$ is: ABABA ATCGATCGA$ is: ATCGA banana$ is: ana abcpqrabpqpq$ is: ab pqrpqpqabab$ is: ab first 1000 d.p. of Pi is: 23846 (this took 7.728858ms) first 10000 d.p. of Pi is: 7111369 (this took 57.524478ms) first 100000 d.p. of Pi is: 041021944 (this took 599.770281ms)
Phix
<lang Phix>-- demo/rosetta/Ukkonens_Suffix_Tree.exw integer maxChar = 'z'
sequence children = {},
suffixLinks = {}, starts = {}, ends = {}, suffixIndices = {}
string text atom pSplitEnd,
pRootEnd, pLeafEnd = allocate_word(0,true)
integer root = NULL,
lastNewNode, activeNode, activeEdge = -1, activeLength = 0, remainingSuffixCount = 0, size = -1
function newNode(integer start, atom pFinish, bool bKids=false)
children = append(children,iff(bKids?repeat(NULL,maxChar):0)) suffixLinks = append(suffixLinks,root) starts = append(starts,start) ends = append(ends,pFinish) suffixIndices = append(suffixIndices,-1) return length(children)
end function
function edgeLength(integer n)
if n == root then return 0 end if return peekns(ends[n]) - starts[n] + 1
end function
function walkDown(integer currNode)
integer l = edgeLength(currNode) if activeLength >= l then activeEdge += l activeLength -= l activeNode = currNode return true end if return false
end function
procedure extendSuffixTree(integer pos)
poken(pLeafEnd,pos) remainingSuffixCount += 1 lastNewNode = NULL while remainingSuffixCount > 0 do if activeLength == 0 then activeEdge = pos end if integer ta = text[activeEdge] object ca = children[activeNode] integer next = iff(ca=0?NULL:ca[ta]) if next == null then if ca=0 then children[activeNode] = repeat(NULL,maxChar) end if children[activeNode][ta] = newNode(pos, pLeafEnd) if lastNewNode != NULL then suffixLinks[lastNewNode] = activeNode lastNewNode = NULL end if else if walkDown(next) then continue end if integer tp = text[pos] if text[starts[next]+activeLength] == tp then if lastNewNode != NULL and activeNode != root then suffixLinks[lastNewNode] = activeNode lastNewNode = NULL end if activeLength += 1 exit end if integer temp = starts[next] + activeLength - 1 pSplitEnd = allocate_word(temp,true) integer splitnode = newNode(starts[next], pSplitEnd,true) ta = text[activeEdge] children[activeNode][ta] = splitnode children[splitnode][tp] = newNode(pos, pLeafEnd) starts[next] += activeLength children[splitnode][text[starts[next]]] = next if lastNewNode != NULL then suffixLinks[lastNewNode] = splitnode end if lastNewNode = splitnode end if remainingSuffixCount -= 1 if activeNode == root and activeLength > 0 then activeLength -= 1 activeEdge = pos - remainingSuffixCount + 1 elsif activeNode != root then activeNode = suffixLinks[activeNode] end if end while
end procedure
procedure setSuffixIndexByDFS(integer n, integer labelHeight)
if n!=NULL then -- Uncomment the 3 lines below to print suffix tree --if starts[n]!=-1 then -- printf(1,text[starts[n]..peekns(ends[n])]) --end if if children[n]=0 then suffixIndices[n] = size - labelHeight -- Uncomment line below to print suffix index --printf(1," [%d]\n", suffixIndices[n]) else bool leaf = true for i=1 to maxChar do integer nci = children[n][i] if nci != NULL then -- Uncomment the 3 lines below to print suffix index --if leaf and starts[n] != -1 then -- printf(1," [%d]\n", suffixIndices[n]) --end if leaf = false setSuffixIndexByDFS(nci, labelHeight+edgeLength(nci)) end if end for if leaf then ?9/0 end if -- (sanity check) end if end if
end procedure
procedure buildSuffixTree()
size = length(text) pRootEnd = allocate_word(-1,true) root = newNode(-1, pRootEnd) activeNode = root for i=1 to size do extendSuffixTree(i) end for integer labelHeight = 0 setSuffixIndexByDFS(root, labelHeight)
end procedure
procedure doTraversal(integer n, integer labelHeight, atom pMaxHeight, pSubstringStartIndex)
if n!=NULL then integer nsi = suffixIndices[n], newHeight if nsi == -1 then for i=1 to maxChar do integer nci = children[n][i] if nci != NULL then newHeight = labelHeight+edgeLength(nci) doTraversal(nci, newHeight, pMaxHeight, pSubstringStartIndex) end if end for elsif nsi > -1 then newHeight = labelHeight-edgeLength(n) if peekns(pMaxHeight)<newHeight then poken(pMaxHeight,newHeight) poken(pSubstringStartIndex,nsi) end if end if end if
end procedure
procedure getLongestRepeatedSubstring(string s)
atom pMaxHeight = allocate_word(0,true), pSubstringStartIndex = allocate_word(0,true) doTraversal(root, 0, pMaxHeight, pSubstringStartIndex) integer maxHeight = peekns(pMaxHeight), start = peekns(pSubstringStartIndex) -- Uncomment line below to print maxHeight and substringStartIndex --printf(1,"maxHeight %d, substringStartIndex %d\n", {maxHeight, start}) string t = iff(maxHeight=0?"No repeated substring" :text[start+1..start+maxHeight]) printf(1," %s is: %s\n", {iff(s=""?text:s),t})
end procedure
constant tests = {"GEEKSFORGEEKS$",
"AAAAAAAAAA$", "ABCDEFG$", "ABABABA$", "ATCGATCGA$", "banana$", "abcpqrabpqpq$", "pqrpqpqabab$"}
printf(1,"Longest Repeated Substring in:\n") for i=1 to length(tests) do
text = tests[i] buildSuffixTree() getLongestRepeatedSubstring("")
end for printf(1,"\n")
-- Sadly gmp crashes when I try 100,000 dp, but I suspect it would be fine with more than my paltry 4GB ram include mpfr.e mpfr pi = mpfr_init(0,-10001) -- (set precision to 10,000 dp, plus the "3.") mpfr_const_pi(pi) string piStr = mpfr_sprintf("%.10000Rf", pi), s="Pi" piStr = piStr[3..$] -- discard leading "3." maxChar = '9' for i=3 to 6 do
atom t0 = time() integer n = power(10,i) if i>=5 then text = repeat('0',n)&"$" for c=1 to n do text[c] = rand(10)+'0'-1 end for s = "a made up number" else text = piStr[1..n] & "$" end if buildSuffixTree() getLongestRepeatedSubstring(sprintf("first %,d d.p. of %s", {n,s})) printf(1," (this took %gs)\n\n", {time()-t0})
end for</lang>
- Output:
Longest Repeated Substring in: GEEKSFORGEEKS$ is: GEEKS AAAAAAAAAA$ is: AAAAAAAAA ABCDEFG$ is: No repeated substring ABABABA$ is: ABABA ATCGATCGA$ is: ATCGA banana$ is: ana abcpqrabpqpq$ is: ab pqrpqpqabab$ is: ab first 1,000 d.p. of Pi is: 23846 (this took 0s) first 10,000 d.p. of Pi is: 7111369 (this took 0.047s) first 100,000 d.p. of a made up number is: 022105155 (this took 0.422s) first 1,000,000 d.p. of a made up number is: 09014346795 (this took 4.734s)