Suffix tree: Difference between revisions
Content added Content deleted
m (→{{header|Phix}}: syntax coloured, made p2js compatible) |
|||
Line 1,165: | Line 1,165: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
{{trans|D}} |
{{trans|D}} |
||
<!--<lang Phix>(phixonline)--> |
|||
<lang Phix>-- tree nodes are simply {string substr, sequence children_idx} |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
enum SUB=1, CHILDREN=2 |
|||
<span style="color: #000080;font-style:italic;">-- tree nodes are simply {string substr, sequence children_idx}</span> |
|||
<span style="color: #008080;">enum</span> <span style="color: #000000;">SUB</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> |
|||
function addSuffix(sequence t, string suffix) |
|||
int n = 1, i = 1 |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">addSuffix</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">)</span> |
|||
while i<=length(suffix) do |
|||
<span style="color: #004080;">int</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
integer ch = suffix[i], x2 = 1, n2 |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffix</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
while (true) do |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n2</span> |
|||
sequence children = t[n][CHILDREN] |
|||
<span style="color: #008080;">while</span> <span style="color: #0000FF;">(</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
if x2>length(children) then |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> |
|||
-- no matching child, remainder of suffix becomes new node. |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">x2</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
t = append(t,{suffix[i..$],{}}) |
|||
<span style="color: #000080;font-style:italic;">-- no matching child, remainder of suffix becomes new node.</span> |
|||
t[n][CHILDREN] &= length(t) |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..$],{}})</span> |
|||
return t |
|||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)&</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
end if |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
n2 = children[x2] |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">n2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">x2</span><span style="color: #0000FF;">]</span> |
|||
x2 += 1 |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]==</span><span style="color: #000000;">ch</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
end while |
|||
<span style="color: #000000;">x2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
-- find prefix of remaining suffix in common with child |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
string prefix = t[n2][SUB] |
|||
<span style="color: #000080;font-style:italic;">-- find prefix of remaining suffix in common with child</span> |
|||
int j = 0 |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">prefix</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]</span> |
|||
while j<length(prefix) do |
|||
<span style="color: #004080;">int</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
if suffix[i+j]!=prefix[j+1] then |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
-- split n2: new node for the part in common |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> |
|||
t = append(t,{prefix[1..j],{n2}}) |
|||
-- |
<span style="color: #000080;font-style:italic;">-- split n2: new node for the part in common</span> |
||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],{</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">}})</span> |
|||
t[n2][SUB] = prefix[j+1..$] |
|||
-- |
<span style="color: #000080;font-style:italic;">-- old node loses the part in common</span> |
||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$]</span> |
|||
n2 = length(t) |
|||
<span style="color: #000080;font-style:italic;">-- and child idx moves to newly created node</span> |
|||
t[n][CHILDREN][x2] = n2 |
|||
<span style="color: #000000;">n2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
exit -- continue down the tree |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">])</span> |
|||
end if |
|||
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">x2</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n2</span> |
|||
j += 1 |
|||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span> |
|||
end while |
|||
<span style="color: #008080;">exit</span> <span style="color: #000080;font-style:italic;">-- continue down the tree</span> |
|||
i += j -- advance past part in common |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
n = n2 -- continue down the tree |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
end while |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
return t |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">j</span> <span style="color: #000080;font-style:italic;">-- advance past part in common</span> |
|||
end function |
|||
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n2</span> <span style="color: #000080;font-style:italic;">-- continue down the tree</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
function SuffixTree(string s) |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
sequence t = {{"",{}}} |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
for i=1 to length(s) do |
|||
t = addSuffix(t,s[i..$]) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">SuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
end for |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,{}}}</span> |
|||
return t |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
end function |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">addSuffix</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..$])</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
procedure visualize(sequence t, integer n=1, string pre="") |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
if length(t)=0 then |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
printf(1,"<empty>\n"); |
|||
return; |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">pre</span><span style="color: #0000FF;">=</span><span style="color: #008000;">""</span><span style="color: #0000FF;">)</span> |
|||
end if |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
sequence children = t[n][CHILDREN] |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"<empty>\n"</span><span style="color: #0000FF;">);</span> |
|||
if length(children)=0 then |
|||
<span style="color: #008080;">return</span><span style="color: #0000FF;">;</span> |
|||
printf(1,"- %s\n", {t[n][SUB]}) |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
return |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> |
|||
end if |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
printf(1,"+ %s\n", {t[n][SUB]}) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"- %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]})</span> |
|||
integer l = length(children) |
|||
<span style="color: #008080;">return</span> |
|||
for i=1 to l do |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
puts(1,pre&"+-") |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"+ %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]})</span> |
|||
visualize(t,children[i],pre&iff(i=l?" ":"| ")) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span> |
|||
end for |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">l</span> <span style="color: #008080;">do</span> |
|||
end procedure |
|||
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">pre</span><span style="color: #0000FF;">&</span><span style="color: #008000;">"+-"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">pre</span><span style="color: #0000FF;">&</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">l</span><span style="color: #0000FF;">?</span><span style="color: #008000;">" "</span><span style="color: #0000FF;">:</span><span style="color: #008000;">"| "</span><span style="color: #0000FF;">))</span> |
|||
sequence t = SuffixTree("banana$") |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
visualize(t)</lang> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">SuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"banana$"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
<!--</lang>--> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |