1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
class SuffixTreeEdge:
def __init__(self, start, end, child=None):
self.start = start # Start index of the substring
self.end = end # End index of the substring
self.child = child or {} # Dictionary for child nodes
class SuffixTree:
def __init__(self, text):
self.text = text + "$" # Append a unique end character to ensure uniqueness of substrings
self.root = {}
self.build_tree()
def build_tree(self):
"""
Build the Suffix Tree using Ukkonen's Algorithm.
This implementation avoids redundant storage by using indices.
"""
n = len(self.text)
for i in range(n): # Start building suffixes
suffix = self.text[i:]
self.insert_suffix(suffix, i)
def insert_suffix(self, suffix, suffix_start):
"""
Insert a suffix into the suffix tree.
Use indices for substrings to ensure compact representation.
"""
current_node = self.root
for i, char in enumerate(suffix):
if char not in current_node:
# Create a new edge with start and end indices
current_node[char] = SuffixTreeEdge(suffix_start + i, len(self.text))
current_node = current_node[char].child
def collect_substrings(self):
"""
Collect all unique substrings using depth-first traversal.
Substring edges are represented using indices for efficiency.
"""
substrings = set()
stack = [(self.root, "")]
while stack:
current_node, path = stack.pop()
for char, edge in current_node.items():
new_path = path + self.text[edge.start:edge.end]
substrings.add(new_path) # Add the substring represented by the edge
stack.append((edge.child, new_path)) # Continue traversal
return substrings
# Example Usage
text = "abc"
suffix_tree = SuffixTree(text)
unique_substrings = suffix_tree.collect_substrings() # Collect all unique substrings
print("Unique Substrings:", unique_substrings)
text2 = "aaa"
suffix_tree2 = SuffixTree(text2)
unique_substrings2 = suffix_tree2.collect_substrings() # Collect all unique substrings
print("Unique Substrings:", unique_substrings2)
|