AC自动机模板
时间: 2025-06-09 14:46:29 浏览: 3
### AC自动机算法模板代码
以下是基于C++和Python两种主流编程语言的AC自动机实现模板:
#### C++ 实现
```cpp
#include <bits/stdc++.h>
using namespace std;
struct ACAutomaton {
struct Node {
int next[26]; // 字符跳转表
int fail; // 失败指针
bool isEnd; // 是否为某个模式串的结尾
vector<int> output; // 输出链,记录该节点对应的模式串编号
Node() : fail(-1), isEnd(false) { memset(next, -1, sizeof(next)); }
};
vector<Node> trie;
vector<string> patterns;
ACAutomaton(const vector<string>& _patterns) : patterns(_patterns) {
trie.push_back(Node());
buildTrie();
buildFailLinks();
}
void insert(string& pattern, int idx) {
int node = 0;
for (char ch : pattern) {
int c = ch - 'a';
if (trie[node].next[c] == -1) {
trie[node].next[c] = trie.size();
trie.emplace_back(Node());
}
node = trie[node].next[c];
}
trie[node].isEnd = true;
trie[node].output.push_back(idx);
}
void buildTrie() {
for (size_t i = 0; i < patterns.size(); ++i) {
insert(patterns[i], i);
}
}
void buildFailLinks() {
queue<int> q;
for (int c = 0; c < 26; ++c) {
if (trie[0].next[c] != -1) {
trie[trie[0].next[c]].fail = 0;
q.push(trie[0].next[c]);
} else {
trie[0].next[c] = 0;
}
}
while (!q.empty()) {
int u = q.front(); q.pop();
for (int c = 0; c < 26; ++c) {
if (trie[u].next[c] != -1) {
int v = trie[u].next[c];
int f = trie[u].fail;
while (f != -1 && trie[f].next[c] == -1) f = trie[f].fail;
if (f == -1) trie[v].fail = 0;
else trie[v].fail = trie[f].next[c];
mergeOutput(v, trie[v].fail);
q.push(v);
} else {
trie[u].next[c] = trie[trie[u].fail].next[c];
}
}
}
}
void mergeOutput(int a, int b) {
if (b != 0) {
for (auto id : trie[b].output) {
trie[a].output.push_back(id);
}
}
}
void search(string text) {
int state = 0;
for (size_t i = 0; i < text.length(); ++i) {
char ch = text[i];
int c = ch - 'a';
while (state != -1 && trie[state].next[c] == -1) state = trie[state].fail;
if (state == -1) state = 0;
else state = trie[state].next[c];
if (!trie[state].output.empty()) {
for (auto id : trie[state].output) {
cout << "Pattern found: " << patterns[id] << ", Position: " << i - patterns[id].length() + 1 << endl;
}
}
}
}
};
int main() {
vector<string> patterns = {"he", "she", "his", "hers"};
string text = "ushers";
ACAutomaton ac(patterns);
ac.search(text);
return 0;
}
```
此代码片段定义了一个结构体`Node`表示AC自动机的状态节点,并通过队列完成失败指针的构建[^3]。
---
#### Python 实现
```python
class ACAutomaton:
class Node:
def __init__(self):
self.next = {}
self.fail = None
self.is_end = False
self.output = []
def __init__(self, patterns):
self.trie = [self.Node()]
self.patterns = patterns
self.build_trie()
self.build_fail_links()
def insert(self, word, idx):
current_node = 0
for char in word:
if char not in self.trie[current_node].next:
self.trie[current_node].next[char] = len(self.trie)
self.trie.append(self.Node())
current_node = self.trie[current_node].next[char]
self.trie[current_node].is_end = True
self.trie[current_node].output.append(idx)
def build_trie(self):
for idx, pattern in enumerate(self.patterns):
self.insert(pattern, idx)
def build_fail_links(self):
queue = []
root = self.trie[0]
for key, child_idx in root.next.items():
self.trie[child_idx].fail = 0
queue.append(child_idx)
while queue:
current_state = queue.pop(0)
current_node = self.trie[current_state]
for key, child_idx in current_node.next.items():
temp = current_node.fail
while temp and key not in self.trie[temp].next:
temp = self.trie[temp].fail
failure_state = self.trie[temp].next[key] if temp else 0
self.trie[child_idx].fail = failure_state
self.trie[child_idx].output.extend(self.trie[failure_state].output)
queue.append(child_idx)
def search(self, text):
current_state = 0
results = []
for pos, char in enumerate(text):
while current_state and char not in self.trie[current_state].next:
current_state = self.trie[current_state].fail
if char in self.trie[current_state].next:
current_state = self.trie[current_state].next[char]
else:
current_state = 0
if self.trie[current_state].output:
for idx in self.trie[current_state].output:
match_start = pos - len(self.patterns[idx]) + 1
results.append((match_start, self.patterns[idx]))
return results
if __name__ == "__main__":
patterns = ["he", "she", "his", "hers"]
text = "ushers"
ac_automaton = ACAutomaton(patterns)
matches = ac_automaton.search(text)
for start_pos, matched_pattern in matches:
print(f"Pattern '{matched_pattern}' found at position {start_pos}")
```
在此Python版本中,我们使用字典存储字符跳转关系,并采用BFS方法设置失败指针[^1]。
---
### 总结
无论是C++还是Python实现,AC自动机的核心思想都是先建立一棵Trie树来保存所有模式串的信息,接着利用广度优先搜索(BFS)构造失配指针以便快速定位下一个可能匹配的位置。最后,在给定文本上运行状态转移机制即可找出所有的匹配项[^2]。
阅读全文
相关推荐
















