前缀树/字典树
前缀树/字典树
前缀树又称为字典树,英文名trie:每个样本都从头节点开始,根据前缀字符或者前缀数字,建出来的一颗大树,就是前缀树。
前缀树的实现
有pass、end信息的节点,pass表示有多少个样本通过这个节点,end表示有多少个样本在这个节点结束。字符是路径/边,节点是维护pass、end信息。
- 类描述的实现方式(动态结构)
- 数组 [节点, …, 节点]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78public class Trie {
class TrieNode {
public int pass;
public int end;
public TrieNode[] nexts;
public TrieNode() {
pass = 0;
end = 0;
nexts = new TrieNode[26];
}
}
private final TrieNode root;
public Trie(){
root = new TrieNode();
}
// 将字符串word插入前缀树
public void insert(String word) {
TrieNode node = root;
node.pass += 1;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
if (node.nexts[path] == null) {
node.nexts[path] = new TrieNode();
}
node = node.nexts[path];
node.pass += 1;
}
node.end += 1;
}
// 返回前缀树中字符串word的数量
public int countWordsEqualTo(String word) {
TrieNode node = root;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
node = node.nexts[path];
if (node == null) {
return 0;
}
}
return node.end;
}
// 返回前缀树中以prefix为前缀的字符串的个数
public int countWordsStartingWith(String prefix) {
TrieNode node = root;
for (int i = 0, path; i < prefix.length(); i++) {
path = prefix.charAt(i) - 'a';
node = node.nexts[path];
if (node == null) {
return 0;
}
}
return node.pass;
}
// 从前缀树中移除字符串word
public void erase(String word) {
if (countWordsEqualTo(word) <= 0) {
return;
}
TrieNode node = root;
node.pass--;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
if (--node.nexts[path].pass == 0) {
node.nexts[path] = null;
return;
}
node = node.nexts[path];
}
node.end--;
}
} - 哈希表 <字符,节点>
- 数组 [节点, …, 节点]
- 静态数组的实现方式(静态结构)
- 二维数组 [n][字符个数] 例如26个字母:[n][26]
- 如果存在数字1-153,不用声明:[n][153],可以将153转换为1->5->3,只需要声明[n][10]即可
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98public class Trie_hash {
public static int MAXN = 150001;
public static int[][] tree = new int[MAXN][26];
public static int[] pass = new int[MAXN];
public static int[] end = new int[MAXN];
public static int cnt;
public static void build() {
cnt = 1;
}
public static void insert(String word) {
int cur = 1;
pass[cur]++;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
if (tree[cur][path] == 0) {
cnt++;
tree[cur][path] = cnt;
}
cur = tree[cur][path];
pass[cur]++;
}
end[cur]++;
}
public static int search(String word) {
int cur = 1;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
if (tree[cur][path] == 0) {
return 0;
}
cur = tree[cur][path];
}
return end[cur];
}
public static int prefixNumber(String pre) {
int cur = 1;
for (int i = 0, path; i < pre.length(); i++) {
path = pre.charAt(i) - 'a';
if (tree[cur][path] == 0) {
return 0;
}
cur = tree[cur][path];
}
return pass[cur];
}
public static void delete(String word) {
if (search(word) <= 0) {
return;
}
int cur = 1;
pass[cur]--;
for (int i = 0, path; i < word.length(); i++) {
path = word.charAt(i) - 'a';
if (--pass[tree[cur][path]] == 0) {
tree[cur][path] = 0;
return;
}
cur = tree[cur][path];
}
end[cur]--;
}
public static void main(String[] args) throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out));
String line = null;
int op, m;
String[] split;
while ((line = in.readLine()) != null) {
m = Integer.parseInt(line);
build();
for (int i = 0; i < m; i++) {
split = in.readLine().split(" ");
op = Integer.parseInt(split[0]);
if (op == 1) {
insert(split[1]);
} else if (op == 2) {
delete(split[1]);
} else if (op == 3) {
String ans = search(split[1]) > 0?"YES":"NO";
out.println(ans);
} else if (op == 4) {
out.println(prefixNumber(split[1]));
}
}
}
out.flush();
in.close();
out.close();
}
}
前缀树的使用场景
需要根据前缀信息来查询的场景
前缀树的优缺点
优点根据前缀信息选择树上的分支,可以节省大量的时间
缺点比较浪费空间,和总字符数量有关
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来源 冰红茶怪兽!