前缀树/字典树

前缀树又称为字典树,英文名trie:每个样本都从头节点开始,根据前缀字符或者前缀数字,建出来的一颗大树,就是前缀树。

前缀树的实现

有pass、end信息的节点,pass表示有多少个样本通过这个节点,end表示有多少个样本在这个节点结束。字符是路径/边,节点是维护pass、end信息。

  • 类描述的实现方式(动态结构)
    • 数组 [节点, …, 节点]
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      public class Trie {
      class TrieNode {
      public int pass;
      public int end;
      public TrieNode[] nexts;

      public TrieNode() {
      pass = 0;
      end = 0;
      nexts = new TrieNode[26];
      }
      }

      private final TrieNode root;

      public Trie(){
      root = new TrieNode();
      }

      // 将字符串word插入前缀树
      public void insert(String word) {
      TrieNode node = root;
      node.pass += 1;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      if (node.nexts[path] == null) {
      node.nexts[path] = new TrieNode();
      }
      node = node.nexts[path];
      node.pass += 1;
      }
      node.end += 1;
      }

      // 返回前缀树中字符串word的数量
      public int countWordsEqualTo(String word) {
      TrieNode node = root;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      node = node.nexts[path];
      if (node == null) {
      return 0;
      }
      }
      return node.end;
      }

      // 返回前缀树中以prefix为前缀的字符串的个数
      public int countWordsStartingWith(String prefix) {
      TrieNode node = root;
      for (int i = 0, path; i < prefix.length(); i++) {
      path = prefix.charAt(i) - 'a';
      node = node.nexts[path];
      if (node == null) {
      return 0;
      }
      }
      return node.pass;
      }

      // 从前缀树中移除字符串word
      public void erase(String word) {
      if (countWordsEqualTo(word) <= 0) {
      return;
      }
      TrieNode node = root;
      node.pass--;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      if (--node.nexts[path].pass == 0) {
      node.nexts[path] = null;
      return;
      }
      node = node.nexts[path];
      }
      node.end--;
      }
      }
    • 哈希表 <字符,节点>
  • 静态数组的实现方式(静态结构)
    • 二维数组 [n][字符个数] 例如26个字母:[n][26]
    • 如果存在数字1-153,不用声明:[n][153],可以将153转换为1->5->3,只需要声明[n][10]即可
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      public class Trie_hash {
      public static int MAXN = 150001;
      public static int[][] tree = new int[MAXN][26];
      public static int[] pass = new int[MAXN];
      public static int[] end = new int[MAXN];

      public static int cnt;
      public static void build() {
      cnt = 1;
      }

      public static void insert(String word) {
      int cur = 1;
      pass[cur]++;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      if (tree[cur][path] == 0) {
      cnt++;
      tree[cur][path] = cnt;
      }
      cur = tree[cur][path];
      pass[cur]++;
      }
      end[cur]++;
      }

      public static int search(String word) {
      int cur = 1;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      if (tree[cur][path] == 0) {
      return 0;
      }
      cur = tree[cur][path];
      }
      return end[cur];
      }

      public static int prefixNumber(String pre) {
      int cur = 1;
      for (int i = 0, path; i < pre.length(); i++) {
      path = pre.charAt(i) - 'a';
      if (tree[cur][path] == 0) {
      return 0;
      }
      cur = tree[cur][path];
      }

      return pass[cur];
      }

      public static void delete(String word) {
      if (search(word) <= 0) {
      return;
      }
      int cur = 1;
      pass[cur]--;
      for (int i = 0, path; i < word.length(); i++) {
      path = word.charAt(i) - 'a';
      if (--pass[tree[cur][path]] == 0) {
      tree[cur][path] = 0;
      return;
      }
      cur = tree[cur][path];
      }
      end[cur]--;
      }

      public static void main(String[] args) throws IOException {
      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
      PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out));
      String line = null;
      int op, m;
      String[] split;
      while ((line = in.readLine()) != null) {
      m = Integer.parseInt(line);
      build();
      for (int i = 0; i < m; i++) {
      split = in.readLine().split(" ");
      op = Integer.parseInt(split[0]);
      if (op == 1) {
      insert(split[1]);
      } else if (op == 2) {
      delete(split[1]);
      } else if (op == 3) {
      String ans = search(split[1]) > 0?"YES":"NO";
      out.println(ans);
      } else if (op == 4) {
      out.println(prefixNumber(split[1]));
      }
      }
      }

      out.flush();
      in.close();
      out.close();
      }
      }

前缀树的使用场景

需要根据前缀信息来查询的场景

前缀树的优缺点

优点

根据前缀信息选择树上的分支,可以节省大量的时间

缺点

比较浪费空间,和总字符数量有关