LeetCode第十题
Rust基于 NFA(非确定有限自动机) 和 DFA(确定有限自动机)实现正则匹配引擎
给定一个字符串 s 和一个模式串 p,判断 s 是否匹配 p。
这里的模式串 p 规则很简单,只支持:
. :匹配任意一个字符* :匹配前一个字符(或 .)的 零次或多次ruststruct Row {
row: [Option<usize>; 26], // 普通字符的转移
eps: Option<usize>, // ε 转移(空串转移)
}
row[j] 表示遇到字母 a + j 时能跳到的 NFA 状态。eps 表示从该状态可以“不消耗输入”直接跳到的状态(ε-closure)。ruststruct NFA {
rows: Vec<Row>, // 状态列表
start: usize, // 起始状态
end: usize, // 接收状态
eps_reach: HashMap<usize, HashSet<usize>>, // 每个状态的 ε-闭包
}
ruststruct DFA {
rows: Vec<[Option<usize>; 26]>, // 转移表(DFA 里没有 ε 转移)
start: usize, // 起始状态
end: HashSet<usize>, // 接收状态集合
}
NFA::build_nfarustwhile i < p.len() {
if i+1 < p.len() && p[i+1] == b'*' {
// case: 带 *
} else {
// case: 单个字符或 '.'
}
}
a*rustrows.push(Row::new()); // 新状态 (cur+1)
rows[cur_state].eps = Some(cur_state + 1); // 从 cur ε 到 cur+1
cur_state += 1;
rows.push(Row::new()); // 再建一个状态 (cur+1)
if p[i] == '.' {
for j in 0..26 { rows[cur_state].row[j] = Some(cur_state); }
} else {
rows[cur_state].row[ne] = Some(cur_state);
}
rows[cur_state].eps = Some(cur_state + 1); // 自环 + ε 跳过
cur_state += 1;
i += 2;
效果:
(cur) -ε-> (loop_state) --c--> (loop_state) -ε-> (next)
即:
a 或 .rustrows.push(Row::new());
if p[i] == '.' {
for j in 0..26 { rows[cur_state].row[j] = Some(cur_state + 1); }
} else {
rows[cur_state].row[ne] = Some(cur_state + 1);
}
cur_state += 1;
i += 1;
效果:
(cur) --c--> (cur+1)
rustfor cur in (0..=cur_state).rev() {
let mut eps_reach = HashSet::from([cur]);
if let Some(ne) = rows[cur].eps {
eps_reach.extend(_cache_eps_reach[&ne].iter());
}
_cache_eps_reach.insert(cur, eps_reach);
}
eps_reach 中。DFA::build_dfa核心思想是 子集构造法:
rustlet mut state_list: Vec<HashSet<usize>> = vec![nfa.eps_reach[&nfa.start].clone()];
rustwhile i < state_list.len() {
let mut row: [Option<usize>; 26] = [None; 26];
for c in 0..26 {
let mut next_state_list: HashSet<usize> = HashSet::new();
for cur in state_list[i].iter() {
if let Some(next) = nfa.rows[*cur].row[c] {
next_state_list.insert(next);
next_state_list.extend(nfa.eps_reach[&next].iter());
}
}
if !next_state_list.is_empty() {
let next_state_i = {
if let Some(pos) = state_list.iter().position(|x| x == &next_state_list) {
pos
} else {
state_list.push(next_state_list);
state_list.len() - 1
}
};
row[c] = Some(next_state_i);
}
}
rows.push(row);
i += 1;
}
遍历当前 DFA 状态里所有的 NFA 状态:
c 进行转移 → 得到一组新的 NFA 状态。如果这个 DFA 状态是新出现的,就加入 state_list。
最终,rows 保存了 DFA 的完整转移表。
rustlet end = {
let mut end = HashSet::new();
for i in 0..state_list.len() {
if state_list[i].contains(&nfa.end) {
end.insert(i);
}
}
end
};
is_matchrustfn is_match(&self, s: &str) -> bool {
let s = s.as_bytes();
let mut cur = self.start;
for c in s {
if let Some(next) = self.rows[cur][(*c - b'a') as usize] {
cur = next;
} else {
return false;
}
}
self.end.contains(&cur)
}
rustuse std::collections::{HashMap, HashSet};
struct Row {
row: [Option<usize>; 26],
eps: Option<usize>,
}
impl Row {
fn new() -> Self {
Row {
row: [None; 26],
eps: None,
}
}
}
struct NFA {
rows: Vec<Row>,
start: usize,
end: usize,
eps_reach: HashMap<usize, HashSet<usize>>,
}
impl NFA {
fn build_nfa(p: &str) -> Self {
let mut cur_state = 0 as usize;
let mut rows = vec![Row::new()];
let p = p.as_bytes();
let mut i = 0 as usize;
while i < p.len() {
let ne = (p[i] - b'a') as usize;
if i + 1 < p.len() && p[i + 1] == b'*' {
rows.push(Row::new());
rows[cur_state].eps = Some(cur_state + 1);
cur_state += 1;
rows.push(Row::new());
if p[i] == b'.' {
for j in 0..26 {
rows[cur_state].row[j] = Some(cur_state);
}
} else {
rows[cur_state].row[ne] = Some(cur_state);
}
rows[cur_state].eps = Some(cur_state + 1);
cur_state += 1;
i += 2;
} else {
rows.push(Row::new());
if p[i] == b'.' {
for j in 0..26 {
rows[cur_state].row[j] = Some(cur_state + 1);
}
} else {
rows[cur_state].row[ne] = Some(cur_state + 1);
}
cur_state += 1;
i += 1;
}
}
let mut _cache_eps_reach: HashMap<usize, HashSet<usize>> = HashMap::new();
for cur in (0..=cur_state).rev() {
let mut eps_reach = HashSet::from([cur]);
if let Some(ne) = rows[cur].eps {
eps_reach.extend(_cache_eps_reach[&ne].iter());
}
_cache_eps_reach.insert(cur, eps_reach);
}
Self {
rows,
start: 0,
end: cur_state,
eps_reach: _cache_eps_reach,
}
}
}
struct DFA {
rows: Vec<[Option<usize>; 26]>,
start: usize,
end: HashSet<usize>,
}
impl DFA {
fn build_dfa(nfa: &NFA) -> Self {
let mut state_list: Vec<HashSet<usize>> = vec![nfa.eps_reach[&nfa.start].clone()];
let mut rows: Vec<[Option<usize>; 26]> = Vec::new();
let mut i = 0;
while i < state_list.len() {
let mut row: [Option<usize>; 26] = [None; 26];
for c in 0..26 {
let mut next_state_list: HashSet<usize> = HashSet::new();
for cur in state_list[i].iter() {
if let Some(next) = nfa.rows[*cur].row[c] {
next_state_list.insert(next);
next_state_list.extend(nfa.eps_reach[&next].iter());
}
}
if !next_state_list.is_empty() {
let next_state_i = {
if let Some(pos) = state_list.iter().position(|x| x == &next_state_list) {
pos
} else {
state_list.push(next_state_list);
state_list.len() - 1
}
};
row[c] = Some(next_state_i);
}
}
rows.push(row);
i += 1;
}
let end = {
let mut end = HashSet::new();
for i in 0..state_list.len() {
if state_list[i].contains(&nfa.end) {
end.insert(i);
}
}
end
};
Self {
rows,
start: 0,
end,
}
}
fn is_match(&self, s: &str) -> bool {
let s = s.as_bytes();
let mut cur = self.start;
for c in s {
if let Some(next) = self.rows[cur][(*c - b'a') as usize] {
cur = next;
} else {
return false;
}
}
self.end.contains(&cur)
}
}
impl Solution {
pub fn is_match(s: String, p: String) -> bool {
let nfa = NFA::build_nfa(&p);
let dfa = DFA::build_dfa(&nfa);
dfa.is_match(&s)
}
}


本文作者:MapleCity
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!