//要求正则式中不含有空集或空串,程序不检查符号是否合法
//NFA初始开始状态记为0,终止状态记为1,新增的状态用2,3,...表示,存储结构为(起点状态,终点状态,正则式)三元组的列表
//假设没有不可达状态和死状态
package compiler;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Scanner;
import java.util.Set;
class triple {
int start, end;
String regex;
public triple(int i, int j, String init) {
start = i;
end = j;
regex = init;
}
@Override
public String toString() {
return start + "," + regex + "->" + end;
}
@Override
public boolean equals(Object arg0) {
if(arg0 == null || arg0.getClass() != this.getClass()) return false;
triple t = (triple)arg0;
return start==t.start&&end==t.end&®ex.equals(t.regex);
}
}
public class RegexToMFA {
static String regex;
static LinkedList<triple> nfa;
static LinkedList<Integer> finalStates;
static LinkedList<triple> dfa;
static Set<Character> chars;
static int mfaStart;
static LinkedList<Integer> mfaFinal;
static LinkedList<triple> mfa;
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
regex = sc.nextLine().trim();
sc.close();
if (regex.length() == 1) {
System.out.println("MFA:初态0,终态1,(0," + regex + ")->1");
return;
}
regexToNFA();
System.out.println("NFA:初态0,终态1");
for (triple t : nfa)
System.out.println(t);
NFAtoDFA();
System.out.println("DFA:初态0,终态" + finalStates);
for (triple t : dfa)
System.out.println(t);
DFAtoMFA();
System.out.println("MFA:初态"+mfaStart+",终态"+mfaFinal);
for (triple t : mfa)
System.out.println(t);
}
static void regexToNFA() {
nfa = new LinkedList<>();
nfa.add(new triple(0, 1, regex));
int state = 2;
boolean loop = true;// 每轮迭代对nfa中的一个映射应用规则拆分,直到不能再拆分为止
while (loop) {
loop = false;
// 遍历映射,若拆分,loop=true,跳出
// pattern1,2,3检查不可调换顺序
for (triple t : nfa) {
int s = t.start, e = t.end;
String reg = t.regex;
if (reg.length() <= 1)
continue;// 不能拆分
// 去括号
if (reg.charAt(0) == '(') {
int bracketCnt = 1;
for (int i = 1; i < reg.length(); ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0) {
if (i == reg.length() - 1)
t.regex = reg.substring(1, i);
break;
}
}
}
}
reg = t.regex;
int i;
if ((i = isPattern1(reg)) != -1) {
nfa.remove(t);
nfa.add(new triple(s, e, reg.substring(0, i)));
nfa.add(new triple(s, e, reg.substring(i + 1)));
loop = true;
break;
}
if ((i = isPattern2(reg)) != -1) {
nfa.remove(t);
nfa.add(new triple(s, state, reg.substring(0, i)));
nfa.add(new triple(state, e, reg.substring(i)));
state++;
loop = true;
break;
}
if (isPattern3(reg)) {
nfa.remove(t);
nfa.add(new triple(s, state, ""));
nfa.add(new triple(state, e, ""));
nfa.add(new triple(state, state, reg.substring(0, reg.length() - 1)));
state++;
loop = true;
break;
}
}
}
}
static int isPattern1(String regex)// 若符合规则,返回|的下标,否则返回-1
{
int bracketCnt = 0;
for (int i = 0; i < regex.length(); ++i) {
if (regex.charAt(i) == '(')
bracketCnt++;
if (regex.charAt(i) == ')')
bracketCnt--;
if (regex.charAt(i) == '|' && bracketCnt == 0)
return i;
}
return -1;
}
static int isPattern2(String reg)// 若符合规则,返回e2开始处的下标,否则返回-1
{
if (reg.charAt(0) != '(')// 字符开头
{
if (reg.charAt(1) == '*')
return reg.length() == 2 ? -1 : 2;
return 1;
} else// 左括号开头
{
int bracketCnt = 1;
for (int i = 1; i < reg.length(); ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0) {
if (reg.charAt(i + 1) == '*')
return reg.length() == i + 2 ? -1 : i + 2;
return i + 1;
}
}
}
}
return -1;
}
static boolean isPattern3(String reg) {
if (reg.length() == 2 && reg.charAt(1) == '*')// a*
return true;
// 相匹配的(...)* 匹配条件:左括号开始计数1,往后左加右减,若减到0时是最后一个右括号,成功
if (reg.charAt(0) == '(' && reg.endsWith(")*")) {
int bracketCnt = 1;
for (int i = 1; i < reg.length() - 1; ++i) {
if (reg.charAt(i) == '(')
bracketCnt++;
if (reg.charAt(i) == ')') {
bracketCnt--;
if (bracketCnt == 0)
return i == reg.length() - 2;
}
}
}
return false;
}
static void NFAtoDFA() {
finalStates = new LinkedList<>();
dfa = new LinkedList<>();
Integer[] I = getClosure(new Integer[] { 0 });
getChars();
LinkedList<Integer[]> newStates = new LinkedList<>();
newStates.add(I);
for (Integer i : I)
if (i == 1) {
finalStates.add(0);
break;
}
LinkedList<Integer[]> toCalc = new LinkedList<>(newStates);
while (!toCalc.isEmpty()) {
Integer[] arr = toCalc.poll();
for (Character c : chars) {
Integer[] tmp = getReachStates(arr, c);
Integer[] reachClosure = getClosure(tmp);
if (!have(newStates, reachClosure)) {
newStates.add(reachClosure);
toCalc.add(reachClosure);
for (Integer i : reachClosure)
if (i == 1) {
finalStates.add(newStates.size() - 1);
break;
}
}
int s = getIndex(newStates, arr);
int e = getIndex(newStates, reachClosure);
triple t = new triple(s, e, c + "");
dfa.add(t);
}
}
}
static Integer[] getClosure(Integer[] arr) {
Set<Integer> closure = new HashSet<>();
for (Integer start : arr)
closure.add(start);
LinkedList<Integer> tocheck = new LinkedList<>(closure);
while (!tocheck.isEmpty()) {
int start = tocheck.poll();
for (triple t : nfa)
if (t.start == start && t.regex.equals("")) {
closure.add(t.end);
tocheck.add(t.end);
break;
}
}
return closure.toArray(new Integer[closure.size()]);
}
static void getChars() {
chars = new HashSet<>();
for (int i = 0; i < regex.length(); ++i) {
char c = regex.charAt(i);
if (c != '(' && c != ')' && c != '|' && c != '*')
chars.add(c);
}
}
static Integer[] getReachStates(Integer[] arr, Character c) {
Set<Integer> reachStates = new HashSet<>();
for (Integer i : arr)
for (triple t : nfa)
if (t.start == i && !t.regex.isEmpty() && t.regex.charAt(0) == c) {
reachStates.add(t.end);
break;
}
return reachStates.toArray(new Integer[reachStates.size()]);
}
static int getIndex(LinkedList<Integer[]> newStates, Integer[] arr) {
for (int i = 0; i < newStates.size(); ++i)
if (Arrays.equals(newStates.get(i), arr))
return i;
return -1;
}
static boolean have(LinkedList<Integer[]> newStates, Integer[] arr) {
for (int i = 0; i < newStates.size(); ++i)
if (Arrays.equals(newStates.get(i), arr))
return true;
return false;
}
static void DFAtoMFA()
{
mfaStart = 0;
mfaFinal = finalStates;
mfa = new LinkedList<>(dfa);
Set<Integer> allStates = new HashSet<>();
allStates.add(0);
for(triple t: dfa)
allStates.add(t.end);
int n = allStates.size();
allStates.removeAll(finalStates);
LinkedList<Integer> nonFinal = new LinkedList<>(allStates);
boolean[][] table = new boolean[n][n];
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
table[i][j] = true;
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && ((finalStates.contains(i)&&nonFinal.contains(j)) || (nonFinal.contains(i)&&finalStates.contains(j))))
table[i][j] = false;
boolean flag = true;
while(flag)
{
flag = false;
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && table[i][j])
for(char c: chars)
{
boolean ihasTrans = false, jhasTrans = false;
int iTrans = 0, jTrans = 0;
for(triple t: dfa)
if(t.start==i && t.regex==c+"")
{
ihasTrans = true;
iTrans = t.end;
break;
}
for(triple t: dfa)
if(t.start==j && t.regex==c+"")
{
jhasTrans = true;
jTrans = t.end;
break;
}
if((ihasTrans&&!jhasTrans) || (!ihasTrans&&jhasTrans) || (ihasTrans&&jhasTrans&&!table[iTrans][jTrans]))
{
table[i][j] = false;
flag = true;
break;
}
}
}
for(int i=0; i<n; ++i)
for(int j=0; j<n; ++j)
if(j!=i && table[i][j])
{
for(triple t: dfa)
if(t.start==j || t.end==j)
{
mfa.remove(t);
if(t.start == j) t.start = i;
if(t.end == j) t.end = i;
if(!mfa.contains(t))
mfa.add(t);
}
if(mfaStart==j) mfaStart=i;
mfaFinal.remove(j);
table[i][j] = false;
table[j][i] = false;
}
}
}
Java实现正则式转MFA
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- title: 用Java实现网络爬虫二之Java正则表达式tags: Java 网络爬虫 Spider Crawl...
- 实现一个正则表达式需要几步? 就三步: 分析正则表达式并构建出NFA 根据NFA得出DFA 根据DFA匹配字符串当...
- 1:使用场景 比如我们在自己实现的登录功能既能支持“手机号码”和“邮箱号码”加用户的密码登录。这时我们就可以使用j...