JSON的语法简单,很适合上完编译原理课程之后或者学习完龙书语法分析之后的练手。
JSON格式
{"a" : 123 }
[1,2,3]
{"a": [1,2,3] }
{"a" : {"b" : 123 }}
基础为以上几种。有了基本的格式,可以考虑先写词法分析部分。JSON格式类似于key/value存储,其中key为string类型,value复杂一些,可包括,string,int,double,jsonobject,jsonarray,true,false,null标记。所以对每个token都要逐一分析,构造出DFA。
- 字符串
字符串以 " 开头,以 " 结束。其中需要考虑的是转义字符的忽略以及unicode的格式(\u hex hex hex hex)
private Token readString() throws IOException, JsonLexException {
StringBuilder builder = new StringBuilder();
while ( (this.at_ = read()) != '\"') {
if (this.at_ == '\r' || this.at_ == '\n') error("string", this.at_);
if (this.at_ == '\\') {
//check escape char
this.at_ = read();
if (isEscapeChar(this.at_)) {
builder.append('\\');
builder.append((char) this.at_);
} else if (this.at_ == 'u'){
//unicode
builder.append('\\');
builder.append('u');
builder.append(unicode());
} else {
error("string", this.at_);
}
} else {
builder.append((char) this.at_);
}
}
if ( this.at_ != '\"') {
//string is not closed
error("string[not closed]", this.at_);
}
return this.token_ = new Token(JsonToken.STRING, new Value(builder.toString()));
}
private String unicode() throws IOException, JsonLexException {
StringBuilder builder = new StringBuilder();
int i=0;
for (;i<4 && isHexChar(this.at_ = read()); builder.append((char) this.at_),++i);
if (i < 4) error("unicode", this.at_);
return builder.toString();
}
- 数字
数字包含有符号int double以及科学计数法表示稍微麻烦,但是画出DFA分析就会明了很多。
据此就可很快写出来。
private Token readNumber(int at) throws IOException,JsonLexException {
StringBuilder builder = new StringBuilder();
int status = 0;
while (true) {
switch (status) {
case 0:
this.at_ = at;
if (this.at_ == '+' || this.at_ == '-') status = 1;
else if (Character.isDigit(this.at_)) status = 2;
else error("number", this.at_);
builder.append((char) this.at_);
break;
case 1:
this.at_ = read();
if (Character.isDigit(this.at_)) status = 2;
else error("number", this.at_);
builder.append((char) this.at_);
break;
case 2:
this.at_ = read();
if (this.at_ == '.') status = 3;
else if (Character.isDigit(this.at_)) status = 2;
else if (this.at_ == 'E' || this.at_ == 'e') status = 5;
else status = 8;
if (status != 8) {
builder.append((char) this.at_);
}
break;
case 3:
this.at_ = read();
if (Character.isDigit(this.at_)) status = 4;
else error("number", this.at_);
builder.append((char) this.at_);
break;
case 4:
this.at_ = read();
if (Character.isDigit(this.at_)) status = 4;
else if (this.at_ == 'E' || this.at_ == 'e') status = 5;
else status = 9;
if (status != 9) {
builder.append((char) this.at_);
}
break;
case 5:
this.at_ = read();
if (this.at_ == '+' || this.at_ == '-') status = 6;
else if (Character.isDigit(this.at_)) status = 7;
else error("number", this.at_);
builder.append((char) this.at_);
break;
case 6:
this.at_ = read();
if (Character.isDigit(this.at_)) status = 7;
else error("number", this.at_);
builder.append((char) this.at_);
break;
case 7:
this.at_ = read();
if (Character.isDigit(this.at_)) status = 7;
else status = 10;
if (status != 10) {
builder.append((char) this.at_);
}
break;
case 8: // int
unread(this.at_); // not a digit
return this.token_ = new Token(JsonToken.INT, new Value(Integer.valueOf(builder.toString())));
case 9: //double without 'E' or 'e'
unread(this.at_); // not a digit
return this.token_ = new Token(JsonToken.DOUBLE, new Value(Double.valueOf(builder.toString())));
case 10://double with 'E' or 'e'
unread(this.at_);// not a digit
return this.token_ = new Token(JsonToken.DOUBLE,new Value(Double.valueOf(builder.toString())));
default:
error("number", this.at_);
}
}
}
- 其他字符
对于其他终结符,true false null则只要匹配首字符来判断就可以了。
综上就可写出来lexer了
public Token scan() throws IOException, JsonLexException {
//this.at_ = '\0';
while (isWhiteBlack( this.at_ = read() ));
switch ((int)this.at_) {
case '{':
return this.token_ = new Token(JsonToken.BEGIN_OBJ, new Value("{"));
case '}':
return this.token_ = new Token(JsonToken.END_OBJ, new Value("}"));
case '[':
return this.token_ = new Token(JsonToken.BEGIN_ARR, new Value("["));
case ']':
return this.token_ = new Token(JsonToken.END_ARR, new Value("]"));
case ':':
return this.token_ = new Token(JsonToken.COLON, new Value(":"));
case ',':
return this.token_ = new Token(JsonToken.COMMA, new Value(","));
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': case '0':
case '-': case '+': case '.':
return this.token_ = readNumber(this.at_);
case '\"':
return this.token_ = readString();
case 'n':
return this.token_ = readNull();
case 't':
return this.token_ = readTrue();
case 'f':
return this.token_ = readFalse();
case -1:
return this.token_ = new Token(JsonToken.EOF, new Value("eof"));
default:
this.token_ = null;
error("scan->default",this.at_);
return null;
}
}
词法分析就完成了。
语法分析
语法分析有LL, LR等方法这里采取LL(1)分析。
首先要构造出JSON文法。根据JSON基础格式。
obj -> { members }
members -> pair members' | eps
members' -> , pair members' | eps
pair -> string : value
array -> [ elem ]
elem -> value elem' | eps
elem' -> , value elem' | eps
value -> obj | array | number | string | true | false | null
求出FIRST集和FOLLOW集,验证可知为LL(1)文法。这样就可以用递归下降来做语法分析。
之后写出SDT就可以根据SDT来编写代码了。
public Json parse() throws IOException, JsonParseException, JsonLexException {
Token token = lexer_.scan();
if (token.getToken() == JsonToken.BEGIN_OBJ) {
return object();
} else if (token.getToken() == JsonToken.BEGIN_ARR) {
return array();
} else {
error("parse", token.toString());
return null;
}
}
//文法 { member }
public JsonObject object() throws IOException,
JsonParseException, JsonLexException {
Map<String, Value> map = new HashMap<>();
return new JsonObject(member(map));
}
//对应文法 pair mem' | eps
public Map<String ,Value> member(Map<String, Value> map) throws IOException,
JsonLexException, JsonParseException {
Token token = lexer_.scan();
if (token.getToken() == JsonToken.END_OBJ) { //eps
return map;
}
return member_1( pair(map) );
}
//对应文法 , pair mem' | eps
public Map<String, Value> member_1(Map<String, Value> map) throws IOException, JsonLexException, JsonParseException {
Token token = lexer_.scan();
if (token.getToken() == JsonToken.COMMA) { //,
lexer_.scan();
return member_1( pair(map) );
} else if (token.getToken() == JsonToken.END_OBJ) { //eps
return map;
} else {
error("member_1", token.toString());
return null;
}
}
//文法 string : value
private Map<String, Value> pair(Map<String, Value> map) throws IOException, JsonLexException, JsonParseException {
Token token = lexer_.peek();
if (token.getToken() == JsonToken.STRING) {
String key = (String) token.getValue().get();
token = lexer_.scan();
if (token.getToken() == JsonToken.COLON) {
lexer_.scan();
map.put(key, value());
return map;
} else {
error("pair", token.toString());
return null;
}
} else {
error("pair", token.toString());
return null;
}
}
//文法 [ elem ]
public JsonArray array() throws IOException,JsonParseException, JsonLexException {
List<Value> list = new LinkedList<>();
return new JsonArray(elem(list));
}
//文法 value elem' | eps
public List<Value> elem(List<Value> list) throws IOException,
JsonLexException,JsonParseException {
Token token = lexer_.scan();
if (token.getToken() == JsonToken.END_ARR) { // eps
return list;
} else {
list.add(value());
return elem_1(list);
}
}
// 文法 , value elem' | eps
public List<Value> elem_1(List<Value> list) throws IOException,
JsonLexException, JsonParseException {
Token token = lexer_.scan();
if (token.getToken() == JsonToken.COMMA) { // ,
lexer_.scan();
list.add(value());
return elem_1(list);
} else if (token.getToken() == JsonToken.END_ARR) { // eps
return list;
} else {
error("elem_1",token.toString());
return null;
}
}
public Value value() throws IOException, JsonLexException,
JsonParseException {
Token token = lexer_.peek();
if (isCommonValue(token.getToken())) {
return new Value(token.getValue());
} else if (token.getToken() == JsonToken.BEGIN_OBJ) {
return new Value(object());
} else if (token.getToken() == JsonToken.BEGIN_ARR) {
return new Value(array());
} else {
error("value",token.toString());
return null;
}
}
private boolean isCommonValue(JsonToken token) {
return (token == JsonToken.STRING || token == JsonToken.FALSE ||
token == JsonToken.INT || token == JsonToken.DOUBLE ||
token == JsonToken.TRUE || token == JsonToken.NULL) ? true : false;
}
public void error(String position, String value) throws JsonParseException {
throw new JsonParseException(String.format("an error occurred on Parser [%s %s]",position,value));
}
至此一个简单的解析器已经写完了,只是实现了简单的解析功能,一个完善的JAVA JSON解析器至少可以对对象序列化和反序列化。后续将会添加上这部分功能,再更新一遍 :-|
源代码:ToyJSON