Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't escape backslashes in grammar_codegen #916

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions fuzztest/grammars/JSON.g4
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
//
// With some simplifications:
// - Restricted character set (e.g., no unicode chars).
// - No escape sequences (e.g., \n, \t, \uff01, etc.)
// - No unicode escape sequences (e.g., \uff01)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After at first adding the unicode escape sequences, I had to remove them again because nlohmann/json actually validates that the generated codepoints are "proper":
https://github.com/nlohmann/json/blob/a259ecc51e1951e12f757ce17db958e9881e9c6c/include/nlohmann/detail/input/lexer.hpp#L359-L381


grammar JSON_GRAMMAR;

Expand All @@ -36,10 +36,12 @@ elements : element (',' element)* ;

element : value ;

STRING : '"' CHARACTER* '"' ;
STRING : '"' (CHARACTER | '\\' ESCAPED )* '"' ;

CHARACTER : [a-zA-Z0-9_];

ESCAPED : '"' | '\\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' ;

NUMBER : INTEGER FRACTION? EXPONENT? ;

INTEGER : DIGIT | ONETONINE DIGITS | '-' DIGIT | '-' ONETONINE DIGITS ;
Expand Down
2 changes: 1 addition & 1 deletion grammar_codegen/antlr_frontend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void ChangeStringQuote(std::string& str) {
std::string EscapeString(absl::string_view text) {
std::string excape_text;
for (int i = 0; i < text.size(); ++i) {
if (text[i] == '"' || text[i] == '\\') {
if (text[i] == '"') {
excape_text.push_back('\\');
}
excape_text.push_back(text[i]);
Expand Down
213 changes: 133 additions & 80 deletions grammar_codegen/testdata/expected_json_grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ enum JsonTypes {
kElementNode,
kSTRINGNode,
kCHARACTERNode,
kESCAPEDNode,
kNUMBERNode,
kINTEGERNode,
kDIGITSNode,
Expand All @@ -47,29 +48,38 @@ enum JsonTypes {
kElementsSubNode5,
kElementsSubNode6,
kSTRINGSubNode7,
kNUMBERSubNode8,
kNUMBERSubNode9,
kINTEGERSubNode10,
kINTEGERSubNode11,
kSTRINGSubNode9,
kSTRINGSubNode8,
kNUMBERSubNode10,
kNUMBERSubNode11,
kINTEGERSubNode12,
kDIGITSSubNode13,
kEXPONENTSubNode14,
kEXPONENTSubNode15,
kWSPACESubNode16,
kLiteral7,
kLiteral11,
kLiteral8,
kINTEGERSubNode13,
kINTEGERSubNode14,
kDIGITSSubNode15,
kEXPONENTSubNode16,
kEXPONENTSubNode17,
kWSPACESubNode18,
kLiteral14,
kLiteral18,
kLiteral15,
kLiteral13,
kLiteral6,
kLiteral5,
kLiteral3,
kLiteral12,
kLiteral3,
kLiteral19,
kLiteral4,
kLiteral13,
kLiteral5,
kLiteral20,
kLiteral7,
kLiteral8,
kLiteral1,
kLiteral2,
kLiteral0,
kLiteral9,
kLiteral2,
kLiteral10,
kLiteral11,
kLiteral0,
kLiteral16,
kLiteral17,
kCharSet3,
kCharSet1,
kCharSet2,
Expand All @@ -85,6 +95,7 @@ class ElementsNode;
class ElementNode;
class STRINGNode;
class CHARACTERNode;
class ESCAPEDNode;
class NUMBERNode;
class INTEGERNode;
class DIGITSNode;
Expand All @@ -102,48 +113,64 @@ class ArraySubNode4;
class ElementsSubNode5;
class ElementsSubNode6;
class STRINGSubNode7;
class NUMBERSubNode8;
class NUMBERSubNode9;
class INTEGERSubNode10;
class INTEGERSubNode11;
class STRINGSubNode9;
class STRINGSubNode8;
class NUMBERSubNode10;
class NUMBERSubNode11;
class INTEGERSubNode12;
class DIGITSSubNode13;
class EXPONENTSubNode14;
class EXPONENTSubNode15;
class WSPACESubNode16;
class Literal7;
class Literal11;
class Literal8;
class INTEGERSubNode13;
class INTEGERSubNode14;
class DIGITSSubNode15;
class EXPONENTSubNode16;
class EXPONENTSubNode17;
class WSPACESubNode18;
class Literal14;
class Literal18;
class Literal15;
class Literal13;
class Literal6;
class Literal5;
class Literal3;
class Literal12;
class Literal3;
class Literal19;
class Literal4;
class Literal13;
class Literal5;
class Literal20;
class Literal7;
class Literal8;
class Literal1;
class Literal2;
class Literal0;
class Literal9;
class Literal2;
class Literal10;
class Literal11;
class Literal0;
class Literal16;
class Literal17;
class CharSet3;
class CharSet1;
class CharSet2;
class CharSet0;

inline constexpr absl::string_view kStrLiteral7 = "+";
inline constexpr absl::string_view kStrLiteral11 = ",";
inline constexpr absl::string_view kStrLiteral8 = "-";
inline constexpr absl::string_view kStrLiteral6 = ".";
inline constexpr absl::string_view kStrLiteral5 = "0";
inline constexpr absl::string_view kStrLiteral14 = "+";
inline constexpr absl::string_view kStrLiteral18 = ",";
inline constexpr absl::string_view kStrLiteral15 = "-";
inline constexpr absl::string_view kStrLiteral13 = ".";
inline constexpr absl::string_view kStrLiteral6 = "/";
inline constexpr absl::string_view kStrLiteral12 = "0";
inline constexpr absl::string_view kStrLiteral3 = ":";
inline constexpr absl::string_view kStrLiteral12 = "[";
inline constexpr absl::string_view kStrLiteral19 = "[";
inline constexpr absl::string_view kStrLiteral4 = "\"";
inline constexpr absl::string_view kStrLiteral13 = "]";
inline constexpr absl::string_view kStrLiteral5 = "\\";
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the fix in antlr_frontend.cc, this (string of length one) "\\" would be (a string of length two) "\\\\".

inline constexpr absl::string_view kStrLiteral20 = "]";
inline constexpr absl::string_view kStrLiteral7 = "b";
inline constexpr absl::string_view kStrLiteral8 = "f";
inline constexpr absl::string_view kStrLiteral1 = "false";
inline constexpr absl::string_view kStrLiteral9 = "n";
inline constexpr absl::string_view kStrLiteral2 = "null";
inline constexpr absl::string_view kStrLiteral10 = "r";
inline constexpr absl::string_view kStrLiteral11 = "t";
inline constexpr absl::string_view kStrLiteral0 = "true";
inline constexpr absl::string_view kStrLiteral9 = "{";
inline constexpr absl::string_view kStrLiteral10 = "}";
inline constexpr absl::string_view kStrLiteral16 = "{";
inline constexpr absl::string_view kStrLiteral17 = "}";
inline constexpr absl::string_view kStrCharSet3 = R"grammar([ \t\n\r])grammar";
inline constexpr absl::string_view kStrCharSet1 = R"grammar([1-9])grammar";
inline constexpr absl::string_view kStrCharSet2 = R"grammar([Ee])grammar";
Expand All @@ -168,73 +195,99 @@ class ElementNode final : public TupleDomain<kElementNode, ValueNode> {};
class STRINGNode final
: public TupleDomain<kSTRINGNode, Literal4, STRINGSubNode7, Literal4> {};
class CHARACTERNode final : public TupleDomain<kCHARACTERNode, CharSet0> {};
class ESCAPEDNode final
: public VariantDomain<kESCAPEDNode, 0, Literal4, Literal5, Literal6,
Literal7, Literal8, Literal9, Literal10, Literal11> {
};
class NUMBERNode final : public TupleDomain<kNUMBERNode, INTEGERNode,
NUMBERSubNode8, NUMBERSubNode9> {};
NUMBERSubNode10, NUMBERSubNode11> {
};
class INTEGERNode final
: public VariantDomain<kINTEGERNode, 0, DIGITNode, INTEGERSubNode10,
INTEGERSubNode11, INTEGERSubNode12> {};
class DIGITSNode final : public TupleDomain<kDIGITSNode, DIGITSSubNode13> {};
: public VariantDomain<kINTEGERNode, 0, DIGITNode, INTEGERSubNode12,
INTEGERSubNode13, INTEGERSubNode14> {};
class DIGITSNode final : public TupleDomain<kDIGITSNode, DIGITSSubNode15> {};
class DIGITNode final
: public VariantDomain<kDIGITNode, 0, Literal5, ONETONINENode> {};
: public VariantDomain<kDIGITNode, 0, Literal12, ONETONINENode> {};
class ONETONINENode final : public TupleDomain<kONETONINENode, CharSet1> {};
class FRACTIONNode final
: public TupleDomain<kFRACTIONNode, Literal6, DIGITSNode> {};
: public TupleDomain<kFRACTIONNode, Literal13, DIGITSNode> {};
class EXPONENTNode final
: public TupleDomain<kEXPONENTNode, CharSet2, EXPONENTSubNode14,
ONETONINENode, EXPONENTSubNode15> {};
class SIGNNode final : public VariantDomain<kSIGNNode, 0, Literal7, Literal8> {
};
class WSPACENode final : public TupleDomain<kWSPACENode, WSPACESubNode16> {};
: public TupleDomain<kEXPONENTNode, CharSet2, EXPONENTSubNode16,
ONETONINENode, EXPONENTSubNode17> {};
class SIGNNode final
: public VariantDomain<kSIGNNode, 0, Literal14, Literal15> {};
class WSPACENode final : public TupleDomain<kWSPACENode, WSPACESubNode18> {};
class ObjectSubNode0 final
: public TupleDomain<kObjectSubNode0, Literal9, Literal10> {};
: public TupleDomain<kObjectSubNode0, Literal16, Literal17> {};
class ObjectSubNode1 final
: public TupleDomain<kObjectSubNode1, Literal9, MembersNode, Literal10> {};
: public TupleDomain<kObjectSubNode1, Literal16, MembersNode, Literal17> {};
class MembersSubNode2 final
: public TupleDomain<kMembersSubNode2, MemberNode, Literal11, MembersNode> {
: public TupleDomain<kMembersSubNode2, MemberNode, Literal18, MembersNode> {
};
class ArraySubNode3 final
: public TupleDomain<kArraySubNode3, Literal12, Literal13> {};
: public TupleDomain<kArraySubNode3, Literal19, Literal20> {};
class ArraySubNode4 final
: public TupleDomain<kArraySubNode4, Literal12, ElementsNode, Literal13> {};
: public TupleDomain<kArraySubNode4, Literal19, ElementsNode, Literal20> {};
class ElementsSubNode5 final
: public Vector<kElementsSubNode5, ElementsSubNode6> {};
class ElementsSubNode6 final
: public TupleDomain<kElementsSubNode6, Literal11, ElementNode> {};
class STRINGSubNode7 final : public Vector<kSTRINGSubNode7, CHARACTERNode> {};
class NUMBERSubNode8 final : public Optional<kNUMBERSubNode8, FRACTIONNode> {};
class NUMBERSubNode9 final : public Optional<kNUMBERSubNode9, EXPONENTNode> {};
class INTEGERSubNode10 final
: public TupleDomain<kINTEGERSubNode10, ONETONINENode, DIGITSNode> {};
class INTEGERSubNode11 final
: public TupleDomain<kINTEGERSubNode11, Literal8, DIGITNode> {};
class INTEGERSubNode12 final : public TupleDomain<kINTEGERSubNode12, Literal8,
: public TupleDomain<kElementsSubNode6, Literal18, ElementNode> {};
class STRINGSubNode7 final : public Vector<kSTRINGSubNode7, STRINGSubNode8> {};
class STRINGSubNode9 final
: public TupleDomain<kSTRINGSubNode9, Literal5, ESCAPEDNode> {};
class STRINGSubNode8 final
: public VariantDomain<kSTRINGSubNode8, 0, CHARACTERNode, STRINGSubNode9> {
};
class NUMBERSubNode10 final : public Optional<kNUMBERSubNode10, FRACTIONNode> {
};
class NUMBERSubNode11 final : public Optional<kNUMBERSubNode11, EXPONENTNode> {
};
class INTEGERSubNode12 final
: public TupleDomain<kINTEGERSubNode12, ONETONINENode, DIGITSNode> {};
class INTEGERSubNode13 final
: public TupleDomain<kINTEGERSubNode13, Literal15, DIGITNode> {};
class INTEGERSubNode14 final : public TupleDomain<kINTEGERSubNode14, Literal15,
ONETONINENode, DIGITSNode> {};
class DIGITSSubNode13 final
: public NonEmptyVector<kDIGITSSubNode13, DIGITNode> {};
class EXPONENTSubNode14 final : public Optional<kEXPONENTSubNode14, SIGNNode> {
class DIGITSSubNode15 final
: public NonEmptyVector<kDIGITSSubNode15, DIGITNode> {};
class EXPONENTSubNode16 final : public Optional<kEXPONENTSubNode16, SIGNNode> {
};
class EXPONENTSubNode15 final : public Optional<kEXPONENTSubNode15, DIGITNode> {
class EXPONENTSubNode17 final : public Optional<kEXPONENTSubNode17, DIGITNode> {
};
class WSPACESubNode16 final
: public NonEmptyVector<kWSPACESubNode16, CharSet3> {};
class Literal7 final : public StringLiteralDomain<kLiteral7, kStrLiteral7> {};
class Literal11 final : public StringLiteralDomain<kLiteral11, kStrLiteral11> {
class WSPACESubNode18 final
: public NonEmptyVector<kWSPACESubNode18, CharSet3> {};
class Literal14 final : public StringLiteralDomain<kLiteral14, kStrLiteral14> {
};
class Literal18 final : public StringLiteralDomain<kLiteral18, kStrLiteral18> {
};
class Literal15 final : public StringLiteralDomain<kLiteral15, kStrLiteral15> {
};
class Literal13 final : public StringLiteralDomain<kLiteral13, kStrLiteral13> {
};
class Literal8 final : public StringLiteralDomain<kLiteral8, kStrLiteral8> {};
class Literal6 final : public StringLiteralDomain<kLiteral6, kStrLiteral6> {};
class Literal5 final : public StringLiteralDomain<kLiteral5, kStrLiteral5> {};
class Literal3 final : public StringLiteralDomain<kLiteral3, kStrLiteral3> {};
class Literal12 final : public StringLiteralDomain<kLiteral12, kStrLiteral12> {
};
class Literal3 final : public StringLiteralDomain<kLiteral3, kStrLiteral3> {};
class Literal19 final : public StringLiteralDomain<kLiteral19, kStrLiteral19> {
};
class Literal4 final : public StringLiteralDomain<kLiteral4, kStrLiteral4> {};
class Literal13 final : public StringLiteralDomain<kLiteral13, kStrLiteral13> {
class Literal5 final : public StringLiteralDomain<kLiteral5, kStrLiteral5> {};
class Literal20 final : public StringLiteralDomain<kLiteral20, kStrLiteral20> {
};
class Literal7 final : public StringLiteralDomain<kLiteral7, kStrLiteral7> {};
class Literal8 final : public StringLiteralDomain<kLiteral8, kStrLiteral8> {};
class Literal1 final : public StringLiteralDomain<kLiteral1, kStrLiteral1> {};
class Literal2 final : public StringLiteralDomain<kLiteral2, kStrLiteral2> {};
class Literal0 final : public StringLiteralDomain<kLiteral0, kStrLiteral0> {};
class Literal9 final : public StringLiteralDomain<kLiteral9, kStrLiteral9> {};
class Literal2 final : public StringLiteralDomain<kLiteral2, kStrLiteral2> {};
class Literal10 final : public StringLiteralDomain<kLiteral10, kStrLiteral10> {
};
class Literal11 final : public StringLiteralDomain<kLiteral11, kStrLiteral11> {
};
class Literal0 final : public StringLiteralDomain<kLiteral0, kStrLiteral0> {};
class Literal16 final : public StringLiteralDomain<kLiteral16, kStrLiteral16> {
};
class Literal17 final : public StringLiteralDomain<kLiteral17, kStrLiteral17> {
};
class CharSet3 final : public RegexLiteralDomain<kCharSet3, kStrCharSet3> {};
class CharSet1 final : public RegexLiteralDomain<kCharSet1, kStrCharSet1> {};
class CharSet2 final : public RegexLiteralDomain<kCharSet2, kStrCharSet2> {};
Expand Down