diff --git a/README.txt b/README.txt index c2fa6c4ac..072bb5955 100644 --- a/README.txt +++ b/README.txt @@ -18,7 +18,7 @@ version of ANTLR for 9 years. You should use v3 in conjunction with ANTLRWorks: - http://www.antlr.org/works/index.html + http://www.antlr3.org/works/ and gUnit (grammar unit testing tool included in distribution): @@ -98,7 +98,7 @@ How is ANTLR v3 different than ANTLR v2? See "What is the difference between ANTLR v2 and v3?" - http://www.antlr.org/wiki/pages/viewpage.action?pageId=719 + http://www.antlr.org/wiki/pages/viewpage.action?pageId=2687279 See migration guide: @@ -110,8 +110,8 @@ How do I install this damn thing? You will have grabbed either of these: - http://antlr.org/download/antlr-3.5-complete-no-st3.jar - http://antlr.org/download/antlr-3.5-complete.jar + http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar + http://www.antlr3.org/download/antlr-3.5.2-complete.jar It has all of the jars you need combined into one. Then you need to add antlr-3.5-complete.jar to your CLASSPATH or add to arg list; e.g., on unix: @@ -139,4 +139,4 @@ Please see the FAQ How can I contribute to ANTLR v3? -http://www.antlr.org/wiki/pages/viewpage.action?pageId=33947666 +http://www.antlr.org/wiki/pages/viewpage.action?pageId=2687297 diff --git a/contributors.txt b/contributors.txt index c5d133dee..1b7ac7b09 100644 --- a/contributors.txt +++ b/contributors.txt @@ -48,6 +48,7 @@ the end of the following contributors list. CONTRIBUTORS: YYYY/MM/DD, github id, Full name, email +2014/05/15, robstoll, Robert Stoll, rstoll@tutteli.ch 2013/04/17, ibre5041, Ivan Brezina, ibre5041@ibrezina.net 2013/02/19, murrayju, Justin Murray, murrayju@addpcs.com 2012/07/12, parrt, Terence Parr, parrt@antlr.org diff --git a/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java b/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java index cc7c0f4e1..af9357ccc 100644 --- a/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java +++ b/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java @@ -90,10 +90,20 @@ protected void trackString(GrammarAST t) { return; } // otherwise add literal to token types if referenced from parser rule - // or in the tokens{} section + // or in the tokens{} section - since a parser can contain a tokenVocab as well + // (in order to predefine the order of the tokens - can reduce branching) + // and it looks like those tokens are not in the tokens section + // (because grammar.getTokenType(t.getText()) will not be Label.INVALID) + // we ensure that literals are still recorded for combined parsers which + // have defined a tokenVocab + String tokenVocab = (String) grammar.getOption("tokenVocab"); + boolean hasTokenVocabAndIsCombinedParser = tokenVocab != null && + grammar.type == Grammar.COMBINED; + if ( (currentRuleName==null || - Character.isLowerCase(currentRuleName.charAt(0))) && - grammar.getTokenType(t.getText())==Label.INVALID ) + Character.isLowerCase(currentRuleName.charAt(0))) && + (hasTokenVocabAndIsCombinedParser || + grammar.getTokenType(t.getText())==Label.INVALID )) { stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE); } diff --git a/tool/src/main/java/org/antlr/tool/Grammar.java b/tool/src/main/java/org/antlr/tool/Grammar.java index 667ebb2dd..82fc507b3 100644 --- a/tool/src/main/java/org/antlr/tool/Grammar.java +++ b/tool/src/main/java/org/antlr/tool/Grammar.java @@ -2392,8 +2392,7 @@ else if ( token == '\'' ) { ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, Utils.integer(lineNum)); - while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} - token = tokenizer.nextToken(); + token = recoverToNextLine(tokenizer); continue; } token = tokenizer.nextToken(); @@ -2401,8 +2400,7 @@ else if ( token == '\'' ) { ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, Utils.integer(lineNum)); - while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} - token = tokenizer.nextToken(); + token = recoverToNextLine(tokenizer); continue; } token = tokenizer.nextToken(); // skip '=' @@ -2410,8 +2408,7 @@ else if ( token == '\'' ) { ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, Utils.integer(lineNum)); - while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} - token = tokenizer.nextToken(); + token = recoverToNextLine(tokenizer); continue; } int tokenType = (int)tokenizer.nval; @@ -2424,11 +2421,13 @@ else if ( token == '\'' ) { ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, Utils.integer(lineNum)); - while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} - token = tokenizer.nextToken(); + token = recoverToNextLine(tokenizer); continue; } token = tokenizer.nextToken(); // skip newline + while(token == StreamTokenizer.TT_EOL ){ + token = tokenizer.nextToken(); + } } br.close(); } @@ -2449,6 +2448,15 @@ else if ( token == '\'' ) { return composite.maxTokenType; } + private int recoverToNextLine(StreamTokenizer tokenizer) throws IOException { + int token = tokenizer.nextToken(); + while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF) { + token = tokenizer.nextToken(); + } + token = tokenizer.nextToken(); + return token; + } + /** Given a token type, get a meaningful name for it such as the ID * or string literal. If this is a lexer and the ttype is in the * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. diff --git a/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java b/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java index cad4addc1..3bde3a13f 100644 --- a/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java +++ b/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java @@ -443,6 +443,164 @@ public class TestCompositeGrammars extends BaseTest { assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size()); } + + @Test public void testTokenVocabEmptyNoWarningsLikeNoLexerRuleCorrespondingToToken() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", ""); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabWrongIdentifierAtEndOfFileRecoversCorrectly() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "#"); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + //twice an error, once in parser composite and once in lexer composite + assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabMissingEqualAtEndOfFileRecoversCorrectly() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "tokenFromTokenVocab"); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + //twice an error, once in parser composite and once in lexer composite + assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabMissingNumberAtEndOfFileRecoversCorrectly() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "tokenFromTokenVocab="); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + //twice an error, once in parser composite and once in lexer composite + assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabCommentsOnOwnLineNoErrorNoWarnings() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "TokenFromTokenVocab=4\n" + + "//some comments on a new line\n'token'=4\n"); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabWithEmptyLineNoErrorNoWarnings() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "TokenFromTokenVocab=4\n\n'token'=4\n"); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } + + @Test public void testTokenVocabNonReferencedTokensNoWarningsLikeNoLexerRuleCorrespondingToToken() + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + ErrorManager.setErrorListener(equeue); + + mkdir(tmpdir); + writeFile(tmpdir, "Foo.tokens", "ReservedTokenNotYetUsedInParserRule=4\n'reserved'=4\n"); + + String grammar = "grammar Foo;\n" + + "options {tokenVocab=Foo;}\n" + + "tokens{TokenFromTokenVocab='token';}\n" + + "a : TokenFromTokenVocab;\n" + + "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n"; + + writeFile(tmpdir, "Foo.g", grammar); + Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"}); + antlr.process(); + + assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size()); + assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size()); + } @Test public void testSyntaxErrorsInImportsNotThrownOut() throws Exception { ErrorQueue equeue = new ErrorQueue();