diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc index d01b75a334..0385937e15 100644 --- a/doc/manual.asciidoc +++ b/doc/manual.asciidoc @@ -861,7 +861,13 @@ one_word_with_no_space = foo$ Other whitespace is only significant if it's at the beginning of a line. If a line is indented more than the previous one, it's considered part of its parent's scope; if it is indented less than the -previous one, it closes the previous scope. +previous one, it closes the previous scope. Note that the physical line that is +a line continuation is not considered as a separate line for the scope +determination purposes. + +_Available since Ninja 1.12._ +Horizontal Tab character (0x09) can be used for indentation. Earlier Ninja +versions allow only space (0x20) character to be used for such purpose. [[ref_toplevel]] Top-level variables diff --git a/misc/output_test.py b/misc/output_test.py index 78848cbd4c..205ab56540 100755 --- a/misc/output_test.py +++ b/misc/output_test.py @@ -162,6 +162,30 @@ def test_tool_inputs(self): out2 ''') + def test_tabs_indent(self): + content = ''' +rule exec +command = $cmd + +var_hello = hell$ +o + +build foo: exec +cmd = touch foo + +build bar: exec $ +foo +cmd = touch bar + +build $var_hello: exec +cmd = touch $var_hello + +build baz: exec $ +bar $var_hello +cmd = touch baz +'''.replace('', '\t') + run(content) + if __name__ == '__main__': unittest.main() diff --git a/src/lexer.cc b/src/lexer.cc index e5729f00a0..148084e928 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -105,12 +105,9 @@ const char* Lexer::TokenErrorHint(Token expected) { string Lexer::DescribeLastError() { if (last_token_) { - switch (last_token_[0]) { - case '\t': - return "tabs are not allowed, use spaces"; - } + return "lexing error "; } - return "lexing error"; + return "lexing error (EOF?)"; } void Lexer::UnreadToken() { @@ -130,7 +127,7 @@ Lexer::Token Lexer::ReadToken() { unsigned int yyaccept = 0; static const unsigned char yybm[] = { 0, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 0, 128, 128, 128, 128, 128, + 128, 160, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 160, 128, 128, 128, 128, 128, 128, 128, @@ -164,16 +161,17 @@ Lexer::Token Lexer::ReadToken() { }; yych = *p; if (yybm[0+yych] & 32) { - goto yy9; + goto yy6; } if (yych <= '^') { if (yych <= ',') { if (yych <= '\f') { if (yych <= 0x00) goto yy2; - if (yych == '\n') goto yy6; + if (yych <= 0x08) goto yy4; + if (yych <= '\n') goto yy9; goto yy4; } else { - if (yych <= '\r') goto yy8; + if (yych <= '\r') goto yy11; if (yych == '#') goto yy12; goto yy4; } @@ -228,31 +226,32 @@ Lexer::Token Lexer::ReadToken() { yy5: { token = ERROR; break; } yy6: - ++p; - { token = NEWLINE; break; } -yy8: - yych = *++p; - if (yych == '\n') goto yy28; - goto yy5; -yy9: yyaccept = 0; yych = *(q = ++p); if (yybm[0+yych] & 32) { - goto yy9; + goto yy6; } if (yych <= '\f') { - if (yych == '\n') goto yy6; + if (yych <= 0x08) goto yy8; + if (yych <= '\n') goto yy9; } else { - if (yych <= '\r') goto yy30; - if (yych == '#') goto yy32; + if (yych <= '\r') goto yy28; + if (yych == '#') goto yy30; } -yy11: +yy8: { token = INDENT; break; } +yy9: + ++p; + { token = NEWLINE; break; } +yy11: + yych = *++p; + if (yych == '\n') goto yy32; + goto yy5; yy12: yyaccept = 1; yych = *(q = ++p); if (yych <= 0x00) goto yy5; - goto yy33; + goto yy31; yy13: yych = *++p; yy14: @@ -296,25 +295,27 @@ Lexer::Token Lexer::ReadToken() { if (yych == '|') goto yy44; { token = PIPE; break; } yy28: - ++p; - { token = NEWLINE; break; } -yy30: yych = *++p; - if (yych == '\n') goto yy28; -yy31: + if (yych == '\n') goto yy32; +yy29: p = q; if (yyaccept == 0) { - goto yy11; + goto yy8; } else { goto yy5; } -yy32: +yy30: yych = *++p; -yy33: +yy31: if (yybm[0+yych] & 128) { - goto yy32; + goto yy30; } - if (yych <= 0x00) goto yy31; + if (yych <= 0x00) goto yy29; + goto yy34; +yy32: + ++p; + { token = NEWLINE; break; } +yy34: ++p; { continue; } yy36: @@ -478,7 +479,7 @@ void Lexer::EatWhitespace() { unsigned char yych; static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, @@ -631,7 +632,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { unsigned char yych; static const unsigned char yybm[] = { 0, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 0, 16, 16, 0, 16, 16, + 16, 48, 0, 16, 16, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 32, 16, 16, 16, 0, 16, 16, 16, @@ -797,6 +798,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { goto yy113; yy128: yych = *++p; + if (yych == '\t') goto yy128; if (yych == ' ') goto yy128; { continue; diff --git a/src/lexer.in.cc b/src/lexer.in.cc index 6f1d8e7937..ee35399c39 100644 --- a/src/lexer.in.cc +++ b/src/lexer.in.cc @@ -104,12 +104,9 @@ const char* Lexer::TokenErrorHint(Token expected) { string Lexer::DescribeLastError() { if (last_token_) { - switch (last_token_[0]) { - case '\t': - return "tabs are not allowed, use spaces"; - } + return "lexing error "; } - return "lexing error"; + return "lexing error (EOF?)"; } void Lexer::UnreadToken() { @@ -133,10 +130,10 @@ Lexer::Token Lexer::ReadToken() { simple_varname = [a-zA-Z0-9_-]+; varname = [a-zA-Z0-9_.-]+; - [ ]*"#"[^\000\n]*"\n" { continue; } - [ ]*"\r\n" { token = NEWLINE; break; } - [ ]*"\n" { token = NEWLINE; break; } - [ ]+ { token = INDENT; break; } + [ \t]*"#"[^\000\n]*"\n" { continue; } + [ \t]*"\r\n" { token = NEWLINE; break; } + [ \t]*"\n" { token = NEWLINE; break; } + [ \t]+ { token = INDENT; break; } "build" { token = BUILD; break; } "pool" { token = POOL; break; } "rule" { token = RULE; break; } @@ -175,7 +172,7 @@ void Lexer::EatWhitespace() { for (;;) { ofs_ = p; /*!re2c - [ ]+ { continue; } + [ \t]+ { continue; } "$\r\n" { continue; } "$\n" { continue; } nul { break; } @@ -241,10 +238,10 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { eval->AddText(StringPiece(" ", 1)); continue; } - "$\r\n"[ ]* { + "$\r\n"[ \t]* { continue; } - "$\n"[ ]* { + "$\n"[ \t]* { continue; } "${"varname"}" { diff --git a/src/lexer_test.cc b/src/lexer_test.cc index c5c416dc54..ab884dbe62 100644 --- a/src/lexer_test.cc +++ b/src/lexer_test.cc @@ -17,12 +17,20 @@ #include "eval_env.h" #include "test.h" -using namespace std; +std::string tok(Lexer::Token t) { + const char *str = Lexer::TokenName(t); + if (!str) + return "TokenOutOfRange: " + std::to_string(t); + return str; +} + +#define EXPECT_EQ_TOK(t1, t2) \ + EXPECT_EQ(tok(t1), tok(t2)) TEST(Lexer, ReadVarValue) { Lexer lexer("plain text $var $VaR ${x}\n"); EvalString eval; - string err; + std::string err; EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); EXPECT_EQ("", err); EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]", @@ -32,7 +40,7 @@ TEST(Lexer, ReadVarValue) { TEST(Lexer, ReadEvalStringEscapes) { Lexer lexer("$ $$ab c$: $\ncde\n"); EvalString eval; - string err; + std::string err; EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); EXPECT_EQ("", err); EXPECT_EQ("[ $ab c: cde]", @@ -41,7 +49,7 @@ TEST(Lexer, ReadEvalStringEscapes) { TEST(Lexer, ReadIdent) { Lexer lexer("foo baR baz_123 foo-bar"); - string ident; + std::string ident; EXPECT_TRUE(lexer.ReadIdent(&ident)); EXPECT_EQ("foo", ident); EXPECT_TRUE(lexer.ReadIdent(&ident)); @@ -56,12 +64,12 @@ TEST(Lexer, ReadIdentCurlies) { // Verify that ReadIdent includes dots in the name, // but in an expansion $bar.dots stops at the dot. Lexer lexer("foo.dots $bar.dots ${bar.dots}\n"); - string ident; + std::string ident; EXPECT_TRUE(lexer.ReadIdent(&ident)); EXPECT_EQ("foo.dots", ident); EvalString eval; - string err; + std::string err; EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); EXPECT_EQ("", err); EXPECT_EQ("[$bar][.dots ][$bar.dots]", @@ -71,7 +79,7 @@ TEST(Lexer, ReadIdentCurlies) { TEST(Lexer, Error) { Lexer lexer("foo$\nbad $"); EvalString eval; - string err; + std::string err; ASSERT_FALSE(lexer.ReadVarValue(&eval, &err)); EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n" "bad $\n" @@ -83,16 +91,28 @@ TEST(Lexer, CommentEOF) { // Verify we don't run off the end of the string when the EOF is // mid-comment. Lexer lexer("# foo"); - Lexer::Token token = lexer.ReadToken(); - EXPECT_EQ(Lexer::ERROR, token); + EXPECT_EQ_TOK(Lexer::ERROR, lexer.ReadToken()); } TEST(Lexer, Tabs) { - // Verify we print a useful error on a disallowed character. - Lexer lexer(" \tfoobar"); - Lexer::Token token = lexer.ReadToken(); - EXPECT_EQ(Lexer::INDENT, token); - token = lexer.ReadToken(); - EXPECT_EQ(Lexer::ERROR, token); - EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError()); + Lexer lexer("rule foo\n" + "\tcommand = foobin $in"); + + EXPECT_EQ_TOK(Lexer::RULE, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken()); +} + +TEST(Lexer, TabsInVars) { + Lexer lexer("cflags =\n" + "\t-std=c11"); + + EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken()); + EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken()); }