diff --git a/BUILD.bazel b/BUILD.bazel index 46087c6..d83d903 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -91,6 +91,7 @@ haskell_library( ], deps = [ ":ast", + ":describe-ast", ":lexer", "//third_party/haskell:aeson", "//third_party/haskell:array", @@ -201,9 +202,11 @@ hspec_test( size = "small", deps = [ ":hs-cimple", + "//third_party/haskell:QuickCheck", "//third_party/haskell:ansi-wl-pprint", "//third_party/haskell:base", "//third_party/haskell:data-fix", + "//third_party/haskell:extra", "//third_party/haskell:hspec", "//third_party/haskell:text", "//third_party/haskell:transformers-compat", diff --git a/cimple.cabal b/cimple.cabal index 2e6ba38..86a36d7 100644 --- a/cimple.cabal +++ b/cimple.cabal @@ -111,17 +111,21 @@ test-suite testsuite hs-source-dirs: test main-is: testsuite.hs other-modules: + Language.CimpleSpec Language.Cimple.AstSpec + Language.Cimple.DescribeAstSpec Language.Cimple.ParserSpec Language.Cimple.PrettySpec ghc-options: -Wall -Wno-unused-imports build-tool-depends: hspec-discover:hspec-discover build-depends: - ansi-wl-pprint + QuickCheck + , ansi-wl-pprint , base <5 , cimple , data-fix + , extra , hspec , text , transformers-compat diff --git a/src/Language/Cimple/Ast.hs b/src/Language/Cimple/Ast.hs index e5fb303..0409d28 100644 --- a/src/Language/Cimple/Ast.hs +++ b/src/Language/Cimple/Ast.hs @@ -147,7 +147,7 @@ data CommentF lexeme a | DocParagraph [a] | DocLine [a] - | DocCode a [a] a + | DocCode lexeme [a] lexeme | DocList [a] | DocULItem [a] [a] | DocOLItem lexeme [a] diff --git a/src/Language/Cimple/CommentParser.y b/src/Language/Cimple/CommentParser.y index 199214b..5c284c6 100644 --- a/src/Language/Cimple/CommentParser.y +++ b/src/Language/Cimple/CommentParser.y @@ -146,12 +146,16 @@ Command(x) | '@implements' CMT_WORD { Fix $ DocImplements $2 } | '@extends' CMT_WORD { Fix $ DocExtends $2 } | '@private' { Fix DocPrivate } -| '@code' Code '@endcode' { Fix $ DocCode (Fix (DocWord $1)) (reverse $2) (Fix (DocWord $3)) } +| Code { $1 } -Code :: { [NonTerm] } +Code :: { NonTerm } Code +: '@code' CodeWords '@endcode' { Fix $ DocCode $1 (reverse $2) $3 } + +CodeWords :: { [NonTerm] } +CodeWords : CodeWord { [$1] } -| Code CodeWord { $2 : $1 } +| CodeWords CodeWord { $2 : $1 } CodeWord :: { NonTerm } CodeWord diff --git a/src/Language/Cimple/DescribeAst.hs b/src/Language/Cimple/DescribeAst.hs index ca60ffa..0a59dfb 100644 --- a/src/Language/Cimple/DescribeAst.hs +++ b/src/Language/Cimple/DescribeAst.hs @@ -5,14 +5,19 @@ module Language.Cimple.DescribeAst ( HasLocation (..) , describeLexeme , describeNode + , parseError ) where import Data.Fix (Fix (..), foldFix) +import Data.List (isPrefixOf, (\\)) +import qualified Data.List as List import Data.Text (Text) import qualified Data.Text as Text import Language.Cimple.Ast (Node, NodeF (..)) import qualified Language.Cimple.Flatten as Flatten -import Language.Cimple.Lexer (Lexeme, lexemeLine) +import Language.Cimple.Lexer (Alex, AlexPosn (..), Lexeme (..), + alexError, lexemeLine) +import Language.Cimple.Tokens (LexemeClass (..)) class HasLocation a where @@ -38,5 +43,136 @@ describeNode node = case unFix node of ellipsis :: String ellipsis = "..." +describeLexemeClass :: LexemeClass -> Maybe String +describeLexemeClass = d + where + d IdConst = Just "constant name" + d IdFuncType = Just "function type name" + d IdStdType = Just "standard type name" + d IdSueType = Just "type name" + d IdVar = Just "variable name" + d LitChar = Just "character literal" + d LitInteger = Just "integer literal" + d LitString = Just "string literal" + d LitSysInclude = Just "system include" + d PctAmpersand = Just "address-of or bitwise-and operator" + d PctAmpersandAmpersand = Just "logical-and operator" + d PctAmpersandEq = Just "bitwise-and-assign operator" + d PctArrow = Just "pointer-member-access operator" + d PctAsterisk = Just "pointer-type, dereference, or multiply operator" + d PctAsteriskEq = Just "multiply-assign operator" + d PctCaret = Just "bitwise-xor operator" + d PctCaretEq = Just "xor-assign operator" + d PctColon = Just "ternary operator" + d PctComma = Just "comma" + d PctEllipsis = Just "ellipsis" + d PctEMark = Just "logical not operator" + d PctEMarkEq = Just "not-equals operator" + d PctEq = Just "assignment operator" + d PctEqEq = Just "equals operator" + d PctGreater = Just "greater-than operator" + d PctGreaterEq = Just "greater-or-equals operator" + d PctGreaterGreater = Just "right-shift operator" + d PctGreaterGreaterEq = Just "right-shift-assign operator" + d PctLBrace = Just "left brace" + d PctLBrack = Just "left square bracket" + d PctLess = Just "less-than operator" + d PctLessEq = Just "less-or-equals operator" + d PctLessLess = Just "left-shift operator" + d PctLessLessEq = Just "left-shift-assign operator" + d PctLParen = Just "left parenthesis" + d PctMinus = Just "minus operator" + d PctMinusEq = Just "minus-assign operator" + d PctMinusMinus = Just "decrement operator" + d PctPeriod = Just "member access operator" + d PctPercent = Just "modulus operator" + d PctPercentEq = Just "modulus-assign operator" + d PctPipe = Just "bitwise-or operator" + d PctPipeEq = Just "bitwise-or-assign operator" + d PctPipePipe = Just "logical-or operator" + d PctPlus = Just "addition operator" + d PctPlusEq = Just "add-assign operator" + d PctPlusPlus = Just "increment operator" + d PctQMark = Just "ternary operator" + d PctRBrace = Just "right brace" + d PctRBrack = Just "right square bracket" + d PctRParen = Just "right parenthesis" + d PctSemicolon = Just "end of statement semicolon" + d PctSlash = Just "division operator" + d PctSlashEq = Just "divide-assign operator" + d PctTilde = Just "bitwise-not operator" + d PpDefine = Just "preprocessor define" + d PpDefined = Just "preprocessor defined" + d PpElif = Just "preprocessor elif" + d PpElse = Just "preprocessor else" + d PpEndif = Just "preprocessor endif" + d PpIf = Just "preprocessor if" + d PpIfdef = Just "preprocessor ifdef" + d PpIfndef = Just "preprocessor ifndef" + d PpInclude = Just "preprocessor include" + d PpNewline = Just "newline" + d PpUndef = Just "preprocessor undef" + d CmtBlock = Just "block comment" + d CmtCommand = Just "doxygen command" + d CmtAttr = Just "parameter attribute" + d CmtEndDocSection = Just "doxygen end-of-section" + d CmtIndent = Just "indented comment" + d CmtStart = Just "start of comment" + d CmtStartCode = Just "escaped comment" + d CmtStartBlock = Just "block comment" + d CmtStartDoc = Just "doxygen comment" + d CmtStartDocSection = Just "doxygen start-of-section" + d CmtSpdxCopyright = Just "SPDX Copyright" + d CmtSpdxLicense = Just "SPDX License" + d CmtCode = Just "code comment" + d CmtWord = Just "comment word" + d CmtRef = Just "comment reference" + d CmtEnd = Just "end of comment" + d IgnStart = Just "tokstyle ignore start" + d IgnBody = Just "tokstyle ignored code" + d IgnEnd = Just "tokstyle ignore end" + + d ErrorToken = Just "lexical error" + d Eof = Just "end-of-file" + d _ = Nothing + describeLexeme :: Show a => Lexeme a -> String -describeLexeme = show +describeLexeme (L _ c s) = maybe "" (<> ": ") (describeLexemeClass c) <> show s + +describeExpected :: [String] -> String +describeExpected [] = "end of file" +describeExpected ["ID_VAR"] = "variable name" +describeExpected [option] = option +describeExpected options + | wants ["break", "const", "continue", "ID_CONST", "VLA"] = "statement or declaration" + | wants ["ID_FUNC_TYPE", "non_null", "static", "'#include'"] = "top-level declaration or definition" + | options == ["ID_STD_TYPE", "ID_SUE_TYPE", "struct", "void"] = "type specifier" + | options == ["ID_STD_TYPE", "ID_SUE_TYPE", "const", "struct", "void"] = "type specifier" + | ["ID_FUNC_TYPE", "ID_STD_TYPE", "ID_SUE_TYPE", "ID_VAR"] `isPrefixOf` options = "type specifier or variable name" + | ["ID_FUNC_TYPE", "ID_STD_TYPE", "ID_SUE_TYPE", "const"] `isPrefixOf` options = "type specifier" + | ["ID_CONST", "sizeof", "LIT_CHAR", "LIT_FALSE", "LIT_TRUE", "LIT_INTEGER"] `isPrefixOf` options = "constant expression" + | ["ID_CONST", "ID_SUE_TYPE", "'/*'"] `isPrefixOf` options = "enumerator, type name, or comment" + | wants ["'defined'"] = "preprocessor constant expression" + | wants ["'&'", "'&&'", "'*'", "'=='", "';'"] = "operator or end of statement" + | wants ["'&'", "'&&'", "'*'", "'^'", "'!='"] = "operator" + | wants ["ID_CONST", "ID_VAR", "sizeof", "LIT_CHAR", "'--'", "'&'", "'*'"] = "expression" + | ["ID_CONST", "ID_STD_TYPE", "ID_SUE_TYPE", "ID_VAR", "const", "sizeof"] `isPrefixOf` options = "expression or type specifier" + | ["ID_CONST", "ID_STD_TYPE", "ID_SUE_TYPE", "const", "sizeof"] `isPrefixOf` options = "constant expression or type specifier" + | ["'&='", "'->'", "'*='"] `isPrefixOf` options = "assignment or member/array access" + | wants ["CMT_WORD"] = "comment contents" + + | length options == 2 = commaOr options + | otherwise = "one of " <> commaOr options + where + wants xs = null (xs \\ options) + +commaOr :: [String] -> String +commaOr = go . reverse + where + go [] = "" + go (x:xs) = List.intercalate ", " (reverse xs) <> " or " <> x + +parseError :: Show text => (Lexeme text, [String]) -> Alex a +parseError (l@(L (AlexPn _ line col) _ _), options) = + alexError $ ":" <> show line <> ":" <> show col <> ": Parse error near " <> describeLexeme l + <> "; expected " <> describeExpected options diff --git a/src/Language/Cimple/Parser.y b/src/Language/Cimple/Parser.y index 20c533e..61b5c4c 100644 --- a/src/Language/Cimple/Parser.y +++ b/src/Language/Cimple/Parser.y @@ -9,18 +9,20 @@ module Language.Cimple.Parser , source ) where -import qualified Data.ByteString as BS -import Data.FileEmbed (embedFile) -import Data.Fix (Fix (..)) -import Data.Text (Text) -import qualified Data.Text as Text -import Language.Cimple.Ast (AssignOp (..), BinaryOp (..), - CommentStyle (..), LiteralType (..), - Node, NodeF (..), Scope (..), - UnaryOp (..)) -import Language.Cimple.Lexer (Alex, AlexPosn (..), Lexeme (..), - alexError, alexMonadScan) -import Language.Cimple.Tokens (LexemeClass (..)) +import qualified Data.ByteString as BS +import Data.FileEmbed (embedFile) +import Data.Fix (Fix (..)) +import Data.Text (Text) +import qualified Data.Text as Text +import Language.Cimple.Ast (AssignOp (..), BinaryOp (..), + CommentStyle (..), + LiteralType (..), Node, + NodeF (..), Scope (..), + UnaryOp (..)) +import Language.Cimple.DescribeAst (parseError) +import Language.Cimple.Lexer (Alex, AlexPosn (..), Lexeme (..), + alexError, alexMonadScan) +import Language.Cimple.Tokens (LexemeClass (..)) } -- Conflict between (static) FunctionDecl and (static) ConstDecl. @@ -136,7 +138,6 @@ import Language.Cimple.Tokens (LexemeClass (..)) '/** @{' { L _ CmtStartDocSection _ } '/** @} */' { L _ CmtEndDocSection _ } '/***' { L _ CmtStartBlock _ } - ' * ' { L _ CmtPrefix _ } ' ' { L _ CmtIndent _ } '*/' { L _ CmtEnd _ } 'Copyright' { L _ CmtSpdxCopyright _ } @@ -238,7 +239,6 @@ CommentToken :: { Term } CommentToken : CommentWord { $1 } | '\n' { $1 } -| ' * ' { $1 } | ' ' { $1 } CommentWords :: { [Term] } @@ -750,11 +750,6 @@ tyPointer, tyConst :: NonTerm -> NonTerm tyPointer = Fix . TyPointer tyConst = Fix . TyConst -parseError :: Show text => (Lexeme text, [String]) -> Alex a -parseError (L (AlexPn _ line col) c t, options) = - alexError $ ":" <> show line <> ":" <> show col <> ": Parse error near " <> show c <> ": " - <> show t <> "; expected one of " <> show options - lexwrap :: (Lexeme Text -> Alex a) -> Alex a lexwrap = (alexMonadScan >>=) diff --git a/src/Language/Cimple/Pretty.hs b/src/Language/Cimple/Pretty.hs index c1f6c55..d1b9e14 100644 --- a/src/Language/Cimple/Pretty.hs +++ b/src/Language/Cimple/Pretty.hs @@ -337,7 +337,7 @@ ppCommentInfo = foldFix go DocParagraph docs -> ppIndented docs DocLine docs -> fillSep docs - DocCode begin code end -> begin <> ppCodeBody code <> end + DocCode begin code end -> ppLexeme begin <> ppCodeBody code <> ppLexeme end DocList l -> ppVerbatimComment $ vcat l DocOLItem num docs -> ppLexeme num <> char '.' <+> nest 3 (fillSep docs) DocULItem docs sublist -> char '-' <+> nest 2 (vsep $ fillSep docs : sublist) diff --git a/src/Language/Cimple/Tokens.hs b/src/Language/Cimple/Tokens.hs index a5ba3ff..1324080 100644 --- a/src/Language/Cimple/Tokens.hs +++ b/src/Language/Cimple/Tokens.hs @@ -106,7 +106,6 @@ data LexemeClass | CmtCommand | CmtAttr | CmtEndDocSection - | CmtPrefix | CmtIndent | CmtStart | CmtStartCode diff --git a/test/Language/Cimple/DescribeAstSpec.hs b/test/Language/Cimple/DescribeAstSpec.hs new file mode 100644 index 0000000..f9c8c48 --- /dev/null +++ b/test/Language/Cimple/DescribeAstSpec.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-orphans #-} +module Language.Cimple.DescribeAstSpec where + +import Test.Hspec (Spec, describe, it, shouldBe, + shouldNotContain) + +import qualified Data.List.Extra as List +import Data.Text (Text) +import qualified Data.Text as Text +import Language.Cimple.IO (parseExpr, parseStmt, parseText) +import Language.CimpleSpec (sampleToken) +import Test.QuickCheck (Testable (property)) + + +expected :: (Text -> Either String a) -> Text -> String +expected parse code = + case parse code of + Left err -> snd $ List.breakOn "expected " err + Right _ -> "" + + +spec :: Spec +spec = do + describe "error messages" $ do + it "has useful suggestions" $ do + parseText "int a() {}" `shouldBe` Left + ":1:10: Parse error near right brace: \"}\"; expected statement or declaration" + + expected parseText "Beep Boop" `shouldBe` + "expected variable name" + + expected parseText "const *a() {}" `shouldBe` + "expected type specifier" + + expected parseText "int a() { int }" `shouldBe` + "expected variable name" + + it "has suggestions for any sequence of tokens in top level" $ do + property $ \tokens -> + expected parseText (Text.intercalate " " (map sampleToken tokens)) `shouldNotContain` + "expected one of" + + it "has suggestions for any sequence of tokens in expressions" $ do + property $ \tokens -> + expected parseExpr (Text.intercalate " " (map sampleToken tokens)) `shouldNotContain` + "expected one of" + + it "has suggestions for any sequence of tokens in statements" $ do + property $ \tokens -> + expected parseStmt (Text.intercalate " " (map sampleToken tokens)) `shouldNotContain` + "expected one of" + + it "does not support multiple declarators per declaration" $ do + let ast = parseText "int main() { int a, b; }" + ast `shouldBe` Left + ":1:19: Parse error near comma: \",\"; expected '=' or ';'" diff --git a/test/Language/Cimple/ParserSpec.hs b/test/Language/Cimple/ParserSpec.hs index 984f830..7c15de1 100644 --- a/test/Language/Cimple/ParserSpec.hs +++ b/test/Language/Cimple/ParserSpec.hs @@ -49,8 +49,3 @@ spec = do (L (AlexPn 17 1 18) IdVar "a") [])) Nothing)]))) ] - - it "does not support multiple declarators per declaration" $ do - let ast = parseText "int main() { int a, b; }" - ast `shouldBe` Left - ":1:19: Parse error near PctComma: \",\"; expected one of [\"'='\",\"';'\"]" diff --git a/test/Language/CimpleSpec.hs b/test/Language/CimpleSpec.hs new file mode 100644 index 0000000..88b1195 --- /dev/null +++ b/test/Language/CimpleSpec.hs @@ -0,0 +1,144 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-orphans #-} +module Language.CimpleSpec where + +import Test.Hspec (Spec, describe, it, shouldNotBe) + +import Data.Text (Text) +import Language.Cimple (LexemeClass (..)) +import Test.QuickCheck (Arbitrary (..), arbitraryBoundedEnum, forAll, + suchThat) + +instance Arbitrary LexemeClass where + arbitrary = arbitraryBoundedEnum `suchThat` ok + where + ok ErrorToken = False + ok Eof = False + ok _ = True + + +sampleToken :: LexemeClass -> Text +sampleToken c = case c of + IdConst -> "ID_CONST" + IdFuncType -> "func_cb" + IdStdType -> "uint32_t" + IdSueType -> "Sue_Type" + IdVar -> "var" + KwBreak -> "break" + KwCase -> "case" + KwConst -> "const" + KwContinue -> "continue" + KwDefault -> "default" + KwDo -> "do" + KwElse -> "else" + KwEnum -> "enum" + KwExtern -> "extern" + KwFor -> "for" + KwGnuPrintf -> "gnu_printf" + KwGoto -> "goto" + KwIf -> "if" + KwNonNull -> "non_null" + KwNullable -> "nullable" + KwReturn -> "return" + KwSizeof -> "sizeof" + KwStatic -> "static" + KwStaticAssert -> "static_assert" + KwStruct -> "struct" + KwSwitch -> "switch" + KwTypedef -> "typedef" + KwUnion -> "union" + KwVla -> "VLA" + KwVoid -> "void" + KwWhile -> "while" + LitFalse -> "false" + LitTrue -> "true" + LitChar -> "'a'" + LitInteger -> "123" + LitString -> "\"str\"" + LitSysInclude -> "" + PctAmpersand -> "&" + PctAmpersandAmpersand -> "&&" + PctAmpersandEq -> "&=" + PctArrow -> "->" + PctAsterisk -> "*" + PctAsteriskEq -> "*=" + PctCaret -> "^" + PctCaretEq -> "^=" + PctColon -> ":" + PctComma -> "," + PctEllipsis -> "..." + PctEMark -> "!" + PctEMarkEq -> "!=" + PctEq -> "=" + PctEqEq -> "==" + PctGreater -> ">" + PctGreaterEq -> ">=" + PctGreaterGreater -> ">>" + PctGreaterGreaterEq -> ">>=" + PctLBrace -> "{\n" + PctLBrack -> "[" + PctLess -> "<" + PctLessEq -> "<=" + PctLessLess -> "<<" + PctLessLessEq -> "<<=" + PctLParen -> "(" + PctMinus -> "-" + PctMinusEq -> "-=" + PctMinusMinus -> "--" + PctPeriod -> "." + PctPercent -> "%" + PctPercentEq -> "%=" + PctPipe -> "|" + PctPipeEq -> "|=" + PctPipePipe -> "||" + PctPlus -> "+" + PctPlusEq -> "+=" + PctPlusPlus -> "++" + PctQMark -> "?" + PctRBrace -> "}" + PctRBrack -> "]" + PctRParen -> ")" + PctSemicolon -> ";\n" + PctSlash -> "/" + PctSlashEq -> "/=" + PctTilde -> "~" + PpDefine -> "\n#define" + PpDefined -> "\n#defined" + PpElif -> "\n#elif" + PpElse -> "\n#else" + PpEndif -> "\n#endif" + PpIf -> "\n#if" + PpIfdef -> "\n#ifdef" + PpIfndef -> "\n#ifndef" + PpInclude -> "\n#include" + PpNewline -> "\n" + PpUndef -> "\n#undef" + CmtBlock -> "/**" + CmtCommand -> "@param" + CmtAttr -> "[out]" + CmtEndDocSection -> "/** @} */" + CmtIndent -> "*" + CmtStart -> "/*" + CmtStartCode -> "/*!" + CmtStartBlock -> "/***" + CmtStartDoc -> "/**" + CmtStartDocSection -> "/** @{" + CmtSpdxCopyright -> "Copyright ©" + CmtSpdxLicense -> "SPDX-License-Identifier:" + CmtCode -> "@code" + CmtWord -> "hello" + CmtRef -> "`ref`" + CmtEnd -> "*/\n" + IgnStart -> "\n//!TOKSTYLE-\n" + IgnBody -> "ignored stuff" + IgnEnd -> "\n//!TOKSTYLE+\n" + + ErrorToken -> "!!ERROR!!" + Eof -> "!!EOF!!" + +spec :: Spec +spec = do + describe "tokens" $ do + it "can be turned into strings" $ + forAll arbitraryBoundedEnum $ \token -> + sampleToken token `shouldNotBe` ""