From 70424929489b1d6c11bea05aca62caf9d5652594 Mon Sep 17 00:00:00 2001 From: Andrea Bedini Date: Wed, 29 Nov 2023 00:13:37 +0800 Subject: [PATCH] WIP --- .../src/Distribution/Fields/Parser.hs | 141 ++++++++++-------- 1 file changed, 76 insertions(+), 65 deletions(-) diff --git a/Cabal-syntax/src/Distribution/Fields/Parser.hs b/Cabal-syntax/src/Distribution/Fields/Parser.hs index c439aae79f5..936d0d7ca24 100644 --- a/Cabal-syntax/src/Distribution/Fields/Parser.hs +++ b/Cabal-syntax/src/Distribution/Fields/Parser.hs @@ -35,6 +35,9 @@ module Distribution.Fields.Parser import qualified Data.ByteString.Char8 as B8 import Data.Functor.Identity +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Encoding.Error as T import Distribution.Compat.Prelude import Distribution.Fields.Field import Distribution.Fields.Lexer @@ -125,52 +128,52 @@ describeToken t = case t of LexicalError is -> "character in input " ++ show (B8.head is) tokSym :: Parser (Name Position) -tokSym = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing +tokSym = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing) tokSym' :: Parser (SectionArg Position) -tokSym' = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing +tokSym' = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing) tokStr :: Parser (SectionArg Position) -tokStr = getTokenWithPos $ \t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing +tokStr = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing) tokOther :: Parser (SectionArg Position) -tokOther = getTokenWithPos $ \t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing +tokOther = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing) tokIndent :: Parser Int -tokIndent = getToken (\t -> case t of Indent x -> Just x; _ -> Nothing) +tokIndent = many tokWhitespace *> getToken (\t -> case t of Indent x -> Just x; _ -> Nothing) tokColon :: Parser () -tokColon = getToken $ \t -> case t of Colon -> Just (); _ -> Nothing +tokColon = getToken (\t -> case t of Colon -> Just (); _ -> Nothing) tokOpenBrace :: Parser () -tokOpenBrace = getToken $ \t -> case t of OpenBrace -> Just (); _ -> Nothing +tokOpenBrace = getToken (\t -> case t of OpenBrace -> Just (); _ -> Nothing) tokCloseBrace :: Parser () -tokCloseBrace = getToken $ \t -> case t of CloseBrace -> Just (); _ -> Nothing +tokCloseBrace = getToken (\t -> case t of CloseBrace -> Just (); _ -> Nothing) tokFieldLine :: Parser (FieldLine Position) -tokFieldLine = getTokenWithPos $ \t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing +tokFieldLine = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing) tokComment :: Parser B8.ByteString -tokComment = getToken $ \case Comment s -> Just s; _ -> Nothing +tokComment = getToken (\case Comment s -> Just s; _ -> Nothing) *> tokWhitespace tokWhitespace :: Parser B8.ByteString -tokWhitespace = getToken $ \case Whitespace s -> Just s; _ -> Nothing +tokWhitespace = getToken (\case Whitespace s -> Just s; _ -> Nothing) sectionArg :: Parser (SectionArg Position) -sectionArg = (tokSym' <|> tokStr <|> tokOther "section parameter") <* optional tokWhitespace +sectionArg = tokSym' <|> tokStr <|> tokOther "section parameter" fieldSecName :: Parser (Name Position) -fieldSecName = (tokSym "field or section name") <* optional tokWhitespace +fieldSecName = tokSym "field or section name" colon :: Parser () -colon = (tokColon "\":\"") <* optional tokWhitespace +colon = tokColon "\":\"" openBrace :: Parser () -openBrace = (tokOpenBrace "\"{\"") <* optional tokWhitespace +openBrace = tokOpenBrace "\"{\"" closeBrace :: Parser () -closeBrace = (tokCloseBrace "\"}\"") <* optional tokWhitespace +closeBrace = tokCloseBrace "\"}\"" fieldContent :: Parser (FieldLine Position) fieldContent = tokFieldLine "field contents" @@ -252,6 +255,7 @@ inLexerMode (LexerMode mode) p = -- cabalStyleFile :: Parser [Field Position] cabalStyleFile = do + skipMany tokComment es <- elements zeroIndentLevel eof return es @@ -270,24 +274,17 @@ elements ilevel = many (element ilevel) -- element ::= '\\n' name elementInLayoutContext -- | name elementInNonLayoutContext element :: IndentLevel -> Parser (Field Position) -element ilevel = - try - ( do - skipMany (tokComment <|> tokWhitespace) - ilevel' <- indentOfAtLeast ilevel - name <- fieldSecName - parserTraced ("elementInLayoutContext " ++ show (getName name)) $ - elementInLayoutContext (incIndentLevel ilevel') name +element ilevel = do + skipMany tokWhitespace + ( do + ilevel' <- indentOfAtLeast ilevel + name <- fieldSecName + elementInLayoutContext (incIndentLevel ilevel') name ) - <* skipMany (tokComment <|> tokWhitespace) - <|> try - ( do - skipMany (tokComment <|> tokWhitespace) - name <- fieldSecName - parserTraced ("elementInNonLayoutContext " ++ show (getName name)) $ + <|> ( do + name <- fieldSecName elementInNonLayoutContext name - <* skipMany (tokComment <|> tokWhitespace) - ) + ) -- An element (field or section) that is valid in a layout context. -- In a layout context we can have fields and sections that themselves @@ -296,10 +293,12 @@ element ilevel = -- elementInLayoutContext ::= ':' fieldLayoutOrBraces -- | arg* sectionLayoutOrBraces elementInLayoutContext :: IndentLevel -> Name Position -> Parser (Field Position) -elementInLayoutContext ilevel name = +elementInLayoutContext ilevel name = do + skipMany tokWhitespace (do colon; fieldLayoutOrBraces ilevel name) <|> ( do - args <- many sectionArg + args <- parserTraced "many sectionArg" (many (sectionArg <* tokWhitespace)) + skipMany tokComment elems <- sectionLayoutOrBraces ilevel return (Section name args elems) ) @@ -311,8 +310,9 @@ elementInLayoutContext ilevel name = -- elementInNonLayoutContext ::= ':' FieldInlineOrBraces -- | arg* '\\n'? '{' elements '\\n'? '}' elementInNonLayoutContext :: Name Position -> Parser (Field Position) -elementInNonLayoutContext name = - (do colon; fieldInlineOrBraces name) +elementInNonLayoutContext name = do + skipMany tokWhitespace + (do parserTraced "colon" colon; fieldInlineOrBraces name) <|> ( do args <- many sectionArg openBrace @@ -327,7 +327,9 @@ elementInNonLayoutContext name = -- fieldLayoutOrBraces ::= '\\n'? '{' content '}' -- | line? ('\\n' line)* fieldLayoutOrBraces :: IndentLevel -> Name Position -> Parser (Field Position) -fieldLayoutOrBraces ilevel name = braces <|> fieldLayout +fieldLayoutOrBraces ilevel name = do + skipMany tokWhitespace + braces <|> fieldLayout where braces = do openBrace @@ -346,28 +348,30 @@ fieldLayoutOrBraces ilevel name = braces <|> fieldLayout -- sectionLayoutOrBraces ::= '\\n'? '{' elements \\n? '}' -- | elements sectionLayoutOrBraces :: IndentLevel -> Parser [Field Position] -sectionLayoutOrBraces ilevel = +sectionLayoutOrBraces ilevel = parserTraced "sectionLayoutOrBraces" $ do + skipMany tokWhitespace ( do openBrace elems <- elements zeroIndentLevel optional tokIndent closeBrace return elems - ) - <|> (elements ilevel) + ) + <|> elements ilevel -- The body of a field, using either inline style or braces. -- -- fieldInlineOrBraces ::= '\\n'? '{' content '}' -- | content fieldInlineOrBraces :: Name Position -> Parser (Field Position) -fieldInlineOrBraces name = +fieldInlineOrBraces name = do + skipMany tokWhitespace ( do openBrace ls <- inLexerMode (LexerMode in_field_braces) (many fieldContent) closeBrace return (Field name ls) - ) + ) <|> ( do ls <- inLexerMode (LexerMode in_field_braces) (option [] (fmap (\l -> [l]) fieldContent)) return (Field name ls) @@ -439,51 +443,58 @@ checkIndentation'' a b | positionCol a == positionCol b = id | otherwise = (LexWarning LexInconsistentIndentation b :) -#ifdef CABAL_PARSEC_DEBUG +-- #ifdef CABAL_PARSEC_DEBUG parseTest' :: Show a => Parsec LexState' () a -> SourceName -> B8.ByteString -> IO () parseTest' p fname s = - case parse p fname (lexSt s) of - Left err -> putStrLn (formatError s err) - - Right x -> print x + case parse p fname (lexSt s) of + Left err -> putStrLn (formatError s err) + Right x -> print x where lexSt = mkLexState' . mkLexState parseFile :: Show a => Parser a -> FilePath -> IO () parseFile p f = B8.readFile f >>= \s -> parseTest' p f s -parseStr :: Show a => Parser a -> String -> IO () +parseStr :: Show a => Parser a -> String -> IO () parseStr p = parseBS p . B8.pack -parseBS :: Show a => Parser a -> B8.ByteString -> IO () +parseBS :: Show a => Parser a -> B8.ByteString -> IO () parseBS p = parseTest' p "" formatError :: B8.ByteString -> ParseError -> String formatError input perr = - unlines - [ "Parse error "++ show (errorPos perr) ++ ":" - , errLine - , indicator ++ errmsg ] + unlines + [ "Parse error " ++ show (errorPos perr) ++ ":" + , errLine + , indicator ++ errmsg + ] where - pos = errorPos perr - ls = lines' (T.decodeUtf8With T.lenientDecode input) - errLine = T.unpack (ls !! (sourceLine pos - 1)) + pos = errorPos perr + ls = lines' (T.decodeUtf8With T.lenientDecode input) + errLine = T.unpack (ls !! (sourceLine pos - 1)) indicator = replicate (sourceColumn pos) ' ' ++ "^" - errmsg = showErrorMessages "or" "unknown parse error" - "expecting" "unexpected" "end of file" - (errorMessages perr) + errmsg = + showErrorMessages + "or" + "unknown parse error" + "expecting" + "unexpected" + "end of file" + (errorMessages perr) -- | Handles windows/osx/unix line breaks uniformly lines' :: T.Text -> [T.Text] lines' s1 | T.null s1 = [] | otherwise = case T.break (\c -> c == '\r' || c == '\n') s1 of - (l, s2) | Just (c,s3) <- T.uncons s2 - -> case T.uncons s3 of - Just ('\n', s4) | c == '\r' -> l : lines' s4 - _ -> l : lines' s3 - | otherwise -> [l] -#endif + (l, s2) + | Just (c, s3) <- T.uncons s2 -> + case T.uncons s3 of + Just ('\n', s4) | c == '\r' -> l : lines' s4 + _ -> l : lines' s3 + | otherwise -> [l] + +-- #endif eof :: Parser () eof = notFollowedBy anyToken "end of file"