Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
andreabedini committed Jan 27, 2024
1 parent 469aa1d commit 7042492
Showing 1 changed file with 76 additions and 65 deletions.
141 changes: 76 additions & 65 deletions Cabal-syntax/src/Distribution/Fields/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ module Distribution.Fields.Parser

import qualified Data.ByteString.Char8 as B8
import Data.Functor.Identity
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
import Distribution.Compat.Prelude
import Distribution.Fields.Field
import Distribution.Fields.Lexer
Expand Down Expand Up @@ -125,52 +128,52 @@ describeToken t = case t of
LexicalError is -> "character in input " ++ show (B8.head is)

tokSym :: Parser (Name Position)
tokSym = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing
tokSym = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing)

tokSym' :: Parser (SectionArg Position)
tokSym' = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing
tokSym' = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing)

tokStr :: Parser (SectionArg Position)
tokStr = getTokenWithPos $ \t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing
tokStr = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing)

tokOther :: Parser (SectionArg Position)
tokOther = getTokenWithPos $ \t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing
tokOther = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing)

tokIndent :: Parser Int
tokIndent = getToken (\t -> case t of Indent x -> Just x; _ -> Nothing)
tokIndent = many tokWhitespace *> getToken (\t -> case t of Indent x -> Just x; _ -> Nothing)

tokColon :: Parser ()
tokColon = getToken $ \t -> case t of Colon -> Just (); _ -> Nothing
tokColon = getToken (\t -> case t of Colon -> Just (); _ -> Nothing)

tokOpenBrace :: Parser ()
tokOpenBrace = getToken $ \t -> case t of OpenBrace -> Just (); _ -> Nothing
tokOpenBrace = getToken (\t -> case t of OpenBrace -> Just (); _ -> Nothing)

tokCloseBrace :: Parser ()
tokCloseBrace = getToken $ \t -> case t of CloseBrace -> Just (); _ -> Nothing
tokCloseBrace = getToken (\t -> case t of CloseBrace -> Just (); _ -> Nothing)

tokFieldLine :: Parser (FieldLine Position)
tokFieldLine = getTokenWithPos $ \t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing
tokFieldLine = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing)

tokComment :: Parser B8.ByteString
tokComment = getToken $ \case Comment s -> Just s; _ -> Nothing
tokComment = getToken (\case Comment s -> Just s; _ -> Nothing) *> tokWhitespace

tokWhitespace :: Parser B8.ByteString
tokWhitespace = getToken $ \case Whitespace s -> Just s; _ -> Nothing
tokWhitespace = getToken (\case Whitespace s -> Just s; _ -> Nothing)

sectionArg :: Parser (SectionArg Position)
sectionArg = (tokSym' <|> tokStr <|> tokOther <?> "section parameter") <* optional tokWhitespace
sectionArg = tokSym' <|> tokStr <|> tokOther <?> "section parameter"

fieldSecName :: Parser (Name Position)
fieldSecName = (tokSym <?> "field or section name") <* optional tokWhitespace
fieldSecName = tokSym <?> "field or section name"

colon :: Parser ()
colon = (tokColon <?> "\":\"") <* optional tokWhitespace
colon = tokColon <?> "\":\""

openBrace :: Parser ()
openBrace = (tokOpenBrace <?> "\"{\"") <* optional tokWhitespace
openBrace = tokOpenBrace <?> "\"{\""

closeBrace :: Parser ()
closeBrace = (tokCloseBrace <?> "\"}\"") <* optional tokWhitespace
closeBrace = tokCloseBrace <?> "\"}\""

fieldContent :: Parser (FieldLine Position)
fieldContent = tokFieldLine <?> "field contents"
Expand Down Expand Up @@ -252,6 +255,7 @@ inLexerMode (LexerMode mode) p =
--
cabalStyleFile :: Parser [Field Position]
cabalStyleFile = do
skipMany tokComment
es <- elements zeroIndentLevel
eof
return es
Expand All @@ -270,24 +274,17 @@ elements ilevel = many (element ilevel)
-- element ::= '\\n' name elementInLayoutContext
-- | name elementInNonLayoutContext
element :: IndentLevel -> Parser (Field Position)
element ilevel =
try
( do
skipMany (tokComment <|> tokWhitespace)
ilevel' <- indentOfAtLeast ilevel
name <- fieldSecName
parserTraced ("elementInLayoutContext " ++ show (getName name)) $
elementInLayoutContext (incIndentLevel ilevel') name
element ilevel = do
skipMany tokWhitespace
( do
ilevel' <- indentOfAtLeast ilevel
name <- fieldSecName
elementInLayoutContext (incIndentLevel ilevel') name
)
<* skipMany (tokComment <|> tokWhitespace)
<|> try
( do
skipMany (tokComment <|> tokWhitespace)
name <- fieldSecName
parserTraced ("elementInNonLayoutContext " ++ show (getName name)) $
<|> ( do
name <- fieldSecName
elementInNonLayoutContext name
<* skipMany (tokComment <|> tokWhitespace)
)
)

-- An element (field or section) that is valid in a layout context.
-- In a layout context we can have fields and sections that themselves
Expand All @@ -296,10 +293,12 @@ element ilevel =
-- elementInLayoutContext ::= ':' fieldLayoutOrBraces
-- | arg* sectionLayoutOrBraces
elementInLayoutContext :: IndentLevel -> Name Position -> Parser (Field Position)
elementInLayoutContext ilevel name =
elementInLayoutContext ilevel name = do
skipMany tokWhitespace
(do colon; fieldLayoutOrBraces ilevel name)
<|> ( do
args <- many sectionArg
args <- parserTraced "many sectionArg" (many (sectionArg <* tokWhitespace))
skipMany tokComment
elems <- sectionLayoutOrBraces ilevel
return (Section name args elems)
)
Expand All @@ -311,8 +310,9 @@ elementInLayoutContext ilevel name =
-- elementInNonLayoutContext ::= ':' FieldInlineOrBraces
-- | arg* '\\n'? '{' elements '\\n'? '}'
elementInNonLayoutContext :: Name Position -> Parser (Field Position)
elementInNonLayoutContext name =
(do colon; fieldInlineOrBraces name)
elementInNonLayoutContext name = do
skipMany tokWhitespace
(do parserTraced "colon" colon; fieldInlineOrBraces name)
<|> ( do
args <- many sectionArg
openBrace
Expand All @@ -327,7 +327,9 @@ elementInNonLayoutContext name =
-- fieldLayoutOrBraces ::= '\\n'? '{' content '}'
-- | line? ('\\n' line)*
fieldLayoutOrBraces :: IndentLevel -> Name Position -> Parser (Field Position)
fieldLayoutOrBraces ilevel name = braces <|> fieldLayout
fieldLayoutOrBraces ilevel name = do
skipMany tokWhitespace
braces <|> fieldLayout
where
braces = do
openBrace
Expand All @@ -346,28 +348,30 @@ fieldLayoutOrBraces ilevel name = braces <|> fieldLayout
-- sectionLayoutOrBraces ::= '\\n'? '{' elements \\n? '}'
-- | elements
sectionLayoutOrBraces :: IndentLevel -> Parser [Field Position]
sectionLayoutOrBraces ilevel =
sectionLayoutOrBraces ilevel = parserTraced "sectionLayoutOrBraces" $ do
skipMany tokWhitespace
( do
openBrace
elems <- elements zeroIndentLevel
optional tokIndent
closeBrace
return elems
)
<|> (elements ilevel)
)
<|> elements ilevel

-- The body of a field, using either inline style or braces.
--
-- fieldInlineOrBraces ::= '\\n'? '{' content '}'
-- | content
fieldInlineOrBraces :: Name Position -> Parser (Field Position)
fieldInlineOrBraces name =
fieldInlineOrBraces name = do
skipMany tokWhitespace
( do
openBrace
ls <- inLexerMode (LexerMode in_field_braces) (many fieldContent)
closeBrace
return (Field name ls)
)
)
<|> ( do
ls <- inLexerMode (LexerMode in_field_braces) (option [] (fmap (\l -> [l]) fieldContent))
return (Field name ls)
Expand Down Expand Up @@ -439,51 +443,58 @@ checkIndentation'' a b
| positionCol a == positionCol b = id
| otherwise = (LexWarning LexInconsistentIndentation b :)

#ifdef CABAL_PARSEC_DEBUG
-- #ifdef CABAL_PARSEC_DEBUG
parseTest' :: Show a => Parsec LexState' () a -> SourceName -> B8.ByteString -> IO ()
parseTest' p fname s =
case parse p fname (lexSt s) of
Left err -> putStrLn (formatError s err)

Right x -> print x
case parse p fname (lexSt s) of
Left err -> putStrLn (formatError s err)
Right x -> print x
where
lexSt = mkLexState' . mkLexState

parseFile :: Show a => Parser a -> FilePath -> IO ()
parseFile p f = B8.readFile f >>= \s -> parseTest' p f s

parseStr :: Show a => Parser a -> String -> IO ()
parseStr :: Show a => Parser a -> String -> IO ()
parseStr p = parseBS p . B8.pack

parseBS :: Show a => Parser a -> B8.ByteString -> IO ()
parseBS :: Show a => Parser a -> B8.ByteString -> IO ()
parseBS p = parseTest' p "<input string>"

formatError :: B8.ByteString -> ParseError -> String
formatError input perr =
unlines
[ "Parse error "++ show (errorPos perr) ++ ":"
, errLine
, indicator ++ errmsg ]
unlines
[ "Parse error " ++ show (errorPos perr) ++ ":"
, errLine
, indicator ++ errmsg
]
where
pos = errorPos perr
ls = lines' (T.decodeUtf8With T.lenientDecode input)
errLine = T.unpack (ls !! (sourceLine pos - 1))
pos = errorPos perr
ls = lines' (T.decodeUtf8With T.lenientDecode input)
errLine = T.unpack (ls !! (sourceLine pos - 1))
indicator = replicate (sourceColumn pos) ' ' ++ "^"
errmsg = showErrorMessages "or" "unknown parse error"
"expecting" "unexpected" "end of file"
(errorMessages perr)
errmsg =
showErrorMessages
"or"
"unknown parse error"
"expecting"
"unexpected"
"end of file"
(errorMessages perr)

-- | Handles windows/osx/unix line breaks uniformly
lines' :: T.Text -> [T.Text]
lines' s1
| T.null s1 = []
| otherwise = case T.break (\c -> c == '\r' || c == '\n') s1 of
(l, s2) | Just (c,s3) <- T.uncons s2
-> case T.uncons s3 of
Just ('\n', s4) | c == '\r' -> l : lines' s4
_ -> l : lines' s3
| otherwise -> [l]
#endif
(l, s2)
| Just (c, s3) <- T.uncons s2 ->
case T.uncons s3 of
Just ('\n', s4) | c == '\r' -> l : lines' s4
_ -> l : lines' s3
| otherwise -> [l]

-- #endif

eof :: Parser ()
eof = notFollowedBy anyToken <?> "end of file"
Expand Down

0 comments on commit 7042492

Please sign in to comment.