From 70424929489b1d6c11bea05aca62caf9d5652594 Mon Sep 17 00:00:00 2001
From: Andrea Bedini <andrea.bedini@tweag.io>
Date: Wed, 29 Nov 2023 00:13:37 +0800
Subject: [PATCH] WIP

---
 .../src/Distribution/Fields/Parser.hs         | 141 ++++++++++--------
 1 file changed, 76 insertions(+), 65 deletions(-)

diff --git a/Cabal-syntax/src/Distribution/Fields/Parser.hs b/Cabal-syntax/src/Distribution/Fields/Parser.hs
index c439aae79f5..936d0d7ca24 100644
--- a/Cabal-syntax/src/Distribution/Fields/Parser.hs
+++ b/Cabal-syntax/src/Distribution/Fields/Parser.hs
@@ -35,6 +35,9 @@ module Distribution.Fields.Parser
 
 import qualified Data.ByteString.Char8 as B8
 import Data.Functor.Identity
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.Encoding.Error as T
 import Distribution.Compat.Prelude
 import Distribution.Fields.Field
 import Distribution.Fields.Lexer
@@ -125,52 +128,52 @@ describeToken t = case t of
   LexicalError is -> "character in input " ++ show (B8.head is)
 
 tokSym :: Parser (Name Position)
-tokSym = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing
+tokSym = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (mkName pos x); _ -> Nothing)
 
 tokSym' :: Parser (SectionArg Position)
-tokSym' = getTokenWithPos $ \t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing
+tokSym' = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokSym x) -> Just (SecArgName pos x); _ -> Nothing)
 
 tokStr :: Parser (SectionArg Position)
-tokStr = getTokenWithPos $ \t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing
+tokStr = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokStr x) -> Just (SecArgStr pos x); _ -> Nothing)
 
 tokOther :: Parser (SectionArg Position)
-tokOther = getTokenWithPos $ \t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing
+tokOther = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokOther x) -> Just (SecArgOther pos x); _ -> Nothing)
 
 tokIndent :: Parser Int
-tokIndent = getToken (\t -> case t of Indent x -> Just x; _ -> Nothing)
+tokIndent = many tokWhitespace *> getToken (\t -> case t of Indent x -> Just x; _ -> Nothing)
 
 tokColon :: Parser ()
-tokColon = getToken $ \t -> case t of Colon -> Just (); _ -> Nothing
+tokColon = getToken (\t -> case t of Colon -> Just (); _ -> Nothing)
 
 tokOpenBrace :: Parser ()
-tokOpenBrace = getToken $ \t -> case t of OpenBrace -> Just (); _ -> Nothing
+tokOpenBrace = getToken (\t -> case t of OpenBrace -> Just (); _ -> Nothing)
 
 tokCloseBrace :: Parser ()
-tokCloseBrace = getToken $ \t -> case t of CloseBrace -> Just (); _ -> Nothing
+tokCloseBrace = getToken (\t -> case t of CloseBrace -> Just (); _ -> Nothing)
 
 tokFieldLine :: Parser (FieldLine Position)
-tokFieldLine = getTokenWithPos $ \t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing
+tokFieldLine = many tokWhitespace *> getTokenWithPos (\t -> case t of L pos (TokFieldLine s) -> Just (FieldLine pos s); _ -> Nothing)
 
 tokComment :: Parser B8.ByteString
-tokComment = getToken $ \case Comment s -> Just s; _ -> Nothing
+tokComment = getToken (\case Comment s -> Just s; _ -> Nothing) *> tokWhitespace
 
 tokWhitespace :: Parser B8.ByteString
-tokWhitespace = getToken $ \case Whitespace s -> Just s; _ -> Nothing
+tokWhitespace = getToken (\case Whitespace s -> Just s; _ -> Nothing)
 
 sectionArg :: Parser (SectionArg Position)
-sectionArg = (tokSym' <|> tokStr <|> tokOther <?> "section parameter") <* optional tokWhitespace
+sectionArg = tokSym' <|> tokStr <|> tokOther <?> "section parameter"
 
 fieldSecName :: Parser (Name Position)
-fieldSecName = (tokSym <?> "field or section name") <* optional tokWhitespace
+fieldSecName = tokSym <?> "field or section name"
 
 colon :: Parser ()
-colon = (tokColon <?> "\":\"") <* optional tokWhitespace
+colon = tokColon <?> "\":\""
 
 openBrace :: Parser ()
-openBrace = (tokOpenBrace <?> "\"{\"") <* optional tokWhitespace
+openBrace = tokOpenBrace <?> "\"{\""
 
 closeBrace :: Parser ()
-closeBrace = (tokCloseBrace <?> "\"}\"") <* optional tokWhitespace
+closeBrace = tokCloseBrace <?> "\"}\""
 
 fieldContent :: Parser (FieldLine Position)
 fieldContent = tokFieldLine <?> "field contents"
@@ -252,6 +255,7 @@ inLexerMode (LexerMode mode) p =
 --
 cabalStyleFile :: Parser [Field Position]
 cabalStyleFile = do
+  skipMany tokComment
   es <- elements zeroIndentLevel
   eof
   return es
@@ -270,24 +274,17 @@ elements ilevel = many (element ilevel)
 -- element ::= '\\n' name elementInLayoutContext
 --           |      name elementInNonLayoutContext
 element :: IndentLevel -> Parser (Field Position)
-element ilevel =
-  try
-    ( do
-        skipMany (tokComment <|> tokWhitespace)
-        ilevel' <- indentOfAtLeast ilevel
-        name <- fieldSecName
-        parserTraced ("elementInLayoutContext " ++ show (getName name)) $
-          elementInLayoutContext (incIndentLevel ilevel') name
+element ilevel = do
+  skipMany tokWhitespace
+  ( do
+      ilevel' <- indentOfAtLeast ilevel
+      name <- fieldSecName
+      elementInLayoutContext (incIndentLevel ilevel') name
     )
-    <* skipMany (tokComment <|> tokWhitespace)
-    <|> try
-      ( do
-          skipMany (tokComment <|> tokWhitespace)
-          name <- fieldSecName
-          parserTraced ("elementInNonLayoutContext " ++ show (getName name)) $
+    <|> ( do
+            name <- fieldSecName
             elementInNonLayoutContext name
-              <* skipMany (tokComment <|> tokWhitespace)
-      )
+        )
 
 -- An element (field or section) that is valid in a layout context.
 -- In a layout context we can have fields and sections that themselves
@@ -296,10 +293,12 @@ element ilevel =
 -- elementInLayoutContext ::= ':'  fieldLayoutOrBraces
 --                          | arg* sectionLayoutOrBraces
 elementInLayoutContext :: IndentLevel -> Name Position -> Parser (Field Position)
-elementInLayoutContext ilevel name =
+elementInLayoutContext ilevel name = do
+  skipMany tokWhitespace
   (do colon; fieldLayoutOrBraces ilevel name)
     <|> ( do
-            args <- many sectionArg
+            args <- parserTraced "many sectionArg" (many (sectionArg <* tokWhitespace))
+            skipMany tokComment
             elems <- sectionLayoutOrBraces ilevel
             return (Section name args elems)
         )
@@ -311,8 +310,9 @@ elementInLayoutContext ilevel name =
 -- elementInNonLayoutContext ::= ':' FieldInlineOrBraces
 --                             | arg* '\\n'? '{' elements '\\n'? '}'
 elementInNonLayoutContext :: Name Position -> Parser (Field Position)
-elementInNonLayoutContext name =
-  (do colon; fieldInlineOrBraces name)
+elementInNonLayoutContext name = do
+  skipMany tokWhitespace
+  (do parserTraced "colon" colon; fieldInlineOrBraces name)
     <|> ( do
             args <- many sectionArg
             openBrace
@@ -327,7 +327,9 @@ elementInNonLayoutContext name =
 -- fieldLayoutOrBraces   ::= '\\n'? '{' content '}'
 --                         | line? ('\\n' line)*
 fieldLayoutOrBraces :: IndentLevel -> Name Position -> Parser (Field Position)
-fieldLayoutOrBraces ilevel name = braces <|> fieldLayout
+fieldLayoutOrBraces ilevel name = do
+  skipMany tokWhitespace
+  braces <|> fieldLayout
   where
     braces = do
       openBrace
@@ -346,28 +348,30 @@ fieldLayoutOrBraces ilevel name = braces <|> fieldLayout
 -- sectionLayoutOrBraces ::= '\\n'? '{' elements \\n? '}'
 --                         | elements
 sectionLayoutOrBraces :: IndentLevel -> Parser [Field Position]
-sectionLayoutOrBraces ilevel =
+sectionLayoutOrBraces ilevel = parserTraced "sectionLayoutOrBraces" $ do
+  skipMany tokWhitespace
   ( do
       openBrace
       elems <- elements zeroIndentLevel
       optional tokIndent
       closeBrace
       return elems
-  )
-    <|> (elements ilevel)
+    )
+    <|> elements ilevel
 
 -- The body of a field, using either inline style or braces.
 --
 -- fieldInlineOrBraces   ::= '\\n'? '{' content '}'
 --                         | content
 fieldInlineOrBraces :: Name Position -> Parser (Field Position)
-fieldInlineOrBraces name =
+fieldInlineOrBraces name = do
+  skipMany tokWhitespace
   ( do
       openBrace
       ls <- inLexerMode (LexerMode in_field_braces) (many fieldContent)
       closeBrace
       return (Field name ls)
-  )
+    )
     <|> ( do
             ls <- inLexerMode (LexerMode in_field_braces) (option [] (fmap (\l -> [l]) fieldContent))
             return (Field name ls)
@@ -439,51 +443,58 @@ checkIndentation'' a b
   | positionCol a == positionCol b = id
   | otherwise = (LexWarning LexInconsistentIndentation b :)
 
-#ifdef CABAL_PARSEC_DEBUG
+-- #ifdef CABAL_PARSEC_DEBUG
 parseTest' :: Show a => Parsec LexState' () a -> SourceName -> B8.ByteString -> IO ()
 parseTest' p fname s =
-    case parse p fname (lexSt s) of
-      Left err -> putStrLn (formatError s err)
-
-      Right x  -> print x
+  case parse p fname (lexSt s) of
+    Left err -> putStrLn (formatError s err)
+    Right x -> print x
   where
     lexSt = mkLexState' . mkLexState
 
 parseFile :: Show a => Parser a -> FilePath -> IO ()
 parseFile p f = B8.readFile f >>= \s -> parseTest' p f s
 
-parseStr  :: Show a => Parser a -> String -> IO ()
+parseStr :: Show a => Parser a -> String -> IO ()
 parseStr p = parseBS p . B8.pack
 
-parseBS  :: Show a => Parser a -> B8.ByteString -> IO ()
+parseBS :: Show a => Parser a -> B8.ByteString -> IO ()
 parseBS p = parseTest' p "<input string>"
 
 formatError :: B8.ByteString -> ParseError -> String
 formatError input perr =
-    unlines
-      [ "Parse error "++ show (errorPos perr) ++ ":"
-      , errLine
-      , indicator ++ errmsg ]
+  unlines
+    [ "Parse error " ++ show (errorPos perr) ++ ":"
+    , errLine
+    , indicator ++ errmsg
+    ]
   where
-    pos       = errorPos perr
-    ls        = lines' (T.decodeUtf8With T.lenientDecode input)
-    errLine   = T.unpack (ls !! (sourceLine pos - 1))
+    pos = errorPos perr
+    ls = lines' (T.decodeUtf8With T.lenientDecode input)
+    errLine = T.unpack (ls !! (sourceLine pos - 1))
     indicator = replicate (sourceColumn pos) ' ' ++ "^"
-    errmsg    = showErrorMessages "or" "unknown parse error"
-                                  "expecting" "unexpected" "end of file"
-                                  (errorMessages perr)
+    errmsg =
+      showErrorMessages
+        "or"
+        "unknown parse error"
+        "expecting"
+        "unexpected"
+        "end of file"
+        (errorMessages perr)
 
 -- | Handles windows/osx/unix line breaks uniformly
 lines' :: T.Text -> [T.Text]
 lines' s1
   | T.null s1 = []
   | otherwise = case T.break (\c -> c == '\r' || c == '\n') s1 of
-                  (l, s2) | Just (c,s3) <- T.uncons s2
-                         -> case T.uncons s3 of
-                              Just ('\n', s4) | c == '\r' -> l : lines' s4
-                              _               -> l : lines' s3
-                          | otherwise -> [l]
-#endif
+      (l, s2)
+        | Just (c, s3) <- T.uncons s2 ->
+            case T.uncons s3 of
+              Just ('\n', s4) | c == '\r' -> l : lines' s4
+              _ -> l : lines' s3
+        | otherwise -> [l]
+
+-- #endif
 
 eof :: Parser ()
 eof = notFollowedBy anyToken <?> "end of file"