diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 55214e2..0ddb51f 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -1,23 +1,33 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE GADTs #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} --- |An implementation of the RFC3987 --- [RFC3987]: http://www.ietf.org/rfc/rfc3987.txt - +-- | An implementation of the RFC3987 +-- [RFC3987]: http://www.ietf.org/rfc/rfc3987.txt module Data.RDF.IRI - ( IRI(..), IRIRef(..) - , Scheme(..), Authority(..), UserInfo(..), Host(..), Port(..) - , Path(..), Query(..), Fragment(..) - , IRIError(..), SchemaError(..) - , mkIRI - , serializeIRI - , parseIRI, parseRelIRI - , validateIRI, resolveIRI - , removeIRIFragment - ) where + ( IRI (..), + IRIRef (..), + Scheme (..), + Authority (..), + UserInfo (..), + Host (..), + Port (..), + Path (..), + IRIQuery (..), + Fragment (..), + IRIError (..), + SchemaError (..), + mkIRI, + serializeIRI, + parseIRI, + parseRelIRI, + validateIRI, + resolveIRI, + removeIRIFragment, + ) +where #if MIN_VERSION_base(4,9,0) #if !MIN_VERSION_base(4,11,0) @@ -33,37 +43,39 @@ import Data.Maybe (isJust) import Data.Maybe (maybe, isJust) #endif -import Data.Functor -import Data.List (intersperse) import Control.Applicative -import Control.Monad (guard) import Control.Arrow (first, (&&&), (>>>)) -import Data.Char (isAlpha, isDigit, isAlphaNum, toUpper, toLower) -import Data.Text (Text) -import qualified Data.Text as T -import Data.Attoparsec.Text (Parser, ()) +import Control.Monad (guard) +import Data.Attoparsec.Text (Parser, ()) import qualified Data.Attoparsec.Text as P +import Data.Char (isAlpha, isAlphaNum, isDigit, toLower, toUpper) +import Data.Functor +import Data.List (intersperse) +import Data.Text (Text) +import qualified Data.Text as T -- | A serialized IRI representation. -newtype IRI = IRI { getIRI :: Text } +newtype IRI = IRI {getIRI :: Text} deriving (Show, Eq) -- | A detailed IRI representation with its components. -data IRIRef = IRIRef - !(Maybe Scheme) - !(Maybe Authority) - !Path - !(Maybe Query) - !(Maybe Fragment) +data IRIRef + = IRIRef + !(Maybe Scheme) + !(Maybe Authority) + !Path + !(Maybe IRIQuery) + !(Maybe Fragment) deriving (Show, Eq, Ord) newtype Scheme = Scheme Text deriving (Show, Eq, Ord) -data Authority = Authority - !(Maybe UserInfo) - !Host - !(Maybe Port) +data Authority + = Authority + !(Maybe UserInfo) + !Host + !(Maybe Port) deriving (Show, Eq, Ord) newtype UserInfo = UserInfo Text @@ -78,11 +90,12 @@ newtype Port = Port Int newtype Path = Path Text deriving (Show, Eq, Semigroup, Monoid, Ord) -newtype Query = Query Text +newtype IRIQuery = IRIQuery Text deriving (Show, Eq, Semigroup, Ord) -instance Monoid Query where - mempty = Query mempty +instance Monoid IRIQuery where + mempty = IRIQuery mempty + #if !(MIN_VERSION_base(4,11,0)) mappend = (<>) #endif @@ -92,6 +105,7 @@ newtype Fragment = Fragment Text instance Monoid Fragment where mempty = Fragment mempty + #if !(MIN_VERSION_base(4,11,0)) mappend = (<>) #endif @@ -100,9 +114,12 @@ data IRIError = InvalidIRI deriving (Show, Eq) data SchemaError - = NonAlphaLeading -- ^ Scheme must start with an alphabet character - | InvalidChars -- ^ Subsequent characters in the schema were invalid - | MissingColon -- ^ Schemas must be followed by a colon + = -- | Scheme must start with an alphabet character + NonAlphaLeading + | -- | Subsequent characters in the schema were invalid + InvalidChars + | -- | Schemas must be followed by a colon + MissingColon deriving (Show, Eq) removeIRIFragment :: IRIRef -> IRIRef @@ -110,23 +127,27 @@ removeIRIFragment (IRIRef s a p q _) = IRIRef s a p q Nothing -- [TODO] use Builder serializeIRI :: IRIRef -> Text -serializeIRI (IRIRef s a p q f) = mconcat - [ maybe mempty scheme s - , maybe mempty authority a - , path p - , maybe mempty query q - , maybe mempty fragment f ] +serializeIRI (IRIRef s a p q f) = + mconcat + [ maybe mempty scheme s, + maybe mempty authority a, + path p, + maybe mempty query q, + maybe mempty fragment f + ] where scheme (Scheme s') = s' <> ":" - authority (Authority u (Host h) p') = mconcat - [ "//" - , maybe mempty userInfo u - , h - , maybe mempty port p' ] + authority (Authority u (Host h) p') = + mconcat + [ "//", + maybe mempty userInfo u, + h, + maybe mempty port p' + ] userInfo (UserInfo u) = u <> "@" port (Port p') = (":" <>) . T.pack . show $ p' path (Path p') = p' - query (Query q') = "?" <> q' + query (IRIQuery q') = "?" <> q' fragment (Fragment f') = "#" <> f' mkIRI :: Text -> Either String IRI @@ -144,10 +165,12 @@ validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt -- [FIXME] Currently, this is a correct but naive implementation. -resolveIRI - :: Text -- ^ Base URI - -> Text -- ^ URI to resolve - -> Either String Text +resolveIRI :: + -- | Base URI + Text -> + -- | URI to resolve + Text -> + Either String Text resolveIRI baseIri iri = serializeIRI <$> resolvedIRI where resolvedIRI = either (const resolvedRelativeIRI) resolveAbsoluteIRI (parseIRI iri) @@ -158,29 +181,31 @@ resolveIRI baseIri iri = serializeIRI <$> resolvedIRI -- Parse base IRI (IRIRef bs ba bp bq _) <- parseIRI baseIri let rIriWithoutAuth = resolveIriWithoutAuth rp rq rf bs ba bp bq - rIriWithAuth = return (IRIRef bs ra (removeDotSegments rp') rq rf) + rIriWithAuth = return (IRIRef bs ra (removeDotSegments rp') rq rf) maybe rIriWithoutAuth (const rIriWithAuth) ra - resolveIriWithoutAuth rp rq rf bs ba bp bq = return $! - if (rp == mempty) - then maybe (IRIRef bs ba bp bq rf) (const (IRIRef bs ba bp rq rf)) rq - else let (Path rp') = rp in if (T.head rp' == '/') - then IRIRef bs ba (removeDotSegments rp') rq rf - else IRIRef bs ba (removeDotSegments (merge ba bp rp)) rq rf + resolveIriWithoutAuth rp rq rf bs ba bp bq = + return + $! if (rp == mempty) + then maybe (IRIRef bs ba bp bq rf) (const (IRIRef bs ba bp rq rf)) rq + else + let (Path rp') = rp + in if (T.head rp' == '/') + then IRIRef bs ba (removeDotSegments rp') rq rf + else IRIRef bs ba (removeDotSegments (merge ba bp rp)) rq rf removeDotSegments p = removeDotSegments' (T.split (== '/') p) mempty removeDotSegments' [] os = Path $ mconcat (intersperse "/" os) removeDotSegments' ["."] os = removeDotSegments' mempty (os <> [mempty]) removeDotSegments' [".."] [] = removeDotSegments' mempty mempty removeDotSegments' [".."] os = removeDotSegments' mempty (init os <> [mempty]) removeDotSegments' ss@[_] os = removeDotSegments' mempty (os <> ss) - removeDotSegments' (".":ss) os = removeDotSegments' ss os - removeDotSegments' ("..":ss) [] = removeDotSegments' ss mempty - removeDotSegments' ("..":ss) os@[""] = removeDotSegments' ss os - removeDotSegments' ("..":ss) os = removeDotSegments' ss (init os) - removeDotSegments' (s:ss) os = removeDotSegments' ss (os <> [s]) + removeDotSegments' ("." : ss) os = removeDotSegments' ss os + removeDotSegments' (".." : ss) [] = removeDotSegments' ss mempty + removeDotSegments' (".." : ss) os@[""] = removeDotSegments' ss os + removeDotSegments' (".." : ss) os = removeDotSegments' ss (init os) + removeDotSegments' (s : ss) os = removeDotSegments' ss (os <> [s]) merge ba (Path bp) (Path rp) | isJust ba && bp == mempty = "/" <> rp - | otherwise = T.dropWhileEnd (/= '/') bp <> rp - + | otherwise = T.dropWhileEnd (/= '/') bp <> rp -- IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] iriParser :: Parser IRIRef @@ -198,10 +223,10 @@ iriParser = do -- / ipath-empty ihierPartParser :: Parser (Maybe Authority, Path) ihierPartParser = - iauthWithPathParser <|> - ipathAbsoluteParser <|> - ipathRootlessParser <|> - ipathEmptyParser + iauthWithPathParser + <|> ipathAbsoluteParser + <|> ipathRootlessParser + <|> ipathEmptyParser -- IRI-reference = IRI / irelative-ref -- [TODO] @@ -223,28 +248,30 @@ irelativeRefParser = do -- / ipath-empty irelativePartParser :: Parser (Maybe Authority, Path) irelativePartParser = - iauthWithPathParser <|> - ipathAbsoluteParser <|> - ipathNoSchemeParser <|> - ipathEmptyParser + iauthWithPathParser + <|> ipathAbsoluteParser + <|> ipathNoSchemeParser + <|> ipathEmptyParser -- iauthority = [ iuserinfo "@" ] ihost [ ":" port ] iauthorityParser :: Parser Authority iauthorityParser = Authority <$> optional (iuserInfoParser <* P.string "@") - <*> ihostParser - <*> optional (P.string ":" *> portParser) - "Authority" + <*> ihostParser + <*> optional (P.string ":" *> portParser) + "Authority" -- iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) iuserInfoParser :: Parser UserInfo iuserInfoParser = UserInfo . mconcat <$> P.many1 iuserInfoP - where iuserInfoP = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string ":" + where + iuserInfoP = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string ":" -- ihost = IP-literal / IPv4address / ireg-name ihostParser :: Parser Host -ihostParser = Host <$> (ipLiteralParser <|> ipV4AddressParser <|> iregNameParser) - "Host" +ihostParser = + Host <$> (ipLiteralParser <|> ipV4AddressParser <|> iregNameParser) + "Host" -- ireg-name = *( iunreserved / pct-encoded / sub-delims ) iregNameParser :: Parser Text @@ -306,20 +333,21 @@ isegmentNzParser = mconcat <$> (P.many1 ipcharParser) -- ; non-zero-length segment without any colon ":" isegmentNzNcParser :: Parser Text isegmentNzNcParser = mconcat <$> (P.many1 _isegmentNzNcParser) - where _isegmentNzNcParser = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string "@" + where + _isegmentNzNcParser = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string "@" -- ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@" ipcharParser :: Parser Text ipcharParser = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string ":" <|> P.string "@" -- iquery = *( ipchar / iprivate / "/" / "?" ) -iqueryParser :: Parser Query -iqueryParser = Query <$> iqueryParser' +iqueryParser :: Parser IRIQuery +iqueryParser = IRIQuery <$> iqueryParser' iqueryParser' :: Parser Text iqueryParser' = P.char '?' *> (mconcat <$> P.many' (ipcharParser <|> iprivateParser <|> P.string "/" <|> P.string "?")) - "Query" + "Query" -- ifragment = *( ipchar / "/" / "?" ) ifragmentParser :: Parser Fragment @@ -328,7 +356,7 @@ ifragmentParser = Fragment <$> ifragmentParser' ifragmentParser' :: Parser Text ifragmentParser' = P.char '#' *> (mconcat <$> P.many' (ipcharParser <|> P.string "/" <|> P.string "?")) - "Fragment" + "Fragment" -- iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar iunreservedP :: Parser Text @@ -344,32 +372,34 @@ isIunreserved c = isUnreserved c || isUcsChar c -- / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD -- / %xD0000-DFFFD / %xE1000-EFFFD isUcsChar :: Char -> Bool -isUcsChar c = ('\x000A0' <= c && c <= '\x0D7FF') - || ('\x0F900' <= c && c <= '\x0FDCF') - || ('\x0FDF0' <= c && c <= '\x0FFEF') - || ('\x10000' <= c && c <= '\x1FFFD') - || ('\x20000' <= c && c <= '\x2FFFD') - || ('\x30000' <= c && c <= '\x3FFFD') - || ('\x40000' <= c && c <= '\x4FFFD') - || ('\x50000' <= c && c <= '\x5FFFD') - || ('\x60000' <= c && c <= '\x6FFFD') - || ('\x70000' <= c && c <= '\x7FFFD') - || ('\x80000' <= c && c <= '\x8FFFD') - || ('\x90000' <= c && c <= '\x9FFFD') - || ('\xA0000' <= c && c <= '\xAFFFD') - || ('\xB0000' <= c && c <= '\xBFFFD') - || ('\xC0000' <= c && c <= '\xCFFFD') - || ('\xD0000' <= c && c <= '\xDFFFD') - || ('\xE1000' <= c && c <= '\xEFFFD') +isUcsChar c = + ('\x000A0' <= c && c <= '\x0D7FF') + || ('\x0F900' <= c && c <= '\x0FDCF') + || ('\x0FDF0' <= c && c <= '\x0FFEF') + || ('\x10000' <= c && c <= '\x1FFFD') + || ('\x20000' <= c && c <= '\x2FFFD') + || ('\x30000' <= c && c <= '\x3FFFD') + || ('\x40000' <= c && c <= '\x4FFFD') + || ('\x50000' <= c && c <= '\x5FFFD') + || ('\x60000' <= c && c <= '\x6FFFD') + || ('\x70000' <= c && c <= '\x7FFFD') + || ('\x80000' <= c && c <= '\x8FFFD') + || ('\x90000' <= c && c <= '\x9FFFD') + || ('\xA0000' <= c && c <= '\xAFFFD') + || ('\xB0000' <= c && c <= '\xBFFFD') + || ('\xC0000' <= c && c <= '\xCFFFD') + || ('\xD0000' <= c && c <= '\xDFFFD') + || ('\xE1000' <= c && c <= '\xEFFFD') -- iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD iprivateParser :: Parser Text iprivateParser = T.singleton <$> P.satisfy isIPrivate isIPrivate :: Char -> Bool -isIPrivate c = ('\x00E000' <= c && c <= '\x00F8FF') - || ('\x0F0000' <= c && c <= '\x0FFFFD') - || ('\x100000' <= c && c <= '\x10FFFD') +isIPrivate c = + ('\x00E000' <= c && c <= '\x00F8FF') + || ('\x0F0000' <= c && c <= '\x0FFFFD') + || ('\x100000' <= c && c <= '\x10FFFD') -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) schemeParser :: Parser Scheme @@ -379,8 +409,12 @@ schemeParser = where schemeHead = P.satisfy isAlpha "Scheme head" schemeRest = P.takeWhile isSchemeTailChar "Scheme tail" - isSchemeTailChar c = isAlphaNum c - || c == '+' || c == '.' || c == '_' || c == '-' + isSchemeTailChar c = + isAlphaNum c + || c == '+' + || c == '.' + || c == '_' + || c == '-' -- port = *DIGIT portParser :: Parser Port @@ -396,12 +430,15 @@ ipLiteralParser = P.string "[" *> (ipV6AddressParser <|> ipFutureParser) <* P.st -- IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) ipFutureParser :: Parser Text ipFutureParser = - mconcat <$> sequence [ - P.string "v", - P.takeWhile1 isHexaDigit, - P.string ".", - P.takeWhile1 isValidFinalChar] - where isValidFinalChar c = isUnreserved c || isSubDelims c || c == ':' + mconcat + <$> sequence + [ P.string "v", + P.takeWhile1 isHexaDigit, + P.string ".", + P.takeWhile1 isValidFinalChar + ] + where + isValidFinalChar c = isUnreserved c || isSubDelims c || c == ':' -- IPv6address = 6( h16 ":" ) ls32 -- / "::" 5( h16 ":" ) ls32 @@ -413,19 +450,20 @@ ipFutureParser = -- / [ *5( h16 ":" ) h16 ] "::" h16 -- / [ *6( h16 ":" ) h16 ] "::" ipV6AddressParser :: Parser Text -ipV6AddressParser = do - l <- leadingP - t <- trailingP l - joinParts l t - "IPV6" +ipV6AddressParser = + do + l <- leadingP + t <- trailingP l + joinParts l t + "IPV6" where leadingP = h16 `P.sepBy` ":" trailingP = (id &&& length) >>> \l -> ipNotElided l <|> ipElided l joinParts leading trailing = pure $ (T.intercalate ":" leading) <> trailing h16 = parseBetween 1 4 (P.takeWhile isHexaDigit) ipNotElided (leading, lengthL) = - guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 <|> - (guard (lengthL == 8) $> mempty) + guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 + <|> (guard (lengthL == 8) $> mempty) ipElided (_, lengthL) = do guard $ lengthL <= 8 elision <- P.string "::" @@ -434,8 +472,8 @@ ipV6AddressParser = do let lengthTotal = lengthL + lengthT guard $ lengthT < 8 embeddedIpV4 <- - guard (lengthT > 0 && lengthTotal < 7 && isDecOctet (last trailing)) *> partialIpV4 <|> - pure mempty + guard (lengthT > 0 && lengthTotal < 7 && isDecOctet (last trailing)) *> partialIpV4 + <|> pure mempty pure $ mconcat [elision, (T.intercalate ":" trailing), embeddedIpV4] partialIpV4 = mconcat <$> sequence [dotP, decOctetP, dotP, decOctetP, dotP, decOctetP] @@ -449,7 +487,6 @@ ipV6AddressParser = do ipV4AddressParser :: Parser Text ipV4AddressParser = mconcat <$> sequence [decOctetP, dotP, decOctetP, dotP, decOctetP, dotP, decOctetP] - -- dec-octet = DIGIT ; 0-9 -- / %x31-39 DIGIT ; 10-99 -- / "1" 2DIGIT ; 100-199 @@ -464,19 +501,24 @@ decOctetP = do isDecOctet :: Text -> Bool isDecOctet s = len > 0 && T.all isDigit s && (len < 3 || (len == 3 && s <= "255")) - where len = T.length s + where + len = T.length s -- pct-encoded = "%" HEXDIG HEXDIG pctEncodedParser :: Parser Text pctEncodedParser = T.cons <$> P.char '%' - <*> (T.pack . fmap toUpper <$> (P.count 2 (P.satisfy isHexaDigit))) - "Percent encoding" + <*> (T.pack . fmap toUpper <$> (P.count 2 (P.satisfy isHexaDigit))) + "Percent encoding" -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" isUnreserved :: Char -> Bool -isUnreserved c = isAlphaNum c - || c == '-' || c == '.' || c == '_' || c == '~' +isUnreserved c = + isAlphaNum c + || c == '-' + || c == '.' + || c == '_' + || c == '~' -- reserved = gen-delims / sub-delims -- [TODO] @@ -498,9 +540,10 @@ iauthWithPathParser = do curry (first Just) <$> iauthorityParser <*> ipathAbEmptyParser isHexaDigit :: Char -> Bool -isHexaDigit c = (isDigit c) || - (c >= 'a' && c <= 'f') || - (c >= 'A' && c <= 'F') +isHexaDigit c = + (isDigit c) + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') dotP :: Parser Text dotP = P.string "."