From 423e6af32dde7c406adcdcf3689521018f467c81 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Sat, 18 Nov 2023 11:59:53 +0000 Subject: [PATCH] Check that filenames are ASCII instead of silent corruption (see #6) --- Codec/Archive/Tar/Index/Internal.hs | 3 ++- Codec/Archive/Tar/PackAscii.hs | 14 ++++++++++++++ Codec/Archive/Tar/Types.hs | 10 ++++++---- changelog.md | 1 + tar.cabal | 1 + 5 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 Codec/Archive/Tar/PackAscii.hs diff --git a/Codec/Archive/Tar/Index/Internal.hs b/Codec/Archive/Tar/Index/Internal.hs index 5064283..a1e4ae0 100644 --- a/Codec/Archive/Tar/Index/Internal.hs +++ b/Codec/Archive/Tar/Index/Internal.hs @@ -67,6 +67,7 @@ import qualified Codec.Archive.Tar.Index.StringTable as StringTable import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder) import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder) +import Codec.Archive.Tar.PackAscii import qualified System.FilePath.Posix as FilePath import Data.Monoid (Monoid(..)) @@ -174,7 +175,7 @@ toComponentIds table = lookupComponents [] . filter (/= BS.Char8.singleton '.') . splitDirectories - . BS.Char8.pack + . packAscii where lookupComponents cs' [] = Just (reverse cs') lookupComponents cs' (c:cs) = case StringTable.lookup table c of diff --git a/Codec/Archive/Tar/PackAscii.hs b/Codec/Archive/Tar/PackAscii.hs new file mode 100644 index 0000000..795f378 --- /dev/null +++ b/Codec/Archive/Tar/PackAscii.hs @@ -0,0 +1,14 @@ +module Codec.Archive.Tar.PackAscii + ( packAscii + ) where + +import qualified Data.ByteString.Char8 as BS.Char8 +import Data.Char +import GHC.Stack + +-- | We should really migrate to 'OsPath' from 'filepath', +-- but for now let's not corrupt data silently. +packAscii :: HasCallStack => FilePath -> BS.Char8.ByteString +packAscii xs + | all isAscii xs = BS.Char8.pack xs + | otherwise = error $ "packAscii: only ASCII filenames are supported, but got " ++ xs diff --git a/Codec/Archive/Tar/Types.hs b/Codec/Archive/Tar/Types.hs index 260bbdb..fcc67b1 100644 --- a/Codec/Archive/Tar/Types.hs +++ b/Codec/Archive/Tar/Types.hs @@ -74,6 +74,8 @@ import qualified System.FilePath.Windows as FilePath.Windows import System.Posix.Types ( FileMode ) +import Codec.Archive.Tar.PackAscii + type FileSize = Int64 -- | The number of seconds since the UNIX epoch type EpochTime = Int64 @@ -357,14 +359,14 @@ splitLongPath :: FilePath -> Either String TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of Left err -> Left err - Right (name, []) -> Right $! TarPath (BS.Char8.pack name) + Right (name, []) -> Right $! TarPath (packAscii name) BS.empty Right (name, first:rest) -> case packName prefixMax remainder of Left err -> Left err Right (_ , (_:_)) -> Left $ "Filename " ++ path ++ " too long (cannot split)" - Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) - (BS.Char8.pack prefix) + Right (prefix, []) -> Right $! TarPath (packAscii name) + (packAscii prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest @@ -399,7 +401,7 @@ instance NFData LinkTarget where -- characters. -- toLinkTarget :: FilePath -> Maybe LinkTarget -toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path) +toLinkTarget path | length path <= 100 = Just $! LinkTarget (packAscii path) | otherwise = Nothing -- | Convert a tar 'LinkTarget' to a native 'FilePath'. diff --git a/changelog.md b/changelog.md index 5701c3b..199aed7 100644 --- a/changelog.md +++ b/changelog.md @@ -7,6 +7,7 @@ See also http://pvp.haskell.org/faq * Speed up `fromTarPathToPosixPath` * Set permissions on extracted files * Handle > 8 GB files + * Prohibit non-ASCII file names instead of silent corruption 0.5.1.1 Herbert Valerio Riedel August 2019 diff --git a/tar.cabal b/tar.cabal index 559e1a2..45b863f 100644 --- a/tar.cabal +++ b/tar.cabal @@ -62,6 +62,7 @@ library tar-internal Codec.Archive.Tar.Read Codec.Archive.Tar.Write Codec.Archive.Tar.Pack + Codec.Archive.Tar.PackAscii Codec.Archive.Tar.Unpack Codec.Archive.Tar.Index.StringTable Codec.Archive.Tar.Index.IntTrie