From ecd27e70aa1de83a02f1f3e743b13bc77a681e57 Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Wed, 20 Dec 2023 18:39:09 +0100 Subject: [PATCH 1/7] Add support for streams in FinfoMimeTypeDetector::detectMimeType() --- src/FinfoMimeTypeDetector.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index 8084f92..bf54b82 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -53,9 +53,12 @@ public function __construct( public function detectMimeType(string $path, $contents): ?string { - $mimeType = is_string($contents) - ? (@$this->finfo->buffer($this->takeSample($contents)) ?: null) - : null; + $mimeType = null; + if (is_string($contents)) { + $mimeType = @$this->finfo->buffer($this->takeSample($contents)); + } elseif (is_resource($contents) && get_resource_type($contents) === 'stream') { + $mimeType = @$this->finfo->buffer(stream_get_contents($contents, $this->bufferSampleSize, 0)); + } if ($mimeType !== null && ! in_array($mimeType, $this->inconclusiveMimetypes)) { return $mimeType; From c647850344ff54d6a5d0da8439b9708e67ccfedb Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Wed, 20 Dec 2023 19:13:52 +0100 Subject: [PATCH 2/7] Adjusted FinfoMimeTypeDetectorTest to test behavior with valid and "invalid" resource. --- src/FinfoMimeTypeDetectorTest.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/FinfoMimeTypeDetectorTest.php b/src/FinfoMimeTypeDetectorTest.php index 5e3fbd6..42b2a56 100644 --- a/src/FinfoMimeTypeDetectorTest.php +++ b/src/FinfoMimeTypeDetectorTest.php @@ -101,7 +101,7 @@ public function detecting_from_a_file_location(): void /** * @test */ - public function detecting_uses_extensions_when_a_resource_is_presented(): void + public function detecting_uses_extensions_when_a_invalid_resource_is_presented(): void { /** @var resource $handle */ $handle = fopen(__DIR__ . '/../test_files/flysystem.svg', 'r+'); @@ -111,4 +111,18 @@ public function detecting_uses_extensions_when_a_resource_is_presented(): void $this->assertEquals('image/svg+xml', $mimeType); } + + /** + * @test + */ + public function detecting_uses_stream_contents_when_a_valid_resource_is_presented(): void + { + /** @var resource $handle */ + $handle = fopen(__DIR__ . '/../test_files/flysystem.svg', 'r+'); + + $mimeType = $this->detector->detectMimeType('flysystem.unknown', $handle); + fclose($handle); + + $this->assertEquals('image/svg+xml', $mimeType); + } } From 302d7c28947bf35b20ebfd65f6278c69a6943df9 Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Wed, 20 Dec 2023 19:16:03 +0100 Subject: [PATCH 3/7] Ensure FinfoMimeTypeDetector::detectMimeType() doesn't modify stream pointers. --- src/FinfoMimeTypeDetector.php | 2 ++ src/FinfoMimeTypeDetectorTest.php | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index bf54b82..a415d97 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -57,7 +57,9 @@ public function detectMimeType(string $path, $contents): ?string if (is_string($contents)) { $mimeType = @$this->finfo->buffer($this->takeSample($contents)); } elseif (is_resource($contents) && get_resource_type($contents) === 'stream') { + $streamPosition = ftell($contents); $mimeType = @$this->finfo->buffer(stream_get_contents($contents, $this->bufferSampleSize, 0)); + fseek($contents, $streamPosition); } if ($mimeType !== null && ! in_array($mimeType, $this->inconclusiveMimetypes)) { diff --git a/src/FinfoMimeTypeDetectorTest.php b/src/FinfoMimeTypeDetectorTest.php index 42b2a56..185440a 100644 --- a/src/FinfoMimeTypeDetectorTest.php +++ b/src/FinfoMimeTypeDetectorTest.php @@ -125,4 +125,18 @@ public function detecting_uses_stream_contents_when_a_valid_resource_is_presente $this->assertEquals('image/svg+xml', $mimeType); } + + /** + * @test + */ + public function detecting_keeps_stream_contents_positions_unchanged(): void + { + /** @var resource $handle */ + $handle = fopen(__DIR__ . '/../test_files/flysystem.svg', 'r+'); + fseek($handle, 10); + $mimeType = $this->detector->detectMimeType('flysystem.unknown', $handle); + $this->assertEquals('image/svg+xml', $mimeType); + $this->assertEquals(10, ftell($handle)); + fclose($handle); + } } From b0eb1662ccf464979085906ff36aa04b4265dc00 Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Thu, 21 Dec 2023 09:07:16 +0100 Subject: [PATCH 4/7] Optimize handling if non-seekable resources and attempt to minimize memory footprint. --- src/FinfoMimeTypeDetector.php | 62 ++++++++++++++++++++++++++++--- src/FinfoMimeTypeDetectorTest.php | 32 ++++++++++++++++ 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index a415d97..6df144d 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -39,16 +39,28 @@ class FinfoMimeTypeDetector implements MimeTypeDetector, ExtensionLookup */ private $inconclusiveMimetypes; + /** + * @var bool + */ + private $ignoreNonSeekableStreams; + + /** + * Buffer size to read from streams if no other bufferSampleSize is defined. + */ + const STREAM_BUFFER_SAMPLE_SIZE_DEFAULT = 4100; + public function __construct( string $magicFile = '', ExtensionToMimeTypeMap $extensionMap = null, ?int $bufferSampleSize = null, - array $inconclusiveMimetypes = self::INCONCLUSIVE_MIME_TYPES + array $inconclusiveMimetypes = self::INCONCLUSIVE_MIME_TYPES, + bool $ignoreNonSeekableStreams = true ) { $this->finfo = new finfo(FILEINFO_MIME_TYPE, $magicFile); $this->extensionMap = $extensionMap ?: new GeneratedExtensionToMimeTypeMap(); $this->bufferSampleSize = $bufferSampleSize; $this->inconclusiveMimetypes = $inconclusiveMimetypes; + $this->ignoreNonSeekableStreams = $ignoreNonSeekableStreams; } public function detectMimeType(string $path, $contents): ?string @@ -56,10 +68,8 @@ public function detectMimeType(string $path, $contents): ?string $mimeType = null; if (is_string($contents)) { $mimeType = @$this->finfo->buffer($this->takeSample($contents)); - } elseif (is_resource($contents) && get_resource_type($contents) === 'stream') { - $streamPosition = ftell($contents); - $mimeType = @$this->finfo->buffer(stream_get_contents($contents, $this->bufferSampleSize, 0)); - fseek($contents, $streamPosition); + } elseif (is_resource($contents)) { + $mimeType = @$this->finfo->buffer($this->takeResourceSample($contents)); } if ($mimeType !== null && ! in_array($mimeType, $this->inconclusiveMimetypes)) { @@ -95,6 +105,48 @@ private function takeSample(string $contents): string return (string) substr($contents, 0, $this->bufferSampleSize); } + /** + * Fetches a sample of a resource while maintaining its pointer. + * + * Non-seekable resources are skipped by default because their pointer can't + * be maintained. + */ + private function takeResourceSample($contents): string + { + if (is_resource($contents) && get_resource_type($contents) === 'stream') { + $streamMetaData = stream_get_meta_data($contents); + if ($streamMetaData['seekable']) { + $streamPosition = ftell($contents); + } elseif ($this->ignoreNonSeekableStreams) { + return ''; + } + + // Memory optimization: given a length stream_get_contents() + // immediately allocates an internal buffer. + // However, stream_copy_to_stream() reads up to the defined length + // without pre-allocating any extra buffer. + // Given the relatively large STREAM_BUFFER_SAMPLE_SIZE_DEFAULT this + // avoids unnecessary memory hogging. + $streamContentBuffer = fopen("php://memory", "w+b"); + $sampleSize = $this->bufferSampleSize ?? self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT; + stream_copy_to_stream( + $contents, + $streamContentBuffer, + $sampleSize, + 0 + ); + rewind($contents); + rewind($streamContentBuffer); + $streamSample = stream_get_contents($streamContentBuffer); + fclose($streamContentBuffer); + if (isset($streamPosition)) { + fseek($contents, $streamPosition); + } + return $streamSample; + } + return ''; + } + public function lookupExtension(string $mimetype): ?string { return $this->extensionMap instanceof ExtensionLookup diff --git a/src/FinfoMimeTypeDetectorTest.php b/src/FinfoMimeTypeDetectorTest.php index 185440a..0e141e9 100644 --- a/src/FinfoMimeTypeDetectorTest.php +++ b/src/FinfoMimeTypeDetectorTest.php @@ -139,4 +139,36 @@ public function detecting_keeps_stream_contents_positions_unchanged(): void $this->assertEquals(10, ftell($handle)); fclose($handle); } + + /** + * @test + */ + public function detecting_non_seekable_streams(): void + { + /** @var resource $handle */ + $handle = fopen('https://github.com/thephpleague/mime-type-detection', 'r'); + $mimeType = $this->detector->detectMimeType('flysystem.unknown', $handle); + $this->assertEquals(null, $mimeType); + $this->assertEquals(0, ftell($handle)); + + $nonSeekableDetector = new FinfoMimeTypeDetector( + '', + null, + null, [ + 'application/x-empty', + 'text/plain', + 'text/x-asm', + 'application/octet-stream', + 'inode/x-empty', + ], + false + ); + $mimeType = $nonSeekableDetector->detectMimeType('flysystem.unknown', $handle); + $this->assertEquals('text/html', $mimeType); + // Because this stream is non seekable it's position will be where the + // detectors buffer is. + $this->assertEquals(FinfoMimeTypeDetector::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT, ftell($handle)); + + fclose($handle); + } } From 019c26c1991ab179c9e3861a07ce07100e7ff32c Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Thu, 21 Dec 2023 09:18:53 +0100 Subject: [PATCH 5/7] Rewind contents stream before stream_copy_to_stream() as it seems to start the copy from current pointer. --- src/FinfoMimeTypeDetector.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index 6df144d..434573f 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -129,6 +129,7 @@ private function takeResourceSample($contents): string // avoids unnecessary memory hogging. $streamContentBuffer = fopen("php://memory", "w+b"); $sampleSize = $this->bufferSampleSize ?? self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT; + rewind($contents); stream_copy_to_stream( $contents, $streamContentBuffer, From 9b61e60b91caf356e972749c779c55a168c2451e Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Thu, 21 Dec 2023 09:19:48 +0100 Subject: [PATCH 6/7] Use php://temp stream with custom memory / file switch for resource sampling --- src/FinfoMimeTypeDetector.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index 434573f..65f1512 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -127,7 +127,7 @@ private function takeResourceSample($contents): string // without pre-allocating any extra buffer. // Given the relatively large STREAM_BUFFER_SAMPLE_SIZE_DEFAULT this // avoids unnecessary memory hogging. - $streamContentBuffer = fopen("php://memory", "w+b"); + $streamContentBuffer = fopen('php://temp/maxmemory:' . self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT, 'w+b'); $sampleSize = $this->bufferSampleSize ?? self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT; rewind($contents); stream_copy_to_stream( From 873c27f92d3def6a75315a70282024d7719d9823 Mon Sep 17 00:00:00 2001 From: Peter Philipp Date: Thu, 21 Dec 2023 09:27:10 +0100 Subject: [PATCH 7/7] Only use seekable streams for content sampling. --- src/FinfoMimeTypeDetector.php | 36 ++++++++++--------------------- src/FinfoMimeTypeDetectorTest.php | 21 +----------------- 2 files changed, 12 insertions(+), 45 deletions(-) diff --git a/src/FinfoMimeTypeDetector.php b/src/FinfoMimeTypeDetector.php index 65f1512..c4093ae 100644 --- a/src/FinfoMimeTypeDetector.php +++ b/src/FinfoMimeTypeDetector.php @@ -39,11 +39,6 @@ class FinfoMimeTypeDetector implements MimeTypeDetector, ExtensionLookup */ private $inconclusiveMimetypes; - /** - * @var bool - */ - private $ignoreNonSeekableStreams; - /** * Buffer size to read from streams if no other bufferSampleSize is defined. */ @@ -53,14 +48,12 @@ public function __construct( string $magicFile = '', ExtensionToMimeTypeMap $extensionMap = null, ?int $bufferSampleSize = null, - array $inconclusiveMimetypes = self::INCONCLUSIVE_MIME_TYPES, - bool $ignoreNonSeekableStreams = true + array $inconclusiveMimetypes = self::INCONCLUSIVE_MIME_TYPES ) { $this->finfo = new finfo(FILEINFO_MIME_TYPE, $magicFile); $this->extensionMap = $extensionMap ?: new GeneratedExtensionToMimeTypeMap(); $this->bufferSampleSize = $bufferSampleSize; $this->inconclusiveMimetypes = $inconclusiveMimetypes; - $this->ignoreNonSeekableStreams = $ignoreNonSeekableStreams; } public function detectMimeType(string $path, $contents): ?string @@ -68,7 +61,12 @@ public function detectMimeType(string $path, $contents): ?string $mimeType = null; if (is_string($contents)) { $mimeType = @$this->finfo->buffer($this->takeSample($contents)); - } elseif (is_resource($contents)) { + } elseif ( + is_resource($contents) + && get_resource_type($contents) === 'stream' + && ($streamMetaData = stream_get_meta_data($contents)) + && !empty($streamMetaData['seekable']) + ) { $mimeType = @$this->finfo->buffer($this->takeResourceSample($contents)); } @@ -107,20 +105,10 @@ private function takeSample(string $contents): string /** * Fetches a sample of a resource while maintaining its pointer. - * - * Non-seekable resources are skipped by default because their pointer can't - * be maintained. */ private function takeResourceSample($contents): string { if (is_resource($contents) && get_resource_type($contents) === 'stream') { - $streamMetaData = stream_get_meta_data($contents); - if ($streamMetaData['seekable']) { - $streamPosition = ftell($contents); - } elseif ($this->ignoreNonSeekableStreams) { - return ''; - } - // Memory optimization: given a length stream_get_contents() // immediately allocates an internal buffer. // However, stream_copy_to_stream() reads up to the defined length @@ -128,21 +116,19 @@ private function takeResourceSample($contents): string // Given the relatively large STREAM_BUFFER_SAMPLE_SIZE_DEFAULT this // avoids unnecessary memory hogging. $streamContentBuffer = fopen('php://temp/maxmemory:' . self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT, 'w+b'); - $sampleSize = $this->bufferSampleSize ?? self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT; + + $streamPosition = ftell($contents); rewind($contents); stream_copy_to_stream( $contents, $streamContentBuffer, - $sampleSize, + $this->bufferSampleSize ?? self::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT, 0 ); - rewind($contents); rewind($streamContentBuffer); $streamSample = stream_get_contents($streamContentBuffer); fclose($streamContentBuffer); - if (isset($streamPosition)) { - fseek($contents, $streamPosition); - } + fseek($contents, $streamPosition); return $streamSample; } return ''; diff --git a/src/FinfoMimeTypeDetectorTest.php b/src/FinfoMimeTypeDetectorTest.php index 0e141e9..ea8ee2d 100644 --- a/src/FinfoMimeTypeDetectorTest.php +++ b/src/FinfoMimeTypeDetectorTest.php @@ -143,32 +143,13 @@ public function detecting_keeps_stream_contents_positions_unchanged(): void /** * @test */ - public function detecting_non_seekable_streams(): void + public function detecting_non_seekable_streams_are_not_sampling(): void { /** @var resource $handle */ $handle = fopen('https://github.com/thephpleague/mime-type-detection', 'r'); $mimeType = $this->detector->detectMimeType('flysystem.unknown', $handle); $this->assertEquals(null, $mimeType); $this->assertEquals(0, ftell($handle)); - - $nonSeekableDetector = new FinfoMimeTypeDetector( - '', - null, - null, [ - 'application/x-empty', - 'text/plain', - 'text/x-asm', - 'application/octet-stream', - 'inode/x-empty', - ], - false - ); - $mimeType = $nonSeekableDetector->detectMimeType('flysystem.unknown', $handle); - $this->assertEquals('text/html', $mimeType); - // Because this stream is non seekable it's position will be where the - // detectors buffer is. - $this->assertEquals(FinfoMimeTypeDetector::STREAM_BUFFER_SAMPLE_SIZE_DEFAULT, ftell($handle)); - fclose($handle); } }