From 5b02df6b01c55095fdeaa2b40532536b8d787513 Mon Sep 17 00:00:00 2001 From: Paul Mitchum Date: Thu, 21 Dec 2023 12:03:02 -0800 Subject: [PATCH 1/3] first steps --- src/Processor/Remote.php | 12 +++++++++++- test/Processor/RemoteTest.php | 28 +++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/Processor/Remote.php b/src/Processor/Remote.php index d073e1f..da1e920 100644 --- a/src/Processor/Remote.php +++ b/src/Processor/Remote.php @@ -3,6 +3,7 @@ namespace FileFetcher\Processor; use GuzzleHttp\Client; +use GuzzleHttp\Exception\GuzzleException; use Procrastinator\Result; class Remote extends ProcessorBase implements ProcessorInterface @@ -32,8 +33,14 @@ public function setupState(array $state): array public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX): array { + $state['source'] = $state['source'] ?? ''; $client = $this->getClient(); try { + // Use a HEAD request to discover whether the source URL is valid. + // If the HTTP client throws an exception, then we can't/shouldn't + // transfer the file. See the subclass hierarchy of + // GuzzleException for all the cases this handles. + $client->head($state['source']); $fout = fopen($state['destination'], "w"); $client->get($state['source'], ['sink' => $fout]); $result->setStatus(Result::DONE); @@ -49,7 +56,10 @@ public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX) protected function getFileSize($path): int { clearstatcache(); - return filesize($path); + if ($size = @filesize($path) !== false) { + return $size; + } + return 0; } protected function getClient(): Client diff --git a/test/Processor/RemoteTest.php b/test/Processor/RemoteTest.php index 5d96b89..6e5a85c 100644 --- a/test/Processor/RemoteTest.php +++ b/test/Processor/RemoteTest.php @@ -6,6 +6,8 @@ use FileFetcher\FileFetcher; use FileFetcher\Processor\Remote; use FileFetcherTests\Mock\FakeRemote; +use GuzzleHttp\Client; +use GuzzleHttp\Exception\ClientException; use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; use Procrastinator\Result; @@ -19,10 +21,10 @@ class RemoteTest extends TestCase public function testCopyAFileWithRemoteProcessor() { $config = [ - "filePath" => 'http://notreal.blah/notacsv.csv', - "processors" => [FakeRemote::class] + 'filePath' => 'http://notreal.blah/notacsv.csv', + 'processors' => [FakeRemote::class] ]; - $fetcher = FileFetcher::get("1", new Memory(), $config); + $fetcher = FileFetcher::get('1', new Memory(), $config); $fetcher->setTimeLimit(1); @@ -72,4 +74,24 @@ public function testCopyException() $this->assertSame(Result::ERROR, $result->getStatus()); $this->assertStringContainsString('ailed to open stream', $result->getError()); } + + public function test404DoesNotCreateFile() + { + $root = vfsStream::setup('test404'); + $root_url = $root->url() . '/bad_file.csv'; + $state = [ + 'source' => 'http://example.com/bad_file.csv', + 'destination' => $root_url, + ]; + + $remote = new Remote(); + $result = new Result(); + $remote->copy($state, $result); + + // Assert that there was a 404 error. + $this->assertSame(Result::ERROR, $result->getStatus()); + $this->assertStringContainsString('resulted in a `404 Not Found` response', $result->getError()); + // Assert that the file was not created. + $this->assertFileDoesNotExist($root_url); + } } From 04e53ebbe672d316f9bbc79703c8e77f9b12cb23 Mon Sep 17 00:00:00 2001 From: Paul Mitchum Date: Thu, 21 Dec 2023 14:16:22 -0800 Subject: [PATCH 2/3] test-only version --- src/Processor/Remote.php | 2 +- test/Processor/RemoteTest.php | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Processor/Remote.php b/src/Processor/Remote.php index da1e920..a36aa1a 100644 --- a/src/Processor/Remote.php +++ b/src/Processor/Remote.php @@ -40,7 +40,7 @@ public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX) // If the HTTP client throws an exception, then we can't/shouldn't // transfer the file. See the subclass hierarchy of // GuzzleException for all the cases this handles. - $client->head($state['source']); +// $client->head($state['source']); $fout = fopen($state['destination'], "w"); $client->get($state['source'], ['sink' => $fout]); $result->setStatus(Result::DONE); diff --git a/test/Processor/RemoteTest.php b/test/Processor/RemoteTest.php index 6e5a85c..18c78b8 100644 --- a/test/Processor/RemoteTest.php +++ b/test/Processor/RemoteTest.php @@ -6,14 +6,13 @@ use FileFetcher\FileFetcher; use FileFetcher\Processor\Remote; use FileFetcherTests\Mock\FakeRemote; -use GuzzleHttp\Client; -use GuzzleHttp\Exception\ClientException; use org\bovigo\vfs\vfsStream; use PHPUnit\Framework\TestCase; use Procrastinator\Result; /** * @covers \FileFetcher\Processor\Remote + * @coversDefaultClass \FileFetcher\Processor\Remote */ class RemoteTest extends TestCase { @@ -75,6 +74,9 @@ public function testCopyException() $this->assertStringContainsString('ailed to open stream', $result->getError()); } + /** + * Ensure that 404 responses do not write content to disk. + */ public function test404DoesNotCreateFile() { $root = vfsStream::setup('test404'); From 4d9e41b0a2c612aef1292c981318dffe592b396e Mon Sep 17 00:00:00 2001 From: Paul Mitchum Date: Thu, 21 Dec 2023 14:49:43 -0800 Subject: [PATCH 3/3] use HEAD request to discover 4xx/5xx --- src/Processor/Remote.php | 8 ++++---- test/Mock/FakeRemote.php | 3 +++ test/Processor/RemoteTest.php | 17 ++++++++++------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Processor/Remote.php b/src/Processor/Remote.php index a36aa1a..aff9834 100644 --- a/src/Processor/Remote.php +++ b/src/Processor/Remote.php @@ -3,7 +3,6 @@ namespace FileFetcher\Processor; use GuzzleHttp\Client; -use GuzzleHttp\Exception\GuzzleException; use Procrastinator\Result; class Remote extends ProcessorBase implements ProcessorInterface @@ -38,9 +37,10 @@ public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX) try { // Use a HEAD request to discover whether the source URL is valid. // If the HTTP client throws an exception, then we can't/shouldn't - // transfer the file. See the subclass hierarchy of - // GuzzleException for all the cases this handles. -// $client->head($state['source']); + // allow the contents to be written to destination. See the + // subclass hierarchy of GuzzleException for all the cases this + // handles. + $client->head($state['source']); $fout = fopen($state['destination'], "w"); $client->get($state['source'], ['sink' => $fout]); $result->setStatus(Result::DONE); diff --git a/test/Mock/FakeRemote.php b/test/Mock/FakeRemote.php index ce1b1cb..7d887ef 100644 --- a/test/Mock/FakeRemote.php +++ b/test/Mock/FakeRemote.php @@ -19,6 +19,9 @@ protected function getClient(): Client private function getMockHandler() { $mock = new MockHandler([ + // Multiple copies because Remote can end up making more than one + // request. + new Response(200, ['X-Foo' => 'Bar'], 'Hello, World'), new Response(200, ['X-Foo' => 'Bar'], 'Hello, World'), ]); return $mock; diff --git a/test/Processor/RemoteTest.php b/test/Processor/RemoteTest.php index 18c78b8..9341234 100644 --- a/test/Processor/RemoteTest.php +++ b/test/Processor/RemoteTest.php @@ -50,6 +50,8 @@ public function provideIsServerCompatible(): array /** * Test the \FileFetcher\Processor\Remote::isServerCompatible() method. * + * @covers ::isServerCompatible + * * @dataProvider provideIsServerCompatible */ public function testIsServerCompatible($expected, $source) @@ -58,20 +60,21 @@ public function testIsServerCompatible($expected, $source) $this->assertSame($expected, $processor->isServerCompatible(['source' => $source])); } + /** + * Ensure the status object contains the message from an exception. + */ public function testCopyException() { - // Ensure the status object contains the message from an exception. - // We'll use vfsstream to mock a file system with no permissions to - // throw an error. - $root = vfsStream::setup('root', 0000); - $state = ['destination' => $root->url()]; + $root = vfsStream::setup('no-write', 0000); $remote = new Remote(); $result = new Result(); - $remote->copy($state, $result); + + // State does not have a source property, so there should be a curl error. + $remote->copy(['destination' => $root->url() . 'file.txt'], $result); $this->assertSame(Result::ERROR, $result->getStatus()); - $this->assertStringContainsString('ailed to open stream', $result->getError()); + $this->assertStringContainsString('cURL error 3', $result->getError()); } /**