Skip to content

Commit

Permalink
Version 1.6.0 (#34)
Browse files Browse the repository at this point in the history
* Updated to supported Symfony and Doctrine versions only

* Implemented maxDurationInSeconds()

* Updated Coding Style implementation

* Updated to PHP 8 features using Rector <3

* Fixed missing docs

* Update CI

* Update CI to include PHP 8.3
  • Loading branch information
Toflar authored Nov 9, 2023
1 parent 9a7b2c9 commit cca4a97
Show file tree
Hide file tree
Showing 24 changed files with 394 additions and 480 deletions.
17 changes: 10 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,26 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 7.4
php-version: 8.2
coverage: none
tools: php-cs-fixer

- name: Checkout
uses: actions/checkout@v2

- name: Checkout
uses: actions/checkout@v2

- name: Install the dependencies
run: composer install --no-interaction --no-suggest

- name: Run the CS fixer
run: php-cs-fixer fix
run: composer cs

tests:
name: PHP ${{ matrix.php }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
php: [7.4, 8.0, 8.1, 8.2]
php: [8.1, 8.2, 8.3]
steps:
- name: Setup PHP
uses: shivammathur/setup-php@v2
Expand All @@ -58,7 +61,7 @@ jobs:
strategy:
fail-fast: false
matrix:
php: [7.4, 8.0, 8.1, 8.2]
php: [8.1, 8.2, 8.3]
steps:
- name: Setup PHP
uses: shivammathur/setup-php@v2
Expand Down
48 changes: 0 additions & 48 deletions .php-cs-fixer.dist.php

This file was deleted.

8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ There are `2` other interfaces which you might want to integrate but you don't h
#### Tags

Sometimes you may want to add meta information to any `CrawlUri` instance so you can let other subscribers decide
what they want to do with this information or it may be relevant during another request.
what they want to do with this information, or it may be relevant during another request.
The `RobotsSubscriber` for instance, tags `CrawlUri` instances when they contained a `<meta name="robots" content="nofollow">`
in the body or the corresponding `X-Robots-Tag` header was set. All the links found on this URI are then not followed
which happens during the next `shouldRequest()` call.
Expand Down Expand Up @@ -438,6 +438,12 @@ There are different configurations you can apply to the `Escargot` instance:

Returns a clone of the `Escargot` instance with a maximum total requests that are going to be executed. It can be
useful if you have limited resources and only want to execute e.g. `100` requests in this run and continue later on.

* `Escargot::withMaxDurationInSeconds(int $maxDurationInSeconds): Escargot`

Returns a clone of the `Escargot` instance with a maximum total seconds Escargot is going to be running. It can be
useful if you have limited resources and only want to execute the crawl process for e.g. `30` seconds in this run
and continue later on.

* `Escargot::withUserAgent(string $userAgent): Escargot`

Expand Down
21 changes: 13 additions & 8 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,32 @@
"source": "https://github.com/terminal42/escargot"
},
"require": {
"php": "^7.4 || ^8.0",
"php": "^8.1",
"ext-simplexml": "*",
"nyholm/psr7": "^1.1",
"psr/http-message": "^1.0 || ^2.0",
"psr/log": "^1.1 || ^2.0 || ^3.0",
"symfony/dom-crawler": "^4.4 || ^5.0 || ^6.0",
"symfony/event-dispatcher": "^4.4 || ^5.0 || ^6.0",
"symfony/http-client": "^4.4 || ^5.0 || ^6.0",
"symfony/clock": "^6.2",
"symfony/dom-crawler": "^5.4 || ^6.0",
"symfony/event-dispatcher": "^5.4 || ^6.0",
"symfony/http-client": "^5.4 || ^6.0",
"terminal42/contao-build-tools": "@dev",
"webignition/robots-txt-file": "^3.0"
},
"require-dev": {
"doctrine/dbal": "^2.13 || ^3.0",
"symfony/finder": "^4.4 || ^5.0 || ^6.0",
"symfony/phpunit-bridge": "^5.1.8 || ^6.0",
"doctrine/dbal": "^3.6",
"symfony/finder": "^5.4|| ^6.0",
"symfony/phpunit-bridge": "^5.4 || ^6.0",
"fig/log-test": "^1.0"
},
"config": {
"preferred-install": {
"*": "dist"
},
"sort-packages": true
"sort-packages": true,
"allow-plugins": {
"terminal42/contao-build-tools": true
}
},
"autoload": {
"psr-4": {
Expand Down
63 changes: 0 additions & 63 deletions phpinsights.php

This file was deleted.

8 changes: 1 addition & 7 deletions src/BaseUriCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ final class BaseUriCollection implements \IteratorAggregate, \Countable
/**
* @var array<UriInterface>
*/
private $baseUris = [];
private array $baseUris = [];

/**
* @param array<UriInterface> $baseUris
Expand Down Expand Up @@ -80,17 +80,11 @@ public function all(): array
return array_values($this->baseUris);
}

/**
* {@inheritdoc}
*/
public function getIterator(): \Traversable
{
return new \ArrayIterator($this->all());
}

/**
* {@inheritdoc}
*/
public function count(): int
{
return \count($this->all());
Expand Down
60 changes: 19 additions & 41 deletions src/CrawlUri.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,23 @@

use Psr\Http\Message\UriInterface;

final class CrawlUri
final class CrawlUri implements \Stringable
{
/**
* @var UriInterface
*/
private $uri;

/**
* @var int
*/
private $level;

/**
* @var bool
*/
private $processed = false;

/**
* @var bool
*/
private $wasMarkedProcessed = false;

/**
* @var UriInterface|null
*/
private $foundOn = null;

/**
* @var array
*/
private $tags = [];

public function __construct(UriInterface $uri, int $level, bool $processed = false, ?UriInterface $foundOn = null)
{
private readonly UriInterface $uri;

private bool $wasMarkedProcessed = false;

private UriInterface|null $foundOn = null;

private array $tags = [];

public function __construct(
UriInterface $uri,
private readonly int $level,
private bool $processed = false,
UriInterface|null $foundOn = null,
) {
$this->uri = self::normalizeUri($uri);
$this->level = $level;
$this->processed = $processed;

if (null !== $foundOn) {
$this->foundOn = self::normalizeUri($foundOn);
Expand All @@ -64,7 +44,7 @@ public function __toString(): string
$this->getLevel(),
$this->isProcessed() ? 'yes' : 'no',
(string) ($this->getFoundOn() ?: 'root'),
$this->getTags() ? implode(', ', $this->getTags()) : 'none'
$this->getTags() ? implode(', ', $this->getTags()) : 'none',
);
}

Expand Down Expand Up @@ -96,7 +76,7 @@ public function wasMarkedProcessed(): bool
return $this->wasMarkedProcessed;
}

public function getFoundOn(): ?UriInterface
public function getFoundOn(): UriInterface|null
{
return $this->foundOn;
}
Expand All @@ -108,7 +88,7 @@ public function getTags(): array

public function addTag(string $tag): self
{
if (false !== strpos($tag, ',')) {
if (str_contains($tag, ',')) {
throw new \InvalidArgumentException('Cannot use commas in tags.');
}

Expand Down Expand Up @@ -139,8 +119,6 @@ public static function normalizeUri(UriInterface $uri): UriInterface
$uri = $uri->withPath('/');
}

$uri = $uri->withFragment('');

return $uri;
return $uri->withFragment('');
}
}
Loading

0 comments on commit cca4a97

Please sign in to comment.