From c4386de9c8400cb3f9ba8a1db48643f9a35f41c9 Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Fri, 9 Aug 2024 20:54:24 +1000 Subject: [PATCH 01/29] feat(link-previews): Add meta data parser service --- app/Services/MetaData.php | 192 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 app/Services/MetaData.php diff --git a/app/Services/MetaData.php b/app/Services/MetaData.php new file mode 100644 index 000000000..76ec8b07c --- /dev/null +++ b/app/Services/MetaData.php @@ -0,0 +1,192 @@ + + */ +final readonly class MetaData +{ + /** + * The Open Graph data. + * + * @var MetaDataCollection + */ + private Collection $data; + + /** + * Fetch the Open Graph data for a given URL. + */ + public function __construct( + private string $url + ) { + $this->data = Cache::get( + $this->url, + fn () => $this->getData() + ); + } + + /** + * Get the open graph data for a given external URL. + * + * @return MetaDataCollection + */ + public static function fetch(string $url): Collection + { + return (new self($url))->data; + } + + /** + * Get the Open Graph data. + * + * @return MetaDataCollection + */ + public function getData(): Collection + { + $response = Http::get($this->url); + + if ($response->ok()) { + $opg = $this->parse($response->body()); + //Cache::put($this->url, $opg, now()->addDay()); + dump($opg); + + return $opg; + } + + return collect(); + } + + // ensure fb, twitter card, open graph, and oembed are all parsed, + // so we can use them in the view to generate a preview + + /** + * Parse the response body for MetaData. + * + * @return Collection + */ + private function parse(string $content): Collection + { + $doc = new DOMDocument(); + @$doc->loadHTML($content); + + $interested_in = ['og', 'fb', 'twitter']; + + $data = collect(); + + // Open graph + $metas = $doc->getElementsByTagName('meta'); + if ($metas->length > 0) { + dump($metas); + for ($n = 0; $n < $metas->length; $n++) { + $meta = $metas->item($n); + + collect(['name', 'property'])->each(function ($name) use ($meta, $interested_in, $data) { + $meta_bits = explode(':', $meta->getAttribute($name)); + //dump($meta_bits); + if (in_array($meta_bits[0], $interested_in)) { + if ($data->has($meta->getAttribute($name)) && ! is_array($data->get($meta->getAttribute($name)))) { + $data->put($meta_bits[0], [$data->get($meta->getAttribute($name)), $meta->getAttribute('content')]); + } elseif ($data->has($meta->getAttribute($name)) && is_array($data->get($meta->getAttribute($name)))) { + $data->push($meta->getAttribute('content')); + } else { + $data->put($meta_bits[0], $meta->getAttribute('content')); + } + } + }); + } + } + + // OEmbed + $metas = $doc->getElementsByTagName('link'); + if ($metas->length > 0) { + for ($n = 0; $n < $metas->length; $n++) { + $meta = $metas->item($n); + + if (mb_strtolower($meta->getAttribute('rel')) === 'alternate') { + if (mb_strtolower($meta->getAttribute('type')) === 'application/json+oembed') { + $data->put('oembed.json', $meta->getAttribute('href')); + } + if (mb_strtolower($meta->getAttribute('type')) === 'text/json+oembed') { + $data->put('oembed.json', $meta->getAttribute('href')); + } + if (mb_strtolower($meta->getAttribute('type')) === 'text/xml+oembed') { + $data->put('oembed.xml', $meta->getAttribute('href')); + } + } + } + + $data = $this->parseTwitterOEmbed(collect($metas), $data); + } + + // Basics + $basic = 'title'; + if (preg_match("#<$basic>(.*?)#siu", $content, $matches)) { + $data->put($basic, trim($matches[1], " \n")); + } + $metas = $doc->getElementsByTagName('meta'); + if ($metas->length > 0) { + for ($n = 0; $n < $metas->length; $n++) { + $meta = $metas->item($n); + + if (mb_strtolower($meta->getAttribute('name')) === 'description') { + $data->put('description', $meta->getAttribute('content')); + } + if (mb_strtolower($meta->getAttribute('name')) === 'keywords') { + $data->put('keywords', $meta->getAttribute('content')); + } + } + } + + return $data; + } + + /** + * Parse Twitter OEmbed data. + * + * @param Collection $metas + * @param Collection> $data + * @return Collection + */ + private function parseTwitterOEmbed(Collection $metas, Collection $data): Collection + { + if ($data->has('oembed.jsonp')) { + return $data; + } + + $canonicalLinks = $metas->filter(function ($meta) { + $canonicalLinks = collect(iterator_to_array($meta->attributes)) + ->filter(fn ($attr) => $attr->name === 'rel' && $attr->value === 'canonical'); + + return $canonicalLinks->isNotEmpty(); + }); + + if ($canonicalLinks->isNotEmpty()) { + $firstCanonicalLink = $canonicalLinks->first()->getAttribute('href'); + + if (! empty(trim($firstCanonicalLink)) && preg_match('#^https://(www\.|mobile\.)?twitter\.com#i', $firstCanonicalLink) === 1) { + $data->put('oembed.jsonp', [ + 'https://publish.twitter.com/oembed?url='.$firstCanonicalLink.'&align=center', + ]); + } + } + + return $data; + } +} From 1ae84dcc6878a000bd6d78129971d89ec1aabe12 Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Fri, 9 Aug 2024 20:56:57 +1000 Subject: [PATCH 02/29] feat(link-previews): init link preview card view --- .../views/components/link-preview-card.blade.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 resources/views/components/link-preview-card.blade.php diff --git a/resources/views/components/link-preview-card.blade.php b/resources/views/components/link-preview-card.blade.php new file mode 100644 index 000000000..0690170da --- /dev/null +++ b/resources/views/components/link-preview-card.blade.php @@ -0,0 +1,14 @@ +
+
+ @if(isset($ogData['image'])) +
+ OpenGraph Image +
+ @endif +
+
{{ $ogData['site_name'] ?? 'Link Preview' }}
+ {{ $ogData['title'] ?? $url }} +

{{ $ogData['description'] ?? '' }}

+
+
+
From 9159bfd542c48639907801183bb337a3f829f920 Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Fri, 9 Aug 2024 20:57:33 +1000 Subject: [PATCH 03/29] feat(link-previews): fetch metadata in the link parser --- .../LinkProviderParsable.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/app/Services/ParsableContentProviders/LinkProviderParsable.php b/app/Services/ParsableContentProviders/LinkProviderParsable.php index a2b24106e..929044b71 100644 --- a/app/Services/ParsableContentProviders/LinkProviderParsable.php +++ b/app/Services/ParsableContentProviders/LinkProviderParsable.php @@ -5,6 +5,7 @@ namespace App\Services\ParsableContentProviders; use App\Contracts\Services\ParsableContentProvider; +use App\Services\MetaData; final readonly class LinkProviderParsable implements ParsableContentProvider { @@ -26,6 +27,17 @@ function (array $matches): string { $url = $isMail ? 'mailto:'.$humanUrl : $url; + if (! $isMail && $url) { + $metadata = MetaData::fetch($url); + + if ($metadata->isNotEmpty()) { + return view('components.link-preview-card', [ + 'data' => $metadata, + 'url' => $url, + ])->render(); + } + } + return ''.$humanUrl.''; }, str_replace('&', '&', $content) From 3cedd7f515641ba1b7f0ac46744d73eb4c9fc2d9 Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Sun, 22 Sep 2024 13:02:58 +1000 Subject: [PATCH 04/29] feat(previews): format updates to link preview card --- .../components/link-preview-card.blade.php | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/resources/views/components/link-preview-card.blade.php b/resources/views/components/link-preview-card.blade.php index 0690170da..8be556687 100644 --- a/resources/views/components/link-preview-card.blade.php +++ b/resources/views/components/link-preview-card.blade.php @@ -1,14 +1,32 @@ -
-
- @if(isset($ogData['image'])) -
- OpenGraph Image +@if($data->has('image')) +
+
+ + {{ $data->get('title') ?? $url }} + + +
+

{{ $data->get('title') ?? $url }}

- @endif -
-
{{ $ogData['site_name'] ?? 'Link Preview' }}
- {{ $ogData['title'] ?? $url }} -

{{ $ogData['description'] ?? '' }}

-
+ +
+ From {{ parse_url($url)['host'] }} +
+@elseif ($data->has('html')) +
+ {!! $data->get('html') !!} +
+@endif From 8244654e3b59ad9479c4bfe6fc68a3cff5087f19 Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Sun, 22 Sep 2024 13:07:39 +1000 Subject: [PATCH 05/29] feat(previews): cleanup the MetaData service --- app/Services/MetaData.php | 173 ++++++------------ .../LinkProviderParsable.php | 1 + 2 files changed, 57 insertions(+), 117 deletions(-) diff --git a/app/Services/MetaData.php b/app/Services/MetaData.php index 76ec8b07c..a63f41f19 100644 --- a/app/Services/MetaData.php +++ b/app/Services/MetaData.php @@ -5,48 +5,35 @@ namespace App\Services; use DOMDocument; -use DOMElement; use Illuminate\Support\Collection; use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Http; +use Illuminate\Support\Str; -/** - * @phpstan-type MetaData = array{ - * title: string, - * type: string, - * image: string, - * url: string, - * description: string, - * site_name: string, - * locale: string, - * } - * @phpstan-type MetaDataCollection = Collection - */ final readonly class MetaData { /** * The Open Graph data. - * - * @var MetaDataCollection */ private Collection $data; /** * Fetch the Open Graph data for a given URL. */ - public function __construct( - private string $url - ) { - $this->data = Cache::get( - $this->url, + public function __construct(private string $url) + { + $this->data = Cache::remember( + Str::of($url)->slug()->prepend('preview_')->value(), + now()->addYear(), + // NOTE: check why this data is not being cached, we are just caching a Collection in the DB & I can't see the data in the DB fn () => $this->getData() ); } /** - * Get the open graph data for a given external URL. + * Fetch the parsed meta-data for a given URL. * - * @return MetaDataCollection + * @return Collection */ public static function fetch(string $url): Collection { @@ -54,96 +41,60 @@ public static function fetch(string $url): Collection } /** - * Get the Open Graph data. + * Get the meta-data for a given URL. * - * @return MetaDataCollection + * @return Collection */ public function getData(): Collection { $response = Http::get($this->url); if ($response->ok()) { - $opg = $this->parse($response->body()); - //Cache::put($this->url, $opg, now()->addDay()); - dump($opg); - - return $opg; + // TODO: add unit test for this service + return $this->parse($response->body()); } return collect(); } - // ensure fb, twitter card, open graph, and oembed are all parsed, - // so we can use them in the view to generate a preview + /** + * Fetch Twitter oEmbed data for a given tweet URL. + * + * @return Collection + */ + private function fetchTwitterOEmbed(string $tweetUrl): Collection + { + $oEmbedUrl = 'https://publish.twitter.com/oembed?url='.urlencode($tweetUrl); + + $response = Http::get($oEmbedUrl); + + if ($response->ok()) { + return collect($response->json()); + } + + return collect(); + } /** * Parse the response body for MetaData. * - * @return Collection + * @return Collection */ private function parse(string $content): Collection { $doc = new DOMDocument(); @$doc->loadHTML($content); - $interested_in = ['og', 'fb', 'twitter']; - + $interested_in = ['og', 'twitter']; $data = collect(); - - // Open graph $metas = $doc->getElementsByTagName('meta'); - if ($metas->length > 0) { - dump($metas); - for ($n = 0; $n < $metas->length; $n++) { - $meta = $metas->item($n); - - collect(['name', 'property'])->each(function ($name) use ($meta, $interested_in, $data) { - $meta_bits = explode(':', $meta->getAttribute($name)); - //dump($meta_bits); - if (in_array($meta_bits[0], $interested_in)) { - if ($data->has($meta->getAttribute($name)) && ! is_array($data->get($meta->getAttribute($name)))) { - $data->put($meta_bits[0], [$data->get($meta->getAttribute($name)), $meta->getAttribute('content')]); - } elseif ($data->has($meta->getAttribute($name)) && is_array($data->get($meta->getAttribute($name)))) { - $data->push($meta->getAttribute('content')); - } else { - $data->put($meta_bits[0], $meta->getAttribute('content')); - } - } - }); - } - } - // OEmbed - $metas = $doc->getElementsByTagName('link'); - if ($metas->length > 0) { - for ($n = 0; $n < $metas->length; $n++) { - $meta = $metas->item($n); - - if (mb_strtolower($meta->getAttribute('rel')) === 'alternate') { - if (mb_strtolower($meta->getAttribute('type')) === 'application/json+oembed') { - $data->put('oembed.json', $meta->getAttribute('href')); - } - if (mb_strtolower($meta->getAttribute('type')) === 'text/json+oembed') { - $data->put('oembed.json', $meta->getAttribute('href')); - } - if (mb_strtolower($meta->getAttribute('type')) === 'text/xml+oembed') { - $data->put('oembed.xml', $meta->getAttribute('href')); - } + if ($metas->count() > 0) { + foreach ($metas as $meta) { + // basic meta tags + if (mb_strtolower($meta->getAttribute('name')) === 'title') { + $data->put('title', $meta->getAttribute('content')); } - } - - $data = $this->parseTwitterOEmbed(collect($metas), $data); - } - - // Basics - $basic = 'title'; - if (preg_match("#<$basic>(.*?)#siu", $content, $matches)) { - $data->put($basic, trim($matches[1], " \n")); - } - $metas = $doc->getElementsByTagName('meta'); - if ($metas->length > 0) { - for ($n = 0; $n < $metas->length; $n++) { - $meta = $metas->item($n); if (mb_strtolower($meta->getAttribute('name')) === 'description') { $data->put('description', $meta->getAttribute('content')); @@ -151,42 +102,30 @@ private function parse(string $content): Collection if (mb_strtolower($meta->getAttribute('name')) === 'keywords') { $data->put('keywords', $meta->getAttribute('content')); } - } - } - return $data; - } - - /** - * Parse Twitter OEmbed data. - * - * @param Collection $metas - * @param Collection> $data - * @return Collection - */ - private function parseTwitterOEmbed(Collection $metas, Collection $data): Collection - { - if ($data->has('oembed.jsonp')) { - return $data; + // og & twitter meta tags + collect(['name', 'property']) + ->map(fn ($name) => $meta->getAttribute($name)) + ->filter(fn ($attribute) => in_array(explode(':', $attribute)[0], $interested_in)) + ->each(function ($attribute) use ($data, $meta) { + $key = explode(':', $attribute)[1]; + if (! $data->has($key)) { + $data->put($key, $meta->getAttribute('content')); + } + }); + } } - $canonicalLinks = $metas->filter(function ($meta) { - $canonicalLinks = collect(iterator_to_array($meta->attributes)) - ->filter(fn ($attr) => $attr->name === 'rel' && $attr->value === 'canonical'); - - return $canonicalLinks->isNotEmpty(); - }); - - if ($canonicalLinks->isNotEmpty()) { - $firstCanonicalLink = $canonicalLinks->first()->getAttribute('href'); - - if (! empty(trim($firstCanonicalLink)) && preg_match('#^https://(www\.|mobile\.)?twitter\.com#i', $firstCanonicalLink) === 1) { - $data->put('oembed.jsonp', [ - 'https://publish.twitter.com/oembed?url='.$firstCanonicalLink.'&align=center', - ]); + // if the title is x.com, fetch twitter oEmbed & add to data + if ($data->has('site_name') && $data->get('site_name') === 'X (formerly Twitter)') { + $x = $this->fetchTwitterOEmbed($this->url); + if ($x->isNotEmpty()) { + foreach ($x as $key => $value) { + $data->put($key, $value); + } } } - return $data; + return $data->unique(); } } diff --git a/app/Services/ParsableContentProviders/LinkProviderParsable.php b/app/Services/ParsableContentProviders/LinkProviderParsable.php index bd03a7e96..1535d0254 100644 --- a/app/Services/ParsableContentProviders/LinkProviderParsable.php +++ b/app/Services/ParsableContentProviders/LinkProviderParsable.php @@ -35,6 +35,7 @@ function (array $matches): string { $metadata = MetaData::fetch($url); if ($metadata->isNotEmpty()) { + // TODO: add tests for this logic to the parser unit test return view('components.link-preview-card', [ 'data' => $metadata, 'url' => $url, From 482f0dd14a5bb7c1b7a71a5e719b61bb9f0192ce Mon Sep 17 00:00:00 2001 From: Cam Kemshal-Bell Date: Tue, 15 Oct 2024 23:23:16 +1100 Subject: [PATCH 06/29] feat: style updates to link preview card --- .../components/link-preview-card.blade.php | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/resources/views/components/link-preview-card.blade.php b/resources/views/components/link-preview-card.blade.php index 8be556687..aeb310d91 100644 --- a/resources/views/components/link-preview-card.blade.php +++ b/resources/views/components/link-preview-card.blade.php @@ -1,30 +1,37 @@ @if($data->has('image')) -
-
- + @php($shortUrl = parse_url($url)['host']) +