Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle providers listed with subdomains, fix test timeouts, fix task timeouts #26

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .formatter.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
import_deps: [],
inputs: ["*.{ex,exs}", "{config,lib,test}/**/*.{ex,exs}"],
subdirectories: []
]
2 changes: 1 addition & 1 deletion benchmark.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Parsers

### Vimeo
vimeo = File.read! "./test/fixtures/vimeo.html"
vimeo = File.read!("./test/fixtures/vimeo.html")

Benchee.run(%{
"facebook" => fn -> Furlex.Parser.Facebook.parse(vimeo) end,
Expand Down
94 changes: 49 additions & 45 deletions lib/furlex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,33 @@ defmodule Furlex do
alias Furlex.Parser.{Facebook, HTML, JsonLD, Twitter}

defstruct [
:canonical_url, :oembed, :facebook, :twitter, :json_ld, :other, :status_code
:canonical_url,
:oembed,
:facebook,
:twitter,
:json_ld,
:other,
:status_code
]

@type t :: %__MODULE__{
canonical_url: String.t,
oembed: nil | Map.t,
facebook: Map.t,
twitter: Map.t,
json_ld: List.t,
other: Map.t,
status_code: Integer.t,
}
canonical_url: String.t(),
oembed: nil | Map.t(),
facebook: Map.t(),
twitter: Map.t(),
json_ld: List.t(),
other: Map.t(),
status_code: Integer.t()
}

@doc false
def start(_type, _args) do
import Supervisor.Spec

opts = [strategy: :one_for_one, name: Furlex.Supervisor]
children = [
worker(Furlex.Oembed, [[name: Furlex.Oembed]]),
{Furlex.Oembed, [name: Furlex.Oembed]}
]

opts = [strategy: :one_for_one, name: Furlex.Supervisor]

Supervisor.start_link(children, opts)
end

Expand All @@ -45,54 +50,53 @@ defmodule Furlex do

unfurl/2 also accepts a keyword list that will be passed to HTTPoison.
"""
@spec unfurl(String.t, Keyword.t) :: {:ok, __MODULE__.t} | {:error, Atom.t}
@spec unfurl(String.t(), Keyword.t()) :: {:ok, __MODULE__.t()} | {:error, Atom.t()}
def unfurl(url, opts \\ []) do
with {:ok, {body, status_code}, oembed} <- fetch(url, opts),
{:ok, results} <- parse(body)
do
{:ok, %__MODULE__{
canonical_url: Parser.extract_canonical(body),
oembed: oembed,
facebook: results.facebook,
twitter: results.twitter,
json_ld: results.json_ld,
other: results.other,
status_code: status_code,
}}
{:ok, results} <- parse(body) do
{:ok,
%__MODULE__{
canonical_url: Parser.extract_canonical(body),
oembed: oembed,
facebook: results.facebook,
twitter: results.twitter,
json_ld: results.json_ld,
other: results.other,
status_code: status_code
}}
end
end

defp fetch(url, opts) do
fetch = Task.async Fetcher, :fetch, [ url, opts ]
fetch_oembed = Task.async Fetcher, :fetch_oembed, [ url, opts ]
yield = Task.yield_many [fetch, fetch_oembed]
fetch = Task.async(Fetcher, :fetch, [url, opts])
fetch_oembed = Task.async(Fetcher, :fetch_oembed, [url, opts])
yield = Task.yield_many([fetch, fetch_oembed], 10_000)

with [ fetch, fetch_oembed ] <- yield,
{_fetch, {:ok, {:ok, body, status_code}}} <- fetch,
{_fetch_oembed, {:ok, {:ok, oembed}}} <- fetch_oembed
do
with [fetch, fetch_oembed] <- yield,
{_fetch, {:ok, {:ok, body, status_code}}} <- fetch,
{_fetch_oembed, {:ok, {:ok, oembed}}} <- fetch_oembed do
{:ok, {body, status_code}, oembed}
else
_ -> {:error, :fetch_error}
end
end

defp parse(body) do
parse = &Task.async(&1, :parse, [ body ])
parse = &Task.async(&1, :parse, [body])
tasks = Enum.map([Facebook, Twitter, JsonLD, HTML], parse)

with [ facebook, twitter, json_ld, other ] <- Task.yield_many(tasks),
{_facebook, {:ok, {:ok, facebook}}} <- facebook,
{_twitter, {:ok, {:ok, twitter}}} <- twitter,
{_json_ld, {:ok, {:ok, json_ld}}} <- json_ld,
{_other, {:ok, {:ok, other}}} <- other
do
{:ok, %{
facebook: facebook,
twitter: twitter,
json_ld: json_ld,
other: other
}}
with [facebook, twitter, json_ld, other] <- Task.yield_many(tasks, 18_000),
{_facebook, {:ok, {:ok, facebook}}} <- facebook,
{_twitter, {:ok, {:ok, twitter}}} <- twitter,
{_json_ld, {:ok, {:ok, json_ld}}} <- json_ld,
{_other, {:ok, {:ok, other}}} <- other do
{:ok,
%{
facebook: facebook,
twitter: twitter,
json_ld: json_ld,
other: other
}}
else
_ -> {:error, :parse_error}
end
Expand Down
15 changes: 7 additions & 8 deletions lib/furlex/fetcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,31 @@ defmodule Furlex.Fetcher do
@doc """
Fetches a url and extracts the body
"""
@spec fetch(String.t, List.t) :: {:ok, String.t, Integer.t} | {:error, Atom.t}
@spec fetch(String.t(), List.t()) :: {:ok, String.t(), Integer.t()} | {:error, Atom.t()}
def fetch(url, opts \\ []) do
case HTTPoison.get(url, [], opts) do
{:ok, %{body: body, status_code: status_code}} -> {:ok, body, status_code}
other -> other
other -> other
end
end

@doc """
Fetches oembed data for the given url
"""
@spec fetch_oembed(String.t, List.t) :: {:ok, String.t} | {:ok, nil} | {:error, Atom.t}
@spec fetch_oembed(String.t(), List.t()) :: {:ok, String.t()} | {:ok, nil} | {:error, Atom.t()}
def fetch_oembed(url, opts \\ []) do
with {:ok, endpoint} <- Oembed.endpoint_from_url(url),
params = %{"url" => url},
opts = Keyword.put(opts, :params, params),
params = %{"url" => url},
opts = Keyword.put(opts, :params, params),
{:ok, response} <- HTTPoison.get(endpoint, [], opts),
{:ok, body} <- @json_library.decode(response.body)
do
{:ok, body} <- @json_library.decode(response.body) do
{:ok, body}
else
{:error, :no_oembed_provider} ->
{:ok, nil}

other ->
"Could not fetch oembed for #{inspect url}: #{inspect other}"
"Could not fetch oembed for #{inspect(url)}: #{inspect(other)}"
|> Logger.error()

{:ok, nil}
Expand Down
33 changes: 17 additions & 16 deletions lib/furlex/oembed.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,24 @@ defmodule Furlex.Oembed do
Soft fetch will fetch cached providers. Hard fetch requests
providers from oembed.com and purges the cache.
"""
@spec fetch_providers(Atom.t) :: {:ok, List.t} | {:error, Atom.t}
@spec fetch_providers(Atom.t()) :: {:ok, List.t()} | {:error, Atom.t()}
def fetch_providers(type \\ :soft)

def fetch_providers(:hard) do
case get("/providers.json") do
{:ok, %{body: providers}} ->
GenServer.cast __MODULE__, {:providers, providers}
GenServer.cast(__MODULE__, {:providers, providers})
{:ok, providers}

other ->
Logger.error "Could not fetch providers: #{inspect other}"
other ->
Logger.error("Could not fetch providers: #{inspect(other)}")
{:error, :fetch_error}
end
end

def fetch_providers(_soft) do
case GenServer.call(__MODULE__, :providers) do
nil -> fetch_providers(:hard)
nil -> fetch_providers(:hard)
providers -> {:ok, providers}
end
end
Expand All @@ -47,10 +49,10 @@ defmodule Furlex.Oembed do
iex> Oembed.endpoint_from_url "https://vimeo.com/88856141", %{"format" => "xml"}
{:ok, "https://vimeo.com/api/oembed.xml"}
"""
@spec endpoint_from_url(String.t, Map.t) :: {:ok, String.t} | {:error, Atom.t}
@spec endpoint_from_url(String.t(), Map.t()) :: {:ok, String.t()} | {:error, Atom.t()}
def endpoint_from_url(url, params \\ %{"format" => "json"}, opts \\ []) do
case provider_from_url(url, opts) do
nil ->
nil ->
{:error, :no_oembed_provider}

provider ->
Expand All @@ -60,39 +62,38 @@ defmodule Furlex.Oembed do

# Maps a url to a provider, or returns nil if no such provider exists
defp provider_from_url(url, opts) do
fetch_type =
if Keyword.get(opts, :skip_cache?, false), do: :hard, else: :soft
fetch_type = if Keyword.get(opts, :skip_cache?, false), do: :hard, else: :soft

{:ok, providers} = fetch_providers(fetch_type)

case URI.parse(url) do
%URI{host: nil} ->
%URI{host: nil} ->
nil

%URI{host: host} ->
Enum.find providers, &host_matches?(host, &1)
Enum.find(providers, &host_matches?(host, &1))
end
end

defp endpoint_from_provider(provider, params) do
[ endpoint | _] = provider["endpoints"]
[endpoint | _] = provider["endpoints"]

url = endpoint["url"]
url = endpoint["url"]
regex = ~r/{(.*?)}/
url = Regex.replace regex, url, fn _, key -> params[key] end
url = Regex.replace(regex, url, fn _, key -> params[key] end)

{:ok, url}
end

defp host_matches?(host, %{"provider_url" => provider_url}) do
Regex.match? ~r/https?:\/\/#{host}/, provider_url
Regex.match?(~r/https?:\/\/([a-zA-Z0-9]+\.)?#{host}/, provider_url)
end

## GenServer callbacks

@doc false
def start_link(opts \\ []) do
GenServer.start_link __MODULE__, nil, opts
GenServer.start_link(__MODULE__, nil, opts)
end

def init(state) do
Expand Down
Loading