diff --git a/config/config.exs b/config/config.exs index 249f185..80bd770 100644 --- a/config/config.exs +++ b/config/config.exs @@ -14,6 +14,35 @@ config :search, # Add types added by the pgvector-elixir extension to Postgrex config :search, Search.Repo, types: Search.PostgrexTypes +# Register embedding providers +config :search, :embedding_providers, + paraphrase_l3: { + Search.Embeddings.BumblebeeProvider, + serving_name: Search.Embeddings.ParaphraseL3, + model: {:hf, "sentence-transformers/paraphrase-MiniLM-L3-v2"}, + embedding_size: 384, + load_model_opts: [ + backend: EXLA.Backend + ], + serving_opts: [ + compile: [batch_size: 16, sequence_length: 512], + defn_options: [compiler: EXLA] + ] + }, + paraphrase_albert_small: { + Search.Embeddings.BumblebeeProvider, + serving_name: Search.Embeddings.ParaphraseAlbertSmall, + model: {:hf, "sentence-transformers/paraphrase-albert-small-v2"}, + embedding_size: 768, + load_model_opts: [ + backend: EXLA.Backend + ], + serving_opts: [ + compile: [batch_size: 16, sequence_length: 100], + defn_options: [compiler: EXLA] + ] + } + # Configures the endpoint config :search, SearchWeb.Endpoint, url: [host: "localhost"], @@ -56,7 +85,7 @@ config :logger, :console, config :phoenix, :json_library, Jason # Configure the EXLA backend for Nx -config :nx, :default_backend, EXLA.Backend +config :nx, :default_backend, {EXLA.Backend, client: :host} # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. diff --git a/lib/mix/tasks/search.add.ex b/lib/mix/tasks/search.add.ex new file mode 100644 index 0000000..45b3326 --- /dev/null +++ b/lib/mix/tasks/search.add.ex @@ -0,0 +1,37 @@ +defmodule Mix.Tasks.Search.Add do + alias Search.Packages + alias Search.HexClient + + @moduledoc """ + Usage: mix #{Mix.Task.task_name(__MODULE__)} [] + + Fetches the documentation for the given package from Hex. Does not embed it yet. + + If the version is ommitted, it will choose the newest release. + """ + @shortdoc "Adds a package's documentation to the index" + + use Mix.Task + + @requirements ["app.start"] + + @impl Mix.Task + def run(args) do + [package | args_tail] = args + + package_or_release = + case args_tail do + [version] -> + version = Version.parse!(version) + %HexClient.Release{package_name: package, version: version} + + [] -> + package + end + + case Packages.add_package(package_or_release) do + {:ok, package} -> Mix.shell().info("Package #{package.name}@#{package.version} added.") + {:error, err} -> Mix.shell().error("Error: #{err}") + end + end +end diff --git a/lib/mix/tasks/search.embed.ex b/lib/mix/tasks/search.embed.ex new file mode 100644 index 0000000..994cf95 --- /dev/null +++ b/lib/mix/tasks/search.embed.ex @@ -0,0 +1,31 @@ +defmodule Mix.Tasks.Search.Embed do + @moduledoc """ + Usage: mix #{Mix.Task.task_name(__MODULE__)} + + Embeds the unembedded docs using the model registered in the config + """ + @shortdoc "Embeds the unembedded doc fragments" + + use Mix.Task + + @requirements ["app.start"] + + defp callback({total, done}) do + ProgressBar.render(done, total) + end + + @impl Mix.Task + def run([model_name]) do + embedding_models = + Search.Application.embedding_models() + |> Keyword.keys() + |> Enum.map(&Atom.to_string/1) + + if Enum.member?(embedding_models, model_name) do + Search.Embeddings.embed(String.to_existing_atom(model_name), &callback/1) + Mix.shell().info("Done.") + else + Mix.shell().error("Expected model name to be one of: #{Enum.join(embedding_models, ", ")}.") + end + end +end diff --git a/lib/mix/tasks/search/index.ex b/lib/mix/tasks/search/index.ex deleted file mode 100644 index cfcb899..0000000 --- a/lib/mix/tasks/search/index.ex +++ /dev/null @@ -1,53 +0,0 @@ -defmodule Mix.Tasks.Search.Index do - @moduledoc """ - Usage: mix #{Mix.Task.task_name(__MODULE__)} [] - - Fetches the documentation for the given package from Hex and indexes it using the embedding model. - - If the version is ommitted, it will choose the newest release. - """ - @shortdoc "Indexes a package's documentation" - - use Mix.Task - - @requirements ["app.start"] - - @impl Mix.Task - def run(args) do - [package | args_tail] = args - {:ok, releases} = Search.HexClient.get_releases(package) - - release = - case args_tail do - [version] -> - version = Version.parse!(version) - Enum.find(releases, &(&1.version == version)) - - [] -> - Enum.max_by(releases, & &1.version, Version, fn -> nil end) - end - - if release do - {:ok, tarball} = Search.HexClient.get_docs_tarball(release) - {:ok, docs} = Search.ExDocParser.extract_search_data(tarball) - docs = Enum.map(docs, & &1["doc"]) - docs_len = length(docs) - - ProgressBar.render(0, docs_len) - - docs - |> Stream.with_index(1) - |> Enum.each(fn {doc, i} -> - %{embedding: embedding} = Nx.Serving.batched_run(Search.Embedding, doc) - - ProgressBar.render(i, docs_len) - - fragment = %Search.Fragment{doc_text: doc, embedding: embedding} - - Search.Repo.insert!(fragment) - end) - else - Mix.shell().error("Release not found.") - end - end -end diff --git a/lib/search/application.ex b/lib/search/application.ex index c4ef71b..52c7b46 100644 --- a/lib/search/application.ex +++ b/lib/search/application.ex @@ -5,18 +5,24 @@ defmodule Search.Application do use Application + def embedding_models do + Application.fetch_env!(:search, :embedding_providers) + end + @impl true def start(_type, _args) do - children = [ - SearchWeb.Telemetry, - Search.Repo, - {DNSCluster, query: Application.get_env(:search, :dns_cluster_query) || :ignore}, - {Phoenix.PubSub, name: Search.PubSub}, - # Start a worker by calling: Search.Worker.start_link(arg) - {Search.Embedding, name: Search.Embedding}, - # Start to serve requests, typically the last entry - SearchWeb.Endpoint - ] + children = + [ + SearchWeb.Telemetry, + Search.Repo, + {DNSCluster, query: Application.get_env(:search, :dns_cluster_query) || :ignore}, + {Phoenix.PubSub, name: Search.PubSub} + ] ++ + Enum.map(embedding_models(), fn {_, {provider, opts}} -> provider.child_spec(opts) end) ++ + [ + # Start to serve requests, typically the last entry + SearchWeb.Endpoint + ] # See https://hexdocs.pm/elixir/Supervisor.html # for other strategies and supported options diff --git a/lib/search/embedding.ex b/lib/search/embedding.ex deleted file mode 100644 index 9e92d33..0000000 --- a/lib/search/embedding.ex +++ /dev/null @@ -1,23 +0,0 @@ -defmodule Search.Embedding do - @moduledoc """ - Provides text embedding capabilities. Currently uses Bumblebee with Sentence Transformers paraphrase L3 model - """ - - @embedding_size 384 - @model_repo {:hf, "sentence-transformers/paraphrase-MiniLM-L3-v2"} - - def embedding_size, do: @embedding_size - - def child_spec(opts) do - opts - |> Keyword.merge(serving: load_model()) - |> Nx.Serving.child_spec() - end - - defp load_model() do - {:ok, model_info} = Bumblebee.load_model(@model_repo) - {:ok, tokenizer} = Bumblebee.load_tokenizer(@model_repo) - - Bumblebee.Text.text_embedding(model_info, tokenizer) - end -end diff --git a/lib/search/embeddings.ex b/lib/search/embeddings.ex new file mode 100644 index 0000000..9bcb64e --- /dev/null +++ b/lib/search/embeddings.ex @@ -0,0 +1,107 @@ +defmodule Search.Embeddings do + @moduledoc """ + The Embeddings context. + """ + + import Ecto.Query + import Pgvector.Ecto.Query + require Logger + alias Search.{Embeddings, Repo, Packages} + + @doc """ + + Embeds any doc fragments which do not have an embedding yet. + + Recieves an optional callback, + which is called to notify about the embedding progress with the tuple {total, done} as its argument. + """ + def embed(model_name, progress_callback \\ &Function.identity/1) do + {provider, config} = + Application.fetch_env!(:search, :embedding_providers) + |> Keyword.fetch!(model_name) + + table_name = table_name(model_name) + + fragments = + from f in Packages.DocFragment, + left_join: e in ^{table_name, Embeddings.Embedding}, + on: e.doc_fragment_id == f.id, + where: is_nil(e) + + fragments = Repo.all(fragments) + texts = Enum.map(fragments, & &1.text) + + embeddings = provider.embed(texts, progress_callback, config) + + now = DateTime.utc_now(:second) + + embeddings_params = + Stream.zip(fragments, embeddings) + |> Enum.map(fn {fragment, embedding} -> + %{ + doc_fragment_id: fragment.id, + embedding: embedding, + updated_at: now, + inserted_at: now + } + end) + + Repo.transaction_with(fn -> + {inserted_count, inserted_embeddings} = + Repo.insert_all({table_name, Embeddings.Embedding}, embeddings_params, returning: true) + + if inserted_count == length(embeddings) do + {:ok, inserted_embeddings} + else + {:error, "Could not insert all embeddings."} + end + end) + end + + def embedding_size(model_name), do: get_config(model_name, :embedding_size) + def table_name(model_name), do: "embeddings__#{model_name}" + + def embed_one(model_name, text) do + {provider, config} = + Application.fetch_env!(:search, :embedding_providers) + |> Keyword.fetch!(model_name) + + provider.embed_one(text, config) + end + + def knn_query(model_name, query_vector, opts \\ []) do + table_name = table_name(model_name) + + %{metric: metric, k: k} = + opts + |> Keyword.validate!(metric: :cosine, k: nil) + |> Map.new() + + query = + from e in {table_name, Embeddings.Embedding}, + preload: [doc_fragment: [doc_item: :package]], + select: e, + limit: ^k + + query = + case metric do + :cosine -> + from e in query, + order_by: cosine_distance(e.embedding, ^query_vector) + + :l2 -> + from e in query, + order_by: l2_distance(e.embedding, ^query_vector) + end + + Repo.all(query) + end + + defp get_config(model_name, key) do + {_provider, config} = + Application.fetch_env!(:search, :embedding_providers) + |> Keyword.fetch!(model_name) + + Keyword.fetch!(config, key) + end +end diff --git a/lib/search/embeddings/bumblebee_provider.ex b/lib/search/embeddings/bumblebee_provider.ex new file mode 100644 index 0000000..33c0526 --- /dev/null +++ b/lib/search/embeddings/bumblebee_provider.ex @@ -0,0 +1,85 @@ +defmodule Search.Embeddings.BumblebeeProvider do + alias Search.Embeddings + + @behaviour Embeddings.Provider + + @impl true + def child_spec(opts) do + %{serving_name: serving_name} = parse_opts(opts) + + {Nx.Serving, serving: load_model(opts), name: serving_name} + end + + @impl true + def embed_one(text, opts) do + %{serving_name: serving_name} = parse_opts(opts) + + Nx.Serving.batched_run(serving_name, text).embedding + end + + @impl true + def embed([] = _text_list, _progress_callback, _opts), do: [] + @impl true + def embed(text_list, progress_callback, opts) do + %{serving_name: serving_name} = + parse_opts(opts) + + texts_count = length(text_list) + + batch_size = + get_batch_size(opts) + + progress_callback.({texts_count, 0}) + + text_list + |> Stream.chunk_every(batch_size) + |> Stream.with_index(1) + |> Stream.map(fn {texts, batch_num} -> + embeddings = + Nx.Serving.batched_run(serving_name, texts) + |> Enum.map(& &1.embedding) + + progress_callback.({texts_count, min(texts_count, batch_num * batch_size)}) + + embeddings + end) + |> Enum.flat_map(&Function.identity/1) + end + + defp get_batch_size(opts) do + opts + |> Keyword.fetch!(:serving_opts) + |> Keyword.fetch!(:compile) + |> Keyword.fetch!(:batch_size) + end + + defp parse_opts(opts) do + opts + |> Keyword.validate!([ + :model, + :serving_name, + :embedding_size, + :serving_opts, + load_model_opts: [], + load_tokenizer_opts: [] + ]) + |> Map.new() + end + + defp load_model(opts) do + %{ + serving_opts: serving_opts, + model: model_repo, + load_model_opts: load_model_opts, + load_tokenizer_opts: load_tokenizer_opts + } = + parse_opts(opts) + + {:ok, model_info} = Bumblebee.load_model(model_repo, load_model_opts) + + {:ok, tokenizer} = + Bumblebee.load_tokenizer(model_repo, load_tokenizer_opts) + + Bumblebee.Text.text_embedding(model_info, tokenizer, serving_opts) + end +end diff --git a/lib/search/embeddings/embedding.ex b/lib/search/embeddings/embedding.ex new file mode 100644 index 0000000..4a9f7c0 --- /dev/null +++ b/lib/search/embeddings/embedding.ex @@ -0,0 +1,22 @@ +defmodule Search.Embeddings.Embedding do + @type t :: %__MODULE__{} + + use Ecto.Schema + import Ecto.Changeset + alias Pgvector.Ecto.Vector + alias Search.Packages + + schema "" do + field :embedding, Vector + belongs_to :doc_fragment, Packages.DocFragment + + timestamps(type: :utc_datetime) + end + + def changeset(embedding, attrs) do + embedding + |> cast(attrs, [:embedding]) + |> cast_assoc(:doc_fragment, required: true) + |> validate_required([:embedding]) + end +end diff --git a/lib/search/embeddings/provider.ex b/lib/search/embeddings/provider.ex new file mode 100644 index 0000000..c113fc1 --- /dev/null +++ b/lib/search/embeddings/provider.ex @@ -0,0 +1,12 @@ +defmodule Search.Embeddings.Provider do + @callback child_spec(config :: keyword()) :: atom() | {atom(), term()} | Supervisor.child_spec() + + @callback embed( + stream :: [String.t()], + progress_callback :: ({total :: integer(), done :: integer()} -> any()), + config :: keyword() + ) :: + [Nx.Tensor.t()] + + @callback embed_one(text :: binary(), config :: keyword()) :: Nx.Tensor.t() +end diff --git a/lib/search/fragment.ex b/lib/search/fragment.ex deleted file mode 100644 index f3349a1..0000000 --- a/lib/search/fragment.ex +++ /dev/null @@ -1,53 +0,0 @@ -defmodule Search.Fragment do - @moduledoc """ - Context for indexed documentation fragments - each fragment has associated with it an embedding vector, upon which - kNN lookup can be performed. - """ - - alias Search.{Fragment, Repo} - use Ecto.Schema - import Ecto.{Changeset, Query} - import Pgvector.Ecto.Query - - # Pgvector cannot handle inner product heuristic in ascending order, making it useless here - @metrics [:cosine, :l2] - - schema "fragments" do - field :doc_text, :string - field :embedding, Pgvector.Ecto.Vector - - timestamps(type: :utc_datetime) - end - - def metrics, do: @metrics - - def knn_lookup(query_tensor, opts \\ []) do - opts = Keyword.validate!(opts, metric: :cosine, k: nil) - metric = opts[:metric] - k = opts[:k] - - query = - case metric do - :cosine -> - from f in Fragment, - order_by: cosine_distance(f.embedding, ^query_tensor), - limit: ^k, - select: f - - :l2 -> - from f in Fragment, - order_by: l2_distance(f.embedding, ^query_tensor), - limit: ^k, - select: f - end - - Repo.all(query) - end - - @doc false - def changeset(fragment, attrs) do - fragment - |> cast(attrs, [:doc_text, :embedding]) - |> validate_required([:doc_text, :embedding]) - end -end diff --git a/lib/search/hex_client.ex b/lib/search/hex_client.ex index febefd9..167e7e8 100644 --- a/lib/search/hex_client.ex +++ b/lib/search/hex_client.ex @@ -1,18 +1,16 @@ defmodule Search.HexClient do - @api_url "https://hex.pm/api" @repo_url "https://repo.hex.pm" alias Search.HexClient def get_releases(package_name) when is_binary(package_name) do - case get("#{@api_url}/packages/#{package_name}") do - {:ok, %{status: 200, body: %{"releases" => releases}}} -> + case get("packages/#{package_name}") do + {:ok, %{status: 200, body: releases}} -> res = - for %{"has_docs" => has_docs, "version" => version} <- releases do + for %{version: version} <- releases do %HexClient.Release{ package_name: package_name, - version: Version.parse!(version), - has_docs: has_docs + version: Version.parse!(version) } end @@ -21,27 +19,39 @@ defmodule Search.HexClient do {:ok, %{status: status}} -> {:error, "HTTP #{status}"} + {:error, ex} when is_exception(ex) -> + {:error, Exception.message(ex)} + err -> err end end def get_docs_tarball( - %HexClient.Release{has_docs: has_docs, package_name: package_name, version: version} = + %HexClient.Release{package_name: package_name, version: version} = _release ) do - if has_docs do - case get("#{@repo_url}/docs/#{package_name}-#{version}.tar.gz") do - {:ok, %{status: 200, body: body}} -> {:ok, body} - {:ok, %{status: status}} -> {:error, "HTTP #{status}"} - err -> err - end - else - {:error, "Package release has no documentation."} + case get("docs/#{package_name}-#{version}.tar.gz") do + {:ok, %{status: 200, body: body}} -> + {:ok, body} + + {:ok, %{status: status}} -> + {:error, "HTTP #{status}"} + + {:error, ex} when is_exception(ex) -> + {:error, Exception.message(ex)} + + err -> + err end end - defp get(url) do - Req.get(url, Application.get_env(:search, :hex_client_req_options, [])) + defp get(resource, opts \\ []) do + opts + |> Keyword.merge(Application.get_env(:search, :hex_client_req_options, [])) + |> Keyword.merge(url: "#{@repo_url}/#{resource}") + |> Req.new() + |> ReqHex.attach() + |> Req.request() end end diff --git a/lib/search/hex_client/release.ex b/lib/search/hex_client/release.ex index b666a19..9e0e1b0 100644 --- a/lib/search/hex_client/release.ex +++ b/lib/search/hex_client/release.ex @@ -1,9 +1,22 @@ defmodule Search.HexClient.Release do - defstruct [:package_name, :version, :has_docs] + defstruct [:package_name, :version] @type t :: %__MODULE__{ package_name: String.t(), - version: Version.t(), - has_docs: boolean() + version: Version.t() } + + def latest(releases) do + releases |> Enum.sort_by(& &1.version, {:desc, Version}) |> latest(nil) + end + + defp latest([%__MODULE__{} = head | tail], latest_prerelease) do + if head.version.pre == [] do + head + else + latest(tail, latest_prerelease || head) + end + end + + defp latest([], latest_prerelease), do: latest_prerelease end diff --git a/lib/search/packages.ex b/lib/search/packages.ex new file mode 100644 index 0000000..82b60b7 --- /dev/null +++ b/lib/search/packages.ex @@ -0,0 +1,105 @@ +defmodule Search.Packages do + import Ecto.Query, warn: false + alias Search.Repo + + alias Search.Packages.{Package, DocItem, DocFragment} + alias Search.{HexClient, ExDocParser} + + @doc """ + Adds the package to be indexed by the application. + + If given a package name, adds the latest version of the package to the app. If given a `%HexClient.Release{}` adds + the specified release. Does not embed it yet. + """ + def add_package(package_name) when is_binary(package_name) do + case HexClient.get_releases(package_name) do + {:ok, releases} -> + latest = HexClient.Release.latest(releases) + add_package(latest) + + err -> + err + end + end + + def add_package(%HexClient.Release{package_name: package_name, version: version} = release) do + version = Version.to_string(version) + + with {:ok, docs} <- HexClient.get_docs_tarball(release), + {:ok, search_data} <- ExDocParser.extract_search_data(docs) do + Repo.transaction_with(fn -> + package = + case Repo.get_by(Package, name: package_name) do + nil -> + %Package{name: package_name, version: version} + + existing -> + existing + end + |> Repo.preload(:doc_items) + |> Package.changeset(%{ + version: version + }) + |> Ecto.Changeset.put_assoc(:doc_items, []) + + with {:ok, package} <- Repo.insert_or_update(package), + :ok <- create_items_from_package(package, search_data) do + {:ok, package} + end + end) + end + end + + defp create_items_from_package(%Package{} = _package, []), do: :ok + + defp create_items_from_package(%Package{} = package, [search_data_head | search_data_tail]) do + %{"doc" => doc, "title" => title, "ref" => ref, "type" => type} = search_data_head + + with {:ok, item} <- + create_doc_item(package, %{doc: doc, title: title, ref: ref, type: type}), + {:ok, _fragment} <- + create_doc_fragment(item, %{ + text: "# #{title}\n\n#{doc}" + }) do + create_items_from_package(package, search_data_tail) + end + end + + def create_doc_fragment(%DocItem{id: item_id} = _doc_item, attrs) do + %DocFragment{doc_item_id: item_id} + |> DocFragment.changeset(attrs) + |> Repo.insert() + end + + def create_doc_item(%Package{id: package_id} = _package, attrs) do + %DocItem{package_id: package_id} + |> DocItem.changeset(attrs) + |> Repo.insert() + end + + def list_packages do + Repo.all(Package) + end + + def get_package!(id), do: Repo.get!(Package, id) + + def create_package(attrs \\ %{}) do + %Package{} + |> Package.changeset(attrs) + |> Repo.insert() + end + + def update_package(%Package{} = package, attrs) do + package + |> Package.changeset(attrs) + |> Repo.update() + end + + def delete_package(%Package{} = package) do + Repo.delete(package) + end + + def change_package(%Package{} = package, attrs \\ %{}) do + Package.changeset(package, attrs) + end +end diff --git a/lib/search/packages/doc_fragment.ex b/lib/search/packages/doc_fragment.ex new file mode 100644 index 0000000..77a116a --- /dev/null +++ b/lib/search/packages/doc_fragment.ex @@ -0,0 +1,20 @@ +defmodule Search.Packages.DocFragment do + alias Search.Packages + use Ecto.Schema + import Ecto.Changeset + + schema "doc_fragments" do + field :text, :string + belongs_to :doc_item, Packages.DocItem + + timestamps(type: :utc_datetime) + end + + @doc false + def changeset(doc_fragment, attrs) do + doc_fragment + |> cast(attrs, [:text]) + |> cast_assoc(:doc_item) + |> validate_required([:text]) + end +end diff --git a/lib/search/packages/doc_item.ex b/lib/search/packages/doc_item.ex new file mode 100644 index 0000000..5e97cf7 --- /dev/null +++ b/lib/search/packages/doc_item.ex @@ -0,0 +1,25 @@ +defmodule Search.Packages.DocItem do + alias Search.Packages + use Ecto.Schema + import Ecto.Changeset + + schema "doc_items" do + field :type, :string + field :title, :string + field :ref, :string + field :doc, :string + belongs_to :package, Packages.Package + has_many :doc_fragments, Packages.DocFragment, on_replace: :delete + + timestamps(type: :utc_datetime) + end + + @doc false + def changeset(doc_item, attrs) do + doc_item + |> cast(attrs, [:ref, :type, :title, :doc]) + |> cast_assoc(:package) + |> cast_assoc(:doc_fragments) + |> validate_required([:ref, :type, :title]) + end +end diff --git a/lib/search/packages/package.ex b/lib/search/packages/package.ex new file mode 100644 index 0000000..581f4e1 --- /dev/null +++ b/lib/search/packages/package.ex @@ -0,0 +1,21 @@ +defmodule Search.Packages.Package do + alias Search.Packages + use Ecto.Schema + import Ecto.Changeset + + schema "packages" do + field :name, :string + field :version, :string + has_many :doc_items, Packages.DocItem, on_replace: :delete + + timestamps(type: :utc_datetime) + end + + @doc false + def changeset(package, attrs) do + package + |> cast(attrs, [:name, :version]) + |> cast_assoc(:doc_items) + |> validate_required([:name, :version]) + end +end diff --git a/lib/search/repo.ex b/lib/search/repo.ex index 25ae38b..8e8e7a1 100644 --- a/lib/search/repo.ex +++ b/lib/search/repo.ex @@ -2,4 +2,23 @@ defmodule Search.Repo do use Ecto.Repo, otp_app: :search, adapter: Ecto.Adapters.Postgres + + def transaction_with(fun, options \\ []) when is_function(fun, 0) and is_list(options) do + transaction( + fn -> + case fun.() do + {:ok, result} -> + result + + {:error, reason} -> + rollback(reason) + + other -> + raise ArgumentError, + "expected to return {:ok, _} or {:error, _}, got: #{inspect(other)}" + end + end, + options + ) + end end diff --git a/lib/search_web/controllers/page_controller.ex b/lib/search_web/controllers/page_controller.ex index d260396..86303ab 100644 --- a/lib/search_web/controllers/page_controller.ex +++ b/lib/search_web/controllers/page_controller.ex @@ -1,13 +1,26 @@ defmodule SearchWeb.PageController do use SearchWeb, :controller + @empty_form Phoenix.Component.to_form(%{ + "search_text" => nil, + "k" => nil, + "embedding_model" => nil + }) + + @embedding_model_opts Search.Application.embedding_models() + |> Keyword.keys() + def home(conn, _params) do # The home page is often custom made, # so skip the default app layout. - render(conn, :home, form: Phoenix.Component.to_form(%{"search_text" => nil, "k" => nil})) + render(conn, :home, form: @empty_form, embedding_models: @embedding_model_opts) end - def search(conn, %{"k" => k, "search_text" => search_text} = params) do + def search( + conn, + %{"k" => k, "search_text" => search_text, "embedding_model" => embedding_model} = params + ) do + embedding_model = String.to_existing_atom(embedding_model) k = String.to_integer(k) search_text = String.trim(search_text) @@ -19,10 +32,14 @@ defmodule SearchWeb.PageController do end if errors == [] do - %{embedding: query_tensor} = Nx.Serving.batched_run(Search.Embedding, search_text) - fragments = Search.Fragment.knn_lookup(query_tensor, k: k) + query_tensor = Search.Embeddings.embed_one(embedding_model, search_text) + + items = + Search.Embeddings.knn_query(embedding_model, query_tensor, k: k) + |> Stream.map(& &1.doc_fragment.doc_item) + |> Enum.uniq_by(& &1.id) - render(conn, :search, fragments: fragments) + render(conn, :search, items: items) else render(conn, :home, form: Phoenix.Component.to_form(params, errors: errors)) end diff --git a/lib/search_web/controllers/page_html/home.html.heex b/lib/search_web/controllers/page_html/home.html.heex index e4931eb..7f38a16 100644 --- a/lib/search_web/controllers/page_html/home.html.heex +++ b/lib/search_web/controllers/page_html/home.html.heex @@ -1,6 +1,13 @@ <.simple_form for={@form} class="bg-transparent" action="/search" method="get"> <.input field={@form[:search_text]} label="Search prompt" required /> <.input field={@form[:k]} type="number" label="Number of entries" required min="1" value="10" /> + <.input + field={@form[:embedding_model]} + label="Embedding model" + type="select" + options={@embedding_models} + required + /> <:actions> <.button>Search diff --git a/lib/search_web/controllers/page_html/search.html.heex b/lib/search_web/controllers/page_html/search.html.heex index d2ecb4a..7a79cc5 100644 --- a/lib/search_web/controllers/page_html/search.html.heex +++ b/lib/search_web/controllers/page_html/search.html.heex @@ -1,3 +1,6 @@ -
- <%= raw(Earmark.as_html!(fragment.doc_text)) %> +
+

<%= item.title %>

+ <%= if item.doc do %> + <%= raw(Earmark.as_html!(item.doc)) %> + <% end %>
diff --git a/mix.exs b/mix.exs index 808b54a..8538989 100644 --- a/mix.exs +++ b/mix.exs @@ -58,9 +58,10 @@ defmodule Search.MixProject do {:pgvector, "~> 0.2.0"}, {:bumblebee, "~> 0.5.3"}, {:exla, ">= 0.0.0"}, - {:req, "~> 0.4.0"}, + {:req, "~> 0.5.0"}, {:progress_bar, "> 0.0.0"}, - {:earmark, "~> 1.4.46"} + {:earmark, "~> 1.4.46"}, + {:req_hex, "~> 0.2.0"} ] end diff --git a/mix.lock b/mix.lock index efc3bbd..8b31699 100644 --- a/mix.lock +++ b/mix.lock @@ -1,59 +1,60 @@ %{ "axon": {:hex, :axon, "0.6.1", "1d042fdba1c1b4413a3d65800524feebd1bc8ed218f8cdefe7a97510c3f427f3", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.6.0 or ~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "d6b0ae2f0dd284f6bf702edcab71e790d6c01ca502dd06c4070836554f5a48e1"}, - "bandit": {:hex, :bandit, "1.2.3", "a98d664a96fec23b68e776062296d76a94b4459795b38209f4ae89cb4225709c", [:mix], [{:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "3e29150245a9b5f56944434e5240966e75c917dad248f689ab589b32187a81af"}, + "bandit": {:hex, :bandit, "1.5.2", "ed0a41c43a9e529c670d0fd48371db4027e7b80d43b1942893e17deb8bed0540", [:mix], [{:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "35ddbdce7e8a2a3c6b5093f7299d70832a43ed2f4a1852885a61d334cab1b4ad"}, "bumblebee": {:hex, :bumblebee, "0.5.3", "151c215fd6014958dbfc322fe5f31b44d170293f69cfdca419936c81e39b1f64", [:mix], [{:axon, "~> 0.6.1", [hex: :axon, repo: "hexpm", optional: false]}, {:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:nx_image, "~> 0.1.0", [hex: :nx_image, repo: "hexpm", optional: false]}, {:nx_signal, "~> 0.2.0", [hex: :nx_signal, repo: "hexpm", optional: false]}, {:progress_bar, "~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: false]}, {:safetensors, "~> 0.1.3", [hex: :safetensors, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.4", [hex: :tokenizers, repo: "hexpm", optional: false]}, {:unpickler, "~> 0.1.0", [hex: :unpickler, repo: "hexpm", optional: false]}, {:unzip, "~> 0.10.0", [hex: :unzip, repo: "hexpm", optional: false]}], "hexpm", "5518f11e424c431a9cbedc80e0d26525368f0b6e50572a674ff247ec3b26bdd7"}, - "castore": {:hex, :castore, "1.0.5", "9eeebb394cc9a0f3ae56b813459f990abb0a3dedee1be6b27fdb50301930502f", [:mix], [], "hexpm", "8d7c597c3e4a64c395980882d4bca3cebb8d74197c590dc272cfd3b6a6310578"}, + "castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"}, "complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"}, "db_connection": {:hex, :db_connection, "2.6.0", "77d835c472b5b67fc4f29556dee74bf511bbafecdcaf98c27d27fa5918152086", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c2f992d15725e721ec7fbc1189d4ecdb8afef76648c746a8e1cad35e3b8a35f3"}, "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, "dns_cluster": {:hex, :dns_cluster, "0.1.3", "0bc20a2c88ed6cc494f2964075c359f8c2d00e1bf25518a6a6c7fd277c9b0c66", [:mix], [], "hexpm", "46cb7c4a1b3e52c7ad4cbe33ca5079fbde4840dedeafca2baf77996c2da1bc33"}, "earmark": {:hex, :earmark, "1.4.46", "8c7287bd3137e99d26ae4643e5b7ef2129a260e3dcf41f251750cb4563c8fb81", [:mix], [], "hexpm", "798d86db3d79964e759ddc0c077d5eb254968ed426399fbf5a62de2b5ff8910a"}, - "ecto": {:hex, :ecto, "3.11.1", "4b4972b717e7ca83d30121b12998f5fcdc62ba0ed4f20fd390f16f3270d85c3e", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebd3d3772cd0dfcd8d772659e41ed527c28b2a8bde4b00fe03e0463da0f1983b"}, - "ecto_sql": {:hex, :ecto_sql, "3.11.1", "e9abf28ae27ef3916b43545f9578b4750956ccea444853606472089e7d169470", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.11.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.16.0 or ~> 0.17.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ce14063ab3514424276e7e360108ad6c2308f6d88164a076aac8a387e1fea634"}, - "elixir_make": {:hex, :elixir_make, "0.7.8", "505026f266552ee5aabca0b9f9c229cbb496c689537c9f922f3eb5431157efc7", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "7a71945b913d37ea89b06966e1342c85cfe549b15e6d6d081e8081c493062c07"}, + "ecto": {:hex, :ecto, "3.11.2", "e1d26be989db350a633667c5cda9c3d115ae779b66da567c68c80cfb26a8c9ee", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3c38bca2c6f8d8023f2145326cc8a80100c3ffe4dcbd9842ff867f7fc6156c65"}, + "ecto_sql": {:hex, :ecto_sql, "3.11.2", "c7cc7f812af571e50b80294dc2e535821b3b795ce8008d07aa5f336591a185a8", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.11.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.16 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "73c07f995ac17dbf89d3cfaaf688fcefabcd18b7b004ac63b0dc4ef39499ed6b"}, + "elixir_make": {:hex, :elixir_make, "0.8.4", "4960a03ce79081dee8fe119d80ad372c4e7badb84c493cc75983f9d3bc8bde0f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "6e7f1d619b5f61dfabd0a20aa268e575572b542ac31723293a4c1a567d5ef040"}, "esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"}, - "exla": {:hex, :exla, "0.7.0", "27fac40a580f0d3816fe3bf35c50dfc2f99597d26ac7e2aca4a3c62b89bb427f", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.6.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "d3bfc622deb52cec95efc9d76063891afc7cd33e38eddbb01f3385c53e043c40"}, - "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, + "exla": {:hex, :exla, "0.7.2", "8ac573093df8e5e6b36845beeb3f5a0ea92b05082bf2fa4678f80170cfc887f6", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.1", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.6.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "d061ea87858415e5585cbd4b7bdae5489000339519a2c6a7f51eb0defd73b588"}, + "file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"}, "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"}, - "floki": {:hex, :floki, "0.35.4", "cc947b446024732c07274ac656600c5c4dc014caa1f8fb2dfff93d275b83890d", [:mix], [], "hexpm", "27fa185d3469bd8fc5947ef0f8d5c4e47f0af02eb6b070b63c868f69e3af0204"}, + "floki": {:hex, :floki, "0.36.2", "a7da0193538c93f937714a6704369711998a51a6164a222d710ebd54020aa7a3", [:mix], [], "hexpm", "a8766c0bc92f074e5cb36c4f9961982eda84c5d2b8e979ca67f5c268ec8ed580"}, "heroicons": {:git, "https://github.com/tailwindlabs/heroicons.git", "88ab3a0d790e6a47404cba02800a6b25d2afae50", [tag: "v2.1.1", sparse: "optimized"]}, + "hex_core": {:hex, :hex_core, "0.10.2", "1a80c487d0ebf57e6371721335f37689773f4df477e04bc2e962226f32d280a7", [:rebar3], [], "hexpm", "e4de8b979548637ae9e2e4d6363a1ca114488617cf7e11af3dd98aa333d596d2"}, "hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"}, "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, - "json": {:hex, :json, "1.4.1", "8648f04a9439765ad449bc56a3ff7d8b11dd44ff08ffcdefc4329f7c93843dfa", [:mix], [], "hexpm", "9abf218dbe4ea4fcb875e087d5f904ef263d012ee5ed21d46e9dbca63f053d16"}, "mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"}, - "mint": {:hex, :mint, "1.5.2", "4805e059f96028948870d23d7783613b7e6b0e2fb4e98d720383852a760067fd", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "d77d9e9ce4eb35941907f1d3df38d8f750c357865353e21d335bdcdf6d892a02"}, - "nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"}, - "nimble_ownership": {:hex, :nimble_ownership, "0.2.1", "3e44c72ebe8dd213db4e13aff4090aaa331d158e72ce1891d02e0ffb05a1eb2d", [:mix], [], "hexpm", "bf38d2ef4fb990521a4ecf112843063c1f58a5c602484af4c7977324042badee"}, - "nimble_pool": {:hex, :nimble_pool, "1.0.0", "5eb82705d138f4dd4423f69ceb19ac667b3b492ae570c9f5c900bb3d2f50a847", [:mix], [], "hexpm", "80be3b882d2d351882256087078e1b1952a28bf98d0a287be87e4a24a710b67a"}, - "nx": {:hex, :nx, "0.7.1", "5f6376e3d18408116e8a84b8f4ac851fb07dfe61764a5410ebf0b5dcb69c1b7e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e3ddd6a3f2a9bac79c67b3933368c25bb5ec814a883fc68aba8fd8a236751777"}, + "mint": {:hex, :mint, "1.6.1", "065e8a5bc9bbd46a41099dfea3e0656436c5cbcb6e741c80bd2bad5cd872446f", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "4fc518dcc191d02f433393a72a7ba3f6f94b101d094cb6bf532ea54c89423780"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, + "nimble_ownership": {:hex, :nimble_ownership, "0.3.1", "99d5244672fafdfac89bfad3d3ab8f0d367603ce1dc4855f86a1c75008bce56f", [:mix], [], "hexpm", "4bf510adedff0449a1d6e200e43e57a814794c8b5b6439071274d248d272a549"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "nx": {:hex, :nx, "0.7.2", "7f6f6584585e49ffbf81769e7ccc2d01c5639074e399c1f94adc2b509869673e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e2c0680066eec5af8b8ef00c99e9bf40a0d08d8b2bbba77f59f801ec54a3f90e"}, "nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"}, "nx_signal": {:hex, :nx_signal, "0.2.0", "e1ca0318877b17c81ce8906329f5125f1e2361e4c4235a5baac8a95ee88ea98e", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "7247e5e18a177a59c4cb5355952900c62fdeadeb2bad02a9a34237b68744e2bb"}, "pgvector": {:hex, :pgvector, "0.2.1", "dc707ce6065ac0e82e5716bc17f9c6a97f92aca23994e5cceef7dfc48bb57eed", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: false]}], "hexpm", "ed86c560af2f85b31d79f119192ce98f3342b4d06ceac63824a8686fe07e59b6"}, - "phoenix": {:hex, :phoenix, "1.7.11", "1d88fc6b05ab0c735b250932c4e6e33bfa1c186f76dcf623d8dd52f07d6379c7", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "b1ec57f2e40316b306708fe59b92a16b9f6f4bf50ccfa41aa8c7feb79e0ec02a"}, - "phoenix_ecto": {:hex, :phoenix_ecto, "4.4.3", "86e9878f833829c3f66da03d75254c155d91d72a201eb56ae83482328dc7ca93", [:mix], [{:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "d36c401206f3011fefd63d04e8ef626ec8791975d9d107f9a0817d426f61ac07"}, - "phoenix_html": {:hex, :phoenix_html, "4.0.0", "4857ec2edaccd0934a923c2b0ba526c44a173c86b847e8db725172e9e51d11d6", [:mix], [], "hexpm", "cee794a052f243291d92fa3ccabcb4c29bb8d236f655fb03bcbdc3a8214b8d13"}, + "phoenix": {:hex, :phoenix, "1.7.12", "1cc589e0eab99f593a8aa38ec45f15d25297dd6187ee801c8de8947090b5a9d3", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "d646192fbade9f485b01bc9920c139bfdd19d0f8df3d73fd8eaf2dfbe0d2837c"}, + "phoenix_ecto": {:hex, :phoenix_ecto, "4.6.1", "96798325fab2fed5a824ca204e877b81f9afd2e480f581e81f7b4b64a5a477f2", [:mix], [{:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.1", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.17", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "0ae544ff99f3c482b0807c5cec2c8289e810ecacabc04959d82c3337f4703391"}, + "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, "phoenix_live_dashboard": {:hex, :phoenix_live_dashboard, "0.8.3", "7ff51c9b6609470f681fbea20578dede0e548302b0c8bdf338b5a753a4f045bf", [:mix], [{:ecto, "~> 3.6.2 or ~> 3.7", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_mysql_extras, "~> 0.5", [hex: :ecto_mysql_extras, repo: "hexpm", optional: true]}, {:ecto_psql_extras, "~> 0.7", [hex: :ecto_psql_extras, repo: "hexpm", optional: true]}, {:ecto_sqlite3_extras, "~> 1.1.7 or ~> 1.2.0", [hex: :ecto_sqlite3_extras, repo: "hexpm", optional: true]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 0.19 or ~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6 or ~> 1.0", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "f9470a0a8bae4f56430a23d42f977b5a6205fdba6559d76f932b876bfaec652d"}, - "phoenix_live_reload": {:hex, :phoenix_live_reload, "1.4.1", "2aff698f5e47369decde4357ba91fc9c37c6487a512b41732818f2204a8ef1d3", [:mix], [{:file_system, "~> 0.2.1 or ~> 0.3", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm", "9bffb834e7ddf08467fe54ae58b5785507aaba6255568ae22b4d46e2bb3615ab"}, - "phoenix_live_view": {:hex, :phoenix_live_view, "0.20.9", "46d5d436d3f8ff97f066b6c45528fd842a711fd3875b2d3f706b2e769ea07c51", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6.15 or ~> 1.7.0", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "694388615ece21b70c523910cba1c633132b08a270caaf60100dd4eaf331885d"}, + "phoenix_live_reload": {:hex, :phoenix_live_reload, "1.5.3", "f2161c207fda0e4fb55165f650f7f8db23f02b29e3bff00ff7ef161d6ac1f09d", [:mix], [{:file_system, "~> 0.3 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}], "hexpm", "b4ec9cd73cb01ff1bd1cac92e045d13e7030330b74164297d1aee3907b54803c"}, + "phoenix_live_view": {:hex, :phoenix_live_view, "0.20.14", "70fa101aa0539e81bed4238777498f6215e9dda3461bdaa067cad6908110c364", [:mix], [{:floki, "~> 0.36", [hex: :floki, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6.15 or ~> 1.7.0", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "82f6d006c5264f979ed5eb75593d808bbe39020f20df2e78426f4f2d570e2402"}, "phoenix_pubsub": {:hex, :phoenix_pubsub, "2.1.3", "3168d78ba41835aecad272d5e8cd51aa87a7ac9eb836eabc42f6e57538e3731d", [:mix], [], "hexpm", "bba06bc1dcfd8cb086759f0edc94a8ba2bc8896d5331a1e2c2902bf8e36ee502"}, "phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"}, - "plug": {:hex, :plug, "1.15.3", "712976f504418f6dff0a3e554c40d705a9bcf89a7ccef92fc6a5ef8f16a30a97", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cc4365a3c010a56af402e0809208873d113e9c38c401cabd88027ef4f5c01fd2"}, - "plug_crypto": {:hex, :plug_crypto, "2.0.0", "77515cc10af06645abbfb5e6ad7a3e9714f805ae118fa1a70205f80d2d70fe73", [:mix], [], "hexpm", "53695bae57cc4e54566d993eb01074e4d894b65a3766f1c43e2c61a1b0f45ea9"}, + "plug": {:hex, :plug, "1.16.0", "1d07d50cb9bb05097fdf187b31cf087c7297aafc3fed8299aac79c128a707e47", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cbf53aa1f5c4d758a7559c0bd6d59e286c2be0c6a1fac8cc3eee2f638243b93e"}, + "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"}, - "postgrex": {:hex, :postgrex, "0.17.4", "5777781f80f53b7c431a001c8dad83ee167bcebcf3a793e3906efff680ab62b3", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "6458f7d5b70652bc81c3ea759f91736c16a31be000f306d3c64bcdfe9a18b3cc"}, + "postgrex": {:hex, :postgrex, "0.18.0", "f34664101eaca11ff24481ed4c378492fed2ff416cd9b06c399e90f321867d7e", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "a042989ba1bc1cca7383ebb9e461398e3f89f868c92ce6671feb7ef132a252d1"}, "progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"}, - "req": {:hex, :req, "0.4.11", "cb19f87d5251e7de30cfc67d1899696b290711092207c6b2e8fc2294f237fcdc", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "bbf4f2393c649fa4146a3b8470e2a7e8c9b23e4100a16c75f5e7d1d3d33144f3"}, + "req": {:hex, :req, "0.5.0", "6d8a77c25cfc03e06a439fb12ffb51beade53e3fe0e2c5e362899a18b50298b3", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "dda04878c1396eebbfdec6db6f3d4ca609e5c8846b7ee88cc56eb9891406f7a3"}, + "req_hex": {:hex, :req_hex, "0.2.1", "f7566a61afde1f3f31977fdb230c2b9e801742da5de0b66a7b1e14febb304be7", [:mix], [{:hex_core, "~> 0.10.0", [hex: :hex_core, repo: "hexpm", optional: false]}, {:req, "~> 0.4.0 or ~> 0.5.0", [hex: :req, repo: "hexpm", optional: false]}], "hexpm", "f132012071714cfc277ff52ea5a80b793a4bebd79675d75820e02b425d897461"}, "rustler_precompiled": {:hex, :rustler_precompiled, "0.7.1", "ecadf02cc59a0eccbaed6c1937303a5827fbcf60010c541595e6d3747d3d0f9f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "b9e4657b99a1483ea31502e1d58c464bedebe9028808eda45c3a429af4550c66"}, "safetensors": {:hex, :safetensors, "0.1.3", "7ff3c22391e213289c713898481d492c9c28a49ab1d0705b72630fb8360426b2", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "fe50b53ea59fde4e723dd1a2e31cfdc6013e69343afac84c6be86d6d7c562c14"}, "tailwind": {:hex, :tailwind, "0.2.2", "9e27288b568ede1d88517e8c61259bc214a12d7eed271e102db4c93fcca9b2cd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "ccfb5025179ea307f7f899d1bb3905cd0ac9f687ed77feebc8f67bdca78565c4"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, "telemetry_metrics": {:hex, :telemetry_metrics, "0.6.2", "2caabe9344ec17eafe5403304771c3539f3b6e2f7fb6a6f602558c825d0d0bfb", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9b43db0dc33863930b9ef9d27137e78974756f5f198cae18409970ed6fa5b561"}, - "telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"}, + "telemetry_poller": {:hex, :telemetry_poller, "1.1.0", "58fa7c216257291caaf8d05678c8d01bd45f4bdbc1286838a28c4bb62ef32999", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9eb9d9cbfd81cbd7cdd24682f8711b6e2b691289a0de6826e58452f28c103c8f"}, "thousand_island": {:hex, :thousand_island, "1.3.5", "6022b6338f1635b3d32406ff98d68b843ba73b3aa95cfc27154223244f3a6ca5", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2be6954916fdfe4756af3239fb6b6d75d0b8063b5df03ba76fd8a4c87849e180"}, - "tokenizers": {:hex, :tokenizers, "0.4.0", "140283ca74a971391ddbd83cd8cbdb9bd03736f37a1b6989b82d245a95e1eb97", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "ef1a9824f5a893cd3b831c0e5b3d72caa250d2ec462035cc6afef6933b13a82e"}, + "tokenizers": {:hex, :tokenizers, "0.5.0", "9944bba07d0b92bbfb0b8f3eef5d3694e8582a84f4154f1c447ca091a303b82d", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "9791879ce694f6ddd0df004d4dfa598ba406c516f8a7ad2162c84cb0f0b7a62f"}, "unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"}, "unzip": {:hex, :unzip, "0.10.0", "374e0059e48e982076f3fd22cd4817ab11016c1bae3f09421511901ddda95c5c", [:mix], [], "hexpm", "101c06b0fa97a858a83beb618f4bc20370624f73ab3954f756d9b52194056de6"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, - "websock_adapter": {:hex, :websock_adapter, "0.5.5", "9dfeee8269b27e958a65b3e235b7e447769f66b5b5925385f5a569269164a210", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "4b977ba4a01918acbf77045ff88de7f6972c2a009213c515a445c48f224ffce9"}, + "websock_adapter": {:hex, :websock_adapter, "0.5.6", "0437fe56e093fd4ac422de33bf8fc89f7bc1416a3f2d732d8b2c8fd54792fe60", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "e04378d26b0af627817ae84c92083b7e97aca3121196679b73c73b99d0d133ea"}, "xla": {:hex, :xla, "0.6.0", "67bb7695efa4a23b06211dc212de6a72af1ad5a9e17325e05e0a87e4c241feb8", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "dd074daf942312c6da87c7ed61b62fb1a075bced157f1cc4d47af2d7c9f44fb7"}, } diff --git a/priv/repo/migrations/20240223110820_create_vector_extension.exs b/priv/repo/migrations/20240223110820_create_vector_extension.exs deleted file mode 100644 index 5b12f7a..0000000 --- a/priv/repo/migrations/20240223110820_create_vector_extension.exs +++ /dev/null @@ -1,11 +0,0 @@ -defmodule Search.Repo.Migrations.CreateVectorExtension do - use Ecto.Migration - - def up do - execute "CREATE EXTENSION IF NOT EXISTS vector" - end - - def down do - execute "DROP EXTENSION vector" - end -end diff --git a/priv/repo/migrations/20240226120519_create_fragments.exs b/priv/repo/migrations/20240226120519_create_fragments.exs deleted file mode 100644 index f5f1de6..0000000 --- a/priv/repo/migrations/20240226120519_create_fragments.exs +++ /dev/null @@ -1,12 +0,0 @@ -defmodule Search.Repo.Migrations.CreateFragments do - use Ecto.Migration - - def change do - create table(:fragments) do - add :doc_text, :text, null: false - add :embedding, :vector, size: Search.Embedding.embedding_size(), null: false - - timestamps(type: :utc_datetime) - end - end -end diff --git a/priv/repo/migrations/20240411191321_create_schema.exs b/priv/repo/migrations/20240411191321_create_schema.exs new file mode 100644 index 0000000..10fcc2b --- /dev/null +++ b/priv/repo/migrations/20240411191321_create_schema.exs @@ -0,0 +1,45 @@ +defmodule Search.Repo.Migrations.CreateSchema do + use Ecto.Migration + + def change do + execute "CREATE EXTENSION IF NOT EXISTS vector", "DROP EXTENSION vector" + + create table(:packages) do + add :name, :string, null: false + add :version, :string, null: false + + timestamps(type: :utc_datetime) + end + + create table(:doc_items) do + add :ref, :string, null: false + add :type, :string, null: false + add :title, :string, null: false + add :doc, :text + add :package_id, references("packages", on_delete: :delete_all), null: false + + timestamps(type: :utc_datetime) + end + + create table(:doc_fragments) do + add :text, :text, null: false + add :doc_item_id, references("doc_items", on_delete: :delete_all), null: false + + timestamps(type: :utc_datetime) + end + + create table(:embeddings__paraphrase_l3) do + add :embedding, :vector, size: 384, null: false + add :doc_fragment_id, references("doc_fragments", on_delete: :delete_all), null: false + + timestamps(type: :utc_datetime) + end + + create table(:embeddings__paraphrase_albert_small) do + add :embedding, :vector, size: 768, null: false + add :doc_fragment_id, references("doc_fragments", on_delete: :delete_all), null: false + + timestamps(type: :utc_datetime) + end + end +end diff --git a/test/search/embedding_test.exs b/test/search/embedding_test.exs deleted file mode 100644 index 1f6e560..0000000 --- a/test/search/embedding_test.exs +++ /dev/null @@ -1,22 +0,0 @@ -defmodule Search.EmbeddingTest do - use ExUnit.Case, async: true - - alias Search.Embedding - - test "creates embedding tensor of correct shape for a single input" do - %{embedding: embedding} = Nx.Serving.batched_run(Embedding, "The cat chases the mouse") - - assert Nx.shape(embedding) == {Embedding.embedding_size()} - end - - test "creates embedding tensor of correct shape for batched inputs" do - [%{embedding: embedding1}, %{embedding: embedding2}] = - Nx.Serving.batched_run(Embedding, [ - "The cat chases the mouse", - "Lorem ipsum dolor sit amet" - ]) - - assert Nx.shape(embedding1) == {Embedding.embedding_size()} - assert Nx.shape(embedding2) == {Embedding.embedding_size()} - end -end diff --git a/test/search/embeddings_test.exs b/test/search/embeddings_test.exs new file mode 100644 index 0000000..c199cf4 --- /dev/null +++ b/test/search/embeddings_test.exs @@ -0,0 +1,134 @@ +defmodule Search.EmbeddingsTest do + use Search.DataCase, async: true + + alias Search.{Embeddings, Repo, PackagesFixtures} + + import Nx.Defn + import Ecto.Query + + setup do + {_, config} = + Application.fetch_env!(:search, :embedding_providers) |> Keyword.fetch!(:paraphrase_l3) + + %{embedding_size: embedding_size} = + Map.new(config) + + table_name = Embeddings.table_name(:paraphrase_l3) + + {embeddings, rng_key} = + Nx.Random.normal(Nx.Random.key(42), shape: {4, embedding_size}) + + fragments = PackagesFixtures.doc_fragments_fixture(4) + + now = DateTime.utc_now(:second) + + embeddings = + for {fragment, i} <- Enum.with_index(fragments) do + %{ + doc_fragment_id: fragment.id, + embedding: embeddings[i], + inserted_at: now, + updated_at: now + } + end + + Repo.insert_all({table_name, Embeddings.Embedding}, embeddings) + + {query, _} = Nx.Random.normal(rng_key, shape: {embedding_size}) + + embeddings = Repo.all(from e in {table_name, Embeddings.Embedding}, select: e) + + {:ok, %{embeddings: embeddings, query: query, fragments: Repo.reload!(fragments)}} + end + + describe "embed_one/2" do + test "creates embedding tensor of correct shape for a single input", _ctx do + embedding = + Embeddings.embed_one(:paraphrase_l3, "The cat chases the mouse") + + assert Nx.shape(embedding) == {Embeddings.embedding_size(:paraphrase_l3)} + end + + test "fails for nonexistent model", _ctx do + assert_raise KeyError, fn -> + Embeddings.embed_one(:no_such_model, "The cat chases the mouse") + end + end + end + + describe "embed" do + test "creates an embedding entity for each fragment to be embedded", %{ + fragments: fragments + } do + {:ok, new_embeddings} = Embeddings.embed(:paraphrase_albert_small) + + embeddings_num = Repo.aggregate(Embeddings.table_name(:paraphrase_albert_small), :count) + + assert embeddings_num == length(fragments) + assert length(new_embeddings) == length(fragments) + end + + test "calls the progress callback if provided", %{ + fragments: fragments + } do + self_pid = self() + callback = fn arg -> send(self_pid, arg) end + + {:ok, _} = Embeddings.embed(:paraphrase_albert_small, callback) + + fragments_count = length(fragments) + + assert_received {^fragments_count, done} when is_integer(done) + end + + test "does no work for already embedded fragments", _ctx do + self_pid = self() + callback = fn _ -> send(self_pid, :never_receive) end + + assert {:ok, []} = Embeddings.embed(:paraphrase_l3, callback) + + refute_received :never_receive + end + end + + describe "knn_query/3" do + test "when given no options, performs the kNN lookup on the entire repo using cosine distance", + %{embeddings: embeddings, query: query} do + knn_result = Embeddings.knn_query(:paraphrase_l3, query) + + sorted_embeddings = sort_embeddings(embeddings, query, &manual_cosine_distance/2) + + assert Enum.map(knn_result, & &1.id) == Enum.map(sorted_embeddings, & &1.id) + end + + test "when given [metric: :l2], performs the kNN lookup on the entire repo usin l2 distance", + %{embeddings: embeddings, query: query} do + knn_result = Embeddings.knn_query(:paraphrase_l3, query, metric: :l2) + + sorted_embeddings = sort_embeddings(embeddings, query, &manual_l2_distance/2) + + assert Enum.map(knn_result, & &1.id) == Enum.map(sorted_embeddings, & &1.id) + end + + test "when given value for :k option, returns only the top k results", %{query: query} do + knn_result = Embeddings.knn_query(:paraphrase_l3, query, k: 3) + + assert length(knn_result) == 3 + end + end + + defp sort_embeddings(embeddings, query, dist_fn) do + Enum.sort(embeddings, fn a, b -> + Nx.to_number(dist_fn.(query, Pgvector.to_tensor(a.embedding))) <= + Nx.to_number(dist_fn.(query, Pgvector.to_tensor(b.embedding))) + end) + end + + defnp manual_cosine_distance(a, b) do + 1 - Nx.dot(a, b) / Nx.sqrt(Nx.sum(Nx.pow(a, 2)) * Nx.sum(Nx.pow(b, 2))) + end + + defnp manual_l2_distance(a, b) do + Nx.sum(Nx.pow(a - b, 2)) + end +end diff --git a/test/search/fragment_test.exs b/test/search/fragment_test.exs deleted file mode 100644 index 3232a6d..0000000 --- a/test/search/fragment_test.exs +++ /dev/null @@ -1,63 +0,0 @@ -defmodule Search.FragmentTest do - use Search.DataCase, async: true - - alias Search.{Fragment, Embedding} - - import Nx.Defn - - setup do - {embeddings, rng_key} = - Nx.Random.normal(Nx.Random.key(42), shape: {10, Embedding.embedding_size()}) - - for i <- 0..9 do - Repo.insert!(%Fragment{doc_text: "Text #{i}", embedding: embeddings[i]}) - end - - {query, _} = Nx.Random.normal(rng_key, shape: {Embedding.embedding_size()}) - - fragments = Repo.all(from f in Fragment, select: f) - - {:ok, %{fragments: fragments, query: query}} - end - - describe "knn_query/2" do - test "when given no options, performs the kNN lookup on the entire repo using cosine distance", - %{fragments: fragments, query: query} do - knn_result = Fragment.knn_lookup(query) - - sorted_fragments = sort_fragments(fragments, query, &manual_cosine_distance/2) - - assert Enum.map(knn_result, & &1.id) == Enum.map(sorted_fragments, & &1.id) - end - - test "when given [metric: :l2], performs the kNN lookup on the entire repo usin l2 distance", - %{fragments: fragments, query: query} do - knn_result = Fragment.knn_lookup(query, metric: :l2) - - sorted_fragments = sort_fragments(fragments, query, &manual_l2_distance/2) - - assert Enum.map(knn_result, & &1.id) == Enum.map(sorted_fragments, & &1.id) - end - - test "when given value for :k option, returns only the top k results", %{query: query} do - knn_result = Fragment.knn_lookup(query, k: 5) - - assert length(knn_result) == 5 - end - end - - defp sort_fragments(fragments, query, dist_fn) do - Enum.sort(fragments, fn a, b -> - Nx.to_number(dist_fn.(query, Pgvector.to_tensor(a.embedding))) <= - Nx.to_number(dist_fn.(query, Pgvector.to_tensor(b.embedding))) - end) - end - - defnp manual_cosine_distance(a, b) do - 1 - Nx.dot(a, b) / Nx.sqrt(Nx.sum(Nx.pow(a, 2)) * Nx.sum(Nx.pow(b, 2))) - end - - defnp manual_l2_distance(a, b) do - Nx.sum(Nx.pow(a - b, 2)) - end -end diff --git a/test/search/hex_client_test.exs b/test/search/hex_client_test.exs index 34973b0..ecc4d53 100644 --- a/test/search/hex_client_test.exs +++ b/test/search/hex_client_test.exs @@ -22,34 +22,6 @@ defmodule Search.HexClientTest do end describe "get_releases/1" do - test "when given a well-formed package JSON, successfuly parses the releases" do - Req.Test.stub(HexClient, fn conn -> - Req.Test.json(conn, %{ - "releases" => [ - %{"version" => "1.2.3", "has_docs" => true}, - %{"version" => "1.1.25", "has_docs" => false} - ] - }) - end) - - package_name = "test_package" - - assert HexClient.get_releases(package_name) == - {:ok, - [ - %HexClient.Release{ - package_name: package_name, - version: Version.parse!("1.2.3"), - has_docs: true - }, - %HexClient.Release{ - package_name: package_name, - version: Version.parse!("1.1.25"), - has_docs: false - } - ]} - end - test "when getting a response other than 200 OK, should fail gracefully" do Req.Test.stub(HexClient, fn conn -> Plug.Conn.send_resp(conn, 403, "Forbidden") @@ -73,23 +45,12 @@ defmodule Search.HexClientTest do rel = %HexClient.Release{ package_name: "test_package", - version: Version.parse!("1.2.3"), - has_docs: true + version: Version.parse!("1.2.3") } assert HexClient.get_docs_tarball(rel) == {:ok, test_tar_contents} end - test "when given a release with no documentation, should return error" do - rel = %HexClient.Release{ - package_name: "test_package", - version: Version.parse!("1.2.3"), - has_docs: false - } - - assert HexClient.get_docs_tarball(rel) == {:error, "Package release has no documentation."} - end - test "when getting a response other than 200 OK, should fail gracefully" do Req.Test.stub(HexClient, fn conn -> Plug.Conn.send_resp(conn, 403, "Forbidden") @@ -97,8 +58,7 @@ defmodule Search.HexClientTest do rel = %HexClient.Release{ package_name: "test_package", - version: Version.parse!("1.2.3"), - has_docs: true + version: Version.parse!("1.2.3") } assert HexClient.get_docs_tarball(rel) == {:error, "HTTP 403"} diff --git a/test/search/hex_client_test/release_test.exs b/test/search/hex_client_test/release_test.exs new file mode 100644 index 0000000..bd5f7c2 --- /dev/null +++ b/test/search/hex_client_test/release_test.exs @@ -0,0 +1,41 @@ +defmodule Search.HexClientTest.ReleaseTest do + use ExUnit.Case, async: true + + alias Search.HexClient.Release + + describe "latest/1" do + test "when given no prerelease entries, returns the release with largest version" do + releases = [ + %Release{version: Version.parse!("1.2.3")}, + %Release{version: Version.parse!("2.1.3")}, + %Release{version: Version.parse!("1.4.3")} + ] + + assert Release.latest(releases).version == Version.parse!("2.1.3") + end + + test "when given only prerelease entries, returns the prerelease with largest version" do + releases = [ + %Release{version: Version.parse!("1.2.3-rc1")}, + %Release{version: Version.parse!("2.1.3-rc2")}, + %Release{version: Version.parse!("1.4.3-rc1")} + ] + + assert Release.latest(releases).version == Version.parse!("2.1.3-rc2") + end + + test "when given mixed release and prerelease entries, returns the release with largest version" do + releases = [ + %Release{version: Version.parse!("1.2.3")}, + %Release{version: Version.parse!("2.1.3")}, + %Release{version: Version.parse!("4.4.3-rc1")} + ] + + assert Release.latest(releases).version == Version.parse!("2.1.3") + end + + test "when given an empty list, returns nil" do + assert is_nil(Release.latest([])) + end + end +end diff --git a/test/search/packages_test.exs b/test/search/packages_test.exs new file mode 100644 index 0000000..b286e0f --- /dev/null +++ b/test/search/packages_test.exs @@ -0,0 +1,115 @@ +defmodule Search.PackagesTest do + use Search.DataCase + + alias Search.Packages + + import Search.PackagesFixtures + + describe "doc_fragments" do + alias Search.Packages.DocFragment + + test "create_doc_fragment/2 with valid data creates an item" do + [item] = doc_items_fixture(1) + + valid_attrs = %{ + text: "Some text" + } + + assert {:ok, %DocFragment{} = fragment} = Packages.create_doc_fragment(item, valid_attrs) + assert fragment.text == valid_attrs.text + fragment = Repo.preload(fragment, :doc_item) + assert fragment.doc_item.id == item.id + end + + test "create_doc_fragment/2 with invalid data returns error changeset" do + [item] = doc_items_fixture(1) + + assert {:error, %Ecto.Changeset{}} = + Packages.create_doc_fragment(item, %{text: nil}) + end + end + + describe "doc_items" do + alias Search.Packages.DocItem + + test "create_doc_item/2 with valid data creates an item" do + package = package_fixture() + + valid_attrs = %{ + title: "Some title", + type: "module", + doc: "Some doc", + ref: "Some ref" + } + + assert {:ok, %DocItem{} = item} = Packages.create_doc_item(package, valid_attrs) + assert item.title == valid_attrs.title + assert item.type == valid_attrs.type + assert item.doc == valid_attrs.doc + assert item.ref == valid_attrs.ref + item = Repo.preload(item, :package) + assert item.package.id == package.id + end + + test "create_doc_item/2 with invalid data returns error changeset" do + package = package_fixture() + + assert {:error, %Ecto.Changeset{}} = + Packages.create_doc_item(package, %{title: nil, type: nil, doc: nil, ref: nil}) + end + end + + describe "packages" do + alias Search.Packages.Package + + @invalid_attrs %{name: nil, version: nil} + + test "list_packages/0 returns all packages" do + package = package_fixture() + assert Packages.list_packages() == [package] + end + + test "get_package!/1 returns the package with given id" do + package = package_fixture() + assert Packages.get_package!(package.id) == package + end + + test "create_package/1 with valid data creates a package" do + valid_attrs = %{name: "some name", version: "some version"} + + assert {:ok, %Package{} = package} = Packages.create_package(valid_attrs) + assert package.name == "some name" + assert package.version == "some version" + end + + test "create_package/1 with invalid data returns error changeset" do + assert {:error, %Ecto.Changeset{}} = Packages.create_package(@invalid_attrs) + end + + test "update_package/2 with valid data updates the package" do + package = package_fixture() + update_attrs = %{name: "some updated name", version: "some updated version"} + + assert {:ok, %Package{} = package} = Packages.update_package(package, update_attrs) + assert package.name == "some updated name" + assert package.version == "some updated version" + end + + test "update_package/2 with invalid data returns error changeset" do + package = package_fixture() + assert {:error, %Ecto.Changeset{}} = Packages.update_package(package, @invalid_attrs) + assert package == Packages.get_package!(package.id) + end + + test "delete_package/1 deletes the package" do + package = package_fixture() + assert {:ok, %Package{}} = Packages.delete_package(package) + assert_raise Ecto.NoResultsError, fn -> Packages.get_package!(package.id) end + end + + test "change_package/1 returns a package changeset" do + package = package_fixture() + assert %Ecto.Changeset{} = Packages.change_package(package) + end + end +end diff --git a/test/support/fixtures/packages_fixtures.ex b/test/support/fixtures/packages_fixtures.ex new file mode 100644 index 0000000..c24bf84 --- /dev/null +++ b/test/support/fixtures/packages_fixtures.ex @@ -0,0 +1,46 @@ +defmodule Search.PackagesFixtures do + @moduledoc """ + This module defines test helpers for creating + entities via the `Search.Packages` context. + """ + + @doc """ + Generate a package. + """ + def package_fixture(attrs \\ %{}) do + {:ok, package} = + attrs + |> Enum.into(%{ + name: "some name", + version: "1.2.3" + }) + |> Search.Packages.create_package() + + package + end + + def doc_items_fixture(num_items) do + package = package_fixture() + + for i <- 1..num_items do + Search.Repo.insert!(%Search.Packages.DocItem{ + title: "Module doc title", + ref: "Test ref", + doc: "Text #{i}", + type: "module", + package: package + }) + end + end + + def doc_fragments_fixture(num_fragments) do + items = doc_items_fixture(num_fragments) + + for item <- items do + Search.Repo.insert!(%Search.Packages.DocFragment{ + text: "Preprocessed text: #{item.doc}", + doc_item: item + }) + end + end +end