diff --git a/Artifacts.toml b/Artifacts.toml index 8110db3..3dc3b50 100644 --- a/Artifacts.toml +++ b/Artifacts.toml @@ -5,3 +5,51 @@ lazy = true [[juliaextra.download]] sha256 = "61133afa7e06fda133f07164c57190a5b922f8f2a1aa17c3f8a628b5cf752512" url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/juliaextra__v1.10.0__ada1.0.tar.gz" + +["julia__textembedding3large-0-Float32"] +git-tree-sha1 = "a105a2482296fa0a80ce0c76677cc9ef673be70e" +lazy = true + + [["julia__textembedding3large-0-Float32".download]] + sha256 = "ff4e91908fb54b7919aad9d6a2ac5045124d43eb864fe9f96a7a68d304d4e0a2" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/julia__v1.10.2__textembedding3large-0-Float32__v1.0.tar.gz" + +["julia__textembedding3large-1024-Bool"] +git-tree-sha1 = "7eef82f15c72712b4f5fff2449ebf3ed64b56b14" +lazy = true + + [["julia__textembedding3large-1024-Bool".download]] + sha256 = "27186886d19ea4c3f1710b4bc70e8e809d906069d5de8c992c948d97d0f454da" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/julia__v1.10.2__textembedding3large-1024-Bool__v1.0.tar.gz" + +["tidier__textembedding3large-0-Float32"] +git-tree-sha1 = "680c7035e512844fd2b9af1757b02b931dfadaa5" +lazy = true + + [["tidier__textembedding3large-0-Float32".download]] + sha256 = "59eb6fef198e32d238c11d3a95e5201d18cb83c5d42eae753706614c0f72db9e" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/tidier__v20240407__textembedding3large-0-Float32__v1.0.tar.gz" + +["tidier__textembedding3large-1024-Bool"] +git-tree-sha1 = "44d861977d663a9c4615023ae38828e0ef88036e" +lazy = true + + [["tidier__textembedding3large-1024-Bool".download]] + sha256 = "226cadd2805abb6ab6e561330aca97466e0a2cb1e1eb171be661d9dea9dcacdc" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/tidier__v20240407__textembedding3large-1024-Bool__v1.0.tar.gz" + +["makie__textembedding3large-0-Float32"] +git-tree-sha1 = "30c29c10d9b2b160b43f358fad9f4f6fe83ce378" +lazy = true + + [["makie__textembedding3large-0-Float32".download]] + sha256 = "ee15489022df191fbede93adf1bd7cc1ceb1f84185229026a5e38ae9a3fab737" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/makie__v20240330__textembedding3large-0-Float32__v1.0.tar.gz" + +["makie__textembedding3large-1024-Bool"] +git-tree-sha1 = "a49a86949f86f6cf4c29bdc9559c05064b49c801" +lazy = true + + [["makie__textembedding3large-1024-Bool".download]] + sha256 = "135f36effc0d29ed20e9bc877f727e4d9d8366bcae4bf4d13f998529d1091324" + url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/makie__v20240330__textembedding3large-1024-Bool__v1.0.tar.gz" \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 73b67eb..d024d66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- (Preliminary) Knowledge packs available for Julia docs (`:julia`), Tidier ecosystem (`:tidier`), Makie ecosystem (`:makie`). Load with `load_index!(:julia)` or several with `load_index!([:julia, :tidier])`. + +### Changed +- Bumped up PromptingTools to v0.20 (brings new RAG capabilities, pretty-printing, etc.) +- Changed default model to be GPT-4 Turbo to improve answer quality ### Fixed - Fixed wrong initiation of `CONV_HISTORY` and other globals that led to UndefVarError. Moved several globals to `const Ref{}` pattern to ensure type stability, but it means that from now it always needs to be dereferenced with `[]` (eg, `MAIN_INDEX[]` instead of `MAIN_INDEX`). diff --git a/Project.toml b/Project.toml index ef1ad51..5e6e3c4 100644 --- a/Project.toml +++ b/Project.toml @@ -4,8 +4,11 @@ authors = ["J S <49557684+svilupp@users.noreply.github.com> and contributors"] version = "0.0.1-DEV" [deps] +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192" REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -20,7 +23,7 @@ JSON3 = "1" LazyArtifacts = "<0.0.1, 1" LinearAlgebra = "<0.0.1, 1" Preferences = "1" -PromptingTools = "0.9" +PromptingTools = "0.20" REPL = "1" SHA = "0.7" Serialization = "<0.0.1, 1" diff --git a/README.md b/README.md index e98eced..abb4583 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ AIHelpMe harnesses the power of Julia's extensive documentation and advanced AI models to provide tailored coding guidance. By integrating with PromptingTools.jl, it offers a unique, AI-assisted approach to answering your coding queries directly in Julia's environment. -Note: This is only a proof-of-concept. If there is enough interest, we will fine-tune the RAG pipeline for better performance. +> [!CAUTION] +> This is only a proof-of-concept. If there is enough interest, we will fine-tune the RAG pipeline for better performance. ## Features @@ -27,7 +28,8 @@ Pkg.add(url="https://github.com/svilupp/AIHelpMe.jl") - Julia (version 1.10 or later). - Internet connection for API access. -- OpenAI and Cohere API keys (recommended for optimal performance). See [How to Obtain API Keys](#how-to-obtain-api-keys). +- OpenAI API keys with available credits. See [How to Obtain API Keys](#how-to-obtain-api-keys). +- For optimal performance, get also Cohere API key (free for community use) and Tavily API key (free for community use). All setup should take less than 5 minutes! @@ -40,10 +42,43 @@ All setup should take less than 5 minutes! ``` ```plaintext - [ Info: Done generating response. Total cost: $0.001 + [ Info: Done generating response. Total cost: $0.015 AIMessage("To implement quicksort in Julia, you can use the `sort` function with the `alg=QuickSort` argument.") ``` + Note: As a default, we load only the Julia documentation and docstrings for standard libraries. The default model used is GPT-4 Turbo. + + You can pretty-print the answer using `pprint` if you return the full RAGResult (`return_all=true`): + ```julia + using AIHelpMe: pprint + + result = aihelp("How do I implement quicksort in Julia?", return_all=true) + pprint(result) + ``` + + ```plaintext + -------------------- + QUESTION(s) + -------------------- + - How do I implement quicksort in Julia? + + -------------------- + ANSWER + -------------------- + To implement quicksort in Julia, you can use the [5,1.0]`sort`[1,1.0] function with the [1,1.0]`alg=QuickSort`[1,1.0] argument.[2,1.0] + + -------------------- + SOURCES + -------------------- + 1. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Functions + 2. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Functions + 3. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Algorithms + 4. SortingAlgorithms::/README.md::0::SortingAlgorithms + 5. AIHelpMe::/README.md::0::AIHelpMe + ``` + + Note: You can see the model cheated because it can see this very documentation... + 2. **`aihelp` Macro**: ```julia aihelp"how to implement quicksort in Julia?" @@ -56,11 +91,12 @@ All setup should take less than 5 minutes! Note: The `!` is required for follow-up questions. `aihelp!` does not add new context/more information - to do that, you need to ask a new question. -4. **Pick stronger models**: - Eg, "gpt4t" is an alias for GPT-4 Turbo: +4. **Pick faster models**: + Eg, for simple questions, GPT 3.5 might be enough, so use the alias "gpt3t": ```julia - aihelp"Elaborate on the `sort` function and quicksort algorithm"gpt4t + aihelp"Elaborate on the `sort` function and quicksort algorithm"gpt3t ``` + ```plaintext [ Info: Done generating response. Total cost: $0.002 --> AIMessage("The `sort` function in programming languages, including Julia.... continues for a while! @@ -69,22 +105,36 @@ All setup should take less than 5 minutes! 5. **Debugging**: How did you come up with that answer? Check the "context" provided to the AI model (ie, the documentation snippets that were used to generate the answer): ```julia - const AHM = AIHelpMe - AHM.preview_context() + AIHelpMe.pprint(AIHelpMe.LAST_RESULT[]) # Output: Pretty-printed Question + Context + Answer with color highlights ``` + The color highlights show you which words were NOT supported by the provided context (magenta = completely new, blue = partially new). + It's a quite and intuitive way to see which function names or variables are made up versus which ones were in the context. + + You can change the kwargs of `pprint` to hide the annotations or potentially even show the underlying context (snippets from the documentation): + + ```julia + AIHelpMe.pprint(AIHelpMe.LAST_RESULT[]; add_context = true, add_scores = false) + ``` + ## How to Obtain API Keys ### OpenAI API Key: 1. Visit [OpenAI's API portal](https://openai.com/api/). 2. Sign up and generate an API Key. -3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://svilupp.github.io/PromptingTools.jl/dev/frequently_asked_questions/#Configuring-the-Environment-Variable-for-API-Key). +3. Charge some credits ($5 minimum but that will last you for a lost time). +4. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). ### Cohere API Key: 1. Sign up at [Cohere's registration page](https://dashboard.cohere.com/welcome/register). 2. After registering, visit the [API keys section](https://dashboard.cohere.com/api-keys) to obtain a free, rate-limited Trial key. -3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://svilupp.github.io/PromptingTools.jl/dev/frequently_asked_questions/#Configuring-the-Environment-Variable-for-API-Key). +3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). + +### Tavily API Key: +1. Sign up at [Tavily](https://app.tavily.com/sign-in). +2. After registering, generate an API key on the [Overview page](https://app.tavily.com/home). You can get a free, rate-limited Trial key. +3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). ## Usage diff --git a/docs/src/index.md b/docs/src/index.md index acee464..08ae006 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -24,14 +24,15 @@ To install AIHelpMe, use the Julia package manager and the address of the reposi ```julia using Pkg -Pkg.add("https://github.com/svilupp/AIHelpMe.jl") +Pkg.add(url="https://github.com/svilupp/AIHelpMe.jl") ``` **Prerequisites:** - Julia (version 1.10 or later). - Internet connection for API access. -- OpenAI and Cohere API keys (recommended for optimal performance). See [How to Obtain API Keys](#how-to-obtain-api-keys). +- OpenAI API keys with available credits. See [How to Obtain API Keys](#how-to-obtain-api-keys). +- For optimal performance, get also Cohere API key (free for community use) and Tavily API key (free for community use). All setup should take less than 5 minutes! @@ -44,10 +45,43 @@ All setup should take less than 5 minutes! ``` ```plaintext - [ Info: Done generating response. Total cost: $0.001 + [ Info: Done generating response. Total cost: $0.015 AIMessage("To implement quicksort in Julia, you can use the `sort` function with the `alg=QuickSort` argument.") ``` + Note: As a default, we load only the Julia documentation and docstrings for standard libraries. The default model used is GPT-4 Turbo. + + You can pretty-print the answer using `pprint` if you return the full RAGResult (`return_all=true`): + ```julia + using AIHelpMe: pprint + + result = aihelp("How do I implement quicksort in Julia?", return_all=true) + pprint(result) + ``` + + ```plaintext + -------------------- + QUESTION(s) + -------------------- + - How do I implement quicksort in Julia? + + -------------------- + ANSWER + -------------------- + To implement quicksort in Julia, you can use the [5,1.0]`sort`[1,1.0] function with the [1,1.0]`alg=QuickSort`[1,1.0] argument.[2,1.0] + + -------------------- + SOURCES + -------------------- + 1. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Functions + 2. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Functions + 3. https://docs.julialang.org/en/v1.10.2/base/sort/index.html::Sorting and Related Functions/Sorting Algorithms + 4. SortingAlgorithms::/README.md::0::SortingAlgorithms + 5. AIHelpMe::/README.md::0::AIHelpMe + ``` + + Note: You can see the model cheated because it can see this very documentation... + 2. **`aihelp` Macro**: ```julia aihelp"how to implement quicksort in Julia?" @@ -60,11 +94,12 @@ All setup should take less than 5 minutes! Note: The `!` is required for follow-up questions. `aihelp!` does not add new context/more information - to do that, you need to ask a new question. -4. **Pick stronger models**: - Eg, "gpt4t" is an alias for GPT-4 Turbo: +4. **Pick faster models**: + Eg, for simple questions, GPT 3.5 might be enough, so use the alias "gpt3t": ```julia - aihelp"Elaborate on the `sort` function and quicksort algorithm"gpt4t + aihelp"Elaborate on the `sort` function and quicksort algorithm"gpt3t ``` + ```plaintext [ Info: Done generating response. Total cost: $0.002 --> AIMessage("The `sort` function in programming languages, including Julia.... continues for a while! @@ -73,22 +108,36 @@ All setup should take less than 5 minutes! 5. **Debugging**: How did you come up with that answer? Check the "context" provided to the AI model (ie, the documentation snippets that were used to generate the answer): ```julia - const AHM = AIHelpMe - AHM.preview_context() + AIHelpMe.pprint(AIHelpMe.LAST_RESULT[]) # Output: Pretty-printed Question + Context + Answer with color highlights ``` + The color highlights show you which words were NOT supported by the provided context (magenta = completely new, blue = partially new). + It's a quite and intuitive way to see which function names or variables are made up versus which ones were in the context. + + You can change the kwargs of `pprint` to hide the annotations or potentially even show the underlying context (snippets from the documentation): + + ```julia + AIHelpMe.pprint(AIHelpMe.LAST_RESULT[]; add_context = true, add_scores = false) + ``` + ## How to Obtain API Keys ### OpenAI API Key: 1. Visit [OpenAI's API portal](https://openai.com/api/). 2. Sign up and generate an API Key. -3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://svilupp.github.io/PromptingTools.jl/dev/frequently_asked_questions/#Configuring-the-Environment-Variable-for-API-Key). +3. Charge some credits ($5 minimum but that will last you for a lost time). +4. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). ### Cohere API Key: 1. Sign up at [Cohere's registration page](https://dashboard.cohere.com/welcome/register). 2. After registering, visit the [API keys section](https://dashboard.cohere.com/api-keys) to obtain a free, rate-limited Trial key. -3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://svilupp.github.io/PromptingTools.jl/dev/frequently_asked_questions/#Configuring-the-Environment-Variable-for-API-Key). +3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). + +### Tavily API Key: +1. Sign up at [Tavily](https://app.tavily.com/sign-in). +2. After registering, generate an API key on the [Overview page](https://app.tavily.com/home). You can get a free, rate-limited Trial key. +3. Set it as an environment variable or a local preference in PromptingTools.jl. See the [instructions](https://siml.earth/PromptingTools.jl/dev/frequently_asked_questions#Configuring-the-Environment-Variable-for-API-Key). ## Usage diff --git a/src/AIHelpMe.jl b/src/AIHelpMe.jl index ef2acc5..8f1ba31 100644 --- a/src/AIHelpMe.jl +++ b/src/AIHelpMe.jl @@ -4,35 +4,46 @@ using Preferences, Serialization, LinearAlgebra, SparseArrays using LazyArtifacts using Base.Docs: DocStr, MultiDoc, doc, meta using REPL: stripmd +using HDF5 using PromptingTools +using PromptingTools: pprint using PromptingTools.Experimental.RAGTools +using PromptingTools.Experimental.RAGTools: AbstractRAGConfig, getpropertynested, + setpropertynested, merge_kwargs_nested using SHA: sha256, bytes2hex +using Logging, PrecompileTools const PT = PromptingTools -const RAG = PromptingTools.Experimental.RAGTools +const RT = PromptingTools.Experimental.RAGTools -## export load_index!, last_context, update_index! ## export remove_pkgdir, annotate_source, find_new_chunks include("utils.jl") +## Globals and types are defined in here +include("pipeline_defaults.jl") + ## export docdata_to_source, docextract, build_index include("preparation.jl") +## export load_index!, update_index! +include("loading.jl") + export aihelp include("generation.jl") export @aihelp_str, @aihelp!_str include("macros.jl") -## Globals -const CONV_HISTORY = Vector{Vector{PT.AbstractMessage}}() -const CONV_HISTORY_LOCK = ReentrantLock() -const MAX_HISTORY_LENGTH = 1 -const LAST_CONTEXT = Ref{Union{Nothing, RAG.RAGContext}}(nothing) -const MAIN_INDEX = Ref{Union{Nothing, RAG.AbstractChunkIndex}}(nothing) function __init__() - ## Load index - MAIN_INDEX[] = load_index!() + ## Set the active configuration + update_pipeline!(:bronze) + ## Load index - auto-loads into MAIN_INDEX + load_index!(:julia) +end + +# Enable precompilation to reduce start time, disabled logging +with_logger(NullLogger()) do + @compile_workload include("precompilation.jl") end end diff --git a/src/generation.jl b/src/generation.jl index 2124996..1145897 100644 --- a/src/generation.jl +++ b/src/generation.jl @@ -1,156 +1,120 @@ """ - aihelp([index::RAG.AbstractChunkIndex,] + aihelp([cfg::RT.AbstractRAGConfig, index::RT.AbstractChunkIndex,] question::AbstractString; - rag_template::Symbol = :RAGAnswerFromContext, - top_k::Int = 100, top_n::Int = 5, - minimum_similarity::AbstractFloat = -1.0, - maximum_cross_similarity::AbstractFloat = 1.0, - rerank_strategy::RAG.RerankingStrategy = (!isempty(PT.COHERE_API_KEY) ? - RAG.CohereRerank() : RAG.Passthrough()), - annotate_sources::Bool = true, - model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT, - chunks_window_margin::Tuple{Int, Int} = (1, 1), - return_context::Bool = false, verbose::Integer = 1, - rerank_kwargs::NamedTuple = NamedTuple(), - api_kwargs::NamedTuple = NamedTuple(), - kwargs...) + verbose::Integer = 1, + model = MODEL_CHAT, + return_all::Bool = false) + +Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach over Julia documentation (or any other knowledge pack). + +If you return RAGResult (`return_all=true`), you can use `AIHelpMe.pprint` to pretty-print the result and see the sources/"support" scores for each chunk of the answer. + +The answer will depend on the knowledge packs loaded, see `?load_index!`. -Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach over Julia documentation. +You can also use add docstrings from any package you have loaded (or all of them), see `?update_index` and make sure to provide your new updated index explicitly! # Arguments +- `cfg::AbstractRAGConfig`: The RAG configuration. - `index::AbstractChunkIndex`: The chunk index (contains chunked and embedded documentation). - `question::AbstractString`: The question to be answered. -- `rag_template::Symbol`: Template for the RAG model, defaults to `:RAGAnswerFromContext`. -- `top_k::Int`: Number of top candidates to retrieve based on embedding similarity. -- `top_n::Int`: Number of candidates to return after reranking. This is how many will be sent to the LLM model. -- `minimum_similarity::AbstractFloat`: Minimum similarity threshold (between -1 and 1) for filtering chunks based on embedding similarity. Defaults to -1.0. -- `maximum_cross_similarity::AbstractFloat`: Maximum cross-similarity threshold to avoid sending duplicate documents. NOT IMPLEMENTED YET -- `rerank_strategy::RerankingStrategy`: Strategy for reranking the retrieved chunks. Defaults to `Passthrough()` or `CohereRerank` depending on whether `COHERE_API_KEY` is set. -- `model_embedding::String`: Model used for embedding the question, default is `PT.MODEL_EMBEDDING`. -- `model_chat::String`: Model used for generating the final response, default is `PT.MODEL_CHAT`. -- `chunks_window_margin::Tuple{Int,Int}`: The window size around each chunk to consider for context building. See `?build_context` for more information. -- `return_context::Bool`: If `true`, returns the context used for RAG along with the response. -- `return_all::Bool`: If `true`, returns all messages in the conversation (helpful to continue conversation later). -- `verbose::Bool`: If `true`, enables verbose logging. -- `rerank_kwargs`: Reranking parameters that will be forwarded to the reranking strategy -- `api_kwargs`: API parameters that will be forwarded to the API calls +- `model::String`: A chat/generation model used for generating the final response, default is `MODEL_CHAT`. +- `return_all::Bool`: If `true`, returns a `RAGResult` (provides details of the pipeline + allows pretty-printing with `pprint(result)`). +- `search::Union{Nothing, Bool}`: If `true`, uses TavilySearchRefiner to add web search results to the context. See `?PromptingTools.Experimental.RAGTools.TavilySearchRefiner` for details. +- `rerank::Union{Nothing, Bool}`: If `true`, uses CohereReranker to rerank the chunks. See `?PromptingTools.Experimental.RAGTools.CohereReranker` for details. # Returns -- If `return_context` is `false`, returns the generated message (`msg`). -- If `return_context` is `true`, returns a tuple of the generated message (`msg`) and the RAG context (`rag_context`). +- If `return_all` is `false`, returns the generated message (`msg`). +- If `return_all` is `true`, returns a `RAGResult` (provides details of the pipeline + allows pretty-printing with `pprint(result)`) # Notes -- The function first finds the closest chunks of documentation to the question (via `embeddings`). -- It reranks the candidates and builds a "context" for the RAG model (ie, information to be provided to the LLM model together with the user question). -- The `chunks_window_margin` allows including surrounding chunks for richer context, considering they are from the same source. -- The function currently supports only single `ChunkIndex`. -- Function always saves the last context in global `LAST_CONTEXT` for inspection of sources/context regardless of `return_context` value. +- Function always saves the last context in global `LAST_RESULT` for inspection of sources/context regardless of `return_all` value. # Examples Using `aihelp` to get a response for a question: ```julia -index = build_index(...) # create an index that contains Makie.jl documentation +using AIHelpMe: build_index + +index = build_index(...) # create an index that contains Makie.jl documentation (or any loaded package that you have) + question = "How to make a barplot in Makie.jl?" msg = aihelp(index, question) +``` -# or simply -msg = aihelp(index; question) +If you want a pretty-printed answer with highlighted sources, you can use the `return_all` argument and `pprint` utility: +```julia +using AIHelpMe: pprint + +result = aihelp(index, question; return_all = true) +pprint(result) +``` + +If you loaded a knowledge pack, you do not have to provide the index. +```julia +# Load Makie knowledge pack +AIHelpMe.load_index!(:makie) + +question = "How to make a barplot in Makie.jl?" +msg = aihelp(question) +``` + +If you know it's a hard question, you can use the `search` and `rerank` arguments to add web search results to the context and rerank the chunks. +```julia +using AIHelpMe: pprint + +question = "How to make a barplot in Makie.jl?" +result = aihelp(question; search = true, rerank = true, return_all = true) +pprint(result) # nicer display with sources for each chunk/sentences (look for square brackets) ``` """ -function aihelp(index::RAG.AbstractChunkIndex, +function aihelp(cfg_orig::RT.AbstractRAGConfig, index::RT.AbstractChunkIndex, question::AbstractString; - rag_template::Symbol = :RAGAnswerFromContext, - top_k::Int = 100, top_n::Int = 5, - minimum_similarity::AbstractFloat = -1.0, - maximum_cross_similarity::AbstractFloat = 1.0, - rerank_strategy::RAG.RerankingStrategy = (!isempty(PT.COHERE_API_KEY) ? - RAG.CohereRerank() : RAG.Passthrough()), - annotate_sources::Bool = true, - model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT, - chunks_window_margin::Tuple{Int, Int} = (1, 1), - return_context::Bool = false, verbose::Integer = 1, - rerank_kwargs::NamedTuple = NamedTuple(), - api_kwargs::NamedTuple = NamedTuple(), - kwargs...) - ## Note: Supports only single ChunkIndex for now - global LAST_CONTEXT, CONV_HISTORY_LOCK - - ## Checks - @assert top_k>0 "top_k must be positive" - @assert top_n>0 "top_n must be positive" - @assert top_k>=top_n "top_k must be greater than or equal to top_n" - @assert minimum_similarity >= -1.0&&minimum_similarity <= 1.0 "minimum_similarity must be between -1 and 1" - @assert maximum_cross_similarity >= -1.0&&maximum_cross_similarity <= 1.0 "maximum_cross_similarity must be between -1 and 1" - ## TODO: implement maximum_cross_similarity filter - - @assert chunks_window_margin[1] >= 0&&chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative" - placeholders = only(aitemplates(rag_template)).variables # only one template should be found - @assert (:question in placeholders)&&(:context in placeholders) "Provided RAG Template $(rag_template) is not suitable. It must have placeholders: `question` and `context`." - - cost_tracker = Threads.Atomic{Float64}(0.0) - - ## Embedding step - msg = aiembed(question, - RAG._normalize; - verbose = (verbose > 1), - model = model_embedding, - api_kwargs) - Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model_embedding)) # track costs - question_emb = msg.content .|> Float32 # no need for Float64 - emb_candidates = RAG.find_closest(index, question_emb; top_k, minimum_similarity) - - filtered_candidates = emb_candidates - reranked_candidates = RAG.rerank(rerank_strategy, - index, - question, - filtered_candidates; - verbose = (verbose > 1), top_n, rerank_kwargs...) - - ## Build the context - sources = RAG.sources(index)[reranked_candidates.positions] - context = RAG.build_context(index, reranked_candidates; chunks_window_margin) - if annotate_sources - context = [annotate_chunk_with_source(chunk, src) - for (chunk, src) in zip(context, sources)] + verbose::Integer = 1, + model = MODEL_CHAT, + return_all::Bool = false, search::Union{Nothing, Bool} = nothing, rerank::Union{ + Nothing, Bool} = nothing) + global LAST_RESULT, CONV_HISTORY_LOCK, RAG_KWARGS + + ## Grab the active kwargs + kwargs = RAG_KWARGS[] + # Update chat model + setpropertynested(kwargs, + [:rephraser_kwargs, :tagger_kwargs, :answerer_kwargs, :refiner_kwargs], + :model, model) + + ## Adjust config as requested, permanent adjustment can be done with `update_pipeline!` + cfg = deepcopy(cfg_orig) + if !isnothing(rerank) && rerank + ## Use Cohere reranking model + @assert !isempty(PT.COHERE_API_KEY) "COHERE_API_KEY is not set! Cannot use the reranker functionality." + cfg.retriever.reranker = RT.CohereReranker() + end + if !isnothing(search) && search + ##set TavilySearchRefiner - Requires TAVILY_API_KEY + @assert !isempty(PT.TAVILY_API_KEY) "TAVILY_API_KEY is not set! Cannot use the web search refinement functionality." + cfg.generator.refiner = RT.TavilySearchRefiner() end - ## LLM call - msg = aigenerate(rag_template; question, - context = join(context, "\n\n"), model = model_chat, - verbose = (verbose > 1), - api_kwargs, - kwargs...) - last_msg = msg isa PT.AIMessage ? msg : last(msg) - Threads.atomic_add!(cost_tracker, PT.call_cost(last_msg, model_chat)) # track costs - (verbose >= 1) && - @info "Done generating response. Total cost: \$$(round(cost_tracker[],digits=3))" - - ## Always create and save the context to global LAST_CONTEXT (for reference) - rag_context = RAG.RAGContext(; - question, - answer = last_msg.content, - context, - sources, - emb_candidates, - tag_candidates = nothing, - filtered_candidates, - reranked_candidates) + ## Run the RAG pipeline + result = airag(cfg, index; question, verbose, return_all = true, kwargs...) lock(CONV_HISTORY_LOCK) do - LAST_CONTEXT[] = rag_context + LAST_RESULT[] = result end + return return_all ? result : PT.last_message(result) +end - if return_context # for evaluation - return msg, rag_context - else - return msg - end +function aihelp(index::RT.AbstractChunkIndex, question::AbstractString; + kwargs...) + global RAG_CONFIG + ## default kwargs and models are injected inside of main aihelp function + aihelp(RAG_CONFIG[], index, question; kwargs...) end function aihelp(question::AbstractString; kwargs...) - global MAIN_INDEX - @assert !isnothing(MAIN_INDEX[]) "MAIN_INDEX is not loaded. Use `load_index!` to load an index." - aihelp(MAIN_INDEX[], question; kwargs...) + global MAIN_INDEX, RAG_CONFIG + @assert !isnothing(MAIN_INDEX[]) "MAIN_INDEX is not loaded. Use `AIHelpMe.load_index!()` to load an index." + ## default kwargs and models are injected inside of main aihelp function + aihelp(RAG_CONFIG[], MAIN_INDEX[], question; kwargs...) end diff --git a/src/loading.jl b/src/loading.jl new file mode 100644 index 0000000..093f7e9 --- /dev/null +++ b/src/loading.jl @@ -0,0 +1,167 @@ + +""" + load_index!(index::RT.AbstractChunkIndex; + verbose::Bool = 1, kwargs...) + +Loads the provided `index` into the global variable `MAIN_INDEX`. + +If you don't have an `index` yet, use `build_index` to build one from your currently loaded packages (see `?build_index`) + +# Example +```julia +# build an index from some modules, keep empty to embed all loaded modules (eg, `build_index()`) +index = AIH.build_index([DataFramesMeta, DataFrames, CSV]) +AIH.load_index!(index) +``` +""" +function load_index!(index::RT.AbstractChunkIndex; + verbose::Bool = true, kwargs...) + global MAIN_INDEX + MAIN_INDEX[] = index + verbose && @info "Loaded index into MAIN_INDEX" + return index +end +""" + load_index!(file_path::AbstractString; + verbose::Bool = true, kwargs...) + +Loads the serialized index in `file_path` into the global variable `MAIN_INDEX`. + +Supports `.jls` (serialized Julia object) and `.hdf5` (HDF5.jl) files. +""" +function load_index!(file_path::AbstractString; + verbose::Bool = true, kwargs...) + global MAIN_INDEX + @assert endswith(file_path, ".jls")||endswith(file_path, ".hdf5") "Provided file path must end with `.jls` (serialized Julia object) or `.hdf5` (see HDF5.jl)." + if endswith(file_path, ".jls") + index = deserialize(file_path) + elseif endswith(file_path, ".hdf5") + index = load_index_hdf5(file_path) + end + @assert index isa RT.AbstractChunkIndex "Provided file path must point to a serialized RAG index (Deserialized type: $(typeof(index)))." + verbose && @info "Loaded index a file $(file_path) into MAIN_INDEX" + MAIN_INDEX[] = index + + return index +end + +""" + load_index!(packs::Vector{Symbol}; verbose::Bool = true, kwargs...) + load_index!(pack::Symbol; verbose::Bool = true, kwargs...) + +Loads one or more `packs` into the main index from our pre-built artifacts. + +Availability of packs might vary depending on your pipeline configuration (ie, whether we have the correct embeddings for it). +See `AIHelpMe.ALLOWED_PACKS` + +# Example +```julia +load_index!(:julia) +``` + +Or multiple packs +```julia +load_index!([:julia, :makie,:tidier]) +``` +""" +function load_index!(packs::Vector{Symbol}; verbose::Bool = true, kwargs...) + global ALLOWED_PACKS, RAG_CONFIG, RAG_CONFIG + @assert all(x -> x in ALLOWED_PACKS, packs) "Invalid pack(s): $(setdiff(packs, ALLOWED_PACKS)). Allowed packs: $(ALLOWED_PACKS)" + + config_key = get_config_key(RAG_CONFIG[], RAG_KWARGS[]) + indices = RT.ChunkIndex[] + for pack in packs + artifact_path = @artifact_str("$(pack)__$(config_key)") + index = load_index_hdf5(joinpath(artifact_path, "pack.hdf5"); verbose = false) + push!(indices, index) + end + # TODO: dedupe the index + joined_index = reduce(vcat, indices) + MAIN_INDEX[] = joined_index + verbose && @info "Loaded index from packs: $(join(packs,", ")) into MAIN_INDEX" + return joined_index +end + +# Default load index +load_index!(pack::Symbol) = load_index!([pack]) +load_index!() = load_index!(:julia) + +""" + update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[], + modules::Vector{Module} = Base.Docs.modules; + verbose::Integer = 1, + kwargs...) + +Updates the provided `index` with the documentation of the provided `modules`. + +Deduplicates against the `index.sources` and embeds only the new document chunks (as measured by a hash). + +Returns the updated `index` (new instance). + +For available configurations and customizations, see the corresponding modules and functions of `PromptingTools.Experimental.RAGTools` (eg, `build_index`). + +# Example +If you loaded some new packages and want to add them to your MAIN_INDEX (or any `index` you use), run: +```julia +# To update the MAIN_INDEX as well +AIHelpMe.update_index() |> AHM.load_index! + +# To update an explicit index +index = AIHelpMe.update_index(index) +``` +""" +function update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[], + modules::Vector{Module} = Base.Docs.modules; + verbose::Integer = 1, + kwargs...) + ## + global RAG_CONFIG, RAG_KWARGS + ## + cost_tracker = Threads.Atomic{Float64}(0.0) + ## Extract docs + all_docs, all_sources = docextract(modules) + + ## Build the new index -- E2E process disabled as it would duplicate a lot the docs we already have + ## + ## new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs, ; + ## embedder_kwargs, chunker = TextChunker(), chunker_kwargs, + ## verbose, kwargs... + ## ) + + ## Chunking + chunker_kwargs_ = (; sources = all_sources) + chunker_kwargs = haskey(kwargs, :chunker_kwargs) ? + merge(kwargs.chunker_kwargs, chunker_kwargs_) : chunker_kwargs_ + output_chunks, output_sources = RT.get_chunks( + RT.TextChunker(), all_docs; chunker_kwargs..., + verbose = (verbose > 1)) + + ## identify new items + mask = find_new_chunks(index.chunks, output_chunks) + + ## Embed new items + embedder = RAG_CONFIG[].retriever.embedder + embedder_kwargs_ = RT.getpropertynested( + RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing) + embedder_kwargs = haskey(kwargs, :embedder_kwargs) ? + merge(kwargs.embedder_kwargs, embedder_kwargs_) : embedder_kwargs_ + embeddings = RT.get_embeddings(embedder, output_chunks[mask]; + embedder_kwargs..., + verbose = (verbose > 1), + cost_tracker) + ## match eltype + embeddings = convert(typeof(index.embeddings), embeddings) + + ## TODO: add tagging in the future! + + ## Update index + @assert size(embeddings, 2)==sum(mask) "Number of embeddings must match the number of new chunks (mask: $(sum(mask)), embeddings: $(size(embeddings,2)))" + new_index = ChunkIndex(; index.id, + chunks = vcat(index.chunks, output_chunks[mask]), + sources = vcat(index.sources, output_sources[mask]), + embeddings = hcat(index.embeddings, embeddings), + index.tags, index.tags_vocab) + + (verbose > 0) && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))" + return new_index +end \ No newline at end of file diff --git a/src/macros.jl b/src/macros.jl index fb858f8..f1f89fe 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -26,21 +26,25 @@ result = aihelp"What is `\$a+\$a`?" # AIMessage("The sum of `1+1` is `2`.") ``` -If you want to use a different model, eg, GPT-4, you can provide its alias as a flag: +If you want to use a different model, eg, GPT-3.5 Turbo, you can provide its alias as a flag: ```julia -result = aihelp"What is `1.23 * 100 + 1`?"gpt4t +result = aihelp"What is `1.23 * 100 + 1`?"gpt3t # AIMessage("The answer is 124.") ``` """ macro aihelp_str(user_question, flags...) - global CONV_HISTORY, MAX_HISTORY_LENGTH, MAIN_INDEX - model = isempty(flags) ? PT.MODEL_CHAT : only(flags) + global CONV_HISTORY, MAX_HISTORY_LENGTH, RAG_CONFIG, MAIN_INDEX, MODEL_CHAT + model = isempty(flags) ? MODEL_CHAT : only(flags) prompt = Meta.parse("\"$(escape_string(user_question))\"") quote - conv = aihelp($(esc(MAIN_INDEX[])), $(esc(prompt)); + result = aihelp($(esc(RAG_CONFIG[])), $(esc(MAIN_INDEX[])), $(esc(prompt)); model = $(esc(model)), return_all = true) - PT.push_conversation!($(esc(CONV_HISTORY)), conv, $(esc(MAX_HISTORY_LENGTH))) + conv = haskey(result.conversations, :final_answer) ? + result.conversations[:final_answer] : + result.conversations[:answer] + PT.push_conversation!( + $(esc(CONV_HISTORY)), conv, $(esc(MAX_HISTORY_LENGTH))) last(conv) end end @@ -84,8 +88,8 @@ aihelp!"Can you create it from named tuple?"gpt4t Ensure that the conversation history is not too long to maintain relevancy and coherence in the AI's responses. The history length is managed by `MAX_HISTORY_LENGTH`. """ macro aihelp!_str(user_question, flags...) - global CONV_HISTORY, MAIN_INDEX - model = isempty(flags) ? PT.MODEL_CHAT : only(flags) + global CONV_HISTORY, MAIN_INDEX, MODEL_CHAT + model = isempty(flags) ? MODEL_CHAT : only(flags) prompt = Meta.parse("\"$(escape_string(user_question))\"") quote @assert !isempty($(esc(CONV_HISTORY))) "No conversation history found. Please use `aihelp\"\"` instead." diff --git a/src/pipeline_defaults.jl b/src/pipeline_defaults.jl new file mode 100644 index 0000000..57da7ad --- /dev/null +++ b/src/pipeline_defaults.jl @@ -0,0 +1,197 @@ +### Global for history, replies, etc +const CONV_HISTORY = Vector{Vector{PT.AbstractMessage}}() +const CONV_HISTORY_LOCK = ReentrantLock() +const MAX_HISTORY_LENGTH = 1 +const LAST_RESULT = Ref{Union{Nothing, RT.AbstractRAGResult}}(nothing) +const MAIN_INDEX = Ref{Union{Nothing, RT.AbstractChunkIndex}}(nothing) +""" + ALLOWED PACKS + +Currently available packs are: +- `:julia` - Julia documentation, standard library docstrings and a few extras (for Julia v1.10) +- `:tidier` - Tidier.jl organization documentation (as of 7th April 2024) +- `:makie` - Makie.jl organization documentation and a few extras (as of 30th March 2024) +""" +const ALLOWED_PACKS = [:julia, :tidier, :makie] + +### Globals for configuration +# These serve as reference models to be injected in the absence of inputs, +# but the actual used for the query is primarily provided aihelpme directly or via the active RAG_KWARGS +const MODEL_CHAT = "gpt4t" +const MODEL_EMBEDDING = "text-embedding-3-large" + +# Loaded up with `update_pipeline!` later once RAG CONFIGURATIONS is populated +const RAG_KWARGS = Ref{NamedTuple}() +const RAG_CONFIG = Ref{RT.AbstractRAGConfig}() +const LOADED_CONFIG_KEY = Ref{String}("") # get the current config key + +""" + RAG_CONFIGURATIONS + +A dictionary of RAG configurations, keyed by a unique symbol (eg, `bronze`). +Each entry contains a dictionary with keys `:config` and `:kwargs`, +where `:config` is the RAG configuration object (`AbstractRAGConfig`) and `:kwargs` the NamedTuple of corresponding kwargs. + +Available Options: +- `:bronze`: A simple configuration for a bronze pipeline, using truncated binary embeddings (dimensionality: 1024) and no re-ranking or refinement. +- `:silver`: A simple configuration for a bronze pipeline, using truncated binary embeddings (dimensionality: 1024) but also enables re-ranking and refinement with a web-search. +- `:gold`: A more complex configuration, similar to `:simpler`, but using a standard embeddings (dimensionality: 3072, type: Float32). It also leverages re-ranking and refinement with a web-search. +""" +const RAG_CONFIGURATIONS = let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING + RAG_CONFIGURATIONS = Dict{Symbol, Dict{Symbol, Any}}() + ## Bronze + RAG_CONFIGURATIONS[:bronze] = Dict{Symbol, Any}( + :config => RT.RAGConfig(; + retriever = RT.SimpleRetriever(; embedder = RT.BinaryBatchEmbedder())), + :kwargs => ( + retriever_kwargs = (; + top_k = 100, + top_n = 5, + rephraser_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + truncate_dimension = 1024, + model = MODEL_EMBEDDING), + tagger_kwargs = (; + model = MODEL_CHAT)), + generator_kwargs = (; + answerer_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + truncate_dimension = 1024, + model = MODEL_EMBEDDING), + refiner_kwargs = (; + model = MODEL_CHAT)))) + ## Silver - reranking + web-search + RAG_CONFIGURATIONS[:silver] = Dict{Symbol, Any}( + :config => RT.RAGConfig(; + retriever = RT.SimpleRetriever(; + embedder = RT.BinaryBatchEmbedder(), reranker = RT.CohereReranker()), + generator = RT.SimpleGenerator(; refiner = RT.TavilySearchRefiner())), + :kwargs => ( + retriever_kwargs = (; + top_k = 100, + top_n = 5, + rephraser_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + truncate_dimension = 1024, + model = MODEL_EMBEDDING), + tagger_kwargs = (; + model = MODEL_CHAT)), + generator_kwargs = (; + answerer_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + truncate_dimension = 1024, + model = MODEL_EMBEDDING), + refiner_kwargs = (; + model = MODEL_CHAT)))) + ## Gold - reranking + web-search + RAG_CONFIGURATIONS[:gold] = Dict{Symbol, Any}( + :config => RT.RAGConfig(; + retriever = RT.SimpleRetriever(; + embedder = RT.BatchEmbedder(), reranker = RT.CohereReranker()), + generator = RT.SimpleGenerator(; refiner = RT.TavilySearchRefiner())), + :kwargs => ( + retriever_kwargs = (; + top_k = 100, + top_n = 5, + rephraser_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + model = MODEL_EMBEDDING), + tagger_kwargs = (; + model = MODEL_CHAT)), + generator_kwargs = (; + answerer_kwargs = (; + model = MODEL_CHAT), + embedder_kwargs = (; + model = MODEL_EMBEDDING), + refiner_kwargs = (; + model = MODEL_CHAT)))) + + RAG_CONFIGURATIONS +end + +"Returns the configuration key for the given `cfg` and `kwargs` to use the relevant artifacts." +function get_config_key(cfg::AbstractRAGConfig, kwargs::NamedTuple) + emb_model = getpropertynested(kwargs, [:embedder_kwargs], :model) + emb_dim = getpropertynested(kwargs, [:embedder_kwargs], :truncate_dimension, 0) + emb_eltype = RT.EmbedderEltype(cfg.retriever.embedder) + string(replace(emb_model, "-" => ""), "-", + emb_dim, "-", emb_eltype) +end + +""" + update_pipeline!(option::Symbol = :bronze; model_chat = MODEL_CHAT, + model_embedding = MODEL_EMBEDDING, verbose::Bool = true, truncate_dimension::Union{ + Nothing, Integer} = nothing) + +Updates the default RAG pipeline to one of the pre-configuration options and sets the requested chat and embedding models. + +This is a good way to update model types to change between OpenAI models and Ollama models. + +See available pipeline options via `keys(RAG_CONFIGURATIONS)`. + +Logic: +- Updates the global `MODEL_CHAT` and `MODEL_EMBEDDING` to the requested models. +- Updates the global `RAG_CONFIG` and `RAG_KWARGS` to the requested `option`. +- Updates the global `LOADED_CONFIG_KEY` to the configuration key for the given `option` and `kwargs` (used by the artifact system to download the correct knowledge packs). + +# Example +```julia +update_pipeline!(:bronze; model_chat = "gpt4t") +``` +You don't need to re-load your index if you just change the chat model. + +You can switch the pipeline to Ollama models: +Note: only 1 Ollama model is supported for embeddings now! You must select "nomic-embed-text" and if you do, set `truncate_dimension=0` (maximum dimension available) +```julia +update_pipeline!(:bronze; model_chat = "mistral:7b-instruct-v0.2-q4_K_M",model_embedding="nomic-embed-text, truncate_dimension=0) + +# You must download the corresponding knowledge packs via `load_index!` (because you changed the embedding model) +load_index!(:julia) # or whichever other packs you want! +``` +""" +function update_pipeline!(option::Symbol = :bronze; model_chat = MODEL_CHAT, + model_embedding = MODEL_EMBEDDING, verbose::Bool = true, truncate_dimension::Union{ + Nothing, Integer} = nothing) + global RAG_CONFIGURATIONS, RAG_CONFIG, RAG_KWARGS, MODEL_CHAT, MODEL_EMBEDDING, LOADED_CONFIG_KEY + @assert haskey(RAG_CONFIGURATIONS, option) "Invalid option: $option. Select one of: $(join(keys(RAG_CONFIGURATIONS),", "))" + @assert truncate_dimension in [nothing, 0, 1024, 3072] "Invalid truncate_dimension: $(truncate_dimension). Supported: 0, 1024, 3072. See the available artifacts." + + ## Update model references + MODEL_CHAT = model_chat + MODEL_EMBEDDING = model_embedding + + config = RAG_CONFIGURATIONS[option][:config] + kwargs = RAG_CONFIGURATIONS[option][:kwargs] + # update models in kwargs to the ones requested + kwargs = setpropertynested( + kwargs, [:rephraser_kwargs, :tagger_kwargs, :answerer_kwargs, :refiner_kwargs], + :model, model_chat + ) + kwargs = setpropertynested( + kwargs, [:embedder_kwargs], + :model, model_embedding + ) + ## change truncate_embeddings + if !isnothing(truncate_dimension) + kwargs = setpropertynested( + kwargs, [:embedder_kwargs], :truncate_dimension, truncate_dimension) + end + + ## Set the options + config_key = get_config_key(config, kwargs) + ## detect significant changes + !isempty(LOADED_CONFIG_KEY[]) && LOADED_CONFIG_KEY[] != config_key && + @warn "Core RAG pipeline configuration has changed! You must re-build your index with `AIHelpMe.load_index!()`!" + LOADED_CONFIG_KEY[] = config_key + RAG_KWARGS[] = kwargs + RAG_CONFIG[] = config + + verbose && + @info "Updated RAG pipeline to `:$option` (Configuration key: \"$config_key\")." + return nothing +end \ No newline at end of file diff --git a/src/precompilation.jl b/src/precompilation.jl new file mode 100644 index 0000000..e69de29 diff --git a/src/preparation.jl b/src/preparation.jl index d301b59..8ad13fb 100644 --- a/src/preparation.jl +++ b/src/preparation.jl @@ -84,36 +84,54 @@ function docextract(modules::Vector{Module} = Base.Docs.modules) end """ - RAG.build_index(mod::Module; verbose::Int = 1, kwargs...) + RT.build_index(mod::Module; verbose::Int = 1, kwargs...) Build `index` from the documentation of a given module `mod`. """ -function RAG.build_index(mod::Module; verbose::Int = 1, kwargs...) - docs, sources = docextract(mod) - RAG.build_index(docs; reader = :docs, - sources, - extract_metadata = false, verbose, - index_id = nameof(mod), kwargs...) +function RT.build_index(mod::Module; verbose::Int = 1, kwargs...) + global RAG_CONFIG, RAG_KWARGS + + ## Extract docstrings + all_docs, all_sources = docextract(mod) + + ## Extract current configuration + chunker_kwargs_ = (; sources = all_sources) + chunker_kwargs = haskey(kwargs, :chunker_kwargs) ? + merge(kwargs.chunker_kwargs, chunker_kwargs_) : chunker_kwargs_ + + embedder_kwargs_ = RT.getpropertynested( + RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing) + embedder_kwargs = haskey(kwargs, :embedder_kwargs) ? + merge(kwargs.embedder_kwargs, embedder_kwargs_) : embedder_kwargs_ + + new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs; + embedder_kwargs, chunker = TextChunker(), chunker_kwargs, + verbose, index_id = nameof(mod), kwargs...) end """ - RAG.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1, - separators = ["\n\n", ". ", "\n"], max_length::Int = 256, + RT.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1, kwargs...) Build index from the documentation of the currently loaded modules. If `modules` is empty, it will use all currently loaded modules. """ -function RAG.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1, - separators = ["\n\n", ". ", "\n"], max_length::Int = 256, +function RT.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1, kwargs...) + global RAG_CONFIG, RAG_KWARGS + all_docs, all_sources = docextract(modules) - RAG.build_index(all_docs; - separators, - max_length, - reader = :docs, - extract_metadata = false, - verbose, - index_id = :all_index, - sources = all_sources) + ## Extract current configuration + chunker_kwargs_ = (; sources = all_sources) + chunker_kwargs = haskey(kwargs, :chunker_kwargs) ? + merge(kwargs.chunker_kwargs, chunker_kwargs_) : chunker_kwargs_ + + embedder_kwargs_ = RT.getpropertynested( + RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing) + embedder_kwargs = haskey(kwargs, :embedder_kwargs) ? + merge(kwargs.embedder_kwargs, embedder_kwargs_) : embedder_kwargs_ + + new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs; + embedder_kwargs, chunker = TextChunker(), chunker_kwargs, + verbose, index_id = nameof(mod), kwargs...) end diff --git a/src/utils.jl b/src/utils.jl index e539b76..6b03cb5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -29,6 +29,7 @@ function find_new_chunks(old_chunks::AbstractVector{<:AbstractString}, return new_items end +# TODO: maybe remove function annotate_chunk_with_source(chunk::AbstractString, src::AbstractString) # parts: module, filepath, line, function parts = split(src, "::") @@ -39,164 +40,24 @@ $chunk end """ - last_context() + last_result() -Returns the RAGContext from the last `aihelp` call. +Returns the RAGResult from the last `aihelp` call. It can be useful to see the sources/references used by the AI model to generate the response. -If you're using `aihelp()` make sure to set `return_context = true` to return the context. +If you're using `aihelp()` make sure to set `return_all = true` to return the RAGResult. """ -last_context() = LAST_CONTEXT[] +last_result() = LAST_RESULT[] -struct ContextPreview - question::AbstractString - context::Vector{AbstractString} - answer::AbstractString -end - -""" - preview_context(context = last_context()) - -Preview the context of the last `aihelp` call. -It will pretty-print the question, context and answer in the REPL. -""" -function preview_context(context = last_context()) - ContextPreview(context.question, context.context, context.answer) -end - -function Base.show(io::IO, context::ContextPreview) - print(io, "\n") - printstyled(io, "QUESTION: ", bold = true, color = :magenta) - print(io, "\n") - printstyled(io, context.question, color = :magenta) - print(io, "\n\n") - printstyled(io, "CONTEXT: ", bold = true, color = :blue) - print(io, "\n") - for ctx in context.context - parts = split(ctx, "\n") - if length(parts) < 3 - println(io, ctx) - continue - end - printstyled(io, parts[begin], color = :blue) - print(io, "\n") - body = parts[(begin + 1):(end - 1)] - printstyled(io, join(body, "\n")) - print(io, "\n") - printstyled(io, parts[end], color = :blue) - print(io, "\n") - end - print(io, "\n\n") - printstyled(io, "ANSWER: ", bold = true, color = :light_green) - print(io, "\n") - printstyled(io, context.answer, color = :light_green) - print(io, "\n") -end - -""" - load_index!(index::RAG.AbstractChunkIndex; - verbose::Bool = 1, kwargs...) - -Loads the provided `index` into the global variable `MAIN_INDEX`. - -If you don't have an `index` yet, use `build_index` to build one from your currently loaded packages (see `?build_index`) - -# Example -```julia -# build an index from some modules, keep empty to embed all loaded modules (eg, `build_index()`) -index = AIH.build_index([DataFramesMeta, DataFrames, CSV]) -AIH.load_index!(index) -``` -""" -function load_index!(index::RAG.AbstractChunkIndex; - verbose::Bool = true, kwargs...) - global MAIN_INDEX - MAIN_INDEX[] = index - verbose && @info "Loaded index into MAIN_INDEX" - return index -end -""" - load_index!(file_path::Union{Nothing, AbstractString} = nothing; - verbose::Bool = true, kwargs...) - -Loads the serialized index in `file_path` into the global variable `MAIN_INDEX`. -If not provided, it will download the latest index from the AIHelpMe.jl repository (more cost-efficient). -""" -function load_index!(file_path::Union{Nothing, AbstractString} = nothing; - verbose::Bool = true, kwargs...) - global MAIN_INDEX - if !isnothing(file_path) - @assert endswith(file_path, ".jls") "Provided file path must end with `.jls` (serialized Julia object)." - file_str = " from a file $(file_path) " - else - artifact_path = artifact"juliaextra" - file_path = joinpath(artifact_path, "docs-index.jls") - file_str = " from an artifact " - end - index = deserialize(file_path) - @assert index isa RAG.AbstractChunkIndex "Provided file path must point to a serialized RAG index (Deserialized type: $(typeof(index)))." - verbose && @info "Loaded index$(file_str)into MAIN_INDEX" - MAIN_INDEX[] = index +"Hacky function to load a HDF5 file into a ChunkIndex object. Only bare-bone ChunkIndex is supported right now." +function load_index_hdf5(path::AbstractString; verbose::Bool = true) + @assert isfile(path) "Index path does not exist! (Provided: $path)" + verbose && @info "Loading index from $path" + fid = h5open(path, "r") + @assert all(x -> haskey(fid, x), ["chunks", "sources", "embeddings"]) "Index is missing fields! (Required: chunks, sources, embeddings)" + index = RT.ChunkIndex(; id = gensym("index"), chunks = read(fid["chunks"]), + sources = read(fid["sources"]), embeddings = read(fid["embeddings"])) + close(fid) return index -end - -""" - update_index(index::RAG.AbstractChunkIndex = MAIN_INDEX, - modules::Vector{Module} = Base.Docs.modules; - verbose::Integer = 1, - separators = ["\\n\\n", ". ", "\\n"], max_length::Int = 256, - model::AbstractString = PT.MODEL_EMBEDDING, - kwargs...) - modules::Vector{Module} = Base.Docs.modules; - verbose::Bool = true, kwargs...) - -Updates the provided `index` with the documentation of the provided `modules`. - -Deduplicates against the `index.sources` and embeds only the new document chunks (as measured by a hash). - -Returns the updated `index` (new instance). - -# Example -If you loaded some new packages and want to add them to your MAIN_INDEX (or any `index` you use), run: -```julia -# To update the MAIN_INDEX -AHM.update_index() |> AHM.load_index! - -# To update an explicit index -index = AHM.update_index(index) -``` -""" -function update_index(index::RAG.AbstractChunkIndex = MAIN_INDEX[], - modules::Vector{Module} = Base.Docs.modules; - verbose::Integer = 1, - separators = ["\n\n", ". ", "\n"], max_length::Int = 256, - model::AbstractString = PT.MODEL_EMBEDDING, - kwargs...) - ## - cost_tracker = Threads.Atomic{Float64}(0.0) - ## Extract docs - all_docs, all_sources = docextract(modules) - ## Split into chunks - output_chunks, output_sources = RAG.get_chunks(all_docs; - reader = :docs, sources = all_sources, separators, max_length) - ## identify new items - mask = find_new_chunks(index.chunks, output_chunks) - ## Embed new items - embeddings = RAG.get_embeddings(output_chunks[mask]; - verbose = (verbose > 1), - cost_tracker, - model, - kwargs...) - - ## Update index - @assert size(embeddings, 2)==sum(mask) "Number of embeddings must match the number of new chunks (mask: $(sum(mask)), embeddings: $(size(embeddings,2)))" - new_index = ChunkIndex(; index.id, - chunks = vcat(index.chunks, output_chunks[mask]), - sources = vcat(index.sources, output_sources[mask]), - embeddings = hcat(index.embeddings, embeddings), - index.tags, index.tags_vocab) - - (verbose > 0) && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))" - return new_index -end +end \ No newline at end of file diff --git a/test/generation.jl b/test/generation.jl new file mode 100644 index 0000000..e69de29 diff --git a/test/runtests.jl b/test/runtests.jl index bc4a996..a46de47 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,8 @@ using AIHelpMe +using PromptingTools +using PromptingTools.Experimental.RAGTools +const PT = PromptingTools +const RT = PromptingTools.Experimental.RAGTools using Test using Aqua @@ -8,5 +12,5 @@ using Aqua end @testset "AIHelpMe.jl" begin - # Write your tests here. + include("generation.jl") end