diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 0000000..5657bd0
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1,2 @@
+# See https://domluna.github.io/JuliaFormatter.jl/stable/ for a list of options
+style = "sciml"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..700707c
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..0d3c43b
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,73 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+    tags: ['*']
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+    permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      actions: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.10'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v3
+        with:
+          files: lcov.info
+  docs:
+    name: Documentation
+    runs-on: ubuntu-latest
+    permissions:
+      actions: write # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      contents: write
+      statuses: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+      - uses: julia-actions/cache@v1
+      - name: Configure doc environment
+        shell: julia --project=docs --color=yes {0}
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(path=pwd()))
+          Pkg.instantiate()
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Run doctests
+        shell: julia --project=docs --color=yes {0}
+        run: |
+          using Documenter: DocMeta, doctest
+          using AIHelpMe
+          DocMeta.setdocmeta!(AIHelpMe, :DocTestSetup, :(using AIHelpMe); recursive=true)
+          doctest(AIHelpMe)
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
new file mode 100644
index 0000000..d48734a
--- /dev/null
+++ b/.github/workflows/CompatHelper.yml
@@ -0,0 +1,16 @@
+name: CompatHelper
+on:
+  schedule:
+    - cron: 0 0 1 * *
+  workflow_dispatch:
+jobs:
+  CompatHelper:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Pkg.add("CompatHelper")
+        run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
+      - name: CompatHelper.main()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+        run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 0000000..2bacdb8
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,31 @@
+name: TagBot
+on:
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+    inputs:
+      lookback:
+        default: 3
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
+jobs:
+  TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.gitignore b/.gitignore
index 29126e4..40f731f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,24 +1,9 @@
-# Files generated by invoking Julia with --code-coverage
-*.jl.cov
 *.jl.*.cov
-
-# Files generated by invoking Julia with --track-allocation
+*.jl.cov
 *.jl.mem
+/Manifest.toml
+/docs/Manifest.toml
+/docs/build/
 
-# System-specific files and directories generated by the BinaryProvider and BinDeps packages
-# They contain absolute paths specific to the host computer, and so should not be committed
-deps/deps.jl
-deps/build.log
-deps/downloads/
-deps/usr/
-deps/src/
-
-# Build artifacts for creating documentation generated by the Documenter package
-docs/build/
-docs/site/
-
-# File generated by Pkg, the package manager, based on a corresponding Project.toml
-# It records a fixed state of all packages used by the project. As such, it should not be
-# committed for packages, but should be committed for applications that require a static
-# environment.
-Manifest.toml
+**/.DS_Store
+**/.vscode
\ No newline at end of file
diff --git a/Artifacts.toml b/Artifacts.toml
new file mode 100644
index 0000000..8110db3
--- /dev/null
+++ b/Artifacts.toml
@@ -0,0 +1,7 @@
+[juliaextra]
+git-tree-sha1 = "66e744fbf39bab71c2f63b6877a506fee8fbc9a5"
+lazy = true
+
+    [[juliaextra.download]]
+    sha256 = "61133afa7e06fda133f07164c57190a5b922f8f2a1aa17c3f8a628b5cf752512"
+    url = "https://github.com/svilupp/AIHelpMeArtifacts/raw/main/artifacts/juliaextra__v1.10.0__ada1.0.tar.gz"
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..986f4e0
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+
+### Fixed
+
+## [Unreleased]
+
+### Added
+
+### Fixed
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 63b262c..737dc3f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2024 J S
+Copyright (c) 2024 J S <49557684+svilupp@users.noreply.github.com> and contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Project.toml b/Project.toml
new file mode 100644
index 0000000..ef1ad51
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,38 @@
+name = "AIHelpMe"
+uuid = "01402e1f-dc83-4213-a98b-42887d758baa"
+authors = ["J S <49557684+svilupp@users.noreply.github.com> and contributors"]
+version = "0.0.1-DEV"
+
+[deps]
+LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
+PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[compat]
+Aqua = "0.7"
+HTTP = "1"
+JSON3 = "1"
+LazyArtifacts = "<0.0.1, 1"
+LinearAlgebra = "<0.0.1, 1"
+Preferences = "1"
+PromptingTools = "0.9"
+REPL = "1"
+SHA = "0.7"
+Serialization = "<0.0.1, 1"
+SparseArrays = "<0.0.1, 1"
+Test = "<0.0.1, 1"
+julia = "1.10"
+
+[extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Aqua", "HTTP", "JSON3", "Test"]
diff --git a/README.md b/README.md
index 52fe661..0b21cdd 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,127 @@
-# AIHelpers
\ No newline at end of file
+# AIHelpMe [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://svilupp.github.io/AIHelpMe.jl/stable/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://svilupp.github.io/AIHelpMe.jl/dev/) [![Build Status](https://github.com/svilupp/AIHelpMe.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/svilupp/AIHelpMe.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/svilupp/AIHelpMe.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/svilupp/AIHelpMe.jl) [![Aqua](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
+
+# AIHelpMe: Enhanced Coding Assistance for Julia
+
+AIHelpMe harnesses the power of Julia's extensive documentation and advanced AI models to provide tailored coding guidance. By integrating with PromptingTools.jl, it offers a unique, AI-assisted approach to answering your coding queries directly in Julia's environment.
+
+Note: This is only a proof-of-concept. If there is enough interest, we will fine-tune the RAG pipeline for better performance.
+
+## Features
+
+- **AI-Powered Assistance**: Get context-aware answers to your coding questions.
+- **Easy-to-Use Interface**: Simple function and macro to input your questions.
+- **Flexible Querying**: Use different AI models for varied insights and performance versus cost trade-offs.
+- **Cost-Effective**: Download pre-embedded documentation to save on API calls.
+
+## Installation
+
+To install AIHelpMe, use the Julia package manager and the address of the repository (it's not yet registered):
+
+```julia
+using Pkg
+Pkg.add("https://github.com/svilupp/AIHelpMe.jl")
+```
+
+**Prerequisites:**
+
+- Julia (version 1.10 or later).
+- Internet connection for API access.
+- OpenAI and Cohere API keys (recommended for optimal performance). See [How to Obtain API Keys](#how-to-obtain-api-keys).
+
+All setup should take less than 5 minutes!
+
+## Quick Start Guide
+
+1. **Basic Usage**:
+   ```julia
+   using AIHelpMe
+   aihelp("How do I implement quicksort in Julia?")
+    ```
+
+    ```plaintext
+   [ Info: Done generating response. Total cost: $0.001
+   AIMessage("To implement quicksort in Julia, you can use the `sort` function with the `alg=QuickSort` argument.")
+   ```
+
+2. **`aihelp` Macro**:
+   ```julia
+   aihelp"how to implement quicksort in Julia?"
+   ```
+
+3. **Follow-up Questions**:
+   ```julia
+   aihelp!"Can you elaborate on the `sort` function?"
+   ```
+   Note: The `!` is required for follow-up questions.
+   `aihelp!` does not add new context/more information - to do that, you need to ask a new question.
+
+4. **Pick stronger models**:
+    Eg, "gpt4t" is an alias for GPT-4 Turbo:
+    ```julia
+    aihelp"Elaborate on the `sort` function and quicksort algorithm"gpt4t
+    ```
+    ```plaintext
+    [ Info: Done generating response. Total cost: $0.002 -->
+    AIMessage("The `sort` function in programming languages, including Julia.... continues for a while!
+    ```
+
+## How to Obtain API Keys
+
+### OpenAI API Key:
+1. Visit [OpenAI's API portal](https://openai.com/api/).
+2. Sign up or log in.
+3. Follow the instructions to generate an API key.
+
+### Cohere API Key:
+1. Go to [Cohere's platform](https://cohere.ai/).
+2. Create an account.
+3. Navigate to the API section to get your key.
+
+## Usage
+
+**Formulating Questions**:
+- Be clear and specific for the best results.
+
+**Example Queries**:
+- Simple question: `aihelp"What is a DataFrame in Julia?"`
+- Using a model: `aihelp"best practices for error handling in Julia"gpt4t`
+- Follow-up: `aihelp!"Could you provide an example?"`
+- Debug errors (use `err` REPL variable):
+```julia
+## define mock function to trigger method error
+f(x::Int) = x^2
+f(Int8(2))
+# we get: ERROR: MethodError: no method matching f(::Int8)
+
+# Help is here:
+aihelp"What does this error mean? $err" # Note the $err to interpolate the stacktrace
+```
+
+```plaintext
+[ Info: Done generating response. Total cost: $0.003
+
+AIMessage("The error message "MethodError: no method matching f(::Int8)" means that there is no method defined for function `f` that accepts an argument of type `Int8`. The error message also provides the closest candidate methods that were found, which are `f(::Any, !Matched::Any)` and `f(!Matched::Int64)` in the specified file `embed_all.jl` at lines 45 and 61, respectively.")
+```
+
+## How it works
+
+AIHelpMe leverages [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) to communicate with the AI models.
+
+We apply a Retrieval Augment Generation (RAG) pattern, ie, 
+- we pre-process all available documentation (and "embed it" to convert text snippets into numbers)
+- when a question is asked, we look up the most relevant documentation snippets
+- we feed the question and the documentation snippets to the AI model
+- the AI model generates the answer
+
+This ensures that the answers are not only based on general AI knowledge but are also specifically tailored to Julia's ecosystem and best practices.
+
+## FAQs
+
+**Q: How accurate are the answers?**
+A: Like any other Generative AI answers, ie, it depends and you should always double-check.
+
+**Q: Can I use it without the internet?**
+A: Not at the moment. It might be possible in the future, as PromptingTools.jl supports local LLMs.
+
+**Q: Why do we need Cohere API Key?**
+A: Cohere's API is used to re-rank the best matching snippets from the documentation. It's free to use in limited quantities (ie, ~thousand requests per month), which should be enough for most users. Re-ranking improves the quality and accuracy of the answers.
diff --git a/docs/Project.toml b/docs/Project.toml
new file mode 100644
index 0000000..57005f7
--- /dev/null
+++ b/docs/Project.toml
@@ -0,0 +1,3 @@
+[deps]
+AIHelpMe = "01402e1f-dc83-4213-a98b-42887d758baa"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
diff --git a/docs/make.jl b/docs/make.jl
new file mode 100644
index 0000000..cda088a
--- /dev/null
+++ b/docs/make.jl
@@ -0,0 +1,23 @@
+using AIHelpMe
+using Documenter
+
+DocMeta.setdocmeta!(AIHelpMe, :DocTestSetup, :(using AIHelpMe); recursive=true)
+
+makedocs(;
+    modules=[AIHelpMe],
+    authors="J S <49557684+svilupp@users.noreply.github.com> and contributors",
+    sitename="AIHelpMe.jl",
+    format=Documenter.HTML(;
+        canonical="https://svilupp.github.io/AIHelpMe.jl",
+        edit_link="main",
+        assets=String[],
+    ),
+    pages=[
+        "Home" => "index.md",
+    ],
+)
+
+deploydocs(;
+    repo="github.com/svilupp/AIHelpMe.jl",
+    devbranch="main",
+)
diff --git a/docs/src/index.md b/docs/src/index.md
new file mode 100644
index 0000000..da05d27
--- /dev/null
+++ b/docs/src/index.md
@@ -0,0 +1,14 @@
+```@meta
+CurrentModule = AIHelpMe
+```
+
+# AIHelpMe
+
+Documentation for [AIHelpMe](https://github.com/svilupp/AIHelpMe.jl).
+
+```@index
+```
+
+```@autodocs
+Modules = [AIHelpMe]
+```
diff --git a/src/AIHelpMe.jl b/src/AIHelpMe.jl
new file mode 100644
index 0000000..0749f7a
--- /dev/null
+++ b/src/AIHelpMe.jl
@@ -0,0 +1,38 @@
+module AIHelpMe
+
+using Preferences, Serialization, LinearAlgebra, SparseArrays
+using LazyArtifacts
+using Base.Docs: DocStr, MultiDoc, doc, meta
+using REPL: stripmd
+
+using PromptingTools
+using PromptingTools.Experimental.RAGTools
+using SHA: sha256, bytes2hex
+const PT = PromptingTools
+const RAG = PromptingTools.Experimental.RAGTools
+
+## export load_index!, last_context, update_index!
+## export remove_pkgdir, annotate_source, find_new_chunks
+include("utils.jl")
+
+## export docdata_to_source, docextract, build_index
+include("preparation.jl")
+
+export aihelp
+include("generation.jl")
+
+export @aihelp_str, @aihelp!_str
+include("macros.jl")
+
+function __init__()
+    ## Globals
+    CONV_HISTORY::Vector{Vector{<:Any}} = Vector{Vector{<:Any}}()
+    CONV_HISTORY_LOCK::ReentrantLock = ReentrantLock()
+    MAX_HISTORY_LENGTH::Int = 1
+    MAIN_INDEX::Union{Nothing, RAG.AbstractChunkIndex} = nothing
+    LAST_CONTEXT::Union{Nothing, RAG.RAGContext} = nothing
+    ## Load index
+    # TODO: load_index!()
+end
+
+end
diff --git a/src/generation.jl b/src/generation.jl
new file mode 100644
index 0000000..bbaee48
--- /dev/null
+++ b/src/generation.jl
@@ -0,0 +1,156 @@
+"""
+    aihelp([index::RAG.AbstractChunkIndex,]
+        question::AbstractString;
+        rag_template::Symbol = :RAGAnswerFromContext,
+        top_k::Int = 100, top_n::Int = 5,
+        minimum_similarity::AbstractFloat = -1.0,
+        maximum_cross_similarity::AbstractFloat = 1.0,
+        rerank_strategy::RAG.RerankingStrategy = (!isempty(PT.COHERE_API_KEY) ?
+                                                  RAG.CohereRerank() : RAG.Passthrough()),
+        annotate_sources::Bool = true,
+        model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT,
+        chunks_window_margin::Tuple{Int, Int} = (1, 1),
+        return_context::Bool = false, verbose::Integer = 1,
+        rerank_kwargs::NamedTuple = NamedTuple(),
+        api_kwargs::NamedTuple = NamedTuple(),
+        kwargs...)
+
+Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach over Julia documentation. 
+
+# Arguments
+- `index::AbstractChunkIndex`: The chunk index (contains chunked and embedded documentation).
+- `question::AbstractString`: The question to be answered.
+- `rag_template::Symbol`: Template for the RAG model, defaults to `:RAGAnswerFromContext`.
+- `top_k::Int`: Number of top candidates to retrieve based on embedding similarity.
+- `top_n::Int`: Number of candidates to return after reranking. This is how many will be sent to the LLM model.
+- `minimum_similarity::AbstractFloat`: Minimum similarity threshold (between -1 and 1) for filtering chunks based on embedding similarity. Defaults to -1.0.
+- `maximum_cross_similarity::AbstractFloat`: Maximum cross-similarity threshold to avoid sending duplicate documents. NOT IMPLEMENTED YET
+- `rerank_strategy::RerankingStrategy`: Strategy for reranking the retrieved chunks. Defaults to `Passthrough()` or `CohereRerank` depending on whether `COHERE_API_KEY` is set.
+- `model_embedding::String`: Model used for embedding the question, default is `PT.MODEL_EMBEDDING`.
+- `model_chat::String`: Model used for generating the final response, default is `PT.MODEL_CHAT`.
+- `chunks_window_margin::Tuple{Int,Int}`: The window size around each chunk to consider for context building. See `?build_context` for more information.
+- `return_context::Bool`: If `true`, returns the context used for RAG along with the response.
+- `return_all::Bool`: If `true`, returns all messages in the conversation (helpful to continue conversation later).
+- `verbose::Bool`: If `true`, enables verbose logging.
+- `rerank_kwargs`: Reranking parameters that will be forwarded to the reranking strategy
+- `api_kwargs`: API parameters that will be forwarded to the API calls
+
+# Returns
+- If `return_context` is `false`, returns the generated message (`msg`).
+- If `return_context` is `true`, returns a tuple of the generated message (`msg`) and the RAG context (`rag_context`).
+
+# Notes
+- The function first finds the closest chunks of documentation to the question (via `embeddings`).
+- It reranks the candidates and builds a "context" for the RAG model (ie, information to be provided to the LLM model together with the user question).
+- The `chunks_window_margin` allows including surrounding chunks for richer context, considering they are from the same source.
+- The function currently supports only single `ChunkIndex`. 
+- Function always saves the last context in global `LAST_CONTEXT` for inspection of sources/context regardless of `return_context` value.
+
+# Examples
+
+Using `aihelp` to get a response for a question:
+```julia
+index = build_index(...)  # create an index that contains Makie.jl documentation
+question = "How to make a barplot in Makie.jl?"
+msg = aihelp(index, question)
+
+# or simply
+msg = aihelp(index; question)
+```
+
+"""
+function aihelp(index::RAG.AbstractChunkIndex,
+    question::AbstractString;
+    rag_template::Symbol=:RAGAnswerFromContext,
+    top_k::Int=100, top_n::Int=5,
+    minimum_similarity::AbstractFloat=-1.0,
+    maximum_cross_similarity::AbstractFloat=1.0,
+    rerank_strategy::RAG.RerankingStrategy=(!isempty(PT.COHERE_API_KEY) ?
+                                            RAG.CohereRerank() : RAG.Passthrough()),
+    annotate_sources::Bool=true,
+    model_embedding::String=PT.MODEL_EMBEDDING, model_chat::String=PT.MODEL_CHAT,
+    chunks_window_margin::Tuple{Int,Int}=(1, 1),
+    return_context::Bool=false, verbose::Integer=1,
+    rerank_kwargs::NamedTuple=NamedTuple(),
+    api_kwargs::NamedTuple=NamedTuple(),
+    kwargs...)
+    ## Note: Supports only single ChunkIndex for now
+    global LAST_CONTEXT, CONV_HISTORY_LOCK
+
+    ## Checks
+    @assert top_k > 0 "top_k must be positive"
+    @assert top_n > 0 "top_n must be positive"
+    @assert top_k >= top_n "top_k must be greater than or equal to top_n"
+    @assert minimum_similarity >= -1.0 && minimum_similarity <= 1.0 "minimum_similarity must be between -1 and 1"
+    @assert maximum_cross_similarity >= -1.0 && maximum_cross_similarity <= 1.0 "maximum_cross_similarity must be between -1 and 1"
+    ## TODO: implement maximum_cross_similarity filter
+
+    @assert chunks_window_margin[1] >= 0 && chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative"
+    placeholders = only(aitemplates(rag_template)).variables # only one template should be found
+    @assert (:question in placeholders) && (:context in placeholders) "Provided RAG Template $(rag_template) is not suitable. It must have placeholders: `question` and `context`."
+
+    cost_tracker = Threads.Atomic{Float64}(0.0)
+
+    ## Embedding step
+    msg = aiembed(question,
+        RAG._normalize;
+        verbose=(verbose > 1),
+        model=model_embedding,
+        api_kwargs)
+    Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model_embedding)) # track costs
+    question_emb = msg.content .|> Float32 # no need for Float64
+    emb_candidates = RAG.find_closest(index, question_emb; top_k, minimum_similarity)
+
+    filtered_candidates = emb_candidates
+    reranked_candidates = RAG.rerank(rerank_strategy,
+        index,
+        question,
+        filtered_candidates;
+        verbose=(verbose > 1), top_n, rerank_kwargs...)
+
+    ## Build the context
+    sources = RAG.sources(index)[reranked_candidates.positions]
+    context = RAG.build_context(index, reranked_candidates; chunks_window_margin)
+    if annotate_sources
+        context = [annotate_chunk_with_source(chunk, src)
+                   for (chunk, src) in zip(context, sources)]
+    end
+
+    ## LLM call
+    msg = aigenerate(rag_template; question,
+        context=join(context, "\n\n"), model=model_chat,
+        verbose=(verbose > 1),
+        api_kwargs,
+        kwargs...)
+    last_msg = msg isa PT.AIMessage ? msg : last(msg)
+    Threads.atomic_add!(cost_tracker, PT.call_cost(last_msg, model_chat)) # track costs
+    (verbose >= 1) &&
+        @info "Done generating response. Total cost: \$$(round(cost_tracker[],digits=3))"
+
+    ## Always create and save the context to global LAST_CONTEXT (for reference)
+    rag_context = RAG.RAGContext(;
+        question,
+        answer=last_msg.content,
+        context,
+        sources,
+        emb_candidates,
+        tag_candidates=nothing,
+        filtered_candidates,
+        reranked_candidates)
+    lock(CONV_HISTORY_LOCK) do
+        PT.LAST_CONTEXT = rag_context
+    end
+
+    if return_context # for evaluation
+        return msg, rag_context
+    else
+        return msg
+    end
+end
+
+function aihelp(question::AbstractString;
+    kwargs...)
+    global MAIN_INDEX
+    @assert !isnothing(MAIN_INDEX) "MAIN_INDEX is not loaded. Use `load_index!` to load an index."
+    aihelp(MAIN_INDEX, question; kwargs...)
+end
\ No newline at end of file
diff --git a/src/macros.jl b/src/macros.jl
new file mode 100644
index 0000000..cdd906c
--- /dev/null
+++ b/src/macros.jl
@@ -0,0 +1,102 @@
+"""
+    aihelp"user_question"[model_alias] -> AIMessage
+
+The `aihelp""` string macro generates an AI response to a given user question by using `aihelp` under the hood.
+It will automatically try to provide the most relevant bits of the documentation (from the index) to the LLM to answer the question.
+
+See also `aihelp!""` if you want to reply to the provided message / continue the conversation.
+
+## Arguments
+- `user_question` (String): The question to be answered by the AI model.
+- `model_alias` (optional, any): Provide model alias of the AI model (see `MODEL_ALIASES`).
+
+## Returns
+`AIMessage` corresponding to the input prompt.
+
+## Example
+```julia
+result = aihelp"Hello, how are you?"
+# AIMessage("Hello! I'm an AI assistant, so I don't have feelings, but I'm here to help you. How can I assist you today?")
+```
+
+If you want to interpolate some variables or additional context, simply use string interpolation:
+```julia
+a=1
+result = aihelp"What is `\$a+\$a`?"
+# AIMessage("The sum of `1+1` is `2`.")
+```
+
+If you want to use a different model, eg, GPT-4, you can provide its alias as a flag:
+```julia
+result = aihelp"What is `1.23 * 100 + 1`?"gpt4t
+# AIMessage("The answer is 124.")
+```
+"""
+macro aihelp_str(user_question, flags...)
+    global CONV_HISTORY, MAX_HISTORY_LENGTH, MAIN_INDEX
+    model = isempty(flags) ? PT.MODEL_CHAT : only(flags)
+    prompt = Meta.parse("\"$(escape_string(user_question))\"")
+    quote
+        conv = aihelp($(esc(MAIN_INDEX)), $(esc(prompt));
+            model = $(esc(model)),
+            return_all = true)
+        PT.push_conversation!($(esc(CONV_HISTORY)), conv, $(esc(MAX_HISTORY_LENGTH)))
+        last(conv)
+    end
+end
+
+"""
+    aihelp!"user_question"[model_alias] -> AIMessage
+
+The `aihelp!""` string macro is used to continue a previous conversation with the AI model. 
+
+It appends the new user prompt to the last conversation in the tracked history (in `AIHelpMe.CONV_HISTORY`) and generates a response based on the entire conversation context.
+If you want to see the previous conversation, you can access it via `AIHelpMe.CONV_HISTORY`, which keeps at most last `PromptingTools.MAX_HISTORY_LENGTH` conversations.
+
+It does NOT provide new context from the documentation. To do that, start a new conversation with `aihelp"<question>"`.
+
+## Arguments
+- `user_question` (String): The follow up question to be added to the existing conversation.
+- `model_alias` (optional, any): Specify the model alias of the AI model to be used (see `PT.MODEL_ALIASES`). If not provided, the default model is used.
+
+## Returns
+`AIMessage` corresponding to the new user prompt, considering the entire conversation history.
+
+## Example
+To continue a conversation:
+```julia
+# start conversation as normal
+aihelp"How to create a dictionary?" 
+
+# ... wait for reply and then react to it:
+
+# continue the conversation (notice that you can change the model, eg, to more powerful one for better answer)
+aihelp!"Can you create it from named tuple?"gpt4t
+# AIMessage("Yes, you can create a dictionary from a named tuple ...")
+```
+
+## Usage Notes
+- This macro should be used when you want to maintain the context of an ongoing conversation (ie, the last `ai""` message).
+- It automatically accesses and updates the global conversation history.
+- If no conversation history is found, it raises an assertion error, suggesting to initiate a new conversation using `ai""` instead.
+
+## Important
+Ensure that the conversation history is not too long to maintain relevancy and coherence in the AI's responses. The history length is managed by `MAX_HISTORY_LENGTH`.
+"""
+macro aihelp!_str(user_question, flags...)
+    global CONV_HISTORY, LAST_CONTEXT, MAIN_INDEX
+    model = isempty(flags) ? PT.MODEL_CHAT : only(flags)
+    prompt = Meta.parse("\"$(escape_string(user_question))\"")
+    quote
+        @assert !isempty($(esc(CONV_HISTORY))) "No conversation history found. Please use `aihelp\"\"` instead."
+        # grab the last conversation, drop system messages
+        old_conv = $(esc(CONV_HISTORY))[end]
+        conv = aigenerate(vcat(old_conv, [PT.UserMessage($(esc(prompt)))]);
+            model = $(esc(model)),
+            return_all = true)
+        # replace the last conversation with the new one
+        $(esc(CONV_HISTORY))[end] = conv
+        # 
+        last(conv)
+    end
+end
\ No newline at end of file
diff --git a/src/preparation.jl b/src/preparation.jl
new file mode 100644
index 0000000..20db4e5
--- /dev/null
+++ b/src/preparation.jl
@@ -0,0 +1,119 @@
+"""
+    docdata_to_source(data::AbstractDict)
+
+Creates a source path from a given DocStr record
+"""
+function docdata_to_source(data::AbstractDict)
+    linenumber = get(data, :linenumber, 0)
+    mod = get(data, :module, "-") |> string
+    func = get(data, :binding, "-") |> string
+    path = get(data, :path, "-") |> string
+    string(mod, "::", path, "::", linenumber, "::", func)
+end
+
+"""
+    docextract(d::DocStr, sep::AbstractString = "\n\n")
+
+Extracts the documentation from a DocStr record.
+Separates the individual docs within `DocStr` with `sep`.
+"""
+function docextract(d::DocStr, sep::AbstractString = "\n\n")
+    io = IOBuffer()
+    for part in d.text
+        if part isa String
+            write(io, part, sep)
+            # else
+            #     @info d.data[:binding] typeof(part)
+            #     @info part
+        end
+    end
+    docs = String(take!(io))
+    source = hasproperty(d, :data) ? docdata_to_source(d.data) : ""
+    return docs, source
+end
+
+"""
+    docextract(d::MultiDoc, sep::AbstractString = "\n\n")
+
+Extracts the documentation from a MultiDoc record (separates the individual docs within `DocStr` with `sep`)
+"""
+function docextract(d::MultiDoc, sep::AbstractString = "\n\n")
+    docs, sources = String[], String[]
+    for v in values(d.docs)
+        doc, source = docextract(v, sep)
+        push!(docs, doc)
+        push!(sources, source)
+    end
+    return docs, sources
+end
+
+"""
+    docextract(mod::Module)
+
+Extracts the documentation from a given (loaded) module.
+"""
+function docextract(mod::Module)
+    all_docs, all_sources = String[], String[]
+    # Module doc might be in README.md instead of the META dict
+    push!(all_docs, doc(mod) |> stripmd)
+    push!(all_sources, string(nameof(mod), "::", "/README.md", "::", 0, "::", nameof(mod)))
+    dict = meta(mod; autoinit = false)
+    isnothing(dict) && return all_docs, all_sources
+    for (k, v) in dict
+        docs, sources = docextract(v)
+        append!(all_docs, docs)
+        sources = !isnothing(pkgdir(mod)) ? remove_pkgdir.(sources, Ref(mod)) : sources
+        append!(all_sources, sources)
+    end
+    all_docs, all_sources
+end
+
+"""
+    docextract(modules::Vector{Module} = Base.Docs.modules)
+
+Extracts the documentation from a vector of `modules`.
+"""
+function docextract(modules::Vector{Module} = Base.Docs.modules)
+    all_docs, all_sources = String[], String[]
+    for mod in modules
+        docs, sources = docextract(mod)
+        append!(all_docs, docs)
+        append!(all_sources, sources)
+    end
+    all_docs, all_sources
+end
+
+"""
+    RAG.build_index(mod::Module; verbose::Int = 1, kwargs...)
+
+Build `index` from the documentation of a given module `mod`.
+"""
+function RAG.build_index(mod::Module; verbose::Int = 1, kwargs...)
+    docs, sources = docextract(mod)
+    RAG.build_index(docs; reader = :docs,
+        sources,
+        extract_metadata = false, verbose,
+        index_id = nameof(mod), kwargs...)
+end
+
+"""
+    RAG.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1,
+        separators = ["\n\n", ". ", "\n"], max_length::Int = 256,
+        kwargs...)
+
+Build index from the documentation of the currently loaded modules.
+If `modules` is empty, it will use all currently loaded modules.
+"""
+function RAG.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::Int = 1,
+        separators = ["\n\n", ". ", "\n"], max_length::Int = 256,
+        kwargs...)
+    all_docs, all_sources = docextract(modules)
+    RAG.build_index(all_docs;
+        separators,
+        max_length,
+        reader = :docs,
+        extract_metadata = false,
+        verbose,
+        index_id = :all_index,
+        sources = all_sources)
+end
\ No newline at end of file
diff --git a/src/utils.jl b/src/utils.jl
new file mode 100644
index 0000000..03a7113
--- /dev/null
+++ b/src/utils.jl
@@ -0,0 +1,155 @@
+remove_pkgdir(filepath::AbstractString, mod::Module) = replace(filepath, pkgdir(mod) => "")
+
+"""
+    find_new_chunks(old_chunks::AbstractVector{<:AbstractString},
+        new_chunks::AbstractVector{<:AbstractString})
+
+Identifies the new chunks in `new_chunks` that are not present in `old_chunks`.
+
+Returns a mask of chunks that are new (not present in `old_chunks`).
+
+Uses SHA256 hashes to dedupe the strings quickly and effectively.
+"""
+function find_new_chunks(old_chunks::AbstractVector{<:AbstractString},
+        new_chunks::AbstractVector{<:AbstractString})
+    ## hash the chunks for easier search
+    old = bytes2hex.(sha256.(old_chunks)) |> sort
+    new = bytes2hex.(sha256.(new_chunks))
+
+    new_items = falses(length(new_chunks))
+    for i in eachindex(new, new_items)
+        idx = searchsortedfirst(old, new[i])
+        # check if idx is a genuine match, if not, it's a new item
+        if idx > lastindex(old)
+            new_items[i] = true
+        elseif old[idx] != new[i]
+            new_items[i] = true
+        end
+    end
+    return new_items
+end
+
+function annotate_chunk_with_source(chunk::AbstractString, src::AbstractString)
+    # parts: module, filepath, line, function
+    parts = split(src, "::")
+    return """
+-- Source: Documentation of $(parts[end]) --
+$chunk
+-- End of Source --"""
+end
+
+"""
+    last_context()
+
+Returns the RAGContext from the last `aihelp` call. 
+It can be useful to see the sources/references used by the AI model to generate the response.
+
+If you're using `aihelp()` make sure to set `return_context = true` to return the context.
+"""
+last_context() = PT.LAST_CONTEXT
+
+"""
+    load_index!(index::RAG.AbstractChunkIndex;
+        verbose::Bool = 1, kwargs...)
+
+Loads the provided `index` into the global variable `MAIN_INDEX`.
+
+If you don't have an `index` yet, use `build_index` to build one from your currently loaded packages (see `?build_index`)
+
+# Example
+```julia
+# build an index from some modules, keep empty to embed all loaded modules (eg, `build_index()`) 
+index = AIH.build_index([DataFramesMeta, DataFrames, CSV])
+AIH.load_index!(index)
+```
+"""
+function load_index!(index::RAG.AbstractChunkIndex;
+        verbose::Bool = true, kwargs...)
+    global MAIN_INDEX
+    MAIN_INDEX = index
+    verbose && @info "Loaded index into MAIN_INDEX"
+    return index
+end
+"""
+    load_index!(file_path::Union{Nothing, AbstractString} = nothing;
+        verbose::Bool = true, kwargs...)
+
+Loads the serialized index in `file_path` into the global variable `MAIN_INDEX`.
+If not provided, it will download the latest index from the AIHelpMe.jl repository (more cost-efficient).
+"""
+function load_index!(file_path::Union{Nothing, AbstractString} = nothing;
+        verbose::Bool = true, kwargs...)
+    global MAIN_INDEX
+    if !isnothing(file_path)
+        @assert endswith(file_path, ".jls") "Provided file path must end with `.jls` (serialized Julia object)."
+    else
+        artifact_path = artifact"juliaextra"
+        file_path = joinpath(artifact_path, "docs-index.jls")
+    end
+    index = deserialize(file_path)
+    @assert index isa RAG.AbstractChunkIndex "Provided file path must point to a serialized RAG index (Deserialized type: $(typeof(index)))."
+    verbose && @info "Loaded index in $file_path into MAIN_INDEX"
+    MAIN_INDEX = index
+
+    return index
+end
+
+"""
+    update_index(index::RAG.AbstractChunkIndex = MAIN_INDEX,
+        modules::Vector{Module} = Base.Docs.modules;
+        verbose::Integer = 1,
+        separators = ["\\n\\n", ". ", "\\n"], max_length::Int = 256,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        kwargs...)
+        modules::Vector{Module} = Base.Docs.modules;
+        verbose::Bool = true, kwargs...)
+
+Updates the provided `index` with the documentation of the provided `modules`.
+
+Deduplicates against the `index.sources` and embeds only the new document chunks (as measured by a hash).
+
+Returns the updated `index` (new instance).
+
+# Example
+If you loaded some new packages and want to add them to your MAIN_INDEX (or any `index` you use), run:
+```julia
+# To update the MAIN_INDEX
+AHM.update_index() |> AHM.load_index!
+
+# To update an explicit index
+index = AHM.update_index(index)
+```
+"""
+function update_index(index::RAG.AbstractChunkIndex = MAIN_INDEX,
+        modules::Vector{Module} = Base.Docs.modules;
+        verbose::Integer = 1,
+        separators = ["\n\n", ". ", "\n"], max_length::Int = 256,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        kwargs...)
+    ##
+    cost_tracker = Threads.Atomic{Float64}(0.0)
+    ## Extract docs
+    all_docs, all_sources = docextract(modules)
+    ## Split into chunks
+    output_chunks, output_sources = RAG.get_chunks(all_docs;
+        reader = :docs, sources = all_sources, separators, max_length)
+    ## identify new items
+    mask = find_new_chunks(index.chunks, output_chunks)
+    ## Embed new items
+    embeddings = RAG.get_embeddings(output_chunks[mask];
+        verbose = (verbose > 1),
+        cost_tracker,
+        model,
+        kwargs...)
+
+    ## Update index
+    @assert size(embeddings, 2)==sum(mask) "Number of embeddings must match the number of new chunks (mask: $(sum(mask)), embeddings: $(size(embeddings,2)))"
+    new_index = ChunkIndex(; index.id,
+        chunks = vcat(index.chunks, output_chunks[mask]),
+        sources = vcat(index.sources, output_sources[mask]),
+        embeddings = hcat(index.embeddings, embeddings),
+        index.tags, index.tags_vocab)
+
+    (verbose > 0) && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))"
+    return new_index
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000..bc4a996
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,12 @@
+using AIHelpMe
+using Test
+using Aqua
+
+@testset "Code quality (Aqua.jl)" begin
+    ## disable piracy check -- we're pirating PT repository - build_index() which is not exported
+    Aqua.test_all(AIHelpMe; piracy = false)
+end
+
+@testset "AIHelpMe.jl" begin
+    # Write your tests here.
+end