Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infomaniak compatibility #67

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ gem "passwordless"
gem "pundit"
gem "reverse_markdown"
gem "rss"
gem "ruby-openai", github: "nosia-ai/ruby-openai"
gem "sequel", "~> 5.68.0"
gem "solid_queue"
gem "thruster"
Expand Down
30 changes: 22 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
GIT
remote: https://github.com/nosia-ai/ruby-openai.git
revision: 6db7453d12b1d562866c2d78ceb1cf7cf3696dc8
specs:
ruby-openai (7.3.1)
event_stream_parser (>= 0.3.0, < 2.0.0)
faraday (>= 1)
faraday-multipart (>= 1)

GIT
remote: https://github.com/patterns-ai-core/langchainrb.git
revision: 5403cc5ac66aa0d99114827cb4ce15233551e540
Expand Down Expand Up @@ -162,12 +171,15 @@ GEM
erubi (1.13.0)
et-orbi (1.2.11)
tzinfo
faraday (2.12.0)
faraday-net_http (>= 2.0, < 3.4)
event_stream_parser (1.0.0)
faraday (2.12.2)
faraday-net_http (>= 2.0, < 3.5)
json
logger
faraday-net_http (3.3.0)
net-http
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (3.4.0)
net-http (>= 0.5.0)
feedjira (3.2.3)
loofah (>= 2.3.1, < 3)
sax-machine (>= 1.0, < 2)
Expand All @@ -193,7 +205,7 @@ GEM
jbuilder (2.13.0)
actionview (>= 5.0.0)
activesupport (>= 5.0.0)
json (2.7.5)
json (2.9.0)
json-schema (4.3.1)
addressable (>= 2.8)
kamal (2.3.0)
Expand All @@ -208,7 +220,7 @@ GEM
thor (~> 1.3)
zeitwerk (>= 2.6.18, < 3.0)
language_server-protocol (3.17.0.3)
logger (1.6.1)
logger (1.6.3)
loofah (2.23.1)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
Expand All @@ -226,9 +238,10 @@ GEM
mini_mime (1.1.5)
minitest (5.25.1)
msgpack (1.7.3)
multipart-post (2.4.1)
neighbor (0.5.0)
activerecord (>= 7)
net-http (0.4.1)
net-http (0.6.0)
uri
net-imap (0.5.1)
date
Expand Down Expand Up @@ -428,7 +441,7 @@ GEM
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (2.6.0)
uri (1.0.1)
uri (1.0.2)
useragent (0.16.10)
web-console (4.2.1)
actionview (>= 6.0.0)
Expand Down Expand Up @@ -487,6 +500,7 @@ DEPENDENCIES
reverse_markdown
rss
rubocop-rails-omakase
ruby-openai!
selenium-webdriver
sequel (~> 5.68.0)
solid_queue
Expand Down
25 changes: 25 additions & 0 deletions app/controllers/api/v1/application_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

module Api
module V1
class ApplicationController < ApplicationController
allow_unauthenticated_access
skip_before_action :verify_authenticity_token
before_action :verify_api_key

private

def verify_api_key
authenticate_or_request_with_http_token do |token, _options|
api_token = ApiToken.find_by(token:)
@user = api_token&.user
if params[:user].present?
@account = @user.accounts.create_with(owner: @user).find_or_create_by(uid: params[:user])
else
@account = api_token&.account
end
end
end
end
end
end
247 changes: 68 additions & 179 deletions app/controllers/api/v1/completions_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,153 +5,93 @@ module V1
class CompletionsController < ApplicationController
include ActionController::Live

allow_unauthenticated_access only: [ :create ]
skip_before_action :verify_authenticity_token
before_action :verify_api_key
before_action :parse_params, only: [ :create ]

def create
@llm = LangchainrbRails.config.vectorsearch.llm
@uuid = SecureRandom.uuid

if @stream
stream_response
else
non_stream_response
end
rescue StandardError => e
handle_error(e)
end

private

def build_check_llm
Langchain::LLM::Ollama.new(
url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
api_key: ENV.fetch("OLLAMA_API_KEY", ""),
default_options: {
chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
}
)
end

def build_context(checked_chunks)
checked_chunks.map(&:content).join("\n")
end

def build_messages(question, context)
system_message = {
role: "system",
content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
}

user_content = if context.present?
template = ENV.fetch(
"QUERY_PROMPT_TEMPLATE",
"Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}"
@chat = @user.chats.create(account: @account)

max_tokens = completion_params[:max_tokens]&.to_i
model = completion_params[:model]
temperature = completion_params[:temperature]&.to_f
top_k = completion_params[:top_k]&.to_f
top_p = completion_params[:top_p]&.to_f

if completion_params[:messages].present?
completion_params[:messages].each do |message_params|
@chat.messages.create(
content: message_params[:content],
response_number: @chat.messages.count,
role: message_params[:role]
)
end
elsif completion_params[:prompt].present?
@chat.messages.create(
content: message_params[:prompt],
response_number: @chat.messages.count,
role: "user"
)
template.gsub("{context}", context).gsub("{question}", question)
else
question
end

user_message = { role: "user", content: user_content }

[ system_message, user_message ]
end

def check_context(question)
k = ENV.fetch("RETRIEVAL_FETCH_K", 4)

check_llm = build_check_llm
checked_chunks = []

search_results = Chunk.similarity_search(question, k:)
search_results.each do |search_result|
context_to_check = search_result.content
check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ]

check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream|
check_response = stream.raw_response.dig("message", "content")

if check_response && check_response.eql?("Yes")
checked_chunks << search_result
stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false

if stream_response
chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:) do |stream|
stream_content = stream.dig("delta", "content")
next unless stream_content
done = !!stream.dig("finish_reason")
if done
response.stream.write("data: [DONE]\n\n")
else
data = {
choices: [
delta: {
content: stream_content,
role: "assistant"
},
finish_reason: done ? "stop" : nil,
index: 0
],
created: Time.now.to_i,
id: "chatcmpl-#{@chat.id}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion.chunk",
system_fingerprint: "fp_nosia"
}
response.stream.write("data: #{data.to_json}\n\n")
end
end
end

checked_chunks
rescue StandardError => e
handle_error(e)
[]
end

def handle_error(e)
error_message = {
error: {
message: e.message,
backtrace: Rails.env.development? ? e.backtrace : nil
}
}

if @stream
response.stream.write("data: #{error_message.to_json}\n\n")
response.stream.write("data: [DONE]\n\n")
else
render json: error_message, status: :internal_server_error
chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:)
render json: {
choices: [
finish_reason: "stop",
index: 0,
message: {
content: chat_response.content,
role: "assistant"
}
],
created: Time.now.to_i,
id: "chatcmpl-#{@uuid}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion",
system_fingerprint: "fp_nosia"
}
end
ensure
response.stream.close if @stream
response.stream.close if response.stream.respond_to?(:close)
end

def non_stream_response
checked_chunks = check_context(@question)
context = build_context(checked_chunks)
messages = build_messages(@question, context)

content = ""
data = {}

@llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
stream_content = stream.raw_response.dig("message", "content")
content += stream_content if stream_content

done = !!stream.raw_response["done"]

if done
data = {
choices: [
finish_reason: "stop",
index: 0,
message: {
content: content,
role: "assistant"
}
],
created: Time.now.to_i,
id: "chatcmpl-#{@uuid}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion",
system_fingerprint: "fp_nosia"
}
end
end

render json: data
end
private

def parse_params
def completion_params
params.permit(
:max_tokens,
:model,
:prompt,
:stream,
:top_p,
:top_k,
:top_p,
:temperature,
:user,
messages: [
:content,
:role
Expand All @@ -160,57 +100,6 @@ def parse_params
chat: {},
completion: {},
)

@question = params[:prompt] || params.dig(:messages, 0, :content)
@stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
@top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f
@top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40)
end

def stream_response
checked_chunks = check_context(@question)
context = build_context(checked_chunks)
messages = build_messages(@question, context)

response.headers["Content-Type"] = "text/event-stream"

@llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
stream_content = stream.raw_response.dig("message", "content")
next unless stream_content

done = !!stream.raw_response["done"]

if done
response.stream.write("data: [DONE]\n\n")
else
data = {
choices: [
delta: {
content: stream_content,
role: "assistant"
},
finish_reason: done ? "stop" : nil,
index: 0
],
created: Time.now.to_i,
id: "chatcmpl-#{@uuid}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion.chunk",
system_fingerprint: "fp_nosia"
}

response.stream.write("data: #{data.to_json}\n\n")
end
end
ensure
response.stream.close if response.stream.respond_to?(:close)
end

def verify_api_key
authenticate_or_request_with_http_token do |token, _options|
api_token = ApiToken.find_by(token:)
@user = api_token&.user
end
end
end
end
Expand Down
Loading