nosia-ai · cbldev · Dec 17, 2024 · Dec 17, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/Gemfile b/Gemfile
@@ -87,6 +87,7 @@ gem "passwordless"
 gem "pundit"
 gem "reverse_markdown"
 gem "rss"
+gem "ruby-openai", github: "nosia-ai/ruby-openai"
 gem "sequel", "~> 5.68.0"
 gem "solid_queue"
 gem "thruster"

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,3 +1,12 @@
+GIT
+  remote: https://github.com/nosia-ai/ruby-openai.git
+  revision: 6db7453d12b1d562866c2d78ceb1cf7cf3696dc8
+  specs:
+    ruby-openai (7.3.1)
+      event_stream_parser (>= 0.3.0, < 2.0.0)
+      faraday (>= 1)
+      faraday-multipart (>= 1)
+
 GIT
   remote: https://github.com/patterns-ai-core/langchainrb.git
   revision: 5403cc5ac66aa0d99114827cb4ce15233551e540
@@ -162,12 +171,15 @@ GEM
     erubi (1.13.0)
     et-orbi (1.2.11)
       tzinfo
-    faraday (2.12.0)
-      faraday-net_http (>= 2.0, < 3.4)
+    event_stream_parser (1.0.0)
+    faraday (2.12.2)
+      faraday-net_http (>= 2.0, < 3.5)
       json
       logger
-    faraday-net_http (3.3.0)
-      net-http
+    faraday-multipart (1.0.4)
+      multipart-post (~> 2)
+    faraday-net_http (3.4.0)
+      net-http (>= 0.5.0)
     feedjira (3.2.3)
       loofah (>= 2.3.1, < 3)
       sax-machine (>= 1.0, < 2)
@@ -193,7 +205,7 @@ GEM
     jbuilder (2.13.0)
       actionview (>= 5.0.0)
       activesupport (>= 5.0.0)
-    json (2.7.5)
+    json (2.9.0)
     json-schema (4.3.1)
       addressable (>= 2.8)
     kamal (2.3.0)
@@ -208,7 +220,7 @@ GEM
       thor (~> 1.3)
       zeitwerk (>= 2.6.18, < 3.0)
     language_server-protocol (3.17.0.3)
-    logger (1.6.1)
+    logger (1.6.3)
     loofah (2.23.1)
       crass (~> 1.0.2)
       nokogiri (>= 1.12.0)
@@ -226,9 +238,10 @@ GEM
     mini_mime (1.1.5)
     minitest (5.25.1)
     msgpack (1.7.3)
+    multipart-post (2.4.1)
     neighbor (0.5.0)
       activerecord (>= 7)
-    net-http (0.4.1)
+    net-http (0.6.0)
       uri
     net-imap (0.5.1)
       date
@@ -428,7 +441,7 @@ GEM
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.6.0)
-    uri (1.0.1)
+    uri (1.0.2)
     useragent (0.16.10)
     web-console (4.2.1)
       actionview (>= 6.0.0)
@@ -487,6 +500,7 @@ DEPENDENCIES
   reverse_markdown
   rss
   rubocop-rails-omakase
+  ruby-openai!
   selenium-webdriver
   sequel (~> 5.68.0)
   solid_queue

diff --git a/app/controllers/api/v1/application_controller.rb b/app/controllers/api/v1/application_controller.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Api
+  module V1
+    class ApplicationController < ApplicationController
+      allow_unauthenticated_access
+      skip_before_action :verify_authenticity_token
+      before_action :verify_api_key
+
+      private
+
+      def verify_api_key
+        authenticate_or_request_with_http_token do |token, _options|
+          api_token = ApiToken.find_by(token:)
+          @user = api_token&.user
+          if params[:user].present?
+            @account = @user.accounts.create_with(owner: @user).find_or_create_by(uid: params[:user])
+          else
+            @account = api_token&.account
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
@@ -5,153 +5,93 @@ module V1
     class CompletionsController < ApplicationController
       include ActionController::Live
 
-      allow_unauthenticated_access only: [ :create ]
-      skip_before_action :verify_authenticity_token
-      before_action :verify_api_key
-      before_action :parse_params, only: [ :create ]
-
       def create
-        @llm = LangchainrbRails.config.vectorsearch.llm
-        @uuid = SecureRandom.uuid
-
-        if @stream
-          stream_response
-        else
-          non_stream_response
-        end
-      rescue StandardError => e
-        handle_error(e)
-      end
-
-      private
-
-      def build_check_llm
-        Langchain::LLM::Ollama.new(
-          url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
-          api_key: ENV.fetch("OLLAMA_API_KEY", ""),
-          default_options: {
-            chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-            completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-            temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
-            num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
-          }
-        )
-      end
-
-      def build_context(checked_chunks)
-        checked_chunks.map(&:content).join("\n")
-      end
-
-      def build_messages(question, context)
-        system_message = {
-          role: "system",
-          content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
-        }
-
-        user_content = if context.present?
-          template = ENV.fetch(
-            "QUERY_PROMPT_TEMPLATE",
-            "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}"
+        @chat = @user.chats.create(account: @account)
+
+        max_tokens = completion_params[:max_tokens]&.to_i
+        model = completion_params[:model]
+        temperature = completion_params[:temperature]&.to_f
+        top_k = completion_params[:top_k]&.to_f
+        top_p = completion_params[:top_p]&.to_f
+
+        if completion_params[:messages].present?
+          completion_params[:messages].each do |message_params|
+            @chat.messages.create(
+              content: message_params[:content],
+              response_number: @chat.messages.count,
+              role: message_params[:role]
+            )
+          end
+        elsif completion_params[:prompt].present?
+          @chat.messages.create(
+            content: message_params[:prompt],
+            response_number: @chat.messages.count,
+            role: "user"
           )
-          template.gsub("{context}", context).gsub("{question}", question)
-        else
-          question
         end
 
-        user_message = { role: "user", content: user_content }
-
-        [ system_message, user_message ]
-      end
-
-      def check_context(question)
-        k = ENV.fetch("RETRIEVAL_FETCH_K", 4)
-
-        check_llm = build_check_llm
-        checked_chunks = []
-
-        search_results = Chunk.similarity_search(question, k:)
-        search_results.each do |search_result|
-          context_to_check = search_result.content
-          check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ]
-
-          check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream|
-            check_response = stream.raw_response.dig("message", "content")
-
-            if check_response && check_response.eql?("Yes")
-              checked_chunks << search_result
+        stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
+
+        if stream_response
+          chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:) do |stream|
+            stream_content = stream.dig("delta", "content")
+            next unless stream_content
+            done = !!stream.dig("finish_reason")
+            if done
+              response.stream.write("data: [DONE]\n\n")
+            else
+              data = {
+                choices: [
+                  delta: {
+                    content: stream_content,
+                    role: "assistant"
+                  },
+                  finish_reason: done ? "stop" : nil,
+                  index: 0
+                ],
+                created: Time.now.to_i,
+                id: "chatcmpl-#{@chat.id}",
+                model: "nosia:#{ENV["LLM_MODEL"]}",
+                object: "chat.completion.chunk",
+                system_fingerprint: "fp_nosia"
+              }
+              response.stream.write("data: #{data.to_json}\n\n")
             end
           end
-        end
-
-        checked_chunks
-      rescue StandardError => e
-        handle_error(e)
-        []
-      end
-
-      def handle_error(e)
-        error_message = {
-          error: {
-            message: e.message,
-            backtrace: Rails.env.development? ? e.backtrace : nil
-          }
-        }
-
-        if @stream
-          response.stream.write("data: #{error_message.to_json}\n\n")
-          response.stream.write("data: [DONE]\n\n")
         else
-          render json: error_message, status: :internal_server_error
+          chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:)
+          render json: {
+            choices: [
+              finish_reason: "stop",
+              index: 0,
+              message: {
+                content: chat_response.content,
+                role: "assistant"
+              }
+            ],
+            created: Time.now.to_i,
+            id: "chatcmpl-#{@uuid}",
+            model: "nosia:#{ENV["LLM_MODEL"]}",
+            object: "chat.completion",
+            system_fingerprint: "fp_nosia"
+          }
         end
       ensure
-        response.stream.close if @stream
+        response.stream.close if response.stream.respond_to?(:close)
       end
 
-      def non_stream_response
-        checked_chunks = check_context(@question)
-        context = build_context(checked_chunks)
-        messages = build_messages(@question, context)
-
-        content = ""
-        data = {}
-
-        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
-          stream_content = stream.raw_response.dig("message", "content")
-          content += stream_content if stream_content
-
-          done = !!stream.raw_response["done"]
-
-          if done
-            data = {
-              choices: [
-                finish_reason: "stop",
-                index: 0,
-                message: {
-                  content: content,
-                  role: "assistant"
-                }
-              ],
-              created: Time.now.to_i,
-              id: "chatcmpl-#{@uuid}",
-              model: "nosia:#{ENV["LLM_MODEL"]}",
-              object: "chat.completion",
-              system_fingerprint: "fp_nosia"
-            }
-          end
-        end
-
-        render json: data
-      end
+      private
 
-      def parse_params
+      def completion_params
         params.permit(
           :max_tokens,
           :model,
           :prompt,
           :stream,
-          :top_p,
           :top_k,
+          :top_p,
           :temperature,
+          :user,
           messages: [
             :content,
             :role
@@ -160,57 +100,6 @@ def parse_params
           chat: {},
           completion: {},
         )
-
-        @question = params[:prompt] || params.dig(:messages, 0, :content)
-        @stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
-        @top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f
-        @top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40)
-      end
-
-      def stream_response
-        checked_chunks = check_context(@question)
-        context = build_context(checked_chunks)
-        messages = build_messages(@question, context)
-
-        response.headers["Content-Type"] = "text/event-stream"
-
-        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
-          stream_content = stream.raw_response.dig("message", "content")
-          next unless stream_content
-
-          done = !!stream.raw_response["done"]
-
-          if done
-            response.stream.write("data: [DONE]\n\n")
-          else
-            data = {
-              choices: [
-                delta: {
-                  content: stream_content,
-                  role: "assistant"
-                },
-                finish_reason: done ? "stop" : nil,
-                index: 0
-              ],
-              created: Time.now.to_i,
-              id: "chatcmpl-#{@uuid}",
-              model: "nosia:#{ENV["LLM_MODEL"]}",
-              object: "chat.completion.chunk",
-              system_fingerprint: "fp_nosia"
-            }
-
-            response.stream.write("data: #{data.to_json}\n\n")
-          end
-        end
-      ensure
-        response.stream.close if response.stream.respond_to?(:close)
-      end
-
-      def verify_api_key
-        authenticate_or_request_with_http_token do |token, _options|
-          api_token = ApiToken.find_by(token:)
-          @user = api_token&.user
-        end
       end
     end
   end