diff --git a/Cargo.lock b/Cargo.lock index f08cdaa..dddf63b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -798,6 +798,7 @@ dependencies = [ "atomic", "pear", "serde", + "toml", "uncased", "version_check", ] diff --git a/Cargo.toml b/Cargo.toml index 5990fcd..b7c38f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ axum-tracing-opentelemetry = "0.18.1" bytemuck = "1.16.0" bytes = "1.6.0" clap = { version = "4.5.4", features = ["derive"] } -figment = { version = "0.10.19", features = ["env"] } +figment = { version = "0.10.19", features = ["env", "toml"] } openai_dive = { version = "0.4.8", default-features = false, features = ["rustls-tls", "stream", "tokio", "tokio-util"] } opentelemetry = { version = "0.23.0", features = ["metrics"] } opentelemetry-jaeger-propagator = "0.2.0" diff --git a/README.md b/README.md index 2bb5529..bde9f26 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Point your clients at AI Router and use any combination of Triton Inference Serv - Low system resource utilization (Rust FTW). - Streaming support with fixups for Triton Inference Server (required ATM). - Support mix of client stream request/stream to backend. +- Extend/override config using environment variables - More to come! ### Supported Inference Types vs Backend Types @@ -33,6 +34,103 @@ Point your clients at AI Router and use any combination of Triton Inference Serv | Images | :x: | :x: | | Legacy Completions | :x: | :white_check_mark: | +### Extend or Override Config Using Environment Variables + +You might not want to define sensitive data like API keys in a plain-text file. With AI Router, you can leave out sensitive data from the TOML config and set it using environment variables instead. + +Imagine the following TOML config in test.toml: + +```toml +title = "test backend api_key from env" + +[daemon] +listen_ip = "0.0.0.0" +listen_port = 3000 + +[backends] + +[backends.openai] +type = "openai" +base_url = "https://api.openai.com/v1" + +[models] + +[models.chat_completions.gpt-4-1106-preview] +backend = "openai" +``` + +Starting ai-router with this config would fail with this error: + +`OpenAI backend openai-api-key-from-env is missing API key` + +However, if there is an environment variable `AI_ROUTER_BACKENDS_OPENAI` +with value `{api_key=some_api_key}`, it will use some_api_key. This way, +there is no need to have API keys in plaintext files. + +Here is a demonstration using the --dump-config flag: + +``` +❯ target/debug/ai-router --config-file test.toml --dump-config +title = "test backend api_key from env" + +[backends.openai] +type = "openai" +base_url = "https://api.openai.com/v1" + +[daemon] +api_key = [] +instance_id = "fff0673e-c34b-4a6e-8034-8591df1845c5" +listen_ip = "0.0.0.0" +listen_port = 3000 +template_dir = "/etc/ai-router/templates" + +[models.chat_completions.gpt-4-1106-preview] +backend = "openai" + +➜ AI_ROUTER_BACKENDS_OPENAI='{api_key=very_secret_api_key}' target/debug/ai-router --config-file test.toml --dump-config +title = "test backend api_key from env" + +[backends.openai] +api_key = "very_secret_api_key" +type = "openai" +base_url = "https://api.openai.com/v1" + +[daemon] +api_key = [] +instance_id = "af6c5187-7aee-4b5f-9567-e19a7690d292" +listen_ip = "0.0.0.0" +listen_port = 3000 +template_dir = "/etc/ai-router/templates" + +[models.chat_completions.gpt-4-1106-preview] +backend = "openai" +``` + +You could also define an entire backend that doesn't exist in the config file at all: +``` +➜ AI_ROUTER_BACKENDS_ANYSCALE='{api_key=very_secret_api_key,type=openai,base_url=https://api.endpoints.anyscale.com/v1}' target/debug/ai-router --config-file test.toml --dump-config +title = "test backend api_key from env" + +[backends.openai] +type = "openai" +base_url = "https://api.openai.com/v1" + +[backends.anyscale] +api_key = "very_secret_api_key" +type = "openai" +base_url = "https://api.endpoints.anyscale.com/v1" + +[daemon] +api_key = [] +instance_id = "524dba5a-5736-4168-b64b-814569b91217" +listen_ip = "0.0.0.0" +listen_port = 3000 +template_dir = "/etc/ai-router/templates" + +[models.chat_completions.gpt-4-1106-preview] +backend = "openai" +``` + ## Usage Example You have Triton Inference Server, vLLM, HF TEI/TGI, or any other OpenAI compatible local embeddings/LLM model(s) served. You may also have API keys for OpenAI, Mistral Le Platforme, Anyscale, etc. Or all of the above, or not. diff --git a/src/config.rs b/src/config.rs index 8cfc179..c37f639 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,10 @@ use std::collections::HashMap; use anyhow::{anyhow, Result}; use clap::Parser; +use figment::{ + providers::{Env, Format, Toml}, + Figment, +}; use serde::{Deserialize, Serialize}; use serde_with::{formats::PreferMany, serde_as, skip_serializing_none, OneOrMany}; use uuid::Uuid; @@ -52,8 +56,10 @@ impl AiRouterConfigFile { /// - when file at path cannot be opened /// - when file content cannot be deserialized into `AiRouterConfigFile` pub fn parse(path: String) -> Result { - let config = std::fs::read_to_string(path)?; - let config: Self = toml::from_str(&config)?; + let config: Self = Figment::new() + .merge(Toml::file(path)) + .merge(Env::prefixed("AI_ROUTER_").split("_")) + .extract()?; if let Err(e) = config.validate() { return Err(anyhow!("config file validation failed: {e}")); }