diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index afba0483153e..05ea02302d83 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -587,6 +587,16 @@ response = completion( +1. Add relevant env vars + +```bash +export AZURE_TENANT_ID="" +export AZURE_CLIENT_ID="" +export AZURE_CLIENT_SECRET="" +``` + +2. Setup config.yaml + ```yaml model_list: - model_name: gpt-3.5-turbo @@ -598,6 +608,12 @@ litellm_settings: enable_azure_ad_token_refresh: true # 👈 KEY CHANGE ``` +3. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index 2ca97e8fdaa5..e251784f4e18 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -1,6 +1,7 @@ from typing import Literal, Optional import litellm +from litellm import LlmProviders from litellm.exceptions import BadRequestError @@ -199,5 +200,15 @@ def get_supported_openai_params( # noqa: PLR0915 model=model ) ) + elif custom_llm_provider in litellm._custom_providers: + if request_type == "chat_completion": + provider_config = litellm.ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders.CUSTOM + ) + return provider_config.get_supported_openai_params(model=model) + elif request_type == "embeddings": + return None + elif request_type == "transcription": + return None return None diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 6bf00fb16af4..aaffa1c85159 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -30,6 +30,7 @@ model_list: model_info: input_cost_per_token: 0.0000006 output_cost_per_token: 0.0000006 + # litellm_settings: # key_generation_settings: diff --git a/litellm/utils.py b/litellm/utils.py index b46707b48f60..494194df9b46 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1946,16 +1946,15 @@ def register_model(model_cost: Union[str, dict]): # noqa: PLR0915 for key, value in loaded_model_cost.items(): ## get model info ## try: - existing_model: Union[ModelInfo, dict] = get_model_info(model=key) + existing_model: dict = cast(dict, get_model_info(model=key)) model_cost_key = existing_model["key"] except Exception: existing_model = {} model_cost_key = key ## override / add new keys to the existing model cost dictionary - litellm.model_cost.setdefault(model_cost_key, {}).update( - _update_dictionary(existing_model, value) # type: ignore - ) - verbose_logger.debug(f"{key} added to model cost map") + updated_dictionary = _update_dictionary(existing_model, value) + litellm.model_cost.setdefault(model_cost_key, {}).update(updated_dictionary) + verbose_logger.debug(f"{model_cost_key} added to model cost map") # add new model names to provider lists if value.get("litellm_provider") == "openai": if key not in litellm.open_ai_chat_completion_models: diff --git a/tests/local_testing/test_custom_llm.py b/tests/local_testing/test_custom_llm.py index f21b27c43a21..beb1e3332dda 100644 --- a/tests/local_testing/test_custom_llm.py +++ b/tests/local_testing/test_custom_llm.py @@ -397,3 +397,58 @@ async def test_image_generation_async_additional_params(): mock_client.call_args.kwargs["optional_params"] == { "my_custom_param": "my-custom-param" } + + +def test_get_supported_openai_params(): + + class MyCustomLLM(CustomLLM): + + # This is what `get_supported_openai_params` should be returning: + def get_supported_openai_params(self, model: str) -> list[str]: + return [ + "tools", + "tool_choice", + "temperature", + "top_p", + "top_k", + "min_p", + "typical_p", + "stop", + "seed", + "response_format", + "max_tokens", + "presence_penalty", + "frequency_penalty", + "repeat_penalty", + "tfs_z", + "mirostat_mode", + "mirostat_tau", + "mirostat_eta", + "logit_bias", + ] + + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + my_custom_llm = MyCustomLLM() + + litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER + {"provider": "my-custom-llm", "custom_handler": my_custom_llm} + ] + + resp = completion( + model="my-custom-llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + + assert resp.choices[0].message.content == "Hi!" + + # Get supported openai params + from litellm import get_supported_openai_params + + response = get_supported_openai_params(model="my-custom-llm/my-fake-model") + assert response is not None diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index 2c313cecad1f..a25b31b671ad 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -285,3 +285,28 @@ def completion(self, *args, **kwargs) -> litellm.ModelResponse: get_model_info( model="my-custom-llm/my-fake-model" ) # 💥 "Exception: This model isn't mapped yet." in v1.56.10 + + +def test_get_model_info_custom_model_router(): + from litellm import Router + from litellm import get_model_info + + litellm._turn_on_debug() + + router = Router( + model_list=[ + { + "model_name": "ma-summary", + "litellm_params": { + "api_base": "http://ma-mix-llm-serving.cicero.svc.cluster.local/v1", + "input_cost_per_token": 1, + "output_cost_per_token": 1, + "model": "openai/meta-llama/Meta-Llama-3-8B-Instruct", + "model_id": "c20d603e-1166-4e0f-aa65-ed9c476ad4ca", + }, + } + ] + ) + info = get_model_info("openai/meta-llama/Meta-Llama-3-8B-Instruct") + print("info", info) + assert info is not None