diff --git a/.default.conf b/.default.conf new file mode 100644 index 00000000..58330b20 --- /dev/null +++ b/.default.conf @@ -0,0 +1,51 @@ +# AWS Variables for S3 Object Storage +# Configure these for AWS access, or use defaults for local development with MinIO. +AWS_ACCESS_KEY_ID=lumigator +AWS_SECRET_ACCESS_KEY=lumigator # pragma: allowlist secret +AWS_DEFAULT_REGION=us-east-2 +# The URL which can be used for S3 operations +AWS_ENDPOINT_URL=http://localhost:9000 +# The name of the S3 bucket to use for storage +S3_BUCKET=lumigator-storage +# Ray Cluster Configuration +# These settings are for the local Ray setup. +# To use an external Ray cluster, you MUST use an external S3-compatible storage +# to ensure the Ray workers can access data from your Lumigator server. +RAY_HEAD_NODE_HOST=ray +RAY_DASHBOARD_PORT=8265 +RAY_WORKER_GPUS=0 +RAY_WORKER_GPUS_FRACTION=0 +NVIDIA_VISIBLE_DEVICES=all +GPU_COUNT=0 +# Cache for HuggingFace models and artifacts ($ escapes resolution during deployment) +HF_HOME=$${HOME}/.cache/huggingface +# Access token to use when attempting to access gated models in HuggingFace +HF_TOKEN= +# Access token to use when attempting to interact with Mistral's API +MISTRAL_API_KEY= +# Access token to use when attempting to interact with OpenAI's API +OPENAI_API_KEY= +# MLFlow Configuration +MLFLOW_TRACKING_URI=http://mlflow:5000 +MLFLOW_DATABASE_URL=sqlite:///mlflow.db +MLFLOW_S3_ROOT_PATH=s3://mlflow +# S3 Configuration (MinIO) +MINIO_ROOT_USER=minioadmin +MINIO_ROOT_PASSWORD=minioadmin # pragma: allowlist secret +MINIO_API_CORS_ALLOW_ORIGIN="*" +DEPLOYMENT_TYPE=local +DATABASE_URL=sqlite:///local.db +# LUMIGATOR_API_CORS_ALLOWED_ORIGINS: +# Array of origins (See: https://developer.mozilla.org/en-US/docs/Glossary/Origin) +# that should be allowed to make Cross-Domain (CORS) API requests to the Lumigator backend API. +# The expected format of each is: scheme + domain + port (if no port is specified then 80 is assumed). +# e.g. "http://localhost:3000,http://lumigator.mydomain.com" +# To allow CORS requests from anywhere specify "*" as any, or the only value. +# e.g. - "*" +LUMIGATOR_API_CORS_ALLOWED_ORIGINS="http://localhost,http://localhost:3000" +EVALUATOR_PIP_REQS=/mzai/lumigator/jobs/evaluator/requirements.txt +EVALUATOR_WORK_DIR=/mzai/lumigator/jobs/evaluator +EVALUATOR_LITE_PIP_REQS=/mzai/lumigator/jobs/evaluator_lite/requirements.txt +EVALUATOR_LITE_WORK_DIR=/mzai/lumigator/jobs/evaluator_lite +INFERENCE_PIP_REQS=/mzai/lumigator/jobs/inference/requirements.txt +INFERENCE_WORK_DIR=/mzai/lumigator/jobs/inference diff --git a/.env.template b/.env.template deleted file mode 100644 index 02c36994..00000000 --- a/.env.template +++ /dev/null @@ -1,63 +0,0 @@ -# .env.template -# This file can be used as a template for the .env file. Copy this file to .env and modify the values as needed. - -################ -# Cache for HuggingFace models and artifacts -HF_HOME=${HF_HOME:-${HOME}/.cache/huggingface} -################ - -################ -# Lumigator container control -# Set to "TRUE" if the containers need to be up and running after -# a test target failed (e.g. in CI where containers are inspected -# for logs after failed steps) -KEEP_CONTAINERS_UP="FALSE" -################ - -################ -# Lumigator API configuration -# LUMI_API_CORS_ALLOWED_ORIGINS: -# Comma separated list of origins (See: https://developer.mozilla.org/en-US/docs/Glossary/Origin) -# that should be allowed to make Cross-Domain (CORS) API requests to the Lumigator backend API. -# The expected format of each is: scheme + domain + port (ff no port is specified then 80 is assumed). -# e.g. "http://localhost:3000,http://lumigator.mydomain.com" -# To allow CORS requests from anywhere specify "*" as any, or the only value. -# e.g. "*" -LUMI_API_CORS_ALLOWED_ORIGINS=${LUMI_API_CORS_ALLOWED_ORIGINS:-http://localhost,http://localhost:3000} -################ - -################ -# Lumigator database backend. If not set, the Makefile will assign a local SQLite db. -# SQLALCHEMY_DATABASE_URL=... -################ - -################ -# AWS Variables for S3 Object Storage -# Configure these for AWS access, or use defaults for local development with minio. -AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-lumigator} -AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-lumigator} -AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-us-east-2} -# Default is the default api port used by minio -AWS_ENDPOINT_URL=${AWS_ENDPOINT_URL:-http://localhost:9000} -S3_BUCKET=${S3_BUCKET:-lumigator-storage} -################ - -################# -# Ray Cluster Configuration -# These settings are for the local Ray setup. -# To use an external Ray cluster, you MUST use an external S3-compatible storage -# to ensure the Ray workers can access data from your Lumigator server. -RAY_HEAD_NODE_HOST=${RAY_HEAD_NODE_HOST:-ray} -RAY_DASHBOARD_PORT=${RAY_DASHBOARD_PORT:-8265} -# Frontend configuration - -RAY_WORKER_GPUS=${RAY_WORKER_GPUS:-0} -RAY_WORKER_GPUS_FRACTION=${RAY_WORKER_GPUS_FRACTION:-0} -NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all} -GPU_COUNT=${GPU_COUNT:-0} -################ - -################ -# MLFlow Configuration -MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI:-http://mlflow:5000} -################ diff --git a/.gitignore b/.gitignore index 2d45390d..955e60b8 100644 --- a/.gitignore +++ b/.gitignore @@ -175,6 +175,8 @@ cython_debug/ *.db *.db-journal -# env diff files -.env.diff -.env.diff.new +# Config: User supplied overrides +user.conf + +# Config: Temporary build directory +build/ diff --git a/Makefile b/Makefile index 007faa4d..d3974657 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,26 @@ -.PHONY: local-up local-down local-logs clean-docker-buildcache clean-docker-images clean-docker-containers start-lumigator-external-services start-lumigator start-lumigator-postgres stop-lumigator test-sdk-unit test-sdk-integration test-sdk-integration-containers test-sdk test-backend-unit test-backend-integration test-backend-integration-containers test-backend test-jobs-evaluation-unit test-jobs-inference-unit test-jobs test-all check-dot-env +.PHONY: local-up local-down local-logs clean-docker-buildcache clean-docker-images clean-docker-containers start-lumigator-external-services start-lumigator start-lumigator-postgres stop-lumigator test-sdk-unit test-sdk-integration test-sdk-integration-containers test-sdk test-backend-unit test-backend-integration test-backend-integration-containers test-backend test-jobs-evaluation-unit test-jobs-inference-unit test-jobs test-all config-clean config-generate-env SHELL:=/bin/bash UNAME:= $(shell uname -o) + +# Required binaries in order to correctly run the makefile, if any cannot be found the script will fail. +# uv is only required for local-up (dev). +REQUIRED_BINARIES := git docker +$(foreach bin,$(REQUIRED_BINARIES),\ + $(if $(shell command -v $(bin) 2> /dev/null),,$(error Please install `$(bin)`))) + PROJECT_ROOT := $(shell git rev-parse --show-toplevel) CONTAINERS_RUNNING := $(shell docker ps -q --filter "name=lumigator-") -KEEP_CONTAINERS_UP := $(shell grep -E '^KEEP_CONTAINERS_UP=' .env | cut -d'=' -f2 | tr -d '"' || echo "FALSE") +KEEP_CONTAINERS_UP ?= "FALSE" + +# Configuration to identify the input and output config files +# NOTE: Changing CONFIG_BUILD_DIR will require review of .gitignore +CONFIG_BUILD_DIR=build +# Default config prefixed with dot to hide from user +CONFIG_DEFAULT=.default.conf +# User editable config file (will be generated if missing) +CONFIG_OVERRIDE=user.conf # used in docker-compose to choose the right Ray image ARCH := $(shell uname -m) @@ -87,62 +102,47 @@ LOCAL_DOCKERCOMPOSE_FILE:= docker-compose.yaml DEV_DOCKER_COMPOSE_FILE:= .devcontainer/docker-compose.override.yaml POSTGRES_DOCKER_COMPOSE_FILE:= .devcontainer/docker-compose-postgres.override.yaml -check-dot-env: -# Create .env from template if it doesn't exist - @if [ ! -f .env ]; then \ - cp .env.template .env; \ - echo ".env created from .env.template"; \ - fi - - # Generate new diff between template and current .env - @diff .env.template .env > .env.diff.new 2>/dev/null || true - - # Check if files are out of sync and show warning - @if [ -f .env ] && [ -f .env.template ] && ! cmp -s .env.diff .env.diff.new; then \ - echo -e "\033[1;31m====================================================================\033[0m"; \ - echo -e "\033[1;31mWARNING: .env and .env.template are out of sync. Please review changes\033[0m"; \ - echo -e "\033[1;31m====================================================================\033[0m"; \ - fi - - # Update diff file for next comparison - @mv .env.diff.new .env.diff 2>/dev/null || true +define remove_config_dir + @echo "Cleaning up temporary config directory: '$(CONFIG_BUILD_DIR)'..." + @rm -rf $(CONFIG_BUILD_DIR) + @echo "Cleanup complete" +endef # Launches Lumigator in 'development' mode (all services running locally, code mounted in) -local-up: check-dot-env +local-up: config-generate-env uv run pre-commit install - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f ${DEV_DOCKER_COMPOSE_FILE} up --watch --build + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(DEV_DOCKER_COMPOSE_FILE) up --watch --build -local-down: - docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f ${DEV_DOCKER_COMPOSE_FILE} down +local-down: config-generate-env + docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f ${DEV_DOCKER_COMPOSE_FILE} down + $(call remove_config_dir) local-logs: docker compose -f $(LOCAL_DOCKERCOMPOSE_FILE) logs # Launches lumigator in 'user-local' mode (All services running locally, using latest docker container, no code mounted in) - postgres version -start-lumigator-postgres: check-dot-env - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) up -d - -stop-lumigator-postgres: - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) down +start-lumigator-postgres: config-generate-env + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) up -d # Launches lumigator in 'user-local' mode (All services running locally, using latest docker container, no code mounted in) -start-lumigator: check-dot-env - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d +start-lumigator: config-generate-env + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d # Launches lumigator with no code mounted in, and forces build of containers (used in CI for integration tests) -start-lumigator-build: check-dot-env - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d --build +start-lumigator-build: config-generate-env + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d --build # Launches lumigator with no code mounted in, and forces build of containers (used in CI for integration tests) -start-lumigator-build-postgres: check-dot-env - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) up -d --build +start-lumigator-build-postgres: config-generate-env + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) up -d --build # Launches lumigator without local dependencies (ray, S3) -start-lumigator-external-services: check-dot-env - docker compose $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d +start-lumigator-external-services: config-generate-env + docker compose --env-file "$(CONFIG_BUILD_DIR)/.env"$(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) up -d -stop-lumigator: - RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) down +stop-lumigator: config-generate-env + RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --env-file "$(CONFIG_BUILD_DIR)/.env" --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f $(POSTGRES_DOCKER_COMPOSE_FILE) down + $(call remove_config_dir) clean-docker-buildcache: docker builder prune --all -f @@ -157,7 +157,7 @@ clean-docker-images: clean-docker-all: clean-docker-containers clean-docker-buildcache clean-docker-images -clean-all: clean-docker-buildcache clean-docker-containers +clean-all: clean-docker-buildcache clean-docker-containers config-clean # SDK tests @@ -244,3 +244,21 @@ test-jobs-unit: test-jobs-evaluation-unit test-jobs-inference-unit # test everything test-all: test-sdk test-backend test-jobs-unit + +# config-clean: removes any generated config files from the build directory (including the directory itself). +config-clean: + $(call remove_config_dir) + +# config-generate-env: parses a generated config YAML file and outputs a .env file ready for use in Docker. +config-generate-env: config-clean + @mkdir -p $(CONFIG_BUILD_DIR) + + @if [ -f $(CONFIG_OVERRIDE) ]; then \ + echo "Found user configuration: '$(CONFIG_OVERRIDE)', overrides will be applied"; \ + scripts/config_generate_env.sh $(CONFIG_DEFAULT) $(CONFIG_OVERRIDE) > "$(CONFIG_BUILD_DIR)/.env"; \ + else \ + echo "No user configuration found, default will be used: '$(CONFIG_DEFAULT)'"; \ + cp $(CONFIG_DEFAULT) "$(CONFIG_BUILD_DIR)/.env"; \ + fi + + @echo "Config file generated: '$(CONFIG_BUILD_DIR)/.env'"; diff --git a/docker-compose.gpu.override.yaml b/docker-compose.gpu.override.yaml index 773b2ee8..8c20ee8c 100644 --- a/docker-compose.gpu.override.yaml +++ b/docker-compose.gpu.override.yaml @@ -13,9 +13,9 @@ services: count: $GPU_COUNT capabilities: [gpu] environment: - - NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES + - NVIDIA_VISIBLE_DEVICES backend: environment: - - RAY_WORKER_GPUS=$RAY_WORKER_GPUS - - RAY_WORKER_GPUS_FRACTION=$RAY_WORKER_GPUS_FRACTION + - RAY_WORKER_GPUS + - RAY_WORKER_GPUS_FRACTION diff --git a/docker-compose.yaml b/docker-compose.yaml index 4c80c245..6fa57029 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -16,9 +16,9 @@ services: timeout: 20s retries: 18 environment: - - MINIO_ROOT_USER=minioadmin - - MINIO_ROOT_PASSWORD=minioadmin - - MINIO_API_CORS_ALLOW_ORIGIN=* + - MINIO_ROOT_USER + - MINIO_ROOT_PASSWORD + - MINIO_API_CORS_ALLOW_ORIGIN volumes: # - ${HOME}/minio/data:/data - minio-data:/data @@ -37,7 +37,7 @@ services: - -c - | set -ex - mc alias set lumigator_s3 http://minio:9000 minioadmin minioadmin + mc alias set lumigator_s3 http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD} mc admin user add lumigator_s3 lumigator lumigator mc admin policy attach lumigator_s3 readwrite --user lumigator mc mb -p lumigator_s3/lumigator-storage @@ -70,7 +70,7 @@ services: condition: service_healthy ports: - "6379:6379" - - "8265:8265" + - "${RAY_DASHBOARD_PORT}:${RAY_DASHBOARD_PORT}" - "10001:10001" # https://docs.ray.io/en/releases-2.30.0/cluster/cli.html#ray-start for more info about the command # Apparently dead head nodes can be selected unless @@ -88,7 +88,7 @@ services: # a shared dir, permissions need to be setup # ... || true allows this to fail (-e is set) sudo chmod -R 777 /tmp/ray_pip_cache/ || true - RAY_JOB_ALLOW_DRIVER_ON_WORKER_NODES=1 RAY_REDIS_ADDRESS=redis:6379 ray start --head --dashboard-port=8265 --port=6379 --dashboard-host=0.0.0.0 --ray-client-server-port 10001 + RAY_JOB_ALLOW_DRIVER_ON_WORKER_NODES=1 RAY_REDIS_ADDRESS=redis:6379 ray start --head --dashboard-port=${RAY_DASHBOARD_PORT} --port=6379 --dashboard-host=0.0.0.0 --ray-client-server-port 10001 mkdir -p /tmp/ray/session_latest/runtime_resources/pip rmdir /tmp/ray/session_latest/runtime_resources/pip/ && ln -s /tmp/ray_pip_cache /tmp/ray/session_latest/runtime_resources/pip sleep infinity @@ -109,6 +109,7 @@ services: - MISTRAL_API_KEY - OPENAI_API_KEY - HF_TOKEN + - HF_HOME - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY - AWS_DEFAULT_REGION @@ -148,31 +149,31 @@ services: ports: - 8000:8000 environment: - - DEPLOYMENT_TYPE=local + - DEPLOYMENT_TYPE # The local file needs to be available through a mount, # if persistence is needed - - SQLALCHEMY_DATABASE_URL=${SQLALCHEMY_DATABASE_URL:-sqlite:////tmp/local.db} + - SQLALCHEMY_DATABASE_URL=${DATABASE_URL} - S3_ENDPOINT_URL=${AWS_ENDPOINT_URL} - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY - AWS_DEFAULT_REGION - AWS_ENDPOINT_URL - S3_BUCKET - - EVALUATOR_PIP_REQS=/mzai/lumigator/jobs/evaluator/requirements.txt - - EVALUATOR_WORK_DIR=/mzai/lumigator/jobs/evaluator + - EVALUATOR_PIP_REQS + - EVALUATOR_WORK_DIR # TODO: the following two rows should be renamed to EVALUATOR_* # and the two above should be removed when we depreate evaluator - - EVALUATOR_LITE_PIP_REQS=/mzai/lumigator/jobs/evaluator_lite/requirements.txt - - EVALUATOR_LITE_WORK_DIR=/mzai/lumigator/jobs/evaluator_lite - - INFERENCE_PIP_REQS=/mzai/lumigator/jobs/inference/requirements.txt - - INFERENCE_WORK_DIR=/mzai/lumigator/jobs/inference + - EVALUATOR_LITE_PIP_REQS + - EVALUATOR_LITE_WORK_DIR + - INFERENCE_PIP_REQS + - INFERENCE_WORK_DIR - RAY_DASHBOARD_PORT - RAY_HEAD_NODE_HOST - - MISTRAL_API_KEY=$MISTRAL_API_KEY - - OPENAI_API_KEY=$OPENAI_API_KEY - - RAY_WORKER_GPUS=$RAY_WORKER_GPUS - - RAY_WORKER_GPUS_FRACTION=$RAY_WORKER_GPUS_FRACTION - - LUMI_API_CORS_ALLOWED_ORIGINS + - MISTRAL_API_KEY + - OPENAI_API_KEY + - RAY_WORKER_GPUS + - RAY_WORKER_GPUS_FRACTION + - LUMIGATOR_API_CORS_ALLOWED_ORIGINS - MLFLOW_TRACKING_URI # NOTE: to keep AWS_ENDPOINT_URL as http://localhost:9000 both on the host system # and inside containers, we map localhost to the host gateway IP. @@ -213,18 +214,18 @@ services: ai.mozilla.product_name: lumigator image: ghcr.io/mlflow/mlflow:v2.20.1 environment: - - MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION} - - BACKEND_STORE_URI=sqlite:///mlflow.db - - ARTIFACT_ROOT=s3://mlflow` + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_DEFAULT_REGION + - MLFLOW_TRACKING_URI + - MLFLOW_DATABASE_URL + - MLFLOW_S3_ROOT_PATH ports: - "8001:5000" depends_on: minio: condition: service_healthy - command: mlflow server --backend-store-uri ${BACKEND_STORE_URI} --default-artifact-root ${ARTIFACT_ROOT} --host 0.0.0.0 + command: mlflow server --backend-store-uri ${MLFLOW_DATABASE_URL} --default-artifact-root ${MLFLOW_S3_ROOT_PATH} --host 0.0.0.0 extra_hosts: - "localhost:host-gateway" profiles: diff --git a/docs/source/get-started/installation.md b/docs/source/get-started/installation.md index c1cb5823..e7675d10 100644 --- a/docs/source/get-started/installation.md +++ b/docs/source/get-started/installation.md @@ -9,18 +9,20 @@ In this guide, we'll show you how to get started with a local deployment. Before you start, make sure you have the following: - A working installation of [Docker](https://docs.docker.com/engine/install/) - - On MAC, Docker Desktop >= 4.3, and `docker-compose` >= 1.28. + - On Mac, Docker Desktop >= 4.37, and `docker-compose` >= 2.31. - On Linux, please also complete the [post-installation steps](https://docs.docker.com/engine/install/linux-postinstall/). +- The [`uv`](https://github.com/astral-sh/uv?tab=readme-ov-file#installation) Python package manager installed. - The system Python; no version manager, such as `uv`, should be active. ## Local Deployment -You can run and develop Lumigator locally using `docker-compose.` This creates four container +You can run and develop Lumigator locally using `docker-compose.` This creates multiple container services networked together to make up all the components of the Lumigator application: - `minio`: Local storage for datasets that mimics S3-API compatible functionality. - `backend`: Lumigator’s FastAPI REST API. - `ray`: A Ray cluster for submitting several types of jobs. +- `mlflow`: Used to track experiments and metrics. - `frontend`: Lumigator's Web UI ```{note} diff --git a/docs/source/operations-guide/configuration.md b/docs/source/operations-guide/configuration.md new file mode 100644 index 00000000..76871bbf --- /dev/null +++ b/docs/source/operations-guide/configuration.md @@ -0,0 +1,84 @@ +# Configuring Lumigator + +> [!NOTE] +> This guide only covers configuring Lumigator when deployed using Docker. + +Lumigator comes with sensible defaults that allow you to [start using it](../get-started/installation.md) without modification using [`Docker Compose`](https://docs.docker.com/compose/). + +This guide explains how configuration works in Lumigator and how you can make changes to settings if required. + +## Where are the config files? + +The default Lumigator settings are found in the repository root under {{ '[`.default.conf`](https://github.com/mozilla-ai/lumigator/blob/{}/.default.conf)'.format(commit_id) }}. + +They are specified in key=value format within the config file. + +## How are these settings used? + +When you start Lumigator using commands like `make local-up` or `make start-lumigator`, configuration steps are automatically run which do the following: + +1. Any temporary config files used for deployment are removed +1. Default and user settings (if present) are combined (with user settings preferred - see below for information on using your own settings) +1. The generated config file (`.env`) is placed under the `build` directory in the repository root +1. Docker Compose is supplied with the environment file path to the generated `.env` file + +From there the `.env` file variables are used in Lumigator's application or supplied to components (such as Ray, MinIO, MLFlow). + +When you stop Lumigator using commands like `make local-down` or `make stop-lumigator`, the temporary files stored under `build` are removed. While Lumigator is running they are present if you wish to examine their contents. + +> [!NOTE] +> The `build` directory and the user defined config file are both marked in `.gitignore` + +## How should I set my own settings? + +User specific configuration can be stored in a file named `user.conf`, this file is configured in `.gitignore` and will never be commited to version control. + +`user.conf` must be created manually when required, **only** add key/values for the settings you explicitly wish to change from the defaults. + +Any settings not included in `user.conf` will automatically fall back to the default settings when running Lumigator. + +Please review `.default.conf` for the format, setting names and default values (also see below for a quick reference). + +## Can I configure everything? + +Not currently, there are a lot of settings available in `.default.conf` but for example you cannot yet change the URL that is exposed via FastAPI on our Backend component from http://localhost:8000. + +## Settings + +The following section documents the available settings: + +> [!Note] +> To use an external Ray cluster, you **must** use external S3-compatible storage and ensure the Ray workers can access data from your Lumigator server. + +| Name | Type | Description | +|------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------| +| AWS_ACCESS_KEY_ID | string | AWS credential, used for auth with S3 services (e.g. MinIO) | +| AWS_SECRET_ACCESS_KEY | string | Sensitive AWS secret, used for auth with S3 services (e.g. MinIO) | +| AWS_DEFAULT_REGION | string | AWS default region name | +| AWS_ENDPOINT_URL | string | URL used to contact S3 services (e.g. MinIO) | +| S3_BUCKET | string | The S3 bucket name to store Lumigator data under | +| RAY_HEAD_NODE_HOST | string | The hostname of the head Ray node that Lumigator should contact | +| RAY_DASHBOARD_PORT | integer | The dashboard port for the Ray cluster that Lumigator is interacting with | +| RAY_WORKER_GPUS | integer | The number of worker GPUs available to Ray that should be used for jobs | +| RAY_WORKER_GPUS_FRACTION | float | If not `0`, the fraction of the worker GPUs that should be used by a Ray job | +| NVIDIA_VISIBLE_DEVICES | string | Defaults to 'all', specifies which NVIDIA devices should be visible to Ray | +| GPU_COUNT | int | The number of GPUs | +| HF_HOME | string | The home directory for HuggingFace (used for caching) | +| HF_TOKEN | string | Sensitive API token used to access gated models in HuggingFace | +| MISTRAL_API_KEY | string | Sensitive API key used to access Mistral | +| OPENAI_API_KEY | string | Sensitive API key used to access OpenAI | +| MLFLOW_TRACKING_URI | string | The URL used to access MLFlow | +| MLFLOW_DATABASE_URL | string | DB connection string/URL used for MLFlow | +| MLFLOW_S3_ROOT_PATH | string | S3 URL styl path to the root where MFLow should store artefacts e.g. S3://mflow | +| MINIO_ROOT_USER | string | The root user name for accessing MinIO | +| MINIO_ROOT_PASSWORD | string | Sensitive secret for accessing MinIO as root | +| MINIO_API_CORS_ALLOW_ORIGIN | string | Allowed origins for CORS requests to MinIO (defaults to "*") | +| DEPLOYMENT_TYPE | string | Allows the user to define which environment Lumigator is deployed in, local', 'development', 'staging' or 'production' | +| DATABASE_URL | string | DB connection string/URL used for Lumigator's local DB storage | +| LUMIGATOR_API_CORS_ALLOWED_ORIGINS | string | A comma separated string array of URLs which should be allowed origins for CORS requests, "*" can be supplied to allow all | +| INFERENCE_PIP_REQS | string | Path within the container to the requirements.txt file for inference jobs | +| INFERENCE_WORK_DIR | string | Path within the container to the working directory that is zipped and sent to Ray as an inference job | +| EVALUATOR_PIP_REQS | string | Path within the container to the requirements.txt file for evaluation jobs | +| EVALUATOR_WORK_DIR | string | Path within the container to the working directory that is zipped and sent to Ray as an evaluation job | +| EVALUATOR_LITE_PIP_REQS | string | Path within the container to the requirements.txt file for evaluation (lite) jobs | +| EVALUATOR_LITE_WORK_DIR | string | Path within the container to the working directory that is zipped and sent to Ray as an evaluation (lite) job | diff --git a/docs/source/operations-guide/configure-S3.md b/docs/source/operations-guide/configure-S3.md index 63a74fbf..cc392f92 100644 --- a/docs/source/operations-guide/configure-S3.md +++ b/docs/source/operations-guide/configure-S3.md @@ -6,7 +6,7 @@ This guide will walk you through the process of configuring your S3-compatible s To use Lumigator with S3 or S3-compatible storage, you need to configure CORS (Cross-Origin Resource Sharing) for your bucket. Below is the recommended CORS configuration: -```console +```xml @@ -22,14 +22,14 @@ To use Lumigator with S3 or S3-compatible storage, you need to configure CORS (C ``` -You can specify your origin instead of * in to restrict access, but a wildcard (*) will also work. Configuring this is necessary for Lumigator to function correctly with your S3 bucket. +You can specify your origin instead of `*` in to restrict access, but a wildcard (`*`) will also work. Configuring this is necessary for Lumigator to function correctly with your S3 bucket. ## Applying CORS configuration In order to apply that CORS configuration to the bucket, if you want to do it with the AWS CLI tool (our suggested method), you have to write the CORS configuration in JSON, and store it in a file: -```console +```json { "CORSRules": [ { @@ -45,5 +45,7 @@ write the CORS configuration in JSON, and store it in a file: After that, you can apply it with the AWS CLI tool: ```console -user@host:~$ aws s3api put-bucket-cors --bucket --cors-configuration file://cors-config.json -``` \ No newline at end of file +user@host:~$ aws s3api put-bucket-cors \ + --bucket \ + --cors-configuration file://cors-config.json +``` diff --git a/docs/source/operations-guide/dev.md b/docs/source/operations-guide/dev.md index f9246551..93a5b0d8 100644 --- a/docs/source/operations-guide/dev.md +++ b/docs/source/operations-guide/dev.md @@ -14,12 +14,13 @@ command: user@host:~/lumigator$ make local-up ``` -This creates four container services networked together to make up all the components of the +This creates multiple container services networked together to make up all the components of the Lumigator application: - `minio`: Local storage for datasets that mimics S3-API compatible functionality. - `backend`: Lumigator’s FastAPI REST API. - `ray`: A Ray cluster for submitting several types of jobs. +- `mlflow`: Used to track experiments and metrics. - `frontend`: Lumigator's Web UI The `local-up` make target will also set a watch on the backend codebase, so that any changes you diff --git a/lumigator/backend/backend/settings.py b/lumigator/backend/backend/settings.py index 634ff69d..ca2924ee 100644 --- a/lumigator/backend/backend/settings.py +++ b/lumigator/backend/backend/settings.py @@ -16,7 +16,7 @@ class BackendSettings(BaseSettings): MAX_DATASET_SIZE: ByteSize = MAX_DATASET_SIZE_HUMAN_READABLE # Backend API env vars - _api_cors_allowed_origins: str = os.environ.get("LUMI_API_CORS_ALLOWED_ORIGINS", "") + _api_cors_allowed_origins: str = os.environ.get("LUMIGATOR_API_CORS_ALLOWED_ORIGINS", "") # AWS S3_ENDPOINT_URL: str | None = None diff --git a/scripts/config_generate_env.sh b/scripts/config_generate_env.sh new file mode 100755 index 00000000..2d93f865 --- /dev/null +++ b/scripts/config_generate_env.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +merge_conf_files() { + temp_file=$(mktemp) + keys_order=() + + while IFS='=' read -r key value || [[ -n "$key" ]]; do + [[ -z "$key" || "$key" =~ ^#.*$ ]] && continue + # Remove existing key entry if found + sed -i '' "/^$key=/d" "$temp_file" 2>/dev/null || sed -i "/^$key=/d" "$temp_file" + # Append new value + echo "$key=$value" >> "$temp_file" + # Store key order if it's the first occurrence + grep -q "^$key=" <<< "${keys_order[*]}" || keys_order+=("$key") + done < <(cat "$@") + + # Print keys in original order with latest values + for key in "${keys_order[@]}"; do + grep "^$key=" "$temp_file" + done + + rm "$temp_file" +} + +if [[ $# -lt 1 || $# -gt 2 ]]; then + echo "Usage: $0 [conf_file_override]" + exit 1 +fi + +merge_conf_files "$@"