diff --git a/Dockerfile b/Dockerfile index 300068f..47179ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,14 +16,14 @@ RUN apt-get update \ && rm -rf /var/lib/{apt,dpkg,cache,log} # Copy the current directory contents into the container at /app -COPY app/requirements.txt requirements.txt +COPY src/finetuningresearch/requirements.txt requirements.txt # Install any needed packages specified in requirements.txt RUN pip install --upgrade pip && \ pip install --no-cache-dir -r requirements.txt && \ rm requirements.txt -COPY app /app +COPY src/finetuningresearch /app # Set the working directory in the container to /app WORKDIR /app diff --git a/pyproject.toml b/pyproject.toml index 01e6b13..b7a7440 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "fine-tuning-research" +name = "finetuningresearch" version = "0.1.0" description = "Open source research on fine-tuning LLMs" authors = [ @@ -14,6 +14,23 @@ classifiers = [ "Programming Language :: Python :: 3.11", "License :: OSI Approved :: MIT License", ] +dependencies = [ + "transformers>=4.35.0", + "peft>=0.5.0", + "bitsandbytes>=0.41.1", + "accelerate>=0.25.0", + "trl>=0.7.2", + "pydantic-settings>=2.0.3", + "scipy>=1.11.3", + "PyYAML>=6.0.1", + "datasets>=2.14.6", + "einops>=0.7.0", + "wandb>=0.15.12", + "python-dotenv", + "minio>=7.2.0", + "fire", + "types-PyYAML" +] [tool.pytest.ini_options] addopts = "-vvv" diff --git a/src/finetuningresearch/__init__.py b/src/finetuningresearch/__init__.py new file mode 100644 index 0000000..f655489 --- /dev/null +++ b/src/finetuningresearch/__init__.py @@ -0,0 +1,4 @@ +"""Helps finetune models.""" +from .sft import start_train + +__all__ = ["start_train"] diff --git a/app/default_config.yaml b/src/finetuningresearch/default_config.yaml similarity index 100% rename from app/default_config.yaml rename to src/finetuningresearch/default_config.yaml diff --git a/app/requirements.txt b/src/finetuningresearch/requirements.txt similarity index 100% rename from app/requirements.txt rename to src/finetuningresearch/requirements.txt diff --git a/app/sft.py b/src/finetuningresearch/sft.py similarity index 99% rename from app/sft.py rename to src/finetuningresearch/sft.py index c983aaf..366864f 100644 --- a/app/sft.py +++ b/src/finetuningresearch/sft.py @@ -14,7 +14,7 @@ from trl import SFTTrainer from wandb import Table, finish -from utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16 +from .utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16 CHECKPOINT_DIR = "/mnt/checkpoint" DATASET_DIR = "/mnt/dataset" @@ -400,7 +400,7 @@ def save_model(model: Any, tokenizer: Any, config: dict[str, Any]) -> None: raise NotImplementedError("S3 support not implemented yet") -def main() -> None: +def start_train(config: dict[str, str] = {}) -> None: """Execute the main training loop.""" dump_envs() config = load_config() @@ -426,4 +426,4 @@ def main() -> None: if __name__ == "__main__": - Fire(main) + Fire(start_train) diff --git a/app/utils.py b/src/finetuningresearch/utils.py similarity index 100% rename from app/utils.py rename to src/finetuningresearch/utils.py