diff --git a/Dockerfile b/Dockerfile index 300068f..47179ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,14 +16,14 @@ RUN apt-get update \ && rm -rf /var/lib/{apt,dpkg,cache,log} # Copy the current directory contents into the container at /app -COPY app/requirements.txt requirements.txt +COPY src/finetuningresearch/requirements.txt requirements.txt # Install any needed packages specified in requirements.txt RUN pip install --upgrade pip && \ pip install --no-cache-dir -r requirements.txt && \ rm requirements.txt -COPY app /app +COPY src/finetuningresearch /app # Set the working directory in the container to /app WORKDIR /app diff --git a/README.md b/README.md index cbf9696..19ffef4 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,21 @@ Use a tag versioning by date / user as needed. For example, docker build . -t rparundekar/fine_tune_research:20230110_01 docker push rparundekar/fine_tune_research:20230110_01 ``` + +## Library +To use this finetuning library as a python package, perform a pip install directly from github. This should install all dependencies as well. + +```sh +pip install -v git+https://github.com/shankarg87/training_research@main +``` + +then use it normally in your python code. + +```python +from finetuningresearch import execute + +config = """ + +""" +execute(config) +``` diff --git a/pyproject.toml b/pyproject.toml index f77f481..b7a7440 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [project] -name = "Fine-Tuning Reserch" +name = "finetuningresearch" version = "0.1.0" description = "Open source research on fine-tuning LLMs" -authors = ["Rahul Parundekar ", "Shankar Ganesan "] -license = "MIT" +authors = [ + {name = "Rahul Parundekar", email= "rahul@aihero.studio" }, + {name = "Shankar Ganesan", email = "gshankar.87@gmail.com" } +] readme = "README.md" -homepage = "https://aihero.studio" -repository = "https://github.com/ai-hero/fine_tune_research" classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.9", @@ -14,6 +14,23 @@ classifiers = [ "Programming Language :: Python :: 3.11", "License :: OSI Approved :: MIT License", ] +dependencies = [ + "transformers>=4.35.0", + "peft>=0.5.0", + "bitsandbytes>=0.41.1", + "accelerate>=0.25.0", + "trl>=0.7.2", + "pydantic-settings>=2.0.3", + "scipy>=1.11.3", + "PyYAML>=6.0.1", + "datasets>=2.14.6", + "einops>=0.7.0", + "wandb>=0.15.12", + "python-dotenv", + "minio>=7.2.0", + "fire", + "types-PyYAML" +] [tool.pytest.ini_options] addopts = "-vvv" diff --git a/src/finetuningresearch/__init__.py b/src/finetuningresearch/__init__.py new file mode 100644 index 0000000..7b2849d --- /dev/null +++ b/src/finetuningresearch/__init__.py @@ -0,0 +1,4 @@ +"""Helps finetune models.""" +from .sft import execute + +__all__ = ["execute"] diff --git a/app/default_config.yaml b/src/finetuningresearch/default_config.yaml similarity index 100% rename from app/default_config.yaml rename to src/finetuningresearch/default_config.yaml diff --git a/app/requirements.txt b/src/finetuningresearch/requirements.txt similarity index 100% rename from app/requirements.txt rename to src/finetuningresearch/requirements.txt diff --git a/app/sft.py b/src/finetuningresearch/sft.py similarity index 99% rename from app/sft.py rename to src/finetuningresearch/sft.py index 557862b..bf0ea01 100644 --- a/app/sft.py +++ b/src/finetuningresearch/sft.py @@ -16,7 +16,7 @@ from trl import SFTTrainer from wandb import Table, finish -from utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16 +from .utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16 CHECKPOINT_DIR = "/mnt/checkpoint" DATASET_DIR = "/mnt/dataset" @@ -647,10 +647,11 @@ def save_model(model: Any, tokenizer: Any, config: dict[str, Any]) -> None: raise NotImplementedError("S3 support not implemented yet") -def main() -> None: +def execute(config: dict[str, Any] = {}) -> None: """Execute the main training loop.""" dump_envs() - config = load_config() + if not config: + config = load_config() # Check if "training" is in config or "batch_inference" is in config, but not both. if "training" not in config and "batch_inference" not in config: @@ -694,4 +695,4 @@ def main() -> None: if __name__ == "__main__": - Fire(main) + Fire(execute) diff --git a/app/utils.py b/src/finetuningresearch/utils.py similarity index 100% rename from app/utils.py rename to src/finetuningresearch/utils.py