Skip to content

Commit

Permalink
Create finetuningresearch python package, allowing it to be pip insta…
Browse files Browse the repository at this point in the history
…lled
  • Loading branch information
shankarg87 committed Feb 14, 2024
1 parent e6b4a15 commit 163d193
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 11 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ RUN apt-get update \
&& rm -rf /var/lib/{apt,dpkg,cache,log}

# Copy the current directory contents into the container at /app
COPY app/requirements.txt requirements.txt
COPY src/finetuningresearch/requirements.txt requirements.txt

# Install any needed packages specified in requirements.txt
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt && \
rm requirements.txt

COPY app /app
COPY src/finetuningresearch /app

# Set the working directory in the container to /app
WORKDIR /app
Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,21 @@ Use a tag versioning by date / user as needed. For example,
docker build . -t rparundekar/fine_tune_research:20230110_01
docker push rparundekar/fine_tune_research:20230110_01
```

## Library
To use this finetuning library as a python package, perform a pip install directly from github. This should install all dependencies as well.

```sh
pip install -v git+https://github.com/shankarg87/training_research@main
```

then use it normally in your python code.

```python
from finetuningresearch import execute

config = """
<config multiline yaml here>
"""
execute(config)
```
27 changes: 22 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
[project]
name = "Fine-Tuning Reserch"
name = "finetuningresearch"
version = "0.1.0"
description = "Open source research on fine-tuning LLMs"
authors = ["Rahul Parundekar <[email protected]>", "Shankar Ganesan <[email protected]>"]
license = "MIT"
authors = [
{name = "Rahul Parundekar", email= "[email protected]" },
{name = "Shankar Ganesan", email = "[email protected]" }
]
readme = "README.md"
homepage = "https://aihero.studio"
repository = "https://github.com/ai-hero/fine_tune_research"
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: MIT License",
]
dependencies = [
"transformers>=4.35.0",
"peft>=0.5.0",
"bitsandbytes>=0.41.1",
"accelerate>=0.25.0",
"trl>=0.7.2",
"pydantic-settings>=2.0.3",
"scipy>=1.11.3",
"PyYAML>=6.0.1",
"datasets>=2.14.6",
"einops>=0.7.0",
"wandb>=0.15.12",
"python-dotenv",
"minio>=7.2.0",
"fire",
"types-PyYAML"
]

[tool.pytest.ini_options]
addopts = "-vvv"
Expand Down
4 changes: 4 additions & 0 deletions src/finetuningresearch/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Helps finetune models."""
from .sft import execute

__all__ = ["execute"]
File renamed without changes.
File renamed without changes.
9 changes: 5 additions & 4 deletions app/sft.py → src/finetuningresearch/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from trl import SFTTrainer
from wandb import Table, finish

from utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16
from .utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16

CHECKPOINT_DIR = "/mnt/checkpoint"
DATASET_DIR = "/mnt/dataset"
Expand Down Expand Up @@ -647,10 +647,11 @@ def save_model(model: Any, tokenizer: Any, config: dict[str, Any]) -> None:
raise NotImplementedError("S3 support not implemented yet")


def main() -> None:
def execute(config: dict[str, Any] = {}) -> None:
"""Execute the main training loop."""
dump_envs()
config = load_config()
if not config:
config = load_config()

# Check if "training" is in config or "batch_inference" is in config, but not both.
if "training" not in config and "batch_inference" not in config:
Expand Down Expand Up @@ -694,4 +695,4 @@ def main() -> None:


if __name__ == "__main__":
Fire(main)
Fire(execute)
File renamed without changes.

0 comments on commit 163d193

Please sign in to comment.