Skip to content

Commit

Permalink
add ruff, change formats, modify README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
youran-qi committed Nov 3, 2024
1 parent fd3b4de commit 2f6de8b
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ docker run -it --rm \

### Step 3. Submit the request to get model response

Run the following command to submit a request to the fine-tuned model and get model response. Note that the port for our inference serivice is `5001`, not `5000`.
Run the following command to submit a request to the fine-tuned model and get model response. Note that this inference service is designed to be similar to [Cohere's Chat API](https://docs.cohere.com/v1/reference/chat), and the port for this inference service is `5001`, not `5000`.
```commandline
curl --request POST http://localhost:5001/inference \
--header "Content-Type: application/json" \
Expand Down
29 changes: 28 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 24 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"pytest>=8.3.3",
"pytest-cov>=5.0.0",
"pytest-xdist>=3.6.1",
"ruff>=0.7.2",
]
requires-python = ">=3.10,<4.0"
readme = "README.md"
Expand All @@ -31,6 +32,27 @@ build-backend = "pdm.backend"
distribution = true

[tool.pytest.ini_options]
pythonpath = [
"src/cohere_finetune"
pythonpath = ["src/cohere_finetune"]

[tool.ruff]
src = ["src/cohere_finetune"]
line-length = 119
extend-exclude = ["*.ipynb"]

[tool.ruff.lint]
extend-select = [
"C", # Complexity
"E", # PEP8 errors
"F", # PEP8 formatting
"I", # Import sorting
"UP", # Pyupgrade upgrades
"W", # PEP8 warnings
"PT009", # Pytest assertions
]
ignore = [
"C901", # Function too complex
"E501", # Line length (handled by ruff-format)
"I001", # Import block is un-sorted or un-formatted
"UP007", # X | Y style Unions
"UP015", # Unnecessary open mode parameters
]
6 changes: 3 additions & 3 deletions src/cohere_finetune/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def chat_to_str(chat: dict) -> str:
"""
# Create a normalized chat, where the order of keys in each message is always: "role" and then "content" (if the
# only difference between two chats is the order of keys in each message, we want to regard them as the same chat)
normalized_chat = {"messages": [
{"role": message["role"], "content": message["content"]} for message in chat["messages"]
]}
normalized_chat = {
"messages": [{"role": message["role"], "content": message["content"]} for message in chat["messages"]]
}
return json.dumps(normalized_chat)


Expand Down
7 changes: 4 additions & 3 deletions src/cohere_finetune/cohere_finetune_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def finetune(self) -> None:
trc = traceback.format_exc()
self._format_error(
logging_msg=f"Missing files during dataset creation: {trc}",
error_msg=f"Missing files during dataset creation: {e}"
error_msg=f"Missing files during dataset creation: {e}",
)
return
except ValueError as e:
Expand Down Expand Up @@ -128,7 +128,7 @@ def finetune(self) -> None:
trc = traceback.format_exc()
self._format_error(
logging_msg=f"Exception during preprocessing: {trc}",
error_msg=f"Exception during preprocessing: {e}"
error_msg=f"Exception during preprocessing: {e}",
)
return

Expand All @@ -145,7 +145,8 @@ def finetune(self) -> None:
trc = traceback.format_exc()
self._format_error(
logging_msg=f"Exception during max sequence length calculation: {trc}",
error_msg=f"Exception during max sequence length calculation: {e}")
error_msg=f"Exception during max sequence length calculation: {e}",
)
return
except Exception:
# If the above procedure fails for any other reason, use the default max_sequence_length in hyperparameters
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass, field
from transformers import TrainingArguments
from transformers.trainer_utils import IntervalStrategy, SchedulerType
from typing import List, Union
from typing import Union


@dataclass
Expand Down Expand Up @@ -208,7 +208,7 @@ class TrainingArgumentsDefaultChanged(TrainingArguments):
default=1,
metadata={"help": "Total number of training epochs to perform"},
)
report_to: Union[None, str, List[str]] = field(
report_to: Union[None, str, list[str]] = field(
default="none",
metadata={"help": "The list of integrations to report the results and logs to"},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ def create_and_prepare_model(
See https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained
"""
torch_dtype = (
quant_storage_dtype
if quant_storage_dtype and quant_storage_dtype.is_floating_point
else torch.bfloat16
quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.bfloat16
)
if os.environ.get("ACCELERATE_USE_FSDP", "false") == "true" or os.environ.get("ACCELERATE_USE_DEEPSPEED", "false") == "true":
# Can't use device_map if you use fsdp or deepspeed
Expand Down
4 changes: 2 additions & 2 deletions src/cohere_finetune/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ def save_file(x: Any, path: str, overwrite_ok: bool = False) -> None:
for entry in x:
assert isinstance(entry, dict)
json.dump(entry, f)
f.write('\n')
f.write("\n")
elif ext in {".txt"}:
if isinstance(x, list):
with open(path, "w") as f:
for entry in x:
assert isinstance(entry, str)
f.write(entry)
f.write('\n')
f.write("\n")
else:
assert isinstance(x, str)
with open(path, "w") as f:
Expand Down

0 comments on commit 2f6de8b

Please sign in to comment.