Skip to content

Commit

Permalink
support upload folder (#66)
Browse files Browse the repository at this point in the history
* support upload folder

* support upload file to folder
  • Loading branch information
SeanHH86 authored Jan 3, 2025
1 parent 113eeb4 commit bb123d1
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 64 deletions.
38 changes: 33 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,17 @@ csghub-cli download wanghh2000/myprivate1
# donwload dataset
csghub-cli download wanghh2000/myds1 -t dataset

# upload a single file
csghub-cli upload wanghh2000/myprivate1 abc/3.txt
# upload a single file to folder1
csghub-cli upload wanghh2000/myprivate1 abc/3.txt folder1

# upload files
csghub-cli upload wanghh2000/myds1 abc/4.txt abc/5.txt -t dataset
# upload local folder '/Users/hhwang/temp/jsonl' to root path of repo 'wanghh2000/m01' with default branch
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl

# upload local folder '/Users/hhwang/temp/jsonl' to path 'test/files' of repo 'wanghh2000/m01' with branch v1
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files --revision v1

# upload local folder '/Users/hhwang/temp/jsonl' to path 'test/files' of repo 'wanghh2000/m01' with token 'xxxxxx'
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files -k xxxxxx
```

Download location is `~/.cache/csg/` by default.
Expand Down Expand Up @@ -184,7 +190,7 @@ for item in repo_files:
http_upload_file(repo_id=repo_id, repo_type=repo_type, file_path=item, endpoint=endpoint, token=token)
```

### Upload repo
### Upload the local path to repo

Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions).

Expand All @@ -204,6 +210,28 @@ r = Repository(
r.upload()
```

### Upload the local path to the specified path in the repo

Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions).

```python
from pycsghub.repository import Repository

token = "your access token"

r = Repository(
repo_id="wanghh2000/model01",
upload_path="/Users/hhwang/temp/jsonl",
path_in_repo="test/abc",
user_name="wanghh2000",
token=token,
repo_type="model",
branch_name="v1",
)

r.upload()
```

### Model loading compatible with huggingface

The transformers library supports directly inputting the repo_id from Hugging Face to download and load related models, as shown below:
Expand Down
38 changes: 33 additions & 5 deletions README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,17 @@ csghub-cli download wanghh2000/myprivate1
# 数据集下载
csghub-cli download wanghh2000/myds1 -t dataset

# 上传单个文件
csghub-cli upload wanghh2000/myprivate1 abc/3.txt
# 上传单个文件到仓库目录folder1
csghub-cli upload wanghh2000/myprivate1 abc/3.txt folder1

# 上传多个文件
csghub-cli upload wanghh2000/myds1 abc/4.txt abc/5.txt -t dataset
# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的默认分支根目录下
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl

# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的v1分支的'test/files'目录下
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files --revision v1

# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的默认分支'test/files'目录下并使用指定token
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files -k xxxxxx
```

文件默认下载路径为`~/.cache/csg/`
Expand Down Expand Up @@ -184,7 +190,7 @@ for item in repo_files:
http_upload_file(repo_id=repo_id, repo_type=repo_type, file_path=item, endpoint=endpoint, token=token)
```

### 上传仓库
### 上传本地目录到仓库

在开始之前,请确保您已安装 Git-LFS(安装说明请参见 [这里](https://git-lfs.github.com/))。

Expand All @@ -204,6 +210,28 @@ r = Repository(
r.upload()
```

### 上传本地目录到仓库的指定目录

在开始之前,请确保您已安装 Git-LFS(安装说明请参见 [这里](https://git-lfs.github.com/))。

```python
from pycsghub.repository import Repository

token = "your access token"

r = Repository(
repo_id="wanghh2000/model01",
upload_path="/Users/hhwang/temp/jsonl",
path_in_repo="test/abc",
user_name="wanghh2000",
token=token,
repo_type="model",
branch_name="v1",
)

r.upload()
```

### 兼容huggingface的模型加载

huggingface的transformers库支持直接输入huggingface上的repo_id以下载并读取相关模型,如下列所示:
Expand Down
40 changes: 30 additions & 10 deletions pycsghub/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import typer
import os
from typing import Annotated, List, Optional
from pycsghub.cmd import repo
from pycsghub.cmd.repo_types import RepoType
Expand All @@ -15,11 +16,13 @@ def version_callback(value: bool):

OPTIONS = {
"repoID": typer.Argument(help="The ID of the repo. (e.g. `username/repo-name`)."),
"repoFiles": typer.Argument(help="Local path to the file or files to upload. Defaults to the relative path of the file of repo of OpenCSG Hub."),
"localPath": typer.Argument(help="Local path to the file or folder to upload. Defaults to the relative path of the file of repo of OpenCSG Hub."),
"pathInRepo": typer.Argument(help="Path of the folder in the repo. Defaults to the relative path of the file or folder."),
"repoType": typer.Option("-t", "--repo-type", help="Specify the repository type."),
"revision": typer.Option("-r", "--revision", help="An optional Git revision id which can be a branch name"),
"cache_dir": typer.Option("-cd", "--cache-dir", help="Path to the directory where to save the downloaded files."),
"endpoint": typer.Option("-e", "--endpoint", help="The address of the request to be sent."),
"username": typer.Option("-u", "--username", help="Logon account of OpenCSG Hub."),
"token": typer.Option("-k", "--token", help="A User Access Token generated from https://opencsg.com/settings/access-token"),
"version": typer.Option(None, "-V", "--version", callback=version_callback, is_eager=True, help="Show the version and exit."),
}
Expand All @@ -45,20 +48,37 @@ def download(
@app.command(name="upload", help="Upload repository files to opencsg.com.")
def upload(
repo_id: Annotated[str, OPTIONS["repoID"]],
repo_files: Annotated[List[str], OPTIONS["repoFiles"]],
local_path: Annotated[str, OPTIONS["localPath"]],
path_in_repo: Annotated[str, OPTIONS["pathInRepo"]] = "",
repo_type: Annotated[RepoType, OPTIONS["repoType"]] = RepoType.MODEL,
revision: Annotated[Optional[str], OPTIONS["revision"]] = DEFAULT_REVISION,
endpoint: Annotated[Optional[str], OPTIONS["endpoint"]] = DEFAULT_CSGHUB_DOMAIN,
token: Annotated[Optional[str], OPTIONS["token"]] = None,
user_name: Annotated[Optional[str], OPTIONS["username"]] = "",
):
repo.upload(
repo_id=repo_id,
repo_type=repo_type,
repo_files=repo_files,
revision=revision,
endpoint=endpoint,
token=token
)
# File upload
if os.path.isfile(local_path):
repo.upload_files(
repo_id=repo_id,
repo_type=repo_type,
repo_file=local_path,
path_in_repo=path_in_repo,
revision=revision,
endpoint=endpoint,
token=token
)
# Folder upload
else:
repo.upload_folder(
repo_id=repo_id,
repo_type=repo_type,
local_path=local_path,
path_in_repo=path_in_repo,
revision=revision,
endpoint=endpoint,
token=token,
user_name=user_name
)

@app.callback(invoke_without_command=True)
def main(version: bool = OPTIONS["version"]):
Expand Down
57 changes: 46 additions & 11 deletions pycsghub/cmd/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional, Union, List
from pycsghub.constants import DEFAULT_REVISION
import requests
from pycsghub.repository import Repository

def download(
repo_id: str,
Expand All @@ -22,20 +23,54 @@ def download(
token=token,
)

def upload(
def upload_files(
repo_id: str,
repo_type: str,
repo_files: List[str],
repo_file: str,
path_in_repo: Optional[str] = "",
revision: Optional[str] = DEFAULT_REVISION,
endpoint: Optional[str] = None,
token: Optional[str] = None
):
for item in repo_files:
http_upload_file(
repo_id=repo_id,
repo_type=repo_type,
file_path=item,
revision=revision,
endpoint=endpoint,
token=token,
)
http_upload_file(
repo_id=repo_id,
repo_type=repo_type,
file_path=repo_file,
path_in_repo=path_in_repo,
revision=revision,
endpoint=endpoint,
token=token,
)

def upload_folder(
repo_id: str,
repo_type: str,
local_path: str,
path_in_repo: Optional[str] = "",
work_dir: Optional[str] = "/tmp/csg",
nickname: Optional[str] = "",
description: Optional[str] = "",
license: Optional[str] = "apache-2.0",
revision: Optional[str] = DEFAULT_REVISION,
endpoint: Optional[str] = None,
user_name: Optional[str] = "",
token: Optional[str] = None,
auto_create: Optional[bool] = True,
):
r = Repository(
repo_id=repo_id,
upload_path=local_path,
path_in_repo=path_in_repo,
work_dir=work_dir,
repo_type=repo_type,
nickname=nickname,
description=description,
license=license,
branch_name=revision,
endpoint=endpoint,
user_name=user_name,
token=token,
auto_create=auto_create,
)
r.upload()

3 changes: 3 additions & 0 deletions pycsghub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@


S3_INTERNAL = os.environ.get("S3_INTERNAL", '')

GIT_HIDDEN_DIR = ".git"
GIT_ATTRIBUTES_FILE = ".gitattributes"
4 changes: 3 additions & 1 deletion pycsghub/file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,21 @@ def http_upload_file(
repo_id: str,
repo_type: Optional[str] = None,
file_path: str = None,
path_in_repo: Optional[str] = "",
revision: Optional[str] = DEFAULT_REVISION,
endpoint: Optional[str] = None,
token: Optional[str] = None,
):
if not os.path.exists(file_path):
raise ValueError(f"file '{file_path}' does not exist")
destination_path = os.path.join(path_in_repo, os.path.basename(file_path)) if path_in_repo else file_path
http_endpoint = endpoint if endpoint is not None else get_endpoint()
if not http_endpoint.endswith("/"):
http_endpoint += "/"
http_url = http_endpoint + "api/v1/" + repo_type + "s/" + repo_id + "/upload_file"
post_headers = build_csg_headers(token=token)
file_data = {'file': open(file_path, 'rb')}
form_data = {'file_path': file_path, 'branch': revision, 'message': 'upload' + file_path}
form_data = {'file_path': destination_path, 'branch': revision, 'message': 'upload' + file_path}
response = requests.post(http_url, headers=post_headers, data=form_data, files=file_data)
if response.status_code == 200:
print(f"file '{file_path}' upload successfully.")
Expand Down
Loading

0 comments on commit bb123d1

Please sign in to comment.