From 705bd6f2823654baa0f6589277c01314b8cba245 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 4 Sep 2024 11:18:41 +0800 Subject: [PATCH] Add fsspec compatibility test cases --- .github/workflows/compatibilitytest.yml | 32 +++++ Makefile | 29 +++-- tosfs/tests/conftest.py | 12 +- tosfs/tests/test_fsspec.py | 148 ++++++++---------------- 4 files changed, 103 insertions(+), 118 deletions(-) create mode 100644 .github/workflows/compatibilitytest.yml diff --git a/.github/workflows/compatibilitytest.yml b/.github/workflows/compatibilitytest.yml new file mode 100644 index 0000000..e772de1 --- /dev/null +++ b/.github/workflows/compatibilitytest.yml @@ -0,0 +1,32 @@ +name: CompatibilityTest + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + set -x + python -m pip install --upgrade pip + pip install --user poetry + make install + - name: Prepare Env + run: | + echo "Preparing environment variables" + echo "TOS_ACCESS_KEY=${{ secrets.TOS_ACCESS_KEY }}" >> $GITHUB_ENV + echo "TOS_SECRET_KEY=${{ secrets.TOS_SECRET_KEY }}" >> $GITHUB_ENV + echo "TOS_REGION=${{ vars.TOS_REGION }}" >> $GITHUB_ENV + echo "TOS_ENDPOINT=${{ vars.TOS_ENDPOINT }}" >> $GITHUB_ENV + echo "TOSFS_LOGGING_LEVEL=${{ vars.TOSFS_LOGGING_LEVEL }}" >> $GITHUB_ENV + - name: Run compatibility tests + run: make test_compatibility diff --git a/Makefile b/Makefile index 73a7fb2..590a2cc 100644 --- a/Makefile +++ b/Makefile @@ -7,17 +7,18 @@ help: ## Show the help. @echo "Usage: make " @echo "" @echo "Targets:" - @echo "help: ## Show the help." - @echo "show: ## Show the current environment." - @echo "install: ## Install the project in dev mode." - @echo "fmt: ## Format code using black & isort." - @echo "lint: ## Run pep8, black, mypy linters." - @echo "test: lint ## Run tests and generate coverage report." - @echo "watch: ## Run tests on every change." - @echo "clean: ## Clean unused files." - @echo "release: ## Create a new tag for release." - @echo "docs: ## Build the documentation." - @echo "release_wheel: ## Release wheel for python client." + @echo "help: ## Show the help." + @echo "show: ## Show the current environment." + @echo "install: ## Install the project in dev mode." + @echo "fmt: ## Format code using black & isort." + @echo "lint: ## Run pep8, black, mypy linters." + @echo "test: lint ## Run tests and generate coverage report." + @echo "test_compatibility: ## Run compatibility tests." + @echo "watch: ## Run tests on every change." + @echo "clean: ## Clean unused files." + @echo "release: ## Create a new tag for release." + @echo "docs: ## Build the documentation." + @echo "release_wheel: ## Release wheel for python client." .PHONY: show show: ## Show the current environment. @@ -47,7 +48,11 @@ lint: ## Run pep8, black, mypy linters. .PHONY: test test: ## Run tests and generate coverage report. - $(ENV_PREFIX)pytest -vv -s --cov-config .coveragerc --cov=tosfs -l --tb=short --maxfail=1 ${TEST_DIR} + $(ENV_PREFIX)pytest -vv -s --cov-config .coveragerc --cov=tosfs -l --tb=short --maxfail=1 ${TEST_DIR} --ignore=${TEST_DIR}/test_fsspec.py + +.PHONY: test_compatibility +test_compatibility: ## Run compatibility tests. + $(ENV_PREFIX)pytest -vv -s -l --tb=short --maxfail=1 ${TEST_DIR} ${TEST_DIR}/test_fsspec.py .PHONY: watch watch: ## Run tests on every change. diff --git a/tosfs/tests/conftest.py b/tosfs/tests/conftest.py index 990f70b..8ffd0fa 100644 --- a/tosfs/tests/conftest.py +++ b/tosfs/tests/conftest.py @@ -41,20 +41,20 @@ def tosfs(_tosfs_env_prepare: None) -> TosFileSystem: ) return tosfs + @pytest.fixture(scope="module") def fsspecfs(_tosfs_env_prepare: None) -> Any: - known_implementations["tos"] = { - "class": "tosfs.core.TosFileSystem" - } + known_implementations["tos"] = {"class": "tosfs.core.TosFileSystem"} fsspecfs, _ = fsspec.core.url_to_fs( "tos://", - endpoint_url = os.environ.get("TOS_ENDPOINT"), - region = os.environ.get("TOS_REGION"), - credentials_provider=EnvCredentialsProvider() + endpoint_url=os.environ.get("TOS_ENDPOINT"), + region=os.environ.get("TOS_REGION"), + credentials_provider=EnvCredentialsProvider(), ) return fsspecfs + @pytest.fixture(scope="module") def bucket() -> str: return os.environ.get("TOS_BUCKET", "proton-ci") diff --git a/tosfs/tests/test_fsspec.py b/tosfs/tests/test_fsspec.py index d140b13..7bdee9c 100644 --- a/tosfs/tests/test_fsspec.py +++ b/tosfs/tests/test_fsspec.py @@ -29,9 +29,7 @@ def test_ls(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_copy(fsspecfs: Any, bucket: str, temporary_workspace: str): # Create a temporary directory and files - dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) subdir_path = f"{bucket}/{temporary_workspace}/{dir_name}" fsspecfs.mkdir(subdir_path) file1_path = f"{subdir_path}/file1.txt" @@ -100,7 +98,8 @@ def test_info(fsspecfs: Any, bucket: str, temporary_workspace: str): assert file_info["name"] == fsspecfs._strip_protocol( file_path ), "Incorrect file name" - assert file_info["size"] == 13, "Incorrect file size" + expected_file_size = 13 + assert file_info["size"] == expected_file_size, "Incorrect file size" assert file_info["type"] == "file", "Incorrect type for file" # Test directory info @@ -112,21 +111,17 @@ def test_info(fsspecfs: Any, bucket: str, temporary_workspace: str): # Some FS might not support 'size' for directories, so it's not strictly checked # Test non-existent path - with pytest.raises(IOError): + with pytest.raises(FileNotFoundError): fsspecfs.info(f"{bucket}/{temporary_workspace}/non_existent") # Test protocol stripping protocol_included_path = fsspecfs._strip_protocol(file_path) protocol_info = fsspecfs.info(protocol_included_path) - assert ( - protocol_info["name"] == protocol_included_path - ), "Protocol stripping failed" + assert protocol_info["name"] == protocol_included_path, "Protocol stripping failed" def test_write_and_read_bytes(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path_to_write = f"{bucket}/{temporary_workspace}/{file_name}.bin" data_to_write = b"Hello, World!" @@ -144,9 +139,7 @@ def test_write_and_read_bytes(fsspecfs: Any, bucket: str, temporary_workspace: s def test_write_and_read_text(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path_to_write = f"{bucket}/{temporary_workspace}/{file_name}.txt" data_to_write = "Hello, World!" @@ -164,26 +157,24 @@ def test_write_and_read_text(fsspecfs: Any, bucket: str, temporary_workspace: st def test_with_size(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" - data = b"a" * (10 * 1**10) + expected_file_size = 10 + + data = b"a" * (expected_file_size * 1**10) with fsspecfs.open(path, "wb") as f: f.write(data) with fsspecfs.open(path, "rb", size=10) as f: - assert f.size == 10 + assert f.size == expected_file_size out = f.read() - assert len(out) == 10 + assert len(out) == expected_file_size def test_simple(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" data = b"a" * (10 * 1**10) @@ -197,9 +188,7 @@ def test_simple(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_write_large(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" mb = 2**20 payload_size = int(2.5 * 1 * mb) @@ -212,9 +201,7 @@ def test_write_large(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_write_limit(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" mb = 2**20 block_size = 1 * mb @@ -228,9 +215,7 @@ def test_write_limit(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_readline(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" lines_to_write = [b"First line\n", b"Second line\n", b"Third line"] @@ -246,15 +231,11 @@ def test_readline(fsspecfs: Any, bucket: str, temporary_workspace: str): read_line == expected_line ), f"Expected {expected_line}, got {read_line}" - assert ( - f.readline() == b"" - ), "Expected empty string when reading past the end" + assert f.readline() == b"", "Expected empty string when reading past the end" def test_readline_empty(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" data = b"" with fsspecfs.open(path, "wb") as f: @@ -265,9 +246,7 @@ def test_readline_empty(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_readline_blocksize(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.txt" data = b"ab\n" + b"a" * (1 * 2**20) + b"\nab" with fsspecfs.open(path, "wb") as f: @@ -287,9 +266,7 @@ def test_readline_blocksize(fsspecfs: Any, bucket: str, temporary_workspace: str def test_next(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}.csv" csv_content = b"name,amount,id\nAlice,100,1\nBob,200,2\nCharlie,300,3\n" @@ -307,9 +284,7 @@ def test_next(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_iterable(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path = f"{bucket}/{temporary_workspace}/{file_name}" data = b"abc\n123" with fsspecfs.open(path, "wb") as f: @@ -332,11 +307,9 @@ def test_iterable(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_write_read_without_protocol( - fsspecfs: Any, bucket: str, temporary_workspace: str + fsspecfs: Any, bucket: str, temporary_workspace: str ): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path_to_write = f"{bucket}/{temporary_workspace}/{file_name}.bin" path_without_protocol = fsspecfs._strip_protocol(path_to_write) data_to_write = b"Hello, World!" @@ -357,9 +330,7 @@ def test_write_read_without_protocol( def test_walk(fsspecfs: Any, bucket: str, temporary_workspace: str): - sub_dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + sub_dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) temp_folder = f"{bucket}/{temporary_workspace}/{sub_dir_name}" nested_dir_1 = f"{bucket}/{temporary_workspace}/nested_dir_1" nested_dir_2 = f"{nested_dir_1}/nested_dir_2" @@ -410,17 +381,14 @@ def test_walk(fsspecfs: Any, bucket: str, temporary_workspace: str): assert result == expected, f"Expected {expected}, got {result}" # Test walk with detail=True - result = list( - fsspecfs.walk(temp_folder, maxdepth=None, topdown=True, detail=True) - ) - assert len(result) == 3, f"Expected 3 directories, got {len(result)}" - for path, dirs, files in result: - assert isinstance( - dirs, dict - ), f"Expected dirs to be dict, got {type(dirs)}" - assert isinstance( - files, dict - ), f"Expected files to be dict, got {type(files)}" + result = list(fsspecfs.walk(temp_folder, maxdepth=None, topdown=True, detail=True)) + expected_dir_num = 3 + assert ( + len(result) == expected_dir_num + ), f"Expected {expected_dir_num} directories, got {len(result)}" + for _, dirs, files in result: + assert isinstance(dirs, dict), f"Expected dirs to be dict, got {type(dirs)}" + assert isinstance(files, dict), f"Expected files to be dict, got {type(files)}" def test_find(fsspecfs: Any, bucket: str, temporary_workspace: str): @@ -431,9 +399,7 @@ def remove_last_modification_time_ms(data): del data[key]["last_modification_time_ms"] return data - sub_dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + sub_dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) temp_folder = f"{bucket}/{temporary_workspace}/{sub_dir_name}" file1_path = f"{bucket}/{temporary_workspace}/file1" file2_path = f"{bucket}/{temporary_workspace}/file2" @@ -486,13 +452,15 @@ def remove_last_modification_time_ms(data): expected = { fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir1/file3"): { "name": fsspecfs._strip_protocol( - f"{bucket}/{temporary_workspace}/dir1/file3"), + f"{bucket}/{temporary_workspace}/dir1/file3" + ), "type": "file", "size": 0, }, fsspecfs._strip_protocol(f"{bucket}/{temporary_workspace}/dir2/file4"): { "name": fsspecfs._strip_protocol( - f"{bucket}/{temporary_workspace}/dir2/file4"), + f"{bucket}/{temporary_workspace}/dir2/file4" + ), "type": "file", "size": 0, }, @@ -516,9 +484,7 @@ def remove_last_modification_time_ms(data): def test_du(fsspecfs: Any, bucket: str, temporary_workspace: str): - sub_dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + sub_dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) temp_folder = f"{bucket}/{temporary_workspace}/{sub_dir_name}" dir_path = f"{bucket}/{temporary_workspace}/test_dir" nested_dir_path = f"{dir_path}/nested_dir" @@ -545,9 +511,7 @@ def test_du(fsspecfs: Any, bucket: str, temporary_workspace: str): fsspecfs._strip_protocol(file_path): 13, fsspecfs._strip_protocol(nested_file_path): 14, } - assert ( - sizes == expected_sizes - ), f"Expected sizes {expected_sizes}, got {sizes}" + assert sizes == expected_sizes, f"Expected sizes {expected_sizes}, got {sizes}" # Test maxdepth sizes_maxdepth_1 = fsspecfs.du(temp_folder, total=False, maxdepth=2) @@ -578,12 +542,8 @@ def test_du(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_isdir(fsspecfs: Any, bucket: str, temporary_workspace: str): # Setup - dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) dir_path = f"{bucket}/{temporary_workspace}/{dir_name}" file_path = f"{bucket}/{temporary_workspace}/{file_name}.txt" fsspecfs.mkdir(dir_path) @@ -605,12 +565,8 @@ def test_isdir(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_isfile(fsspecfs: Any, bucket: str, temporary_workspace: str): # Setup - dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) dir_path = f"{bucket}/{temporary_workspace}/{dir_name}" file_path = f"{bucket}/{temporary_workspace}/{file_name}.txt" fsspecfs.mkdir(dir_path) @@ -649,9 +605,7 @@ def test_rm(fsspecfs: Any, bucket: str, temporary_workspace: str): assert not fsspecfs.exists(path), f"Path {path} still exists after removal" # Remove scheme from paths and test removal again - paths_without_scheme = [ - fsspecfs._strip_protocol(path) for path in paths_to_remove - ] + paths_without_scheme = [fsspecfs._strip_protocol(path) for path in paths_to_remove] for path in paths_without_scheme: with fsspecfs.open(path, "wb") as f: @@ -664,17 +618,13 @@ def test_rm(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_cat_file(fsspecfs: Any, bucket: str, temporary_workspace: str): - file_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + file_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) path_to_write = f"{bucket}/{temporary_workspace}/{file_name}.bin" content = b"Hello, World! This is a test file." with fsspecfs.open(path_to_write, "wb") as f: f.write(content) - assert ( - fsspecfs.cat_file(path_to_write) == content - ), "Failed to read the entire file" + assert fsspecfs.cat_file(path_to_write) == content, "Failed to read the entire file" start = 7 assert ( @@ -692,9 +642,7 @@ def test_cat_file(fsspecfs: Any, bucket: str, temporary_workspace: str): def test_cat(fsspecfs: Any, bucket: str, temporary_workspace: str): - dir_name = "".join( - random.choices(string.ascii_letters + string.digits, k=10) - ) + dir_name = "".join(random.choices(string.ascii_letters + string.digits, k=10)) subdir_path = f"{bucket}/{temporary_workspace}/{dir_name}" fsspecfs.mkdir(subdir_path) file1_path = f"{subdir_path}/file1.txt"