Skip to content

Commit

Permalink
Mark some tests as integration tests to address space issues
Browse files Browse the repository at this point in the history
  • Loading branch information
alanakbik committed Jul 23, 2024
1 parent a4f7a80 commit 2f3e82e
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ def test_load_universal_dependencies_conllu_corpus(tasks_base_path):
_assert_universal_dependencies_conllu_dataset(corpus.train)


@pytest.mark.integration()
def test_hipe_2022_corpus(tasks_base_path):
# This test covers the complete HIPE 2022 dataset.
# https://github.com/hipe-eval/HIPE-2022-data
Expand Down Expand Up @@ -681,6 +682,7 @@ def test_hipe_2022(dataset_version="v2.1", add_document_separator=True):
test_hipe_2022(dataset_version="v2.1", add_document_separator=False)


@pytest.mark.integration()
def test_icdar_europeana_corpus(tasks_base_path):
# This test covers the complete ICDAR Europeana corpus:
# https://github.com/stefan-it/historic-domain-adaptation-icdar
Expand All @@ -698,6 +700,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str):
check_number_sentences(len(corpus.test), gold_stats[language]["test"], "test")


@pytest.mark.integration()
def test_masakhane_corpus(tasks_base_path):
# This test covers the complete MasakhaNER dataset, including support for v1 and v2.
supported_versions = ["v1", "v2"]
Expand Down Expand Up @@ -781,6 +784,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag
check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)


@pytest.mark.integration()
def test_nermud_corpus(tasks_base_path):
# This test covers the NERMuD dataset. Official stats can be found here:
# https://github.com/dhfbk/KIND/tree/main/evalita-2023
Expand Down Expand Up @@ -808,6 +812,7 @@ def test_german_ler_corpus(tasks_base_path):
assert len(corpus.test) == 6673, "Mismatch in number of sentences for test split"


@pytest.mark.integration()
def test_masakha_pos_corpus(tasks_base_path):
# This test covers the complete MasakhaPOS dataset.
supported_versions = ["v1"]
Expand Down Expand Up @@ -876,6 +881,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag
check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)


@pytest.mark.integration()
def test_german_mobie(tasks_base_path):
corpus = flair.datasets.NER_GERMAN_MOBIE()

Expand Down Expand Up @@ -960,6 +966,7 @@ def test_jsonl_corpus_loads_metadata(tasks_base_path):
assert dataset.sentences[2].get_metadata("from") == 125


@pytest.mark.integration()
def test_ontonotes_download():
from urllib.parse import urlparse

Expand Down

0 comments on commit 2f3e82e

Please sign in to comment.