From 5ffe4c45fd8145c835a1ec69cc893659bb5bcafc Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Sun, 21 Jul 2024 23:40:27 +0200
Subject: [PATCH 1/8] Move error message to main load function

---
 flair/file_utils.py |  7 -------
 flair/nn/model.py   | 12 +++++++++++-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/flair/file_utils.py b/flair/file_utils.py
index dfb0049b78..f7f20a20f3 100644
--- a/flair/file_utils.py
+++ b/flair/file_utils.py
@@ -171,13 +171,6 @@ def hf_download(model_name: str) -> str:
         )
     except HTTPError:
         # output information
-        logger.error("-" * 80)
-        logger.error(
-            f"ERROR: The key '{model_name}' was neither found on the ModelHub nor is this a valid path to a file on your system!"
-        )
-        logger.error(" -> Please check https://huggingface.co/models?filter=flair for all available models.")
-        logger.error(" -> Alternatively, point to a model file on your local drive.")
-        logger.error("-" * 80)
         Path(flair.cache_root / "models" / model_folder).rmdir()  # remove folder again if not valid
         raise
 
diff --git a/flair/nn/model.py b/flair/nn/model.py
index 96b2c2d925..88f51f443b 100644
--- a/flair/nn/model.py
+++ b/flair/nn/model.py
@@ -151,7 +151,17 @@ def load(cls, model_path: Union[str, Path, Dict[str, Any]]) -> "Model":
                     continue
 
             # if the model cannot be fetched, load as a file
-            state = model_path if isinstance(model_path, dict) else load_torch_state(str(model_path))
+            try:
+                state = model_path if isinstance(model_path, dict) else load_torch_state(str(model_path))
+            except Exception:
+                log.error("-" * 80)
+                log.error(
+                    f"ERROR: The key '{model_path}' was neither found on the ModelHub nor is this a valid path to a file on your system!"
+                )
+                log.error(" -> Please check https://huggingface.co/models?filter=flair for all available models.")
+                log.error(" -> Alternatively, point to a model file on your local drive.")
+                log.error("-" * 80)
+                raise ValueError(f"Could not find any model with name '{model_path}'")
 
             # try to get model class from state
             cls_name = state.pop("__cls__", None)

From a4f7a80b6c376dbe8c05aee9c02bd7856f0ed171 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Mon, 22 Jul 2024 00:11:54 +0200
Subject: [PATCH 2/8] Make mypy happy

---
 flair/trainers/trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py
index 6d9c3ec54b..fb8590841b 100644
--- a/flair/trainers/trainer.py
+++ b/flair/trainers/trainer.py
@@ -473,7 +473,7 @@ def train_custom(
             if inspect.isclass(sampler):
                 sampler = sampler()
             # set dataset to sample from
-            sampler.set_dataset(train_data)  # type: ignore[union-attr]
+            sampler.set_dataset(train_data)
             shuffle = False
 
         # this field stores the names of all dynamic embeddings in the model (determined after first forward pass)

From 2f3e82e5e3e0d50e24abbf2cc60c7eb2b63c06d7 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Tue, 23 Jul 2024 07:42:24 +0200
Subject: [PATCH 3/8] Mark some tests as integration tests to address space
 issues

---
 tests/test_datasets.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 52fec1c5ea..2d0391b264 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -418,6 +418,7 @@ def test_load_universal_dependencies_conllu_corpus(tasks_base_path):
     _assert_universal_dependencies_conllu_dataset(corpus.train)
 
 
+@pytest.mark.integration()
 def test_hipe_2022_corpus(tasks_base_path):
     # This test covers the complete HIPE 2022 dataset.
     # https://github.com/hipe-eval/HIPE-2022-data
@@ -681,6 +682,7 @@ def test_hipe_2022(dataset_version="v2.1", add_document_separator=True):
     test_hipe_2022(dataset_version="v2.1", add_document_separator=False)
 
 
+@pytest.mark.integration()
 def test_icdar_europeana_corpus(tasks_base_path):
     # This test covers the complete ICDAR Europeana corpus:
     # https://github.com/stefan-it/historic-domain-adaptation-icdar
@@ -698,6 +700,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str):
         check_number_sentences(len(corpus.test), gold_stats[language]["test"], "test")
 
 
+@pytest.mark.integration()
 def test_masakhane_corpus(tasks_base_path):
     # This test covers the complete MasakhaNER dataset, including support for v1 and v2.
     supported_versions = ["v1", "v2"]
@@ -781,6 +784,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag
             check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)
 
 
+@pytest.mark.integration()
 def test_nermud_corpus(tasks_base_path):
     # This test covers the NERMuD dataset. Official stats can be found here:
     # https://github.com/dhfbk/KIND/tree/main/evalita-2023
@@ -808,6 +812,7 @@ def test_german_ler_corpus(tasks_base_path):
     assert len(corpus.test) == 6673, "Mismatch in number of sentences for test split"
 
 
+@pytest.mark.integration()
 def test_masakha_pos_corpus(tasks_base_path):
     # This test covers the complete MasakhaPOS dataset.
     supported_versions = ["v1"]
@@ -876,6 +881,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag
             check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)
 
 
+@pytest.mark.integration()
 def test_german_mobie(tasks_base_path):
     corpus = flair.datasets.NER_GERMAN_MOBIE()
 
@@ -960,6 +966,7 @@ def test_jsonl_corpus_loads_metadata(tasks_base_path):
     assert dataset.sentences[2].get_metadata("from") == 125
 
 
+@pytest.mark.integration()
 def test_ontonotes_download():
     from urllib.parse import urlparse
 

From c1ef3d372f230531920e0a90d2e169bffb5099e7 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Sun, 21 Jul 2024 21:59:12 +0200
Subject: [PATCH 4/8] Update nl NER model

---
 flair/models/sequence_tagger_model.py | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index 1f2a93c686..5daa74a245 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -696,32 +696,9 @@ def _fetch_model(model_name) -> str:
 
         hu_model_map = {
             # English NER models
-            "ner": "/".join([hu_path, "ner", "en-ner-conll03-v0.4.pt"]),
             "ner-pooled": "/".join([hu_path, "ner-pooled", "en-ner-conll03-pooled-v0.5.pt"]),
-            "ner-fast": "/".join([hu_path, "ner-fast", "en-ner-fast-conll03-v0.4.pt"]),
-            "ner-ontonotes": "/".join([hu_path, "ner-ontonotes", "en-ner-ontonotes-v0.4.pt"]),
-            "ner-ontonotes-fast": "/".join([hu_path, "ner-ontonotes-fast", "en-ner-ontonotes-fast-v0.4.pt"]),
-            # Multilingual NER models
-            "ner-multi": "/".join([hu_path, "multi-ner", "quadner-large.pt"]),
-            "multi-ner": "/".join([hu_path, "multi-ner", "quadner-large.pt"]),
-            "ner-multi-fast": "/".join([hu_path, "multi-ner-fast", "ner-multi-fast.pt"]),
-            # English POS models
-            "upos": "/".join([hu_path, "upos", "en-pos-ontonotes-v0.4.pt"]),
-            "upos-fast": "/".join([hu_path, "upos-fast", "en-upos-ontonotes-fast-v0.4.pt"]),
-            "pos": "/".join([hu_path, "pos", "en-pos-ontonotes-v0.5.pt"]),
-            "pos-fast": "/".join([hu_path, "pos-fast", "en-pos-ontonotes-fast-v0.5.pt"]),
-            # Multilingual POS models
-            "pos-multi": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]),
-            "multi-pos": "/".join([hu_path, "multi-pos", "pos-multi-v0.1.pt"]),
-            "pos-multi-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]),
-            "multi-pos-fast": "/".join([hu_path, "multi-pos-fast", "pos-multi-fast.pt"]),
             # English SRL models
-            "frame": "/".join([hu_path, "frame", "en-frame-ontonotes-v0.4.pt"]),
-            "frame-fast": "/".join([hu_path, "frame-fast", "en-frame-ontonotes-fast-v0.4.pt"]),
             "frame-large": "/".join([hu_path, "frame-large", "frame-large.pt"]),
-            # English chunking models
-            "chunk": "/".join([hu_path, "chunk", "en-chunk-conll2000-v0.4.pt"]),
-            "chunk-fast": "/".join([hu_path, "chunk-fast", "en-chunk-conll2000-fast-v0.4.pt"]),
             # Danish models
             "da-pos": "/".join([hu_path, "da-pos", "da-pos-v0.1.pt"]),
             "da-ner": "/".join([hu_path, "NER-danish", "da-ner-v0.1.pt"]),
@@ -730,13 +707,11 @@ def _fetch_model(model_name) -> str:
             "de-pos-tweets": "/".join([hu_path, "de-pos-tweets", "de-pos-twitter-v0.1.pt"]),
             "de-ner": "/".join([hu_path, "de-ner", "de-ner-conll03-v0.4.pt"]),
             "de-ner-germeval": "/".join([hu_path, "de-ner-germeval", "de-ner-germeval-0.4.1.pt"]),
-            "de-ler": "/".join([hu_path, "de-ner-legal", "de-ner-legal.pt"]),
-            "de-ner-legal": "/".join([hu_path, "de-ner-legal", "de-ner-legal.pt"]),
             # French models
             "fr-ner": "/".join([hu_path, "fr-ner", "fr-ner-wikiner-0.4.pt"]),
             # Dutch models
             "nl-ner": "/".join([hu_path, "nl-ner", "nl-ner-bert-conll02-v0.8.pt"]),
-            "nl-ner-rnn": "/".join([hu_path, "nl-ner-rnn", "nl-ner-conll02-v0.5.pt"]),
+            "nl-ner-rnn": "/".join([hu_path, "nl-ner-rnn", "nl-ner-conll02-v0.14.0.pt"]),
             # Malayalam models
             "ml-pos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-xpos-model.pt",
             "ml-upos": "https://raw.githubusercontent.com/qburst/models-repository/master/FlairMalayalamModels/malayalam-upos-model.pt",

From 17da8b388c9369ab172b5b21613a55b0acf10b00 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Sun, 21 Jul 2024 22:18:54 +0200
Subject: [PATCH 5/8] Update models for v0.14.0

---
 flair/models/sequence_tagger_model.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index 5daa74a245..fad853bcaf 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -691,7 +691,6 @@ def _fetch_model(model_name) -> str:
         }
 
         hu_path: str = "https://nlp.informatik.hu-berlin.de/resources/models"
-        hunflair_paper_path = hu_path + "/hunflair_smallish_models"
         hunflair_main_path = hu_path + "/hunflair_allcorpus_models"
 
         hu_model_map = {
@@ -725,18 +724,13 @@ def _fetch_model(model_name) -> str:
             ),
             # Keyphase models
             "keyphrase": "/".join([hu_path, "keyphrase", "keyphrase-en-scibert.pt"]),
-            "negation-speculation": "/".join([hu_path, "negation-speculation", "negation-speculation-model.pt"]),
+            "negation-speculation": "/".join([hu_path, "negation-speculation", "negation-speculation-v0.14.0.pt"]),
             # Biomedical models
-            "hunflair-paper-cellline": "/".join([hunflair_paper_path, "cellline", "hunflair-celline-v1.0.pt"]),
-            "hunflair-paper-chemical": "/".join([hunflair_paper_path, "chemical", "hunflair-chemical-v1.0.pt"]),
-            "hunflair-paper-disease": "/".join([hunflair_paper_path, "disease", "hunflair-disease-v1.0.pt"]),
-            "hunflair-paper-gene": "/".join([hunflair_paper_path, "gene", "hunflair-gene-v1.0.pt"]),
-            "hunflair-paper-species": "/".join([hunflair_paper_path, "species", "hunflair-species-v1.0.pt"]),
-            "hunflair-cellline": "/".join([hunflair_main_path, "cellline", "hunflair-celline-v1.0.pt"]),
-            "hunflair-chemical": "/".join([hunflair_main_path, "huner-chemical", "hunflair-chemical-full-v1.0.pt"]),
-            "hunflair-disease": "/".join([hunflair_main_path, "huner-disease", "hunflair-disease-full-v1.0.pt"]),
-            "hunflair-gene": "/".join([hunflair_main_path, "huner-gene", "hunflair-gene-full-v1.0.pt"]),
-            "hunflair-species": "/".join([hunflair_main_path, "huner-species", "hunflair-species-full-v1.1.pt"]),
+            "hunflair-cellline": "/".join([hunflair_main_path, "huner-cellline", "hunflair-celline.pt"]),
+            "hunflair-chemical": "/".join([hunflair_main_path, "huner-chemical", "hunflair-chemical.pt"]),
+            "hunflair-disease": "/".join([hunflair_main_path, "huner-disease", "hunflair-disease.pt"]),
+            "hunflair-gene": "/".join([hunflair_main_path, "huner-gene", "hunflair-gene.pt"]),
+            "hunflair-species": "/".join([hunflair_main_path, "huner-species", "hunflair-species.pt"]),
         }
 
         cache_dir = Path("models")

From 873cbe4f16b22500e42afc9e2a32bdc503d8c329 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Tue, 23 Jul 2024 09:50:53 +0200
Subject: [PATCH 6/8] Update models for GH-3493 compatibility

---
 flair/models/sequence_tagger_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index fad853bcaf..070fcb4930 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -724,9 +724,9 @@ def _fetch_model(model_name) -> str:
             ),
             # Keyphase models
             "keyphrase": "/".join([hu_path, "keyphrase", "keyphrase-en-scibert.pt"]),
-            "negation-speculation": "/".join([hu_path, "negation-speculation", "negation-speculation-v0.14.0.pt"]),
+            "negation-speculation": "/".join([hu_path, "negation-speculation-v14", "negation-speculation-v0.14.0.pt"]),
             # Biomedical models
-            "hunflair-cellline": "/".join([hunflair_main_path, "huner-cellline", "hunflair-celline.pt"]),
+            "hunflair-cellline": "/".join([hunflair_main_path, "huner-cellline", "hunflair-cellline.pt"]),
             "hunflair-chemical": "/".join([hunflair_main_path, "huner-chemical", "hunflair-chemical.pt"]),
             "hunflair-disease": "/".join([hunflair_main_path, "huner-disease", "hunflair-disease.pt"]),
             "hunflair-gene": "/".join([hunflair_main_path, "huner-gene", "hunflair-gene.pt"]),

From 7ae9607ec9639c41411a904bad0ada4032e2b24e Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Tue, 23 Jul 2024 10:04:01 +0200
Subject: [PATCH 7/8] Remove keyphrase model

---
 flair/models/sequence_tagger_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index 070fcb4930..604f73c96b 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -722,8 +722,6 @@ def _fetch_model(model_name) -> str:
                     "pucpr-flair-clinical-pos-tagging-best-model.pt",
                 ]
             ),
-            # Keyphase models
-            "keyphrase": "/".join([hu_path, "keyphrase", "keyphrase-en-scibert.pt"]),
             "negation-speculation": "/".join([hu_path, "negation-speculation-v14", "negation-speculation-v0.14.0.pt"]),
             # Biomedical models
             "hunflair-cellline": "/".join([hunflair_main_path, "huner-cellline", "hunflair-cellline.pt"]),

From fc94c513d0c111eee88559908514d25c9ba471d7 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Tue, 23 Jul 2024 10:35:19 +0200
Subject: [PATCH 8/8] Add Arabic models

---
 flair/models/sequence_tagger_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py
index 604f73c96b..e2ac009020 100644
--- a/flair/models/sequence_tagger_model.py
+++ b/flair/models/sequence_tagger_model.py
@@ -677,8 +677,6 @@ def _fetch_model(model_name) -> str:
             "chunk": "flair/chunk-english",
             "chunk-fast": "flair/chunk-english-fast",
             # Language-specific NER models
-            "ar-ner": "megantosh/flair-arabic-multi-ner",
-            "ar-pos": "megantosh/flair-arabic-dialects-codeswitch-egy-lev",
             "da-ner": "flair/ner-danish",
             "de-ner": "flair/ner-german",
             "de-ler": "flair/ner-german-legal",
@@ -706,6 +704,9 @@ def _fetch_model(model_name) -> str:
             "de-pos-tweets": "/".join([hu_path, "de-pos-tweets", "de-pos-twitter-v0.1.pt"]),
             "de-ner": "/".join([hu_path, "de-ner", "de-ner-conll03-v0.4.pt"]),
             "de-ner-germeval": "/".join([hu_path, "de-ner-germeval", "de-ner-germeval-0.4.1.pt"]),
+            # Arabic models
+            "ar-ner": "/".join([hu_path, "arabic", "ar-ner.pt"]),
+            "ar-pos": "/".join([hu_path, "arabic", "ar-pos.pt"]),
             # French models
             "fr-ner": "/".join([hu_path, "fr-ner", "fr-ner-wikiner-0.4.pt"]),
             # Dutch models