Skip to content

Commit

Permalink
fix null error
Browse files Browse the repository at this point in the history
  • Loading branch information
capjamesg authored Jun 11, 2024
1 parent 25545d0 commit 7717b43
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion autodistill_distilbert/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ def preprocess_function(examples):
dataset = dataset.rename_column("content", "text")
dataset = dataset.train_test_split(test_size=0.2)
tokenized_dataset = dataset.map(preprocess_function, batched=True)
# drop null labels
tokenized_dataset = tokenized_dataset.filter(lambda example: example["classification"] is not None)

# assign ids to each unique label
labels = list(set(tokenized_dataset["train"]["classification"]))
# order alphabetically
labels = sorted(labels)
labels = sorted([i for i in labels if i is not None])

id2label = {i: label for i, label in enumerate(labels)}
label2id = {v: k for k, v in id2label.items()}
Expand Down

0 comments on commit 7717b43

Please sign in to comment.