Skip to content

Commit

Permalink
ch06/03 fixes (rasbt#336)
Browse files Browse the repository at this point in the history
* fixed bash commands

* fixed help docstrings

* added missing logreg bash cmd

* Update train_bert_hf.py

* Update train_bert_hf_spam.py

* Update README.md

---------

Co-authored-by: Sebastian Raschka <[email protected]>
  • Loading branch information
d-kleine and rasbt authored Aug 27, 2024
1 parent 91cdfe3 commit c7267c3
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 18 deletions.
13 changes: 9 additions & 4 deletions ch06/03_bonus_imdb-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Test accuracy: 91.88%
A 340M parameter encoder-style [BERT](https://arxiv.org/abs/1810.04805) model:

```bash
python train_bert_hf --trainable_layers "all" --num_epochs 1 --model "bert"
python train_bert_hf.py --trainable_layers "all" --num_epochs 1 --model "bert"
```

```
Expand Down Expand Up @@ -114,7 +114,7 @@ A 355M parameter encoder-style [RoBERTa](https://arxiv.org/abs/1907.11692) model


```bash
python train_bert_hf.py --trainable_layers "last_block" --num_epochs 1 --bert_model "roberta"
python train_bert_hf.py --trainable_layers "last_block" --num_epochs 1 --model "roberta"
```

```
Expand All @@ -140,7 +140,12 @@ Test accuracy: 92.95%

<br>

A scikit-learn logistic regression classifier as a baseline.
A scikit-learn logistic regression classifier as a baseline:


```bash
python train_sklearn_logreg.py
```

```
Dummy classifier:
Expand All @@ -153,4 +158,4 @@ Logistic regression classifier:
Training Accuracy: 99.80%
Validation Accuracy: 88.62%
Test Accuracy: 88.85%
```
```
14 changes: 7 additions & 7 deletions ch06/03_bonus_imdb-classification/train_bert_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
type=str,
default="true",
help=(
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'"
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'."
)
)
parser.add_argument(
"--bert_model",
"--model",
type=str,
default="distilbert",
help=(
"Which model to train. Options: 'distilbert', 'bert'."
"Which model to train. Options: 'distilbert', 'bert', 'roberta'."
)
)
parser.add_argument(
Expand All @@ -223,7 +223,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
###############################

torch.manual_seed(123)
if args.bert_model == "distilbert":
if args.model == "distilbert":

model = AutoModelForSequenceClassification.from_pretrained(
"distilbert-base-uncased", num_labels=2
Expand All @@ -247,7 +247,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

elif args.bert_model == "bert":
elif args.model == "bert":

model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2
Expand All @@ -272,7 +272,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
raise ValueError("Invalid --trainable_layers argument.")

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
elif args.bert_model == "roberta":
elif args.model == "roberta":

model = AutoModelForSequenceClassification.from_pretrained(
"FacebookAI/roberta-large", num_labels=2
Expand All @@ -296,7 +296,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,

tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
else:
raise ValueError("Selected --bert_model not supported.")
raise ValueError("Selected --model {args.model} not supported.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
Expand Down
14 changes: 7 additions & 7 deletions ch06/03_bonus_imdb-classification/train_bert_hf_spam.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,15 +280,15 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
type=str,
default="true",
help=(
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'"
"Whether to use a attention mask for padding tokens. Options: 'true', 'false'."
)
)
parser.add_argument(
"--bert_model",
"--model",
type=str,
default="distilbert",
help=(
"Which model to train. Options: 'distilbert', 'bert'."
"Which model to train. Options: 'distilbert', 'bert', 'roberta'."
)
)
parser.add_argument(
Expand All @@ -314,7 +314,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
###############################

torch.manual_seed(123)
if args.bert_model == "distilbert":
if args.model == "distilbert":

model = AutoModelForSequenceClassification.from_pretrained(
"distilbert-base-uncased", num_labels=2
Expand All @@ -338,7 +338,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

elif args.bert_model == "bert":
elif args.model == "bert":

model = AutoModelForSequenceClassification.from_pretrained(
"bert-base-uncased", num_labels=2
Expand All @@ -363,7 +363,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,
raise ValueError("Invalid --trainable_layers argument.")

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
elif args.bert_model == "roberta":
elif args.model == "roberta":

model = AutoModelForSequenceClassification.from_pretrained(
"FacebookAI/roberta-large", num_labels=2
Expand All @@ -387,7 +387,7 @@ def train_classifier_simple(model, train_loader, val_loader, optimizer, device,

tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")
else:
raise ValueError("Selected --bert_model not supported.")
raise ValueError("Selected --model {args.model} not supported.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
Expand Down

0 comments on commit c7267c3

Please sign in to comment.