-
Notifications
You must be signed in to change notification settings - Fork 0
/
sandbox.py
67 lines (54 loc) · 2.4 KB
/
sandbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|
# | tutorial |
# |_____________________________|
import streamlit as st
from transformers import pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer, DistilBertForSequenceClassification
import torch
import torch.nn.functional as F
import emoji
import time
from PIL import Image
import pandas as pd
import numpy as np
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(
model_name, use_fast=False)
model = AutoModelForSequenceClassification.from_pretrained(
model_name) # not used
pl = pipeline("sentiment-analysis", model=model,
tokenizer=tokenizer, framework='pt')
results = pl(
["We are very happy to show you the hugging face transformers library.",
"It's so doomed!"])
# for result in results:
# print(result)
tokens = tokenizer.tokenize(
"We are very happy to show you the hugging face transformers library.")
token_ids = tokenizer.convert_tokens_to_ids(tokens)
token_ids2 = tokenizer(
"We are very happy to show you the hugging face transformers library.")
# ['we', 'are', 'very', 'happy', 'to', 'show', 'you', 'the', 'hugging', 'face', 'transformers', 'library', '.']
# print(tokens)
# [2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 17662, 2227, 19081, 3075, 1012]
# print(token_ids)
# [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 17662, 2227, 19081, 3075, 1012, 102]
# print(token_ids2['input_ids']) # 101 and 102 are beginning and ending vals
X_train = ["We are very happy to show you the hugging face transformers library.",
"I hate it here!"]
batch = tokenizer(X_train, padding=True, truncation=True,
max_length=512, return_tensors='pt')
with torch.no_grad():
outputs = model(**batch, labels=torch.tensor([1, 0]))
print(outputs)
predictions = F.softmax(outputs.logits, dim=1)
print(predictions)
labels = torch.argmax(predictions, dim=1)
print(labels)
labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
print(labels)
save_dir = "saved"
tokenizer.save_pretrained(save_dir)
model.save_pretrained(save_dir)
tokenizer = AutoTokenizer.from_pretrained(save_dir)
model = AutoModelForSequenceClassification.from_pretrained(save_dir)