diff --git a/stanza/models/common/bert_embedding.py b/stanza/models/common/bert_embedding.py
index 77ade22813..bad9f0ddaa 100644
--- a/stanza/models/common/bert_embedding.py
+++ b/stanza/models/common/bert_embedding.py
@@ -413,7 +413,14 @@ def convert_to_position_list(sentence, offsets):
         # this uses the last token piece for any offset by overwriting the previous value
         list_offsets[offset+1] = pos
     list_offsets[0] = 0
-    list_offsets[-1] = list_offsets[-2] + 1
+    for offset in list_offsets[-2::-1]:
+        # count backwards in case the last position was
+        # a word or character that got erased by the tokenizer
+        # this loop should eventually find something...
+        # after all, we just set the first one to be 0
+        if offset is not None:
+            list_offsets[-1] = offset + 1
+            break
     return list_offsets
 
 def extract_base_embeddings(model_name, tokenizer, model, data, device, keep_endpoints, num_layers, detach):