Skip to content

Commit

Permalink
Add query/document marker tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
NohTow committed Aug 2, 2024
1 parent a0c64e3 commit 6fe1cfb
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions giga_cherche/models/colbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ def __init__(
# Pass the model to the model card data for later use in generating a model card upon saving this model
self.model_card_data.register_model(self)

# this will add the query and document prefix to the tokenizer vocab if they are not already there and resize the embeddings accordingly
# self.tokenizer.add_tokens([self.query_prefix, self.document_prefix])
# self._first_module().auto_model.resize_token_embeddings(len(self.tokenizer))

self.document_prefix_id = self.tokenizer.convert_tokens_to_ids(
self.document_prefix
)
Expand Down

0 comments on commit 6fe1cfb

Please sign in to comment.