chengchingwen · deveshjawla · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024 · Oct 25, 2024
diff --git a/docs/src/huggingface_dev.md b/docs/src/huggingface_dev.md
@@ -220,4 +220,4 @@ end
 
 ### Validation
 
-After implementing the model, we use the same [script](https://github.com/chengchingwen/Transformers.jl/tree/master/example/HuggingFaceValidation) mentioned in the tokenizer part to check if our model perform the same computation as Python.
+After implementing the model, we use the same [script](https://github.com/chengchingwen/Transformers.jl/tree/master/example/HuggingFaceValidation) mentioned in the tokenizer part to check if our model perform the same computation as Python. Before running the script, remember to dev your local Transformers package using Pkg.dev() in the HugginFaceValidation env.
diff --git a/example/DistilBert_FillMask/fill_mask.jl b/example/DistilBert_FillMask/fill_mask.jl
@@ -0,0 +1,25 @@
+using Transformers
+using FuncPipelines, TextEncodeBase
+using TextEncodeBase: nested2batch, nestedcall
+using Flux, StatsBase
+
+tkr = Transformers.HuggingFace.load_tokenizer("distilbert/distilbert-base-cased")
+tkr = Transformers.TextEncoders.BertTextEncoder(tkr) do e
+    e.process[1:5] |> Pipeline{:masked_position}(nested2batch ∘ nestedcall(isequal("[MASK]")), :token) |> e.process[6:end-1] |> PipeGet{(:token, :attention_mask, :masked_position)}()
+end
+
+model = Transformers.HuggingFace.load_model("distilbert/distilbert-base-cased", :ForMaskedLM)
+
+query = "[MASK] is the Capital of France"
+input = Transformers.TextEncoders.encode(tkr, query)
+
+input_ids = input.masked_position
+
+model_output = model(input)
+
+mask_logits = model_output.logit[:, :, 1]
+
+mask_probabilities = softmax(mask_logits, dims=1)
+predicted_token_id = map(argmax, eachcol(mask_probabilities))
+
+predicted_token = Transformers.TextEncoders.decode(tkr, predicted_token_id)[input_ids]
diff --git a/example/Roberta_FillMask/fill_mask.jl b/example/Roberta_FillMask/fill_mask.jl
@@ -0,0 +1,27 @@
+using Transformers
+using FuncPipelines, TextEncodeBase
+using TextEncodeBase: nested2batch, nestedcall  
+using Flux, StatsBase
+
+conf = Transformers.HuggingFace.load_config("distilbert/distilroberta-base")
+
+tkr = Transformers.HuggingFace.load_tokenizer("distilbert/distilroberta-base")
+model = Transformers.HuggingFace.load_model("distilbert/distilroberta-base", :ForMaskedLM)
+
+new_tkr = Transformers.TextEncoders.BertTextEncoder(tkr) do e
+	e.process[1:5] |> Pipeline{:masked_position}(nested2batch ∘ nestedcall(isequal("<mask>")), :token) |> e.process[6:end-1] |> PipeGet{(:token, :segment, :attention_mask, :masked_position)}()
+end
+
+query = "Paris is the<mask> of France."
+
+input = Transformers.TextEncoders.encode(new_tkr, query)
+
+input_ids = input.masked_position
+
+model_output = model(input)
+mask_logits = model_output.logit[:, :, 1]
+
+mask_probabilities = softmax(mask_logits, dims=1)
+predicted_token_id = map(argmax, eachcol(mask_probabilities))
+
+predicted_token = Transformers.TextEncoders.decode(new_tkr, predicted_token_id)#[input_ids]
diff --git a/src/huggingface/implementation/distilbert/config.jl b/src/huggingface/implementation/distilbert/config.jl
@@ -0,0 +1,29 @@
+@hgfcfg :distilbert struct HGFDistilBertConfig
+    vocab_size::Int64 = 28996
+    max_position_embeddings::Int64 = 512
+    sinusoidal_pos_embds::Bool = false
+    [n_layers, num_hidden_layers]::Int = 6
+    [n_heads, num_attention_heads]::Int = 12
+    [dim, hidden_size]::Int64 = 768
+    hidden_dim::Int64 = 3072
+    dropout::Float64 = 0.1
+    attention_dropout::Float64 = 0.1
+    activation::String = "gelu"
+    initializer_range::Float64 = 0.02
+    qa_dropout::Float64 = 0.1
+    seq_classif_dropout::Float64 = 0.2
+    pad_token_id::Int64 = 0
+    tie_weights_::Bool = true
+    output_past::Bool = true
+	type_vocab_size::Int = 2
+	hidden_dropout_prob::Float64 = 0.1
+	architectures::Vector{String} = ["DistilBertForMaskedLM"]
+    intermediate_size::Int = 3072
+    hidden_act::String = "gelu"
+    attention_probs_dropout_prob::Float64 = 0.1
+    layer_norm_eps::Float32 = 1e-12
+    bos_token_id::Int = 0
+    eos_token_id::Int = 2
+    position_embedding_type::String = "absolute"
+    classifier_dropout::Nothing = nothing
+end
diff --git a/src/huggingface/implementation/distilbert/distilbert.jl b/src/huggingface/implementation/distilbert/distilbert.jl
@@ -0,0 +1,3 @@
+include("./config.jl")
+include("./load.jl")
+include("./tokenizer.jl")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -220,4 +220,4 @@ end

		### Validation

		After implementing the model, we use the same [script](https://github.com/chengchingwen/Transformers.jl/tree/master/example/HuggingFaceValidation) mentioned in the tokenizer part to check if our model perform the same computation as Python.
		After implementing the model, we use the same [script](https://github.com/chengchingwen/Transformers.jl/tree/master/example/HuggingFaceValidation) mentioned in the tokenizer part to check if our model perform the same computation as Python. Before running the script, remember to dev your local Transformers package using Pkg.dev() in the HugginFaceValidation env.