Skip to content

Commit

Permalink
update position_ids
Browse files Browse the repository at this point in the history
  • Loading branch information
Blaizzy committed May 2, 2024
1 parent 4a751dc commit 34a3dd1
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions mlx_vlm/models/idefics2/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class VisionConfig:
num_attention_heads: int
image_size: int
patch_size: int
layer_norm_eps: float = 1e-6
layer_norm_eps: float
num_channels: int = 3

@classmethod
Expand Down Expand Up @@ -163,7 +163,10 @@ def __call__(self, x: mx.array, mask: Optional[mx.array] = None) -> mx.array:
H // self.patch_size,
W // self.patch_size,
)
position_ids = np.full((B, max_nb_patches_h * max_nb_patches_w), fill_value=0)
sequence = np.arange(max_nb_patches_h * max_nb_patches_w)

# Tile the sequence to repeat it B times, each time as a new row
position_ids = np.tile(sequence, (B, 1))

embeddings = patch_embeddings
embeddings += self.position_embedding(mx.array(position_ids))
Expand Down Expand Up @@ -214,7 +217,7 @@ def __call__(
if output_hidden_states:
encoder_states = encoder_states + (x,)

pooler_output = self.post_layernorm(x[:, 0, :])
pooler_output = self.post_layernorm(x[:, -1, :])

return pooler_output, x, encoder_states

Expand Down

0 comments on commit 34a3dd1

Please sign in to comment.