Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename neva datamodule #12121

Merged
merged 7 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions nemo/collections/vlm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from nemo.collections.vlm.llava_next.model.llava_next import LlavaNextConfig7B, LlavaNextConfig13B, LlavaNextModel

# MLLAMA
from nemo.collections.vlm.mllama.data import MLlamaLazyDataModule, MLlamaMockDataModule
from nemo.collections.vlm.mllama.data import MLlamaMockDataModule, MLlamaPreloadedDataModule
from nemo.collections.vlm.mllama.model.base import (
CrossAttentionTextConfig,
CrossAttentionVisionConfig,
Expand All @@ -46,8 +46,8 @@
ImageDataConfig,
ImageToken,
MultiModalToken,
NevaLazyDataModule,
NevaMockDataModule,
NevaPreloadedDataModule,
VideoDataConfig,
VideoToken,
)
Expand Down Expand Up @@ -77,9 +77,9 @@
"HFDatasetDataModule",
"HFAutoModelForImageTextToText",
"NevaMockDataModule",
"NevaLazyDataModule",
"NevaPreloadedDataModule",
"MLlamaMockDataModule",
"MLlamaLazyDataModule",
"MLlamaPreloadedDataModule",
"DataConfig",
"ImageDataConfig",
"VideoDataConfig",
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/vlm/mllama/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.collections.vlm.mllama.data.lazy import MLlamaLazyDataModule
from nemo.collections.vlm.mllama.data.mock import MockDataModule as MLlamaMockDataModule
from nemo.collections.vlm.mllama.data.preloaded import MLlamaPreloadedDataModule

__all__ = [
"MLlamaMockDataModule",
"MLlamaLazyDataModule",
"MLlamaPreloadedDataModule",
]
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=C0115,C0116

import json
import logging
Expand All @@ -28,7 +29,7 @@
from nemo.collections.nlp.modules.common.megatron.utils import get_ltor_masks_and_position_ids
from nemo.collections.vlm.mllama.model.utils import create_vision_mask_tensor
from nemo.collections.vlm.neva.data.config import DataConfig, ImageDataConfig
from nemo.collections.vlm.neva.data.lazy import IGNORE_INDEX, LazySupervisedDataset
from nemo.collections.vlm.neva.data.preloaded import IGNORE_INDEX, LazySupervisedDataset
from nemo.lightning.pytorch.plugins import MegatronDataSampler


Expand Down Expand Up @@ -170,7 +171,7 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
return batch


class MLlamaLazyDataModule(pl.LightningDataModule):
class MLlamaPreloadedDataModule(pl.LightningDataModule):
def __init__(
self,
paths: str | List[str],
Expand Down Expand Up @@ -223,7 +224,7 @@ def __init__(

if tokenizer is None or image_processor is None:
logging.warning(
f"Processor and tokenizer are not provided! Fall back to `meta-llama/Llama-3.2-11B-Vision-Instruct`."
"Processor and tokenizer are not provided! Fall back to `meta-llama/Llama-3.2-11B-Vision-Instruct`."
)
from transformers import AutoProcessor

Expand All @@ -246,7 +247,8 @@ def setup(self, stage: str = "") -> None:
else:
# TODO:
# rng = torch.Generator().manual_seed(self.seed)
# train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], generator=rng)
# train_dataset, val_dataset, test_dataset =
# random_split(dataset, [train_size, val_size, test_size], generator=rng)
self._train_ds = MLlamaDataset(
self.paths[0], self.data_config, self.tokenizer, self.image_processor, self.seq_length
)
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/vlm/neva/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

from nemo.collections.vlm.neva.data.config import DataConfig, ImageDataConfig, VideoDataConfig
from nemo.collections.vlm.neva.data.lazy import NevaLazyDataModule
from nemo.collections.vlm.neva.data.mock import MockDataModule as NevaMockDataModule
from nemo.collections.vlm.neva.data.multimodal_tokens import ImageToken, MultiModalToken, VideoToken
from nemo.collections.vlm.neva.data.preloaded import NevaPreloadedDataModule

__all__ = [
"NevaLazyDataModule",
"NevaPreloadedDataModule",
"NevaMockDataModule",
"DataConfig",
"ImageDataConfig",
Expand Down
10 changes: 6 additions & 4 deletions nemo/collections/vlm/neva/data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@

import lightning.pytorch as pl

from nemo.collections.vlm.neva.data.lazy import NevaLazyDataModule
from nemo.collections.vlm.neva.data.mock import MockDataModule
from nemo.collections.vlm.neva.data.preloaded import NevaPreloadedDataModule


def mock() -> pl.LightningDataModule:
"""Mock Neva Data Module"""
return MockDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)


def lazy() -> pl.LightningDataModule:
return NevaLazyDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)
def preloaded() -> pl.LightningDataModule:
"""Preloaded Llava-like Data Module"""
return NevaPreloadedDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)


__all__ = ["mock", "lazy"]
__all__ = ["mock", "preloaded"]
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def collate_fn(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
return batch


class NevaLazyDataModule(pl.LightningDataModule):
class NevaPreloadedDataModule(pl.LightningDataModule):
def __init__(
self,
paths: str | List[str],
Expand Down
14 changes: 8 additions & 6 deletions nemo/collections/vlm/vision/intern_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Callable
Expand Down Expand Up @@ -337,7 +337,7 @@ class InternViTConfig(CLIPViTConfig):
normalization: str = 'RMSNorm'
layernorm_epsilon: float = 1e-6
apply_rope_fusion: bool = False
transformer_layer_spec: ModuleSpec = get_internvit_layer_spec(use_te=True)
transformer_layer_spec: ModuleSpec = field(default_factory=lambda: get_internvit_layer_spec(use_te=True))


@dataclass
Expand All @@ -363,10 +363,12 @@ class InternViT_300M_448px_Config(InternViTConfig):
attention_dropout: float = 0.0
ffn_hidden_size: int = 4096
normalization: str = 'LayerNorm'
transformer_layer_spec: ModuleSpec = get_internvit_layer_spec(
use_te=True,
add_qk_norm=False,
norm_type='LayerNorm',
transformer_layer_spec: ModuleSpec = field(
default_factory=lambda: get_internvit_layer_spec(
use_te=True,
add_qk_norm=False,
norm_type='LayerNorm',
)
)


Expand Down
4 changes: 2 additions & 2 deletions scripts/vlm/mllama_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from nemo import lightning as nl
from nemo.collections import llm, vlm
from nemo.collections.vlm import ImageDataConfig
from nemo.collections.vlm.mllama.data.lazy import MLlamaLazyDataModule
from nemo.collections.vlm.mllama.data.preloaded import MLlamaPreloadedDataModule
from nemo.lightning.pytorch.optim import CosineAnnealingScheduler
from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule
from nemo.utils.exp_manager import TimingCallback
Expand Down Expand Up @@ -71,7 +71,7 @@ def main(args):
)

# Data module setup
data = MLlamaLazyDataModule(
data = MLlamaPreloadedDataModule(
paths=args.data_path,
data_config=data_config,
seq_length=seq_length,
Expand Down
2 changes: 1 addition & 1 deletion scripts/vlm/neva_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def main(args):
)

# Data module setup
data = vlm.NevaLazyDataModule(
data = vlm.NevaPreloadedDataModule(
paths=args.data_path,
data_config=data_config,
seq_length=decoder_seq_length,
Expand Down
Loading