diff --git a/examples/fairseq/tasks/data/utils.py b/examples/fairseq/tasks/data/utils.py index 9511061b..d8c0f0c0 100644 --- a/examples/fairseq/tasks/data/utils.py +++ b/examples/fairseq/tasks/data/utils.py @@ -62,7 +62,7 @@ def close(self): pass -class WeightIterator(object): +class WeightIterator: def __init__(self, weights, seed): self.weights = weights self.seed = seed diff --git a/torchscale/architecture/config.py b/torchscale/architecture/config.py index 0d2e9bee..5e7ac92d 100644 --- a/torchscale/architecture/config.py +++ b/torchscale/architecture/config.py @@ -2,7 +2,7 @@ # Licensed under The MIT License [see LICENSE for details] -class EncoderConfig(object): +class EncoderConfig: def __init__(self, **kwargs): self.encoder_embed_dim = kwargs.pop("encoder_embed_dim", 768) self.encoder_attention_heads = kwargs.pop("encoder_attention_heads", 12) @@ -71,7 +71,7 @@ def override(self, args): self.__dict__[hp] = getattr(args, hp, None) -class DecoderConfig(object): +class DecoderConfig: def __init__(self, **kwargs): self.decoder_embed_dim = kwargs.pop("decoder_embed_dim", 768) self.decoder_attention_heads = kwargs.pop("decoder_attention_heads", 12) @@ -135,7 +135,7 @@ def override(self, args): self.__dict__[hp] = getattr(args, hp, None) -class EncoderDecoderConfig(object): +class EncoderDecoderConfig: def __init__(self, **kwargs): self.encoder_embed_dim = kwargs.pop("encoder_embed_dim", 768) self.encoder_attention_heads = kwargs.pop("encoder_attention_heads", 12) diff --git a/torchscale/component/feedforward_network.py b/torchscale/component/feedforward_network.py index cc187a8a..8e970d28 100644 --- a/torchscale/component/feedforward_network.py +++ b/torchscale/component/feedforward_network.py @@ -13,7 +13,7 @@ from .xmoe.global_groups import get_moe_group -class set_torch_seed(object): +class set_torch_seed: def __init__(self, seed): assert isinstance(seed, int) self.rng_state = self.get_rng_state()