From e8b1def8d20297e3568e1f4260b40b4cbfaf8c70 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Tue, 15 Mar 2016 14:05:43 -0400 Subject: [PATCH 1/7] managing initilizations via a a role scheme dictionary --- blocks/bricks/interfaces.py | 72 +++++++++++++++++++++---------------- blocks/bricks/simple.py | 1 + 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index 531fb2aa..25b43acb 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -127,45 +127,55 @@ class Initializable(RNGMixin, Brick): ``True``. use_bias : :obj:`bool`, optional Whether to use a bias. Defaults to `True`. Required by - :meth:`~.Brick.initialize`. Only supported by bricks for which - :attr:`has_biases` is ``True``. + :meth:`~.Brick.initialize`. rng : :class:`numpy.random.RandomState` - Attributes - ---------- - has_biases : bool - ``False`` if the brick does not support biases, and only has - :attr:`weights_init`. For an example of this, see - :class:`.Bidirectional`. If this is ``False``, the brick does not - support the arguments ``biases_init`` or ``use_bias``. - """ - has_biases = True @lazy() - def __init__(self, weights_init=None, biases_init=None, use_bias=None, + def __init__(self, initialization_schemes=None, use_bias=True, seed=None, **kwargs): - super(Initializable, self).__init__(**kwargs) - self.weights_init = weights_init - if self.has_biases: - self.biases_init = biases_init - elif biases_init is not None or not use_bias: - raise ValueError("This brick does not support biases config") - if use_bias is not None: - self.use_bias = use_bias + self.use_bias = use_bias self.seed = seed + self.initialization_schemes = initialization_schemes + self.parameter_roles = set([]) + if self.initialization_schemes is None: + self.initialization_schemes = {} + + kwargs_ = {} + for key in kwargs: + if key[-5:] == "_init": + if key in self.initialization_schemes: + raise ValueError("All initializations are accepted either" + "through initialization_schemes or " + "correspodong attribute but not both") + else: + self.initialization_schemes[key] = kwargs[key] + else: + kwargs_[key] = kwargs[key] + + super(Initializable, self).__init__(**kwargs_) + self._collect_roles() def _push_initialization_config(self): + for child in self.children: + if (isinstance(child, Initializable) and + hasattr(child, 'initialization_schemes')): + for role in child.initialization_schemes: + if role not in self.parameter_roles: + raise ValueError("The parameter role: " + + "{} is not defined in".format(role) + + "in the class parameter_roles") + for child in self.children: if isinstance(child, Initializable): child.rng = self.rng - if self.weights_init: - child.weights_init = self.weights_init - if hasattr(self, 'biases_init') and self.biases_init: - for child in self.children: - if (isinstance(child, Initializable) and - hasattr(child, 'biases_init')): - child.biases_init = self.biases_init + child.initialization_schemes = self.initialization_schemes + + def _collect_roles(self): + for child in self.children: + if isinstance(child, Initializable): + self.parameter_roles.update(child.parameter_roles) class LinearLike(Initializable): @@ -196,9 +206,11 @@ def b(self): def _initialize(self): # Use self.parameters[] references in case W and b are overridden # to return non-shared-variables. - if getattr(self, 'use_bias', True): - self.biases_init.initialize(self.parameters[1], self.rng) - self.weights_init.initialize(self.parameters[0], self.rng) + if self.use_bias: + self.initialization_schemes['biases_init'].initialize( + self.parameters[1], self.rng) + self.initialization_schemes['weights_init'].initialize( + self.parameters[0], self.rng) class Random(Brick): diff --git a/blocks/bricks/simple.py b/blocks/bricks/simple.py index 1243d3ea..084529c0 100644 --- a/blocks/bricks/simple.py +++ b/blocks/bricks/simple.py @@ -43,6 +43,7 @@ def __init__(self, input_dim, output_dim, **kwargs): super(Linear, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim + self.parameter_roles = set(['weights_init', 'biases_init']) def _allocate(self): W = shared_floatx_nans((self.input_dim, self.output_dim), name='W') From f1ef6abdf7a4bd341b9454ae9063fa67f8a87797 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Tue, 12 Apr 2016 14:58:27 -0400 Subject: [PATCH 2/7] reorganize initialization --- blocks/bricks/interfaces.py | 77 +++++++++++++++--------- blocks/bricks/lookup.py | 3 - blocks/bricks/simple.py | 5 -- tests/bricks/test_attention.py | 4 +- tests/bricks/test_recurrent.py | 6 +- tests/bricks/test_sequence_generators.py | 2 +- 6 files changed, 53 insertions(+), 44 deletions(-) diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index 25b43acb..56ebbb06 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -5,6 +5,7 @@ from ..config import config from .base import _Brick, Brick, lazy +from blocks.roles import WEIGHT, BIAS, FILTER, INITIAL_STATE class ActivationDocumentation(_Brick): @@ -132,6 +133,8 @@ class Initializable(RNGMixin, Brick): """ + initializable_roles = ['WEIGHT', 'BIAS', 'FILTER', 'INITIAL_STATE'] + @lazy() def __init__(self, initialization_schemes=None, use_bias=True, seed=None, **kwargs): @@ -142,41 +145,63 @@ def __init__(self, initialization_schemes=None, use_bias=True, if self.initialization_schemes is None: self.initialization_schemes = {} - kwargs_ = {} - for key in kwargs: + + initialization_to_role = {"weights_init": 'WEIGHT', 'biases_init': 'BIAS', + 'initial_state_init': 'INITIAL_STATE'} + for key in list(kwargs.keys()): if key[-5:] == "_init": - if key in self.initialization_schemes: + if initialization_to_role[key] in self.initialization_schemes.keys(): raise ValueError("All initializations are accepted either" - "through initialization_schemes or " - "correspodong attribute but not both") + "through initialization schemes or " + "corresponding attribute but not both") else: - self.initialization_schemes[key] = kwargs[key] - else: - kwargs_[key] = kwargs[key] + self.initialization_schemes[initialization_to_role[key]] = kwargs[key] + kwargs.pop(key) + + for key in self.initialization_schemes: + if key not in self.initializable_roles: + raise ValueError("{} is not member of ".format(str(key)) + + "initializable_roles") + + super(Initializable, self).__init__(**kwargs) + + + def _validate_roles_schmes(self): + for role in self.parameter_roles: + if role not in self.initialization_schemes.keys(): + found = False + for init_role in list(self.initialization_schemes.keys()): + if isinstance(eval(role), type(eval(init_role))): + self.initialization_schemes[role] = self.initialization_schemes[init_role] + found = True + if not found: + raise ValueError("There is no initialization_schemes" + " defined for {}".format(role)) - super(Initializable, self).__init__(**kwargs_) - self._collect_roles() def _push_initialization_config(self): + self._collect_roles() + self._validate_roles_schmes() for child in self.children: if (isinstance(child, Initializable) and hasattr(child, 'initialization_schemes')): - for role in child.initialization_schemes: - if role not in self.parameter_roles: - raise ValueError("The parameter role: " + - "{} is not defined in".format(role) + - "in the class parameter_roles") - - for child in self.children: - if isinstance(child, Initializable): child.rng = self.rng - child.initialization_schemes = self.initialization_schemes + for role, scheme in self.initialization_schemes.items(): + child.initialization_schemes[role] = scheme + def _collect_roles(self): - for child in self.children: - if isinstance(child, Initializable): - self.parameter_roles.update(child.parameter_roles) + if hasattr(self, 'parameters'): + for param in self.parameters: + for role in param.tag.roles: + if str(role) in self.initializable_roles: + self.parameter_roles.update(set([str(role)])) + def _initialize(self): + for param in self.parameters: + for role in param.tag.roles: + if str(role) in self.initializable_roles: + self.initialization_schemes[str(role)].initialize(param, self.rng) class LinearLike(Initializable): """Initializable subclass with logic for :class:`Linear`-like classes. @@ -203,14 +228,6 @@ def b(self): else: raise AttributeError('use_bias is False') - def _initialize(self): - # Use self.parameters[] references in case W and b are overridden - # to return non-shared-variables. - if self.use_bias: - self.initialization_schemes['biases_init'].initialize( - self.parameters[1], self.rng) - self.initialization_schemes['weights_init'].initialize( - self.parameters[0], self.rng) class Random(Brick): diff --git a/blocks/bricks/lookup.py b/blocks/bricks/lookup.py index 2fd20ba4..01b0b648 100644 --- a/blocks/bricks/lookup.py +++ b/blocks/bricks/lookup.py @@ -41,9 +41,6 @@ def _allocate(self): name='W')) add_role(self.parameters[-1], WEIGHT) - def _initialize(self): - self.weights_init.initialize(self.W, self.rng) - @application(inputs=['indices'], outputs=['output']) def apply(self, indices): """Perform lookup. diff --git a/blocks/bricks/simple.py b/blocks/bricks/simple.py index 084529c0..0fe6a0bc 100644 --- a/blocks/bricks/simple.py +++ b/blocks/bricks/simple.py @@ -43,7 +43,6 @@ def __init__(self, input_dim, output_dim, **kwargs): super(Linear, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim - self.parameter_roles = set(['weights_init', 'biases_init']) def _allocate(self): W = shared_floatx_nans((self.input_dim, self.output_dim), name='W') @@ -96,10 +95,6 @@ def _allocate(self): add_role(b, BIAS) self.parameters.append(b) - def _initialize(self): - b, = self.parameters - self.biases_init.initialize(b, self.rng) - @application(inputs=['input_'], outputs=['output']) def apply(self, input_): """Apply the linear transformation. diff --git a/tests/bricks/test_attention.py b/tests/bricks/test_attention.py index 148774d4..36afaabf 100644 --- a/tests/bricks/test_attention.py +++ b/tests/bricks/test_attention.py @@ -73,8 +73,8 @@ def test_attention_recurrent(): state_names=wrapped.apply.states, attended_dim=attended_dim, match_dim=attended_dim) recurrent = AttentionRecurrent(wrapped, attention, seed=1234) - recurrent.weights_init = IsotropicGaussian(0.5) - recurrent.biases_init = Constant(0) + recurrent.initialization_schemes['WEIGHT'] = IsotropicGaussian(0.5) + recurrent.initialization_schemes['BIAS'] = Constant(0) recurrent.initialize() attended = tensor.tensor3("attended") diff --git a/tests/bricks/test_recurrent.py b/tests/bricks/test_recurrent.py index f7fc45cb..9d8dcc33 100644 --- a/tests/bricks/test_recurrent.py +++ b/tests/bricks/test_recurrent.py @@ -502,7 +502,7 @@ def setUp(self): dim=3, activation=Tanh())) self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(), activation=Tanh(), seed=1) - self.bidir.allocate() + self.bidir.initialize() self.simple.initialize() self.bidir.children[0].parameters[0].set_value( self.simple.parameters[0].get_value()) @@ -542,8 +542,8 @@ def setUp(self): for _ in range(3)] self.stack = RecurrentStack(self.layers) for fork in self.stack.forks: - fork.weights_init = Identity(1) - fork.biases_init = Constant(0) + fork.initialization_schemes['WEIGHT'] = Identity(1) + fork.initialization_schemes['BIAS'] = Constant(0) self.stack.initialize() self.x_val = 0.1 * numpy.asarray( diff --git a/tests/bricks/test_sequence_generators.py b/tests/bricks/test_sequence_generators.py index 4ed5e06b..cc7edd04 100644 --- a/tests/bricks/test_sequence_generators.py +++ b/tests/bricks/test_sequence_generators.py @@ -160,7 +160,7 @@ def test_integer_sequence_generator(): assert outputs_val.shape == (n_steps, batch_size) assert outputs_val.dtype == 'int64' assert costs_val.shape == (n_steps, batch_size) - assert_allclose(states_val.sum(), -17.854, rtol=1e-5) + assert_allclose(states_val.sum(), -17.889, rtol=1e-5) assert_allclose(costs_val.sum(), 482.868, rtol=1e-5) assert outputs_val.sum() == 629 From 2f09a35f93464064065aae9629b02f069ec86e13 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Wed, 13 Apr 2016 14:26:13 -0400 Subject: [PATCH 3/7] support weight_init, biases_init --- blocks/bricks/interfaces.py | 55 ++++++++++++++++++++++------------ blocks/roles.py | 3 ++ tests/bricks/test_attention.py | 4 +-- tests/bricks/test_recurrent.py | 4 +-- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index 56ebbb06..6c8c6cf0 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -133,7 +133,7 @@ class Initializable(RNGMixin, Brick): """ - initializable_roles = ['WEIGHT', 'BIAS', 'FILTER', 'INITIAL_STATE'] + initializable_roles = [WEIGHT, BIAS, FILTER, INITIAL_STATE] @lazy() def __init__(self, initialization_schemes=None, use_bias=True, @@ -145,39 +145,39 @@ def __init__(self, initialization_schemes=None, use_bias=True, if self.initialization_schemes is None: self.initialization_schemes = {} - - initialization_to_role = {"weights_init": 'WEIGHT', 'biases_init': 'BIAS', - 'initial_state_init': 'INITIAL_STATE'} + initialization_to_role = {"weights_init": WEIGHT, 'biases_init': BIAS, + 'initial_state_init': INITIAL_STATE} for key in list(kwargs.keys()): if key[-5:] == "_init": - if initialization_to_role[key] in self.initialization_schemes.keys(): + if initialization_to_role[key] in \ + self.initialization_schemes.keys(): raise ValueError("All initializations are accepted either" "through initialization schemes or " "corresponding attribute but not both") else: - self.initialization_schemes[initialization_to_role[key]] = kwargs[key] + self.initialization_schemes[initialization_to_role[ + key]] = kwargs[key] kwargs.pop(key) for key in self.initialization_schemes: if key not in self.initializable_roles: - raise ValueError("{} is not member of ".format(str(key)) + - "initializable_roles") + raise ValueError("{} is not member of ".format(key) + + "initializable_roles") super(Initializable, self).__init__(**kwargs) - def _validate_roles_schmes(self): for role in self.parameter_roles: if role not in self.initialization_schemes.keys(): found = False for init_role in list(self.initialization_schemes.keys()): - if isinstance(eval(role), type(eval(init_role))): - self.initialization_schemes[role] = self.initialization_schemes[init_role] + if isinstance(role, type(init_role)): + self.initialization_schemes[role] = \ + self.initialization_schemes[init_role] found = True if not found: raise ValueError("There is no initialization_schemes" - " defined for {}".format(role)) - + " defined for {}".format(role)) def _push_initialization_config(self): self._collect_roles() @@ -189,19 +189,37 @@ def _push_initialization_config(self): for role, scheme in self.initialization_schemes.items(): child.initialization_schemes[role] = scheme - def _collect_roles(self): if hasattr(self, 'parameters'): for param in self.parameters: for role in param.tag.roles: - if str(role) in self.initializable_roles: - self.parameter_roles.update(set([str(role)])) + if role in self.initializable_roles: + self.parameter_roles.update(set([role])) def _initialize(self): for param in self.parameters: for role in param.tag.roles: - if str(role) in self.initializable_roles: - self.initialization_schemes[str(role)].initialize(param, self.rng) + if role in self.initializable_roles: + self.initialization_schemes[role].initialize(param, + self.rng) + + def __getattr__(self, name): + if name == "weights_init": + if WEIGHT in self.initialization_schemes: + return self.initialization_schemes[WEIGHT] + elif name == "biases_init": + if BIAS in self.initialization_schemes: + return self.initialization_schemes[BIAS] + raise AttributeError("Attribute {} not found".format(name)) + + def __setattr__(self, name, value): + if name == 'weights_init': + self.initialization_schemes[WEIGHT] = value + elif name == 'biases_init': + self.initialization_schemes[BIAS] = value + else: + super(Initializable, self).__setattr__(name, value) + class LinearLike(Initializable): """Initializable subclass with logic for :class:`Linear`-like classes. @@ -229,7 +247,6 @@ def b(self): raise AttributeError('use_bias is False') - class Random(Brick): """A mixin class for Bricks which need Theano RNGs. diff --git a/blocks/roles.py b/blocks/roles.py index d672189c..6fd3e4bb 100644 --- a/blocks/roles.py +++ b/blocks/roles.py @@ -71,6 +71,9 @@ def __repr__(self): return re.sub(r'(?!^)([A-Z]+)', r'_\1', self.__class__.__name__[:-4]).upper() + def __hash__(self): + return hash(str(self)) + class InputRole(VariableRole): pass diff --git a/tests/bricks/test_attention.py b/tests/bricks/test_attention.py index 36afaabf..148774d4 100644 --- a/tests/bricks/test_attention.py +++ b/tests/bricks/test_attention.py @@ -73,8 +73,8 @@ def test_attention_recurrent(): state_names=wrapped.apply.states, attended_dim=attended_dim, match_dim=attended_dim) recurrent = AttentionRecurrent(wrapped, attention, seed=1234) - recurrent.initialization_schemes['WEIGHT'] = IsotropicGaussian(0.5) - recurrent.initialization_schemes['BIAS'] = Constant(0) + recurrent.weights_init = IsotropicGaussian(0.5) + recurrent.biases_init = Constant(0) recurrent.initialize() attended = tensor.tensor3("attended") diff --git a/tests/bricks/test_recurrent.py b/tests/bricks/test_recurrent.py index 9d8dcc33..ab5a7624 100644 --- a/tests/bricks/test_recurrent.py +++ b/tests/bricks/test_recurrent.py @@ -542,8 +542,8 @@ def setUp(self): for _ in range(3)] self.stack = RecurrentStack(self.layers) for fork in self.stack.forks: - fork.initialization_schemes['WEIGHT'] = Identity(1) - fork.initialization_schemes['BIAS'] = Constant(0) + fork.weights_init = Identity(1) + fork.biases_init = Constant(0) self.stack.initialize() self.x_val = 0.1 * numpy.asarray( From 84dff782f1c52f715ce256395c3685494e108d67 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Mon, 30 May 2016 16:55:05 -0400 Subject: [PATCH 4/7] get rid of initializable_roles --- blocks/bricks/conv.py | 4 +- blocks/bricks/interfaces.py | 76 ++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 31 deletions(-) diff --git a/blocks/bricks/conv.py b/blocks/bricks/conv.py index 5c804e01..cae5436a 100644 --- a/blocks/bricks/conv.py +++ b/blocks/bricks/conv.py @@ -72,7 +72,9 @@ class Convolutional(LinearLike): def __init__(self, filter_size, num_filters, num_channels, batch_size=None, image_size=(None, None), step=(1, 1), border_mode='valid', tied_biases=False, **kwargs): - super(Convolutional, self).__init__(**kwargs) + parameter_roles = set([FILTER, BIAS]) + super(Convolutional, self).__init__(parameter_roles=parameter_roles, + **kwargs) self.filter_size = filter_size self.num_filters = num_filters diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index 6c8c6cf0..64faaeee 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -133,24 +133,32 @@ class Initializable(RNGMixin, Brick): """ - initializable_roles = [WEIGHT, BIAS, FILTER, INITIAL_STATE] - @lazy() - def __init__(self, initialization_schemes=None, use_bias=True, - seed=None, **kwargs): + def __init__(self, initialization_schemes=None, parameter_roles=None, + use_bias=True, seed=None, **kwargs): self.use_bias = use_bias self.seed = seed self.initialization_schemes = initialization_schemes - self.parameter_roles = set([]) if self.initialization_schemes is None: self.initialization_schemes = {} + if parameter_roles: + self.parameter_roles = parameter_roles + else: + self.parameter_roles = set([WEIGHT]) + if use_bias: + self.parameter_roles.update(set([BIAS])) + initialization_to_role = {"weights_init": WEIGHT, 'biases_init': BIAS, 'initial_state_init': INITIAL_STATE} for key in list(kwargs.keys()): if key[-5:] == "_init": + if key not in initialization_to_role: + raise ValueError("The initlization scheme: {}".format(key), + "is not defined by default, pass it" + "via initialization_schemes") if initialization_to_role[key] in \ - self.initialization_schemes.keys(): + self.initialization_schemes.keys(): raise ValueError("All initializations are accepted either" "through initialization schemes or " "corresponding attribute but not both") @@ -159,47 +167,47 @@ def __init__(self, initialization_schemes=None, use_bias=True, key]] = kwargs[key] kwargs.pop(key) - for key in self.initialization_schemes: - if key not in self.initializable_roles: - raise ValueError("{} is not member of ".format(key) + - "initializable_roles") - super(Initializable, self).__init__(**kwargs) + self._collect_roles() - def _validate_roles_schmes(self): + def _validate_roles(self): + high_level_roles = [] for role in self.parameter_roles: if role not in self.initialization_schemes.keys(): - found = False - for init_role in list(self.initialization_schemes.keys()): - if isinstance(role, type(init_role)): + for key in list(self.initialization_schemes.keys()): + if isinstance(role, type(key)): self.initialization_schemes[role] = \ - self.initialization_schemes[init_role] - found = True - if not found: - raise ValueError("There is no initialization_schemes" - " defined for {}".format(role)) + self.initialization_schemes[key] + high_level_roles.append(key) + + for key in high_level_roles: + if key not in self.parameter_roles: + self.initialization_schemes.pop(key) + + for key in self.initialization_schemes: + if key not in self.parameter_roles: + raise ValueError("{} is not member of ".format(key) + + "parameter_roles") def _push_initialization_config(self): - self._collect_roles() - self._validate_roles_schmes() + self._validate_roles() for child in self.children: if (isinstance(child, Initializable) and hasattr(child, 'initialization_schemes')): child.rng = self.rng for role, scheme in self.initialization_schemes.items(): - child.initialization_schemes[role] = scheme + if role in child.parameter_roles: + child.initialization_schemes[role] = scheme def _collect_roles(self): - if hasattr(self, 'parameters'): - for param in self.parameters: - for role in param.tag.roles: - if role in self.initializable_roles: - self.parameter_roles.update(set([role])) + for child in self.children: + if isinstance(child, Initializable): + self.parameter_roles.update(child.parameter_roles) def _initialize(self): for param in self.parameters: for role in param.tag.roles: - if role in self.initializable_roles: + if role in self.parameter_roles: self.initialization_schemes[role].initialize(param, self.rng) @@ -210,7 +218,7 @@ def __getattr__(self, name): elif name == "biases_init": if BIAS in self.initialization_schemes: return self.initialization_schemes[BIAS] - raise AttributeError("Attribute {} not found".format(name)) + super(Initializable, self).__getattr__(name) def __setattr__(self, name, value): if name == 'weights_init': @@ -235,6 +243,14 @@ class LinearLike(Initializable): first and biases (if ``use_bias`` is True) coming second. """ + + def __init__(self, **kwargs): + if 'parameter_roles' in kwargs: + kwargs['parameter_roles'].update(set([WEIGHT, BIAS])) + else: + kwargs['parameter_roles'] = set([WEIGHT, BIAS]) + super(LinearLike, self).__init__(**kwargs) + @property def W(self): return self.parameters[0] From ecb758f1b3a25a26234021abbc71cebec5a1106d Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Thu, 2 Jun 2016 14:25:43 -0400 Subject: [PATCH 5/7] collect parameter roles from tag --- blocks/bricks/conv.py | 4 +--- blocks/bricks/interfaces.py | 29 +++++++++++++---------------- tests/bricks/test_recurrent.py | 4 +--- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/blocks/bricks/conv.py b/blocks/bricks/conv.py index cae5436a..5c804e01 100644 --- a/blocks/bricks/conv.py +++ b/blocks/bricks/conv.py @@ -72,9 +72,7 @@ class Convolutional(LinearLike): def __init__(self, filter_size, num_filters, num_channels, batch_size=None, image_size=(None, None), step=(1, 1), border_mode='valid', tied_biases=False, **kwargs): - parameter_roles = set([FILTER, BIAS]) - super(Convolutional, self).__init__(parameter_roles=parameter_roles, - **kwargs) + super(Convolutional, self).__init__(**kwargs) self.filter_size = filter_size self.num_filters = num_filters diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index 64faaeee..a39a42b3 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -134,7 +134,7 @@ class Initializable(RNGMixin, Brick): """ @lazy() - def __init__(self, initialization_schemes=None, parameter_roles=None, + def __init__(self, initialization_schemes=None, use_bias=True, seed=None, **kwargs): self.use_bias = use_bias self.seed = seed @@ -142,13 +142,6 @@ def __init__(self, initialization_schemes=None, parameter_roles=None, if self.initialization_schemes is None: self.initialization_schemes = {} - if parameter_roles: - self.parameter_roles = parameter_roles - else: - self.parameter_roles = set([WEIGHT]) - if use_bias: - self.parameter_roles.update(set([BIAS])) - initialization_to_role = {"weights_init": WEIGHT, 'biases_init': BIAS, 'initial_state_init': INITIAL_STATE} for key in list(kwargs.keys()): @@ -168,7 +161,6 @@ def __init__(self, initialization_schemes=None, parameter_roles=None, kwargs.pop(key) super(Initializable, self).__init__(**kwargs) - self._collect_roles() def _validate_roles(self): high_level_roles = [] @@ -190,6 +182,7 @@ def _validate_roles(self): "parameter_roles") def _push_initialization_config(self): + self._collect_roles() self._validate_roles() for child in self.children: if (isinstance(child, Initializable) and @@ -200,8 +193,19 @@ def _push_initialization_config(self): child.initialization_schemes[role] = scheme def _collect_roles(self): + def get_param_roles(obj): + all_roles = [] + for param in obj.parameters: + roles = param.tag.roles + # TODO do something smarter + if len(roles) > 0: + all_roles.append(roles[0]) + return all_roles + + self.parameter_roles = set(get_param_roles(self)) for child in self.children: if isinstance(child, Initializable): + child._collect_roles() self.parameter_roles.update(child.parameter_roles) def _initialize(self): @@ -244,13 +248,6 @@ class LinearLike(Initializable): """ - def __init__(self, **kwargs): - if 'parameter_roles' in kwargs: - kwargs['parameter_roles'].update(set([WEIGHT, BIAS])) - else: - kwargs['parameter_roles'] = set([WEIGHT, BIAS]) - super(LinearLike, self).__init__(**kwargs) - @property def W(self): return self.parameters[0] diff --git a/tests/bricks/test_recurrent.py b/tests/bricks/test_recurrent.py index ab5a7624..78c22ed1 100644 --- a/tests/bricks/test_recurrent.py +++ b/tests/bricks/test_recurrent.py @@ -146,8 +146,7 @@ def test_many_steps(self): class TestLSTM(unittest.TestCase): def setUp(self): - self.lstm = LSTM(dim=3, weights_init=Constant(2), - biases_init=Constant(0)) + self.lstm = LSTM(dim=3, weights_init=Constant(2)) self.lstm.initialize() def test_one_step(self): @@ -244,7 +243,6 @@ def setUp(self): self.stack2 = RecurrentStack(transitions, weights_init=Constant(2), - biases_init=Constant(0), skip_connections=True) self.stack2.initialize() From e4bbab9c208464d02318072525cbb1967d5c55d1 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Thu, 2 Jun 2016 14:50:25 -0400 Subject: [PATCH 6/7] rebase --- blocks/bricks/recurrent/architectures.py | 2 ++ tests/bricks/test_sequence_generators.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/blocks/bricks/recurrent/architectures.py b/blocks/bricks/recurrent/architectures.py index 1367a4df..1ebc259c 100644 --- a/blocks/bricks/recurrent/architectures.py +++ b/blocks/bricks/recurrent/architectures.py @@ -6,6 +6,7 @@ from ..simple import Initializable, Logistic, Tanh from ...roles import add_role, WEIGHT, INITIAL_STATE from ...utils import shared_floatx_nans, shared_floatx_zeros +from ...initialization import Constant from .base import BaseRecurrent, recurrent @@ -32,6 +33,7 @@ def __init__(self, dim, activation, **kwargs): self.dim = dim children = [activation] kwargs.setdefault('children', []).extend(children) + kwargs.setdefault('initial_state_init', Constant(0.)) super(SimpleRecurrent, self).__init__(**kwargs) @property diff --git a/tests/bricks/test_sequence_generators.py b/tests/bricks/test_sequence_generators.py index cc7edd04..4ed5e06b 100644 --- a/tests/bricks/test_sequence_generators.py +++ b/tests/bricks/test_sequence_generators.py @@ -160,7 +160,7 @@ def test_integer_sequence_generator(): assert outputs_val.shape == (n_steps, batch_size) assert outputs_val.dtype == 'int64' assert costs_val.shape == (n_steps, batch_size) - assert_allclose(states_val.sum(), -17.889, rtol=1e-5) + assert_allclose(states_val.sum(), -17.854, rtol=1e-5) assert_allclose(costs_val.sum(), 482.868, rtol=1e-5) assert outputs_val.sum() == 629 From f3fca5343847c265abe7b7c2424d585485476b76 Mon Sep 17 00:00:00 2001 From: Mehdi Mirza Date: Mon, 6 Jun 2016 12:53:52 -0400 Subject: [PATCH 7/7] move search in role heiarachy to initilaize --- blocks/bricks/interfaces.py | 45 ++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/blocks/bricks/interfaces.py b/blocks/bricks/interfaces.py index a39a42b3..8edf918b 100644 --- a/blocks/bricks/interfaces.py +++ b/blocks/bricks/interfaces.py @@ -1,5 +1,6 @@ """Bricks that are interfaces and/or mixins.""" import numpy +import inspect from six import add_metaclass from theano.sandbox.rng_mrg import MRG_RandomStreams @@ -162,24 +163,23 @@ def __init__(self, initialization_schemes=None, super(Initializable, self).__init__(**kwargs) + def get_scheme(role, schemes): + for key in schemes: + if role == type(key): + return key + for key in schemes: + if isinstance(role, type(key)): + return key + def _validate_roles(self): - high_level_roles = [] + all_parent_roles = [] for role in self.parameter_roles: - if role not in self.initialization_schemes.keys(): - for key in list(self.initialization_schemes.keys()): - if isinstance(role, type(key)): - self.initialization_schemes[role] = \ - self.initialization_schemes[key] - high_level_roles.append(key) - - for key in high_level_roles: - if key not in self.parameter_roles: - self.initialization_schemes.pop(key) + all_parent_roles += list(inspect.getmro(type(role))) for key in self.initialization_schemes: - if key not in self.parameter_roles: - raise ValueError("{} is not member of ".format(key) + - "parameter_roles") + if type(key) not in all_parent_roles: + raise ValueError("There is no parameter role" + "for initlization sheme {}".format(key)) def _push_initialization_config(self): self._collect_roles() @@ -209,11 +209,24 @@ def get_param_roles(obj): self.parameter_roles.update(child.parameter_roles) def _initialize(self): + def get_scheme(role, schemes): + if role in schemes: + return role + for key in schemes: + if role == type(key): + return key + for key in schemes: + if isinstance(role, type(key)): + return key + for param in self.parameters: for role in param.tag.roles: if role in self.parameter_roles: - self.initialization_schemes[role].initialize(param, - self.rng) + key = get_scheme(role, self.initialization_schemes.keys()) + if key is not None: + self.initialization_schemes[key].initialize(param, + self.rng) + continue def __getattr__(self, name): if name == "weights_init":