skorch-dev · BenjaminBossan · May 23, 2020 · Feb 23, 2020 · Feb 23, 2020 · Feb 23, 2020
diff --git a/CHANGES.md b/CHANGES.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Added the `event_name` argument for `LRScheduler` for optional recording of LR changes inside `net.history`. NOTE: Supported only in Pytorch>=1.4
+- Make it easier to add custom modules or optimizers to a neural net class by automatically registering them where necessary and by making them available to set_params
 
 ### Changed
 

diff --git a/docs/user/neuralnet.rst b/docs/user/neuralnet.rst
@@ -430,9 +430,10 @@ Subclassing NeuralNet
 ---------------------
 
 Apart from the :class:`.NeuralNet` base class, we provide
-:class:`.NeuralNetClassifier` and :class:`.NeuralNetRegressor` for
-typical classification and regressions tasks. They should work as
-drop-in replacements for sklearn classifiers and regressors.
+:class:`.NeuralNetClassifier`, :class:`.NeuralNetBinaryClassifier`,
+and :class:`.NeuralNetRegressor` for typical classification, binary
+classification, and regressions tasks. They should work as drop-in
+replacements for sklearn classifiers and regressors.
 
 The :class:`.NeuralNet` class is a little less opinionated about the
 incoming data, e.g. it does not determine a loss function by default.
@@ -442,9 +443,9 @@ case, you would typically subclass from :class:`.NeuralNet`.
 skorch aims at making subclassing as easy as possible, so that it
 doesn't stand in your way. For instance, all components (``module``,
 ``optimizer``, etc.) have their own initialization method
-(``initialize_module``, ``initialize_optimizer``, etc.). That way, if
-you want to modify the initialization of a component, you can easily
-do so.
+(:meth:`.initialize_module`, :meth:`.initialize_optimizer`,
+etc.). That way, if you want to modify the initialization of a
+component, you can easily do so.
 
 Additonally, :class:`.NeuralNet` has a couple of ``get_*`` methods for
 when a component is retrieved repeatedly. E.g.,
@@ -467,3 +468,79 @@ total loss:
 
 .. note:: This example also regularizes the biases, which you typically
     don't need to do.
+
+It is possible to add your own criterion, module, or optimizer to your
+customized neural net class. You should follow a few rules when you do
+so:
+
+1. Set this attribute inside the corresponding method. E.g., when
+   setting an optimizer, use :meth:`.initialize_optimizer` for that.
+2. Inside the initialization method, use :meth:`.get_params_for` (or,
+   if dealing with an optimizer, :meth:`.get_params_for_optimizer`) to
+   retrieve the arguments for the constructor.
+3. The attribute name should contain the substring ``"module"`` if
+   it's a module, ``"criterion"`` if a criterion, and ``"optimizer"``
+   if an optimizer. This way, skorch knows if a change in
+   parameters (say, because :meth:`.set_params` was called) should
+   trigger re-initialization.
+
+When you follow these rules, you will make sure that your added
+components are amenable to :meth:`.set_params` and hence to things
+like grid search.
+
+Here is an example of how this could look like in practice:
+
+.. code:: python
+
+    class MyNet(NeuralNet):
+        def initialize_criterion(self, *args, **kwargs):
+            super().initialize_criterion(*args, **kwargs)
+
+            # add an additional criterion
+            params = self.get_params_for('other_criterion')
+            self.other_criterion_ = nn.BCELoss(**params)
+            return self
+
+        def initialize_module(self, *args, **kwargs):
+            super().initialize_module(*args, **kwargs)
+
+            # add an additional module called 'mymodule'
+            params = self.get_params_for('mymodule')
+            self.mymodule_ = MyModule(**params)
+            return self
+
+        def initialize_optimizer(self, *args, **kwargs):
+            super().initialize_optimizer(*args, **kwargs)
+
+            # add an additional optimizer called 'optimizer2' that is
+            # responsible for 'mymodule'
+            named_params = self.mymodule_.named_parameters()
+            pgroups, params = self.get_params_for_optimizer('optimizer2', named_params)
+            self.optimizer2_ = torch.optim.SGD(*pgroups, **params)
+            return self
+
+        ...  # additional changes
+
+
+    net = MyNet(
+        ...,
+        other_criterion__reduction='sum',
+        mymodule__num_units=123,
+        optimizer2__lr=0.1,
+    )
+    net.fit(X, y)
+
+    # set_params works
+    net.set_params(optimizer2__lr=0.05)
+    net.partial_fit(X, y)
+
+    # grid search et al. works
+    search = GridSearchCV(net, {'mymodule__num_units': [10, 50, 100]}, ...)
+    search.fit(X, y)
+
+In this example, a new criterion, a new module, and a new optimizer
+were added. Of course, additional changes should be made to the net so
+that those new components are actually being used for something, but
+this example should illustrate how to start. Since the rules outlined
+above are being followed, we can use grid search on our customly
+defined components.
diff --git a/notebooks/Advanced_Usage.ipynb b/notebooks/Advanced_Usage.ipynb
@@ -685,7 +685,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Error: Stratified CV requires explicitely passing a suitable y.\n"
+      "Error: Stratified CV requires explicitly passing a suitable y.\n"
      ]
     }
    ],

diff --git a/skorch/dataset.py b/skorch/dataset.py
@@ -312,7 +312,7 @@ def _is_regular(self, x):
 
     def __call__(self, dataset, y=None, groups=None):
         bad_y_error = ValueError(
-            "Stratified CV requires explicitely passing a suitable y.")
+            "Stratified CV requires explicitly passing a suitable y.")
         if (y is None) and self.stratified:
             raise bad_y_error
 

diff --git a/skorch/net.py b/skorch/net.py
@@ -34,6 +34,16 @@
 from skorch.utils import to_tensor
 
 
+PYTORCH_COMPONENTS = {'criterion', 'module', 'optimizer'}
+"""Special names that mark pytorch components.
+
+These special names are used to recognize whether an attribute that is
+being set in the net should be added to prefixes_ and
+cuda_dependent_attributes_
+
+"""
+
+
 # pylint: disable=too-many-instance-attributes
 class NeuralNet:
     # pylint: disable=anomalous-backslash-in-string
@@ -1436,7 +1446,7 @@ def _check_kwargs(self, kwargs):
             tmpl = ("__init__() got unexpected argument(s) {}. "
                     "Either you made a typo, or you added new arguments "
                     "in a subclass; if that is the case, the subclass "
-                    "should deal with the new arguments explicitely.")
+                    "should deal with the new arguments explicitly.")
             msg = tmpl.format(', '.join(sorted(unexpected_kwargs)))
             msgs.append(msg)
 
@@ -1497,17 +1507,17 @@ def set_params(self, **kwargs):
             self.initialize_callbacks()
             self._set_params_callback(**cb_params)
 
-        if any(key.startswith('criterion') for key in special_params):
+        if any('criterion' in key.split('__', 1)[0] for key in special_params):
             self.initialize_criterion()
 
         module_triggers_optimizer_reinit = False
-        if any(key.startswith('module') for key in special_params):
+        if any('module' in key.split('__', 1)[0] for key in special_params):
             self.initialize_module()
             module_triggers_optimizer_reinit = True
 
         optimizer_changed = (
-            any(key.startswith('optimizer') for key in special_params) or
-            'lr' in normal_params
+            any('optimizer' in key.split('__', 1)[0] for key in special_params)
+            or 'lr' in normal_params
         )
         if module_triggers_optimizer_reinit or optimizer_changed:
             # Model selectors such as GridSearchCV will set the
@@ -1599,6 +1609,100 @@ def __setstate__(self, state):
 
         self.__dict__.update(state)
 
+    def _register_attribute(
+            self,
+            name,
+            prefixes=True,
+            cuda_dependent_attributes=True,
+    ):
+        """Add attribute name to prefixes_ and
+        cuda_dependent_attributes_.
+
+        The first is to take care that the attribute works correctly
+        with set_params, e.g. when it comes to re-initialization.
+
+        The second is to make sure that nets trained with CUDA can be
+        loaded without CUDA.
+
+        This method takes care of not mutating the lists.
+
+        Parameters
+        ----------
+        prefixes : bool (default=True)
+          Whether to add to prefixes_.
+
+        cuda_dependent_attributes : bool (default=True)
+          Whether to add to cuda_dependent_attributes_.
+
+        """
+        # copy the lists to avoid mutation
+        if prefixes:
+            self.prefixes_ = self.prefixes_[:] + [name]
+
+        if cuda_dependent_attributes:
+            self.cuda_dependent_attributes_ = (
+                self.cuda_dependent_attributes_[:] + [name + '_'])
+
+    def _unregister_attribute(
+            self,
+            name,
+            prefixes=True,
+            cuda_dependent_attributes=True,
+    ):
+        """Remove attribute name from prefixes_ and
+        cuda_dependent_attributes_.
+
+        Use this to remove PyTorch components that are not needed
+        anymore. This is mostly a clean up job, so as to not leave
+        unnecessary prefixes or cuda-dependent attributes.
+
+        This method takes care of not mutating the lists.
+
+        Parameters
+        ----------
+        prefixes : bool (default=True)
+          Whether to remove from prefixes_.
+
+        cuda_dependent_attributes : bool (default=True)
+          Whether to remove from cuda_dependent_attributes_.
+
+        """
+        # copy the lists to avoid mutation
+        if prefixes:
+            self.prefixes_ = [p for p in self.prefixes_[:] if p != name]
+
+        if cuda_dependent_attributes:
+            self.cuda_dependent_attributes_ = [
+                a for a in self.cuda_dependent_attributes_ if a != name + '_']
-                a for a in self.cuda_dependent_attributes_ if a != name + '_']
+                a for a in self.cuda_dependent_attributes_[:] if a != name + '_']
-                a for a in self.cuda_dependent_attributes_ if a != name + '_']
+                a for a in self.cuda_dependent_attributes_[:] if a != name + '_']
+
+    def __setattr__(self, name, attr):
+        """Set an attribute on the net
+
+        When a custom net with additional torch modules or optimizers
+        is created, those attributes are added to ``prefixes_`` and
+        ``cuda_dependent_attributes_`` automatically.
+
+        """
+        # If it's a
+        # 1. known attribute or
+        # 2. special param like module__num_units or
+        # 3. not a torch module/optimizer instance or class
+        # just setattr as usual.
+        # For a discussion why we chose this implementation, see here:
+        # https://github.com/skorch-dev/skorch/pull/597
+        is_known = name.endswith('_') or (name in self.prefixes_)
+        is_special_param = '__' in name
+        is_torch_component = any(c in name for c in PYTORCH_COMPONENTS)
+
+        if not (is_known or is_special_param) and is_torch_component:
+            self._register_attribute(name)
+        super().__setattr__(name, attr)
+
+    def __delattr__(self, name):
+        # take extra precautions to undo the changes made in __setattr__
+        self._unregister_attribute(name)
+        super().__delattr__(name)
+
     def save_params(
             self, f_params=None, f_optimizer=None, f_history=None):
         """Saves the module's parameters, history, and optimizer,

diff --git a/skorch/tests/callbacks/test_scoring.py b/skorch/tests/callbacks/test_scoring.py
@@ -74,10 +74,10 @@ def test_scoring_uses_score_when_none(
             max_epochs=5,
             train_split=train_split,
         )
-        net.fit(*data)
+        with patch.object(net, 'score', side_effect=[10, 8, 6, 11, 7]):
+            net.fit(*data)
 
         result = net.history[:, 'score']
-        # these values are the hard-coded side_effects from net.score
         expected = [10, 8, 6, 11, 7]
         assert result == expected
 
@@ -104,15 +104,17 @@ def test_scoring_uses_best_score_when_continuing_training(
             # to load best score for this scorer.
             train_split=None,
         )
-        net.fit(*data)
 
-        history_fn = tmpdir.mkdir('skorch').join('history.json')
-        net.save_params(f_history=str(history_fn))
+        with patch.object(net, 'score', side_effect=[10, 8, 6, 11, 7]):
+            net.fit(*data)
+
+            history_fn = tmpdir.mkdir('skorch').join('history.json')
+            net.save_params(f_history=str(history_fn))
 
-        net.initialize()
-        net.load_params(f_history=str(history_fn))
-        net.max_epochs = 5 - initial_epochs
-        net.partial_fit(*data)
+            net.initialize()
+            net.load_params(f_history=str(history_fn))
+            net.max_epochs = 5 - initial_epochs
+            net.partial_fit(*data)
 
         is_best = net.history[:, 'score_best']
         assert is_best == expected
@@ -135,7 +137,9 @@ def test_best_score_when_lower_is_better(
             train_split=train_split,
             max_epochs=5,
         )
-        net.fit(*data)
+
+        with patch.object(net, 'score', side_effect=[10, 8, 6, 11, 7]):
+            net.fit(*data)
 
         if lower_is_better is not None:
             is_best = net.history[:, 'score_best']
@@ -660,15 +664,17 @@ def test_scoring_uses_best_score_when_continuing_training(
             # to load best score for this scorer.
             train_split=None,
         )
-        net.fit(*data)
 
-        history_fn = tmpdir.mkdir('skorch').join('history.json')
-        net.save_params(f_history=str(history_fn))
+        with patch.object(net, 'score', side_effect=[10, 8, 6, 11, 7]):
+            net.fit(*data)
 
-        net.max_epochs = 5 - initial_epochs
-        net.initialize()
-        net.load_params(f_history=str(history_fn))
-        net.partial_fit(*data)
+            history_fn = tmpdir.mkdir('skorch').join('history.json')
+            net.save_params(f_history=str(history_fn))
+
+            net.max_epochs = 5 - initial_epochs
+            net.initialize()
+            net.load_params(f_history=str(history_fn))
+            net.partial_fit(*data)
 
         is_best = net.history[:, 'score_best']
         assert is_best == expected
@@ -691,7 +697,8 @@ def test_best_score_when_lower_is_better(
             train_split=train_split,
             max_epochs=5,
         )
-        net.fit(*data)
+        with patch.object(net, 'score', side_effect=[10, 8, 6, 11, 7]):
+            net.fit(*data)
 
         if lower_is_better is not None:
             is_best = net.history[:, 'score_best']

diff --git a/skorch/tests/conftest.py b/skorch/tests/conftest.py
@@ -131,7 +131,6 @@ def func(dataset, y):
 @pytest.fixture
 def net_cls():
     from skorch import NeuralNetRegressor
-    NeuralNetRegressor.score = Mock(side_effect=[10, 8, 6, 11, 7])
     return NeuralNetRegressor
 
 

diff --git a/skorch/tests/test_classifier.py b/skorch/tests/test_classifier.py
@@ -67,9 +67,9 @@ def test_predict_and_predict_proba(self, net_fit, data):
         y_pred = net_fit.predict(X)
         assert np.allclose(np.argmax(y_proba, 1), y_pred, rtol=1e-5)
 
-    def test_score(self, net, data):
+    def test_score(self, net_fit, data):
         X, y = data
-        accuracy = net.score(X, y)
+        accuracy = net_fit.score(X, y)
         assert 0. <= accuracy <= 1.
 
     # classifier-specific test
@@ -262,9 +262,9 @@ def test_predict_predict_proba(self, net, data, threshold):
         y_pred_actual = net.predict(X)
         assert np.allclose(y_pred_exp, y_pred_actual)
 
-    def test_score(self, net, data):
+    def test_score(self, net_fit, data):
         X, y = data
-        accuracy = net.score(X, y)
+        accuracy = net_fit.score(X, y)
         assert 0. <= accuracy <= 1.
 
     def test_target_2d_raises(self, net, data):