Updating SymbolBlock.imports to support different dtypes (#15230)

ThomasDelteil · szha · commit 145f82d820f9 · 2019-06-19T18:14:38.000-07:00
* updating symbol block for different dtypes

* remove logging

* update test and fix lint issues

* lint

* fix initializer
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
@@ -26,7 +26,8 @@
 import re
 from collections import OrderedDict
 
-from ..base import mx_real_t
+
+from ..base import mx_real_t, MXNetError
 from .. import symbol, ndarray, initializer
 from ..symbol import Symbol
 from ..ndarray import NDArray
@@ -354,7 +355,7 @@ def save_params(self, filename):
                               'save_parameters may resolve this error.'%e.message)
 
     def load_parameters(self, filename, ctx=None, allow_missing=False,
-                        ignore_extra=False, cast_dtype=False):
+                        ignore_extra=False, cast_dtype=False, dtype_source='current'):
         """Load parameters from file previously saved by `save_parameters`.
 
         Parameters
@@ -371,7 +372,10 @@ def load_parameters(self, filename, ctx=None, allow_missing=False,
         cast_dtype : bool, default False
             Cast the data type of the NDArray loaded from the checkpoint to the dtype
             provided by the Parameter if any.
-
+        dtype_source : str, default 'current'
+            must be in {'current', 'saved'}
+            Only valid if cast_dtype=True, specify the source of the dtype for casting
+            the parameters
         References
         ----------
         `Saving and Loading Gluon Models \
@@ -386,7 +390,8 @@ def load_parameters(self, filename, ctx=None, allow_missing=False,
             # legacy loading
             del loaded
             self.collect_params().load(
-                filename, ctx, allow_missing, ignore_extra, self.prefix, cast_dtype=cast_dtype)
+                filename, ctx, allow_missing, ignore_extra, self.prefix,
+                cast_dtype=cast_dtype, dtype_source=dtype_source)
             return
 
         if not allow_missing:
@@ -402,7 +407,7 @@ def load_parameters(self, filename, ctx=None, allow_missing=False,
                     "which contains parameters %s. Set ignore_extra=True to ignore. "%(
                         name, filename, _brief_print_list(self._params.keys())))
             if name in params:
-                params[name]._load_init(loaded[name], ctx, cast_dtype=cast_dtype)
+                params[name]._load_init(loaded[name], ctx, cast_dtype=cast_dtype, dtype_source=dtype_source)
 
     def load_params(self, filename, ctx=None, allow_missing=False,
                     ignore_extra=False):
@@ -1021,10 +1026,15 @@ def imports(symbol_file, input_names, param_file=None, ctx=None):
         sym = symbol.load(symbol_file)
         if isinstance(input_names, str):
             input_names = [input_names]
-        inputs = [symbol.var(i) for i in input_names]
+        if param_file is None:
+            # Get a valid type inference by using fp32
+            inputs = [symbol.var(i, dtype=mx_real_t) for i in input_names]
+        else:
+            # Do not specify type, rely on saved params type instead
+            inputs = [symbol.var(i) for i in input_names]
         ret = SymbolBlock(sym, inputs)
         if param_file is not None:
-            ret.collect_params().load(param_file, ctx=ctx)
+            ret.collect_params().load(param_file, ctx=ctx, cast_dtype=True, dtype_source='saved')
         return ret
 
     def __repr__(self):
@@ -1156,7 +1166,11 @@ def _infer_param_types(in_params, out_params, arg_params, aux_params, default_dt
     # Try to infer types of other parameters.
     if can_infer_input_type:
         params = {k:v for k, v in zip(input_sym_names, input_sym_arg_types)}
-        arg_types, _, aux_types = out_params.infer_type(**params)
+        try:
+            arg_types, _, aux_types = out_params.infer_type(**params)
+        except MXNetError:
+            # Cannot infer type with current input
+            arg_types, aux_types = None, None
 
     if arg_types is None or len(arg_types) != len(arg_params):
         arg_types = []
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
@@ -117,7 +117,7 @@ def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
             shape = (shape,)
         self._shape = shape
         self.name = name
-        self.dtype = dtype
+        self._dtype = dtype
         self.lr_mult = lr_mult
         self.wd_mult = wd_mult
         self.grad_req = grad_req
@@ -155,6 +155,18 @@ def grad_req(self, req):
         elif self._data is not None:
             self._init_grad()
 
+    @property
+    def dtype(self):
+        """The type of the parameter.
+
+        Setting the dtype value is equivalent to casting the value of the parameter
+        """
+        return self._dtype
+
+    @dtype.setter
+    def dtype(self, dtype):
+        self.cast(dtype)
+
     @property
     def shape(self):
         """The shape of the parameter.
@@ -241,8 +253,24 @@ def _get_row_sparse(self, arr_list, ctx, row_id):
         self._trainer._row_sparse_pull(self, results, row_id)
         return results
 
-    def _load_init(self, data, ctx, cast_dtype=False):
-        """(Re)initializes by loading from data."""
+    def _load_init(self, data, ctx, cast_dtype=False, dtype_source='current'):
+        """
+        (Re)initializes by loading from data.
+        Parameters
+        ----------
+        data : NDArray
+            The data to load
+        ctx : Context or list of Context
+            Context(s) initialize loaded parameters on.
+        cast_dtype : bool, default False
+            Cast the data type of the parameter
+        dtype_source : str, default 'current'
+            must be in {'current', 'saved'}
+            Only valid if cast_dtype=True, specify the source of the dtype for casting
+            the parameters
+        """
+        if cast_dtype:
+            assert dtype_source in ['current', 'saved']
         if self.shape:
             for self_dim, data_dim in zip(self.shape, data.shape):
                 assert self_dim in (0, data_dim), \
@@ -252,8 +280,12 @@ def _load_init(self, data, ctx, cast_dtype=False):
             self.shape = tuple(i if i != 0 else j for i, j in zip(self.shape, data.shape))
         if self.dtype:
             if cast_dtype and np.dtype(self.dtype).type != data.dtype:
-                data = data.astype(self.dtype, copy=False)
-            assert np.dtype(self.dtype).type == data.dtype, \
+                if dtype_source == 'current':
+                    data = data.astype(self.dtype, copy=False)
+                elif dtype_source == 'saved':
+                    self.dtype = data.dtype
+            else:
+                assert np.dtype(self.dtype).type == data.dtype, \
                 "Failed loading Parameter '%s' from saved params: " \
                 "dtype incompatible expected %s vs saved %s. " \
                 "Set cast_dtype=True to cast the dtype of saved params."%(
@@ -580,7 +612,7 @@ def cast(self, dtype):
         dtype : str or numpy.dtype
             The new data type.
         """
-        self.dtype = dtype
+        self._dtype = dtype
         if self._data is None:
             return
         with autograd.pause():
@@ -894,7 +926,8 @@ def save(self, filename, strip_prefix=''):
         ndarray.save(filename, arg_dict)
 
     def load(self, filename, ctx=None, allow_missing=False,
-             ignore_extra=False, restore_prefix='', cast_dtype=False):
+             ignore_extra=False, restore_prefix='', cast_dtype=False,
+             dtype_source="current"):
         """Load parameters from file.
 
         Parameters
@@ -911,8 +944,11 @@ def load(self, filename, ctx=None, allow_missing=False,
         restore_prefix : str, default ''
             prepend prefix to names of stored parameters before loading.
         cast_dtype : bool, default False
-            Cast the data type of the NDArray loaded from the checkpoint to the dtype
-            provided by the Parameter if any.
+            Cast the data type of the parameter
+        dtype_source : str, default 'current'
+            must be in {'current', 'saved'}
+            Only valid if cast_dtype=True, specify the source of the dtype for casting
+            the parameters
         """
         if restore_prefix:
             for name in self.keys():
@@ -938,4 +974,4 @@ def load(self, filename, ctx=None, allow_missing=False,
                     "Please make sure source and target networks have the same prefix."%(
                         name[lprefix:], filename, _brief_print_list(self._params.keys()))
                 continue
-            self[name]._load_init(arg_dict[name], ctx, cast_dtype=cast_dtype)
+            self[name]._load_init(arg_dict[name], ctx, cast_dtype=cast_dtype, dtype_source=dtype_source)
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
@@ -94,7 +94,7 @@ def test_parameter_invalid_access():
     assertRaises(RuntimeError, p1.list_row_sparse_data, row_id)
 
 @with_seed()
-def test_paramdict():
+def test_parameter_dict():
     ctx = mx.cpu(1)
     params0 = gluon.ParameterDict('net_')
     params0.get('w0', shape=(10, 10))
@@ -107,15 +107,15 @@ def test_paramdict():
     prev_w0 = params0.get('w0').data(ctx)
     prev_w1 = params0.get('w1').row_sparse_data(all_row_ids)
     # save params
-    params0.save('test_paramdict.params')
+    params0.save('test_parameter_dict.params')
 
     # load params
     params1 = gluon.ParameterDict('net_')
     params1.get('w0', shape=(10, 10))
     params1.get('w1', shape=(10, 10), stype='row_sparse')
-    params1.load('test_paramdict.params', ctx)
+    params1.load('test_parameter_dict.params', ctx)
     trainer1 = mx.gluon.Trainer(params1, 'sgd')
-
+    
     # compare the values before and after save/load
     cur_w0 = params1.get('w0').data(ctx)
     cur_w1 = params1.get('w1').row_sparse_data(all_row_ids)
@@ -127,13 +127,30 @@ def test_paramdict():
     params2 = gluon.ParameterDict('net_')
     params2.get('w0', shape=(10, 10))
     params2.get('w1', shape=(10, 10))
-    params2.load('test_paramdict.params', ctx)
+    params2.load('test_parameter_dict.params', ctx)
 
     # compare the values before and after save/load
     cur_w0 = params2.get('w0').data(ctx)
     cur_w1 = params2.get('w1').data(ctx)
     mx.test_utils.assert_almost_equal(prev_w0.asnumpy(), cur_w0.asnumpy())
     mx.test_utils.assert_almost_equal(prev_w1.asnumpy(), cur_w1.asnumpy())
+    
+    # test the dtype casting functionality
+    params0 = gluon.ParameterDict('')
+    params0.get('w0', shape=(10, 10), dtype='float32')
+    params0.get('w1', shape=(10, 10), dtype='int8')
+    params0.initialize(mx.init.One(), ctx=ctx)
+    params0.save('test_parameter_dict.params')
+
+    params1 = gluon.ParameterDict('')
+    params1.get('w0', shape=(10, 10), dtype='float16')
+    params1.get('w1', shape=(10, 10), dtype='float64')
+    params1.load('test_parameter_dict.params', cast_dtype=True, dtype_source='current')
+    assert params1['w0'].data().dtype == np.float16
+    assert params1['w1'].data().dtype == np.float64
+    params1.load('test_parameter_dict.params', cast_dtype=True, dtype_source='saved')
+    assert params1['w0'].data().dtype == np.float32
+    assert params1['w1'].data().dtype == np.int8
 
 
 @with_seed()
@@ -242,7 +259,7 @@ def __init__(self, **kwargs):
 
 
 @with_seed()
-def test_collect_paramters():
+def test_collect_parameters():
     net = nn.HybridSequential(prefix="test_")
     with net.name_scope():
         net.add(nn.Conv2D(10, 3))
@@ -355,18 +372,30 @@ def hybrid_forward(self, F, x):
     net_fp32.forward(data)
     net_fp32.export(tmpfile, 0)
 
-    # 2. Load the saved model and verify if all the params are loaded correctly.
-    # and choose one of the param to verify the type if fp64.
-    sm = mx.sym.load(tmpfile + '-symbol.json')
+    # 2.a Load the saved model and verify if all the params are loaded correctly.
+    # and choose one of the param to verify the type if fp64.\
+    sym_file = tmpfile + '-symbol.json'
+    params_file = tmpfile + '-0000.params'
+    sm = mx.sym.load(sym_file)
     inputs = mx.sym.var('data', dtype='float64')
     net_fp64 = mx.gluon.SymbolBlock(sm, inputs)
-    net_fp64.collect_params().load(tmpfile + '-0000.params', ctx=ctx)
-    # 3. Get a conv layer's weight parameter name. Conv layer's weight param is
+    net_fp64.collect_params().load(params_file, ctx=ctx)
+    # Get a conv layer's weight parameter name. Conv layer's weight param is
     # expected to be of dtype casted, fp64.
     for param_name in net_fp64.params.keys():
         if 'conv' in param_name and 'weight' in param_name:
             break
     assert np.dtype(net_fp64.params[param_name].dtype) == np.dtype(np.float64)
+    
+    # 3.b Verify same functionnality with the imports API
+    net_fp_64 = mx.gluon.SymbolBlock.imports(sym_file, 'data', params_file, ctx=ctx)
+
+    # Get a conv layer's weight parameter name. Conv layer's weight param is
+    # expected to be of dtype casted, fp64.
+    for param_name in net_fp_64.params.keys():
+        if 'conv' in param_name and 'weight' in param_name:
+            break
+    assert np.dtype(net_fp_64.params[param_name].dtype) == np.dtype(np.float64)
 
     # Cast the symbol block to FP32 and try to forward a FP32 data.
     # This will verify SymbolBlock.cast() functionality.
@@ -2750,6 +2779,17 @@ def test_gluon_param_load():
     net.cast('float16')
     net.load_parameters('test_gluon_param_load.params', cast_dtype=True)
     mx.nd.waitall()
+    
+@with_seed()
+def test_gluon_param_load_dtype_source():
+    net = mx.gluon.nn.Dense(10, in_units=10)
+    net.initialize()
+    net.cast('float16')
+    net.save_parameters('test_gluon_param_load_dtype_source.params')
+    net.cast('float32')
+    net.load_parameters('test_gluon_param_load_dtype_source.params', cast_dtype=True, dtype_source="saved")
+    assert net.weight.dtype == np.float16
+    mx.nd.waitall()
 
 if __name__ == '__main__':
     import nose