Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 0cb2ad6

Browse files
lebegnswamy
authored andcommitted
Revert "Set correct update on kvstore flag in dist_device_sync mode (v1.3.x) (#13121)" (#13228)
This reverts commit d0b83d4.
1 parent 7fc344c commit 0cb2ad6

2 files changed

Lines changed: 3 additions & 33 deletions

File tree

python/mxnet/gluon/trainer.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,14 @@ def _init_kvstore(self):
194194

195195
if config['update_on_kvstore'] is not None:
196196
update_on_kvstore = config['update_on_kvstore']
197-
198197
if kvstore:
199198
if self._compression_params:
200199
kvstore.set_gradient_compression(self._compression_params)
201200
self._distributed = 'dist' in kvstore.type
202201
if self._distributed:
203202
# kv.pull(row_sparse_grad) is not supported for dist kvstore
204-
# Captures condition for dist_async, dist_device_sync or based on config for
205-
# update_on_kvstore
206203
update_on_kvstore = self._contains_sparse_weight or self._contains_sparse_grad \
207-
or 'device' in kvstore.type or 'async' in kvstore.type \
208-
or config['update_on_kvstore']
204+
or 'async' in kvstore.type
209205
if update_on_kvstore:
210206
# optimizer preferably needs to be set before init for multiprecision
211207
kvstore.set_optimizer(self._optimizer)
@@ -273,20 +269,13 @@ def step(self, batch_size, ignore_stale_grad=False):
273269
If true, ignores Parameters with stale gradient (gradient that has not
274270
been updated by `backward` after last step) and skip update.
275271
"""
276-
rescale_grad = self._scale / batch_size
277-
if self._update_on_kvstore and self._distributed and \
278-
self._optimizer.rescale_grad != rescale_grad:
279-
raise UserWarning('Possible change in the `batch_size` from previous `step` detected.' \
280-
'Optimizer gradient normalizing factor will not change w.r.t new batch_size when ' \
281-
'update_on_kvstore=True and when distributed `kvstore` is used.')
282-
283-
self._optimizer.rescale_grad = rescale_grad
284-
285272
if not self._kv_initialized:
286273
self._init_kvstore()
287274
if self._params_to_init:
288275
self._init_params()
289276

277+
self._optimizer.rescale_grad = self._scale / batch_size
278+
290279
self._allreduce_grads()
291280
self._update(ignore_stale_grad)
292281

tests/nightly/dist_device_sync_kvstore.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -90,25 +90,6 @@ def check_init(kv, cur_keys, cur_shape, device=False):
9090
my_rank = kv.rank
9191
print('worker ' + str(my_rank) + ' is initialized')
9292

93-
def test_gluon_trainer_type():
94-
def check_trainer_kv_update(update_on_kv):
95-
params = mx.gluon.ParameterDict()
96-
x = params.get('x', shape=(10,1), lr_mult=1.0)
97-
params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
98-
try:
99-
trainer = mx.gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv, update_on_kvstore=update_on_kv)
100-
trainer._init_kvstore()
101-
assert trainer._kv_initialized
102-
assert trainer._update_on_kvstore is True
103-
except ValueError:
104-
assert update_on_kv is False
105-
106-
check_trainer_kv_update(False)
107-
check_trainer_kv_update(True)
108-
check_trainer_kv_update(None)
109-
my_rank = kv.rank
110-
print('worker ' + str(my_rank) + ' passed test_gluon_trainer_type')
111-
11293
if __name__ == "__main__":
11394
test_sync_init()
11495
test_sync_push_pull()

0 commit comments

Comments
 (0)