shuffling the functions for better understanding

init-22 · init-22 · commit a775004a0b84 · 2025-07-01T05:15:06.000Z
diff --git a/custom_pytorch_jax_converter.py b/custom_pytorch_jax_converter.py
@@ -6,6 +6,7 @@
 import copy
 import copy
 from jax.tree_util import tree_map
+
 """
 Jax default parameter structure:
 dict_keys(['Dense_0', 'Dense_1', 'Dense_2', 'Dense_3', 'Dense_4', 'Dense_5', 'Dense_6', 'Dense_7', 'embedding_table'])
@@ -44,11 +45,45 @@ def use_pytorch_weights_inplace(jax_params, file_name=None, replicate=False):
     for i, j in zip([0, 2, 4, 6, 8], range(3, 8)):
         jax_params[f'Dense_{j}']['kernel'] = jnp.array(numpy_weights[f'top_mlp.{i}.weight'].T)
         jax_params[f'Dense_{j}']['bias'] = jnp.array(numpy_weights[f'top_mlp.{i}.bias'])
-    #jax_params = tree_map(lambda x: jnp.array(x), jax_params) 
+   
     del state_dict
     return jax_params
 
 
+def use_pytorch_weights_cpu_copy(jax_params, file_name=None, replicate=False):
+
+    def deep_copy_to_cpu(pytree):
+        return tree_map(lambda x: jax.device_put(jnp.array(copy.deepcopy(x)), device=jax.devices("cpu")[0]), pytree)
+
+    jax_copy = deep_copy_to_cpu(jax_params) 
+    # Load PyTorch state_dict lazily to CPU
+    state_dict = torch.load(file_name, map_location='cpu')
+    print(state_dict.keys())
+    # Convert PyTorch tensors to NumPy arrays
+    numpy_weights = {k: v.cpu().numpy() for k, v in state_dict.items()}   
+
+    # --- Embedding Table ---
+    embedding_table = np.concatenate([
+        numpy_weights[f'embedding_chunk_{i}'] for i in range(4)
+    ], axis=0)  # adjust axis depending on chunking direction
+
+    jax_copy['embedding_table'] = jnp.array(embedding_table)
+
+    # --- Bot MLP: Dense_0 to Dense_2 ---
+    for i, j in zip([0, 2, 4], range(3)):
+        jax_copy[f'Dense_{j}']['kernel'] = jnp.array(numpy_weights[f'bot_mlp.{i}.weight'].T)
+        jax_copy[f'Dense_{j}']['bias'] = jnp.array(numpy_weights[f'bot_mlp.{i}.bias'])
+
+    # --- Top MLP: Dense_3 to Dense_7 ---
+    for i, j in zip([0, 2, 4, 6, 8], range(3, 8)):
+        jax_copy[f'Dense_{j}']['kernel'] = jnp.array(numpy_weights[f'top_mlp.{i}.weight'].T)
+        jax_copy[f'Dense_{j}']['bias'] = jnp.array(numpy_weights[f'top_mlp.{i}.bias'])
+    #jax_copy = tree_map(lambda x: jnp.array(x), jax_copy) 
+    del state_dict
+
+    return jax_copy
+
+
 def use_pytorch_weights_inplace_mnist(jax_params, file_name=None, replicate=False):
     # Load the PyTorch checkpoint
     ckpt = torch.load(file_name)
@@ -78,34 +113,6 @@ def use_pytorch_weights_inplace_mnist(jax_params, file_name=None, replicate=Fals
     return jax_params
 
 
-# def are_weights_equal(params1, params2, atol=1e-6, rtol=1e-6):
-#     """Compares two JAX PyTrees of weights and prints where they differ."""
-#     all_equal = True
-
-#     def compare_fn(p1, p2):
-#         nonlocal all_equal
-#         #if not jnp.allclose(p1, p2):
-#         if not jnp.allclose(p1, p2, atol=atol, rtol=rtol):
-#             logging.info("❌ Mismatch found:")
-#             logging.info(f"Shape 1: {p1.shape}, Shape 2: {p2.shape}")
-#             logging.info(f"Max diff: {jnp.max(jnp.abs(p1 - p2))}")
-#             all_equal = False
-#         return jnp.allclose(p1, p2, atol=atol, rtol=rtol)
-
-#     try:
-#         _ = jax.tree_util.tree_map(compare_fn, params1, params2)
-#     except Exception as e:
-#         logging.info("❌ Structure mismatch or error during comparison:", e)
-#         return False
-
-#     if all_equal:
-#         logging.info("✅ All weights are equal (within tolerance)")
-#     return all_equal
-
-import jax
-import jax.numpy as jnp
-import logging
-
 def maybe_unreplicate(pytree):
     """If leading axis matches device count, strip it assuming it's pmap replication."""
     num_devices = jax.device_count()
@@ -150,37 +157,27 @@ def compare_fn(p1, p2):
 
 
 
-def use_pytorch_weights2(jax_params, file_name=None, replicate=False):
-
-    def deep_copy_to_cpu(pytree):
-        return tree_map(lambda x: jax.device_put(jnp.array(copy.deepcopy(x)), device=jax.devices("cpu")[0]), pytree)
-
-    breakpoint()
-    jax_copy = deep_copy_to_cpu(jax_params) 
-    # Load PyTorch state_dict lazily to CPU
-    state_dict = torch.load(file_name, map_location='cpu')
-    print(state_dict.keys())
-    # Convert PyTorch tensors to NumPy arrays
-    numpy_weights = {k: v.cpu().numpy() for k, v in state_dict.items()}   
-
-    # --- Embedding Table ---
-    embedding_table = np.concatenate([
-        numpy_weights[f'embedding_chunk_{i}'] for i in range(4)
-    ], axis=0)  # adjust axis depending on chunking direction
-
-    jax_copy['embedding_table'] = jnp.array(embedding_table)
+# def are_weights_equal(params1, params2, atol=1e-6, rtol=1e-6):
+#     """Compares two JAX PyTrees of weights and prints where they differ."""
+#     all_equal = True
 
-    # --- Bot MLP: Dense_0 to Dense_2 ---
-    for i, j in zip([0, 2, 4], range(3)):
-        jax_copy[f'Dense_{j}']['kernel'] = jnp.array(numpy_weights[f'bot_mlp.{i}.weight'].T)
-        jax_copy[f'Dense_{j}']['bias'] = jnp.array(numpy_weights[f'bot_mlp.{i}.bias'])
+#     def compare_fn(p1, p2):
+#         nonlocal all_equal
+#         #if not jnp.allclose(p1, p2):
+#         if not jnp.allclose(p1, p2, atol=atol, rtol=rtol):
+#             logging.info("❌ Mismatch found:")
+#             logging.info(f"Shape 1: {p1.shape}, Shape 2: {p2.shape}")
+#             logging.info(f"Max diff: {jnp.max(jnp.abs(p1 - p2))}")
+#             all_equal = False
+#         return jnp.allclose(p1, p2, atol=atol, rtol=rtol)
 
-    # --- Top MLP: Dense_3 to Dense_7 ---
-    for i, j in zip([0, 2, 4, 6, 8], range(3, 8)):
-        jax_copy[f'Dense_{j}']['kernel'] = jnp.array(numpy_weights[f'top_mlp.{i}.weight'].T)
-        jax_copy[f'Dense_{j}']['bias'] = jnp.array(numpy_weights[f'top_mlp.{i}.bias'])
-    #jax_copy = tree_map(lambda x: jnp.array(x), jax_copy) 
-    del state_dict
+#     try:
+#         _ = jax.tree_util.tree_map(compare_fn, params1, params2)
+#     except Exception as e:
+#         logging.info("❌ Structure mismatch or error during comparison:", e)
+#         return False
 
-    return jax_copy
+#     if all_equal:
+#         logging.info("✅ All weights are equal (within tolerance)")
+#     return all_equal
 
diff --git a/reference_algorithms/schedule_free/jax/submission.py b/reference_algorithms/schedule_free/jax/submission.py
@@ -10,7 +10,7 @@
 import jax.numpy as jnp
 from optax.contrib import schedule_free_adamw
 from algoperf import spec
-from custom_pytorch_jax_converter import use_pytorch_weights2, are_weights_equal
+from custom_pytorch_jax_converter import use_pytorch_weights_cpu_copy, are_weights_equal
 
 _GRAD_CLIP_EPS = 1e-6
 
@@ -174,7 +174,7 @@ def update_params(workload: spec.Workload,
   if global_step % 100 == 0 and workload.metrics_logger is not None:
     date_ = "2025-06-14"
     file_name = f"/results/schedule_free_test_pytorch_weights/criteo1tb_{date_}_after_{global_step}_steps.pth"
-    params = use_pytorch_weights2(new_params, file_name=file_name, replicate=True)
+    params = use_pytorch_weights_cpu_copy(new_params, file_name=file_name, replicate=True)
     are_weights_equal(new_params, params)
     del params