Merge branch 'main' into improve-version-mismatch-warning

h-mayorquin · h-mayorquin · commit 8a456457d412 · 2026-04-16T09:46:16.000-06:00
diff --git a/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py b/src/spikeinterface/core/waveforms_extractor_backwards_compatibility.py
@@ -629,12 +629,16 @@ def _read_old_waveforms_extractor_binary(folder, sorting):
                     pc_all[mask, ...] = pc_one
                 ext.data["pca_projection"] = pc_all
 
-        # update params
-        new_params = ext._set_params()
-        updated_params = make_ext_params_up_to_date(ext, params, new_params)
-        ext.set_params(**updated_params, save=False)
+        # Install raw on-disk params and run compat handler first,
+        # matching what AnalyzerExtension.load does for non-legacy folders.
+        ext.params = dict(params)
         if ext.need_backward_compatibility_on_load:
             ext._handle_backward_compatibility_on_load()
+
+        # Now merge and validate — deprecated names are already migrated.
+        new_params = ext._set_params()
+        updated_params = make_ext_params_up_to_date(ext, ext.params, new_params)
+        ext.set_params(**updated_params, save=False)
         ext.run_info = None
 
         sorting_analyzer.extensions[new_name] = ext
diff --git a/src/spikeinterface/extractors/neoextractors/openephys.py b/src/spikeinterface/extractors/neoextractors/openephys.py
@@ -224,6 +224,8 @@ def __init__(
         experiment_names: str | list | None = None,
         all_annotations: bool = False,
     ):
+        folder_path = Path(folder_path)
+
         # Handle experiment_names deprecation
         if experiment_names is not None:
             warnings.warn(
@@ -336,8 +338,15 @@ def __init__(
                     if sample_shifts is not None:
                         self.set_property("inter_sample_shift", sample_shifts)
 
-            # load synchronized timestamps and set_times to recording
-            recording_folder = Path(folder_path) / record_node
+            # folder_path can point to different levels of the OE folder structure
+            # (root, record node, experiment, or recording). We need to find the root folder
+            # in order to load the sync timestamps and set them as times to the recording.
+            if record_node in folder_path.parts:
+                root_index = len(folder_path.parts) - folder_path.parts.index(record_node) - 1
+                root_folder = folder_path.parents[root_index]
+            else:
+                root_folder = folder_path
+            recording_folder = root_folder / record_node
             stream_folders = []
             for segment_index, rec_id in enumerate(rec_ids):
                 stream_folder = (
diff --git a/src/spikeinterface/extractors/tests/test_neoextractors.py b/src/spikeinterface/extractors/tests/test_neoextractors.py
@@ -156,6 +156,38 @@ def test_non_trivial_wiring(self):
         probe = recording.get_probe()
         np.testing.assert_array_equal(recording.channel_ids, probe.contact_annotations["settings_channel_key"])
 
+    def test_timestamp_loading_multi_level(self):
+        """
+        Test that we can load the sync timestamps from different levels of the folder structure and
+        that they are the same.
+        """
+        recording_folder = (
+            local_folder / "openephysbinary/v0.6.x_neuropixels_with_sync/Record Node 104/experiment1/recording1"
+        )
+        stream_name = "Record Node 104#Neuropix-PXI-100.ProbeA-AP"
+        block_index = 0
+
+        recording_from_recording_folder = self.ExtractorClass(
+            recording_folder,
+            stream_name=stream_name,
+            block_index=block_index,
+            load_sync_timestamps=True,
+        )
+        assert recording_from_recording_folder.has_time_vector()
+        timestamps_recording = recording_from_recording_folder.get_times()
+        parent_folder = recording_folder
+        for _ in range(3):
+            parent_folder = parent_folder.parent
+            recording_from_parent = self.ExtractorClass(
+                parent_folder,
+                stream_name=stream_name,
+                block_index=block_index,
+                load_sync_timestamps=True,
+            )
+            assert recording_from_parent.has_time_vector()
+            timestamps_parent = recording_from_parent.get_times()
+            np.testing.assert_array_equal(timestamps_recording, timestamps_parent)
+
 
 class OpenEphysBinaryEventTest(EventCommonTestSuite, unittest.TestCase):
     ExtractorClass = OpenEphysBinaryEventExtractor
diff --git a/src/spikeinterface/postprocessing/principal_component.py b/src/spikeinterface/postprocessing/principal_component.py
@@ -629,31 +629,63 @@ def _all_pc_extractor_chunk(segment_index, start_frame, end_frame, worker_ctx):
     if i0 == i1:
         return
 
+    # Since `get_traces` accounts for nbefore and nafter, all spikes in the chunk are valid and we can extract
+    # all waveforms in one go without worrying about borders.
     start = int(spike_times[i0] - nbefore)
     end = int(spike_times[i1 - 1] + nafter)
     traces = recording.get_traces(start_frame=start, end_frame=end, segment_index=segment_index)
 
-    for i in range(i0, i1):
-        st = spike_times[i]
-        if st - start - nbefore < 0:
-            continue
-        if st - start + nafter > traces.shape[0]:
-            continue
+    nsamples = nbefore + nafter
 
-        wf = traces[st - start - nbefore : st - start + nafter, :]
+    # Extract all waveforms in the chunk at once
+    spike_times_in_chunk = spike_times[i0:i1]
+    # Offset spike times to be relative to the start of the traces buffer
+    spike_times_offset = spike_times_in_chunk - start - nbefore
+    spike_indices = np.arange(i0, i1)
 
-        unit_index = spike_labels[i]
-        chan_inds = unit_channels[unit_index]
+    # Build waveform array: (n_spikes, nsamples, n_channels)
+    # Use fancy indexing to extract all snippets at once
+    sample_indices = spike_times_offset[:, None] + np.arange(nsamples)[None, :]  # (n_spikes, nsamples)
+    all_wfs = traces[sample_indices]  # (n_spikes, nsamples, n_channels)
 
+    # Vectorized PCA: batch by channel across all spikes in the chunk.
+    # For each unique channel, find all spikes that use it (via their unit's
+    # sparsity), extract waveforms, and call transform once.
+    labels_in_chunk = spike_labels[spike_indices]
+
+    # Build a set of all channels used by spikes in this chunk
+    unique_unit_indices = np.unique(labels_in_chunk)
+    chan_info: dict[int, list[tuple[np.ndarray, int]]] = {}
+    for unit_index in unique_unit_indices:
+        chan_inds = unit_channels[unit_index]
+        unit_mask = labels_in_chunk == unit_index
+        unit_local_idxs = np.nonzero(unit_mask)[0]
         for c, chan_ind in enumerate(chan_inds):
-            w = wf[:, chan_ind]
-            if w.size > 0:
-                w = w[None, :]
-                try:
-                    all_pcs[i, :, c] = pca_model[chan_ind].transform(w)
-                except:
-                    # this could happen if len(wfs) is less then n_comp for a channel
-                    pass
+            if chan_ind not in chan_info:
+                chan_info[chan_ind] = []
+            chan_info[chan_ind].append((unit_local_idxs, c))
+
+    for chan_ind, unit_groups in chan_info.items():
+        # Concatenate all spike indices for this channel across units
+        all_local_idxs = np.concatenate([g[0] for g in unit_groups])
+        global_idxs = spike_indices[all_local_idxs]
+
+        # Batch waveforms for this channel: (n_spikes, nsamples)
+        wfs_batch = all_wfs[all_local_idxs, :, chan_ind]
+
+        if wfs_batch.size == 0:
+            continue
+        try:
+            pcs_batch = pca_model[chan_ind].transform(wfs_batch)
+            # Write results back — each unit group has a fixed channel position
+            offset = 0
+            for unit_local_idxs, c_pos in unit_groups:
+                n = len(unit_local_idxs)
+                all_pcs[global_idxs[offset : offset + n], :, c_pos] = pcs_batch[offset : offset + n]
+                offset += n
+        except Exception:
+            # this could happen if len(wfs) is less than n_comp for a channel
+            pass
 
 
 def _init_work_all_pc_extractor(recording, sorting, all_pcs_args, nbefore, nafter, unit_channels, pca_model):