LIMLabSWC
diff --git a/‎xdetectioncore/TimeSeries_clasification/RNN_classifier.py‎
Lines changed: 138 additions & 0 deletions b/‎xdetectioncore/TimeSeries_clasification/RNN_classifier.py‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎xdetectioncore/TimeSeries_clasification/__pycache__/RNN_classifier.cpython-310.pyc‎
9.53 KB b/‎xdetectioncore/TimeSeries_clasification/__pycache__/RNN_classifier.cpython-310.pyc‎
9.53 KB
diff --git a/‎xdetectioncore/TimeSeries_clasification/__pycache__/cluster_analysis.cpython-310.pyc‎
5.12 KB b/‎xdetectioncore/TimeSeries_clasification/__pycache__/cluster_analysis.cpython-310.pyc‎
5.12 KB
diff --git a/‎xdetectioncore/TimeSeries_clasification/cluster_analysis.py‎
Lines changed: 191 additions & 0 deletions b/‎xdetectioncore/TimeSeries_clasification/cluster_analysis.py‎
Lines changed: 191 additions & 0 deletions
@@ -0,0 +1,138 @@
+import numpy as np
+from scipy.stats import ttest_ind
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import accuracy_score
+import tensorflow as tf
+from tensorflow.keras import Sequential
+from tensorflow.keras.layers import Conv1D, GlobalAveragePooling1D, Dense, Dropout, BatchNormalization
+from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
+import random
+import tempfile
+import os
+import matplotlib.pyplot as plt
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras.layers import LSTM
+
+# -----------------------------
+# Step 1: Fix seeds for reproducibility
+# -----------------------------
+np.random.seed(42)
+tf.random.set_seed(42)
+random.seed(42)
+
+# -----------------------------
+# Step 2: Simulate example data with n conditions
+# -----------------------------
+def simulate_data(n_conditions=3, n_trials=100, n_timesteps=200):
+    conditions = []
+    for i in range(n_conditions):
+        base = np.sin(np.linspace(0, 2*np.pi*(i+1), n_timesteps))
+        cond = np.array([
+            base + 0.2*np.random.randn(n_timesteps)
+            for _ in range(n_trials)
+        ])
+        conditions.append(cond)
+    return np.array(conditions)  # shape (n_conditions, n_trials, n_timesteps)
+
+# -----------------------------
+# Step 3: Prepare data
+# -----------------------------
+def prepare_data(data):
+    n_conditions, n_trials, n_timesteps = data.shape
+    X = data.reshape(-1, n_timesteps)
+    y = np.repeat(np.arange(n_conditions), n_trials)
+    # Add channel dimension for Conv1D
+    X = X[..., np.newaxis]
+    return X, y
+
+
+def preprocess_data(X):
+    # Normalize per trial
+    X = (X - X.mean(axis=1, keepdims=True)) / (X.std(axis=1, keepdims=True) + 1e-8)
+    X = X[..., np.newaxis] # add channel dimension for Conv1D
+    return X
+
+# -----------------------------
+# Step 4: Build classifier (Conv1D + BatchNorm)
+# -----------------------------
+def build_model(n_timesteps, n_classes,learning_rate=1e-3,dropout_rate=0.3, l2_reg=1e-2):
+    model = Sequential([
+        Conv1D(4, kernel_size=5, activation='relu', kernel_regularizer=regularizers.l2(l2_reg),
+               input_shape=(n_timesteps, 1)),
+        BatchNormalization(),
+        Conv1D(8, kernel_size=5, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
+        BatchNormalization(),
+        LSTM(8, return_sequences=False, kernel_regularizer=regularizers.l2(l2_reg)),
+        Dropout(dropout_rate),
+        Dense(8, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
+        Dense(n_classes, activation='softmax')
+    ])
+    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+    return model
+
+# -----------------------------
+# Step 5: Cross-validation & significance test
+# -----------------------------
+def train_model(X, y, epochs=50):
+    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)
+    early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
+
+
+    model = build_model(X.shape[1], len(np.unique(y)))
+    history = model.fit(X, y, validation_split=0.2, epochs=epochs, batch_size=16,
+    shuffle=True, callbacks=[rlrop, early_stop], verbose=1)
+
+
+    # Plot accuracy and loss
+    fig, ax = plt.subplots(1,2, figsize=(12,4))
+    ax[0].plot(history.history['accuracy'], label='Train Acc')
+    ax[0].plot(history.history['val_accuracy'], label='Val Acc')
+    ax[0].set_title('Accuracy')
+    ax[0].set_xlabel('Epoch')
+    ax[0].set_ylabel('Accuracy')
+    ax[0].legend()
+
+
+    ax[1].plot(history.history['loss'], label='Train Loss')
+    ax[1].plot(history.history['val_loss'], label='Val Loss')
+    ax[1].set_title('Loss')
+    ax[1].set_xlabel('Epoch')
+    ax[1].set_ylabel('Loss')
+    ax[1].legend()
+
+
+    fig.tight_layout()
+    fig.show()
+
+
+    return model
+
+
+# -----------------------------
+# Step 6: Permutation test
+# -----------------------------
+def permutation_test(X, y, model_builder, n_permutations=100, epochs=50):
+    observed_model = model_builder(X.shape[1], len(np.unique(y)))
+    observed_model.fit(X, y, epochs=epochs, batch_size=16, shuffle=True, verbose=0)
+    y_pred = np.argmax(observed_model.predict(X, verbose=0), axis=1)
+    observed_acc = accuracy_score(y, y_pred)
+
+
+    null_accs = []
+    for _ in range(n_permutations):
+        y_perm = np.random.permutation(y)
+        perm_model = model_builder(X.shape[1], len(np.unique(y)))
+        perm_model.fit(X, y_perm, epochs=epochs, batch_size=16, shuffle=True, verbose=0)
+        y_pred_perm = np.argmax(perm_model.predict(X, verbose=0), axis=1)
+        null_accs.append(accuracy_score(y_perm, y_pred_perm))
+
+
+    p_value = (np.sum(np.array(null_accs) >= observed_acc) + 1) / (n_permutations + 1)
+    return observed_acc, null_accs, p_value
+
+# -----------------------------
+# Step 6: Run
+# -----------------------------
+if __name__ == "__main__":
+    pass
@@ -0,0 +1,191 @@
+import numpy as np
+from mne.stats import permutation_cluster_test
+import matplotlib.pyplot as plt
+
+from plot_funcs import choose_hist_rule
+
+
+def cluster_analysis(X1, X2, n_permutations=1000,p_alpha=0.05, **kwargs):
+    """
+    Perform cluster-based permutation test on two conditions.
+
+    Returns
+    -------
+    clusters : list of tuples
+        All clusters (tuple of slices).
+    cluster_p_values : list of float
+        p-value of each cluster from permutation test.
+    cluster_mass : list of float
+        Sum of t-values per cluster (raw cluster-mass).
+    normalized_mass : list of float
+        Mean t-value per cluster (length-normalized).
+    """
+    # return None
+    X = [X2, X1]
+    tfce_params = dict(
+    E=1,    # extent exponent (sensitivity to cluster width)
+    H=1.0,      # height exponent (sensitivity to effect size)
+    start=0.2,   # start threshold (no threshold)
+    step=0.1,)   # step size (smaller = more accurate)
+
+    # silence runtime warnings
+
+    T_obs, clusters, cluster_p_values, H0 = permutation_cluster_test(
+        X, n_permutations=n_permutations, tail=1, n_jobs=1, seed=42,threshold=2.5,
+        verbose=False,
+    )
+    # print(f'--- H0: {H0} ---')
+
+    # T_obs, clusters, cluster_p_values, H0 = split_clusters(T_obs, clusters, cluster_p_values, H0)
+    # clusters = [c[0] for c in clusters]
+
+    cluster_mass = [T_obs[c].sum() for c in clusters]
+    cluster_mass = [e if e != np.nan else 0 for e in cluster_mass]
+    normalized_mass = [T_obs[c].mean() for c in clusters]
+
+    return clusters, cluster_p_values, cluster_mass, normalized_mass
+
+
+def plot_clusters(x_ser, clusters, cluster_p_values,plot,p_alpha=0.05,plot_y=0,plot_kwargs=None):
+    """
+    Plot mean responses with significant clusters shaded.
+    """
+
+    fig, ax = plot
+
+    if plot_kwargs is None:
+        plot_kwargs = {}
+
+    for i, c in enumerate(clusters):
+        if cluster_p_values[i] < p_alpha:
+            ax.plot([x_ser[c[0]], x_ser[c[-1]]],[plot_y,plot_y], **plot_kwargs)
+
+    return fig, ax
+
+
+def plot_cluster_stats(cluster_mass, shuff_cluster_mass,plot=None, plot_kwargs=None,
+                       plot_raw=True, plot_normed=True):
+
+    if plot is None:
+        fig, ax = plt.subplots()
+    else:
+        fig, ax = plot
+
+    if plot_kwargs is None:
+        plot_kwargs = {}
+
+    """
+        Plot total cluster mass and total normalized cluster mass (sums across all clusters).
+        """
+    total_mass = np.sum(cluster_mass)
+    total_shuff_mass = [np.sum(shuff) for shuff in shuff_cluster_mass]
+    # plot 1% to 99% range
+    total_shuff_mass = [m for m in total_shuff_mass if m >= np.percentile(total_shuff_mass,1) 
+                        and m <= np.percentile(total_shuff_mass,99)]
+
+    ax.hist(total_shuff_mass,bins=choose_hist_rule(total_shuff_mass),fc='gray')
+    ax.axvline(total_mass,color='goldenrod',ls='-')
+    ax.set_ylabel("Frequency",fontdict={'size':5})
+    ax.set_xlabel("Total mass",fontdict={'size':5})
+    ax.yaxis.labelpad = 1
+    ax.xaxis.labelpad = 2
+    ax.tick_params(axis='both', which='both', pad=2, labelsize=5,)
+    # set symlog scale if needed
+    if abs(total_mass) / np.min(total_shuff_mass) > 20:
+        ax.set_xscale('symlog', linthresh=1)
+    else:
+        ax.set_xscale('linear')
+    ax.set_xscale('symlog', linthresh=abs(np.max(total_shuff_mass)-np.min(total_shuff_mass))*0.1+0.1)
+    # ax.set_xscale('linear',)
+
+    ax.set_title("",fontdict={'size':5})
+    ax.locator_params(axis='y', nbins=3)
+    # ax.locator_params(axis='x', nbins=4)
+    fig.tight_layout()
+    fig.show()
+
+    return fig, ax
+
+
+def split_clusters(T_obs, clusters, cluster_p_values, H0=None,
+                   depth_fraction=0.5, min_gap=1):
+    """
+    Split clusters (arrays of indices) if valleys appear inside them.
+    Returns results in the exact same format as the input:
+        T_obs, clusters, cluster_p_values, H0
+
+    Parameters
+    ----------
+    T_obs : ndarray, shape (n_timepoints,)
+        Observed t-values.
+    clusters : list of np.ndarray
+        Each cluster is an array of indices (sorted, contiguous).
+    cluster_p_values : ndarray
+        P-values corresponding to clusters.
+    H0 : ndarray | None
+        Permutation null distribution (unchanged).
+    depth_fraction : float
+        Split when |t| dips below (peak * depth_fraction) inside a cluster.
+    min_gap : int
+        Minimum valley length (in samples) to split on.
+
+    Returns
+    -------
+    T_obs : ndarray
+        Same as input.
+    new_clusters : list of np.ndarray
+        Same format as input, but with splits applied.
+    new_cluster_p_values : ndarray
+        Same length as new_clusters, children inherit parent's p-value.
+    H0 : ndarray | None
+        Same as input.
+    """
+    new_clusters = []
+    new_pvals = []
+
+    for idx, pval in zip(clusters, cluster_p_values):
+        # ensure indices are sorted
+        idx = np.sort(np.unique(idx))
+        if idx.size == 0:
+            continue
+
+        tvals = T_obs[idx]
+        peak = np.nanmax(np.abs(tvals))
+        if peak == 0:  # nothing to split
+            new_clusters.append(idx)
+            new_pvals.append(pval)
+            continue
+
+        valley_threshold = peak * depth_fraction
+        below = np.where(np.abs(tvals) <= valley_threshold)[0]
+
+        if below.size == 0:
+            # no valley → keep cluster as is
+            new_clusters.append(idx)
+            new_pvals.append(pval)
+            continue
+
+        # contiguous runs of below-threshold points
+        runs = np.split(below, np.where(np.diff(below) > 1)[0] + 1)
+
+        cut_points = []
+        for run in runs:
+            if run.size >= min_gap:
+                cut_rel = run[-1] + 1
+                if cut_rel < idx.size:  # stay inside cluster
+                    cut_points.append(idx[cut_rel])
+
+        if not cut_points:
+            new_clusters.append(idx)
+            new_pvals.append(pval)
+            continue
+
+        # split cluster at cut_points
+        split_idx = [idx[0]] + cut_points + [idx[-1] + 1]
+        for a, b in zip(split_idx[:-1], split_idx[1:]):
+            sub = idx[(idx >= a) & (idx < b)]
+            if sub.size > 0:
+                new_clusters.append(sub)
+                new_pvals.append(pval)
+
+    return T_obs, new_clusters, np.array(new_pvals), H0