tutorial update

JesseLivezey · JesseLivezey · commit 2c8e65adc06c · 2021-03-26T11:13:00.000-07:00
diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb
@@ -40,7 +40,9 @@
    "source": [
     "fname = 'indy_20160627_01.mat'\n",
     "if not os.path.isfile(fname): # check if file was already downloaded\n",
-    "    urllib.request.urlretrieve('https://zenodo.org/record/583331/files/indy_20160627_01.mat?download=1', fname)"
+    "    tmp = f\"{fname}_tmp\"\n",
+    "    urllib.request.urlretrieve('https://zenodo.org/record/583331/files/indy_20160627_01.mat?download=1', tmp)\n",
+    "    os.rename(tmp, fname)"
    ]
   },
   {
@@ -110,7 +112,7 @@
     "\n",
     "We'll focus on the first 30 dimensions, but you could extend the analysis out to 109. We'll look at the objective of PCA (variance explained) and DCA (PI) as a function of projection dimensionality. This is a purely unsupervised analysis of dimensionality. We can also ask how well the projections (found in an unsupervised manner) can be used to predict behavior for each method.\n",
     "\n",
-    "One weakness of PCA, which motivated the development of DCA, is that it cannot distinguish high variance dynamics from high variance noise. Let's first look at the variance explained by PCA projections and their $R^2$ in predicting the behavioral data."
+    "One weakness of PCA, which motivated the development of DCA, is that it cannot distinguish high variance dynamics from high variance noise. Let's first look at the variance explained by PCA projections and their $R^2$ in predicting the behavioral data. We'll also plot the max $R^2$ for a fully supervised linear method at each dimensionality."
    ]
   },
   {
@@ -135,18 +137,25 @@
     "var = np.sum(pca_model.explained_variance_)\n",
     "\n",
     "pca1_scores = np.zeros(ds.size)\n",
+    "max_scores = np.zeros(ds.size)\n",
     "for ii, d in enumerate(ds):\n",
     "    Xd = pca_model.transform(X)[:, :d]\n",
     "    rr_model = RR(alpha=1e-6)\n",
     "    rr_model.fit(Xd[:-lag], Y[lag:])\n",
     "    pca1_scores[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
     "rr_model = RR(alpha=1e-6)\n",
     "rr_model.fit(X[:-lag], Y[lag:])\n",
-    "max_score = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
+    "max_scores[:] = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
+    "u, s, v = np.linalg.svd(rr_model.coef_)\n",
+    "for ii, d in enumerate(range(1, Y.shape[1])):\n",
+    "    rr_model.coef_ = (u[:, :d] * s[:d]) @ v[:d]\n",
+    "    max_scores[ii] = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
     "\n",
-    "plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained')\n",
-    "plt.ylim(0, 1)\n",
-    "plt.plot(ds, pca1_scores / max_score, label=r'$R^2')\n",
+    "\n",
+    "plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained', c='C0')\n",
+    "plt.ylim(0, 1.01)\n",
+    "plt.plot(ds, pca1_scores / max_scores[-1], label=r'PCA $R^2$', c='C1')\n",
+    "plt.plot(ds, max_scores / max_scores[-1], label=r'Max $R^2$', c='C3')\n",
     "plt.legend(loc='best')\n",
     "plt.xlabel('Projected dimensionality')\n",
     "plt.ylabel('0-1 normalized metric')"
@@ -202,10 +211,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plt.plot(ds, pi / max_pi, label='PI')\n",
-    "plt.plot(ds, dca_scores / max_score, label=r'$R^2')\n",
-    "plt.ylim(0, 1)\n",
-    "plt.legend(loc='best')\n",
+    "plt.plot(ds, pi / max_pi, label='PI', c='C0')\n",
+    "plt.plot(ds, pca1_scores / max_scores[-1], label=r'PCA $R^2$', c='C1')\n",
+    "plt.plot(ds, dca_scores / max_scores[-1], label=r'DCA $R^2$', c='C2')\n",
+    "plt.plot(ds, max_scores / max_scores[-1], label=r'Max $R^2$', c='C3')\n",
+    "\n",
+    "plt.ylim(0, 1.01)\n",
+    "plt.legend(loc='lower right')\n",
     "plt.xlabel('Projected dimensionality')\n",
     "plt.ylabel('0-1 normalized metric')"
    ]
@@ -235,16 +247,25 @@
     "var = np.sum(pca_model.explained_variance_)\n",
     "\n",
     "pca2_scores = np.zeros(ds.size)\n",
+    "max_scores2 = np.zeros(ds.size)\n",
     "for ii, d in enumerate(ds):\n",
     "    Xd = pca_model.transform(Xn)[:, :d]\n",
     "    rr_model = RR(alpha=1e-6)\n",
     "    rr_model.fit(Xd[:-lag], Y[lag:])\n",
     "    pca2_scores[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
+    "rr_model = RR(alpha=1e-6)\n",
+    "rr_model.fit(Xd[:-lag], Y[lag:])\n",
+    "max_scores2[:] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
+    "u, s, v = np.linalg.svd(rr_model.coef_)\n",
+    "for ii, d in enumerate(range(1, Y.shape[1])):\n",
+    "    rr_model.coef_ = (u[:, :d] * s[:d]) @ v[:d]\n",
+    "    max_scores2[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
     "\n",
-    "plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained')\n",
-    "plt.ylim(0, 1)\n",
-    "plt.plot(ds, pca2_scores / max_score, label=r'$R^2')\n",
-    "plt.legend(loc='best')\n",
+    "plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained', c='C0')\n",
+    "plt.ylim(0, 1.01)\n",
+    "plt.plot(ds, pca2_scores / max_scores2[-1], label=r'PCA $R^2$', c='C1')\n",
+    "plt.plot(ds, max_scores2 / max_scores2[-1], label=r'Max $R^2$', c='C3')\n",
+    "plt.legend(loc='lower right')\n",
     "plt.xlabel('Projected dimensionality')\n",
     "plt.ylabel('0-1 normalized metric')"
    ]
@@ -284,9 +305,11 @@
    "outputs": [],
    "source": [
     "plt.plot(ds, pi / max_pi, label='PI')\n",
-    "plt.plot(ds, dca_scores / max_score, label=r'$R^2')\n",
-    "plt.ylim(0, 1)\n",
-    "plt.legend(loc='best')\n",
+    "plt.plot(ds, pca2_scores / max_scores2[-1], label=r'PCA $R^2$', c='C1')\n",
+    "plt.plot(ds, dca_scores / max_scores2[-1], label=r'DCA $R^2$', c='C2')\n",
+    "plt.plot(ds, max_scores2 / max_scores2[-1], label=r'Max $R^2$', c='C3')\n",
+    "plt.ylim(0, 1.01)\n",
+    "plt.legend(loc='lower right')\n",
     "plt.xlabel('Projected dimensionality')\n",
     "plt.ylabel('0-1 normalized metric')"
    ]
@@ -326,7 +349,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,