Skip to content

Commit 2c8e65a

Browse files
committed
tutorial update
1 parent bfee73f commit 2c8e65a

1 file changed

Lines changed: 41 additions & 18 deletions

File tree

notebooks/tutorial.ipynb

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040
"source": [
4141
"fname = 'indy_20160627_01.mat'\n",
4242
"if not os.path.isfile(fname): # check if file was already downloaded\n",
43-
" urllib.request.urlretrieve('https://zenodo.org/record/583331/files/indy_20160627_01.mat?download=1', fname)"
43+
" tmp = f\"{fname}_tmp\"\n",
44+
" urllib.request.urlretrieve('https://zenodo.org/record/583331/files/indy_20160627_01.mat?download=1', tmp)\n",
45+
" os.rename(tmp, fname)"
4446
]
4547
},
4648
{
@@ -110,7 +112,7 @@
110112
"\n",
111113
"We'll focus on the first 30 dimensions, but you could extend the analysis out to 109. We'll look at the objective of PCA (variance explained) and DCA (PI) as a function of projection dimensionality. This is a purely unsupervised analysis of dimensionality. We can also ask how well the projections (found in an unsupervised manner) can be used to predict behavior for each method.\n",
112114
"\n",
113-
"One weakness of PCA, which motivated the development of DCA, is that it cannot distinguish high variance dynamics from high variance noise. Let's first look at the variance explained by PCA projections and their $R^2$ in predicting the behavioral data."
115+
"One weakness of PCA, which motivated the development of DCA, is that it cannot distinguish high variance dynamics from high variance noise. Let's first look at the variance explained by PCA projections and their $R^2$ in predicting the behavioral data. We'll also plot the max $R^2$ for a fully supervised linear method at each dimensionality."
114116
]
115117
},
116118
{
@@ -135,18 +137,25 @@
135137
"var = np.sum(pca_model.explained_variance_)\n",
136138
"\n",
137139
"pca1_scores = np.zeros(ds.size)\n",
140+
"max_scores = np.zeros(ds.size)\n",
138141
"for ii, d in enumerate(ds):\n",
139142
" Xd = pca_model.transform(X)[:, :d]\n",
140143
" rr_model = RR(alpha=1e-6)\n",
141144
" rr_model.fit(Xd[:-lag], Y[lag:])\n",
142145
" pca1_scores[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
143146
"rr_model = RR(alpha=1e-6)\n",
144147
"rr_model.fit(X[:-lag], Y[lag:])\n",
145-
"max_score = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
148+
"max_scores[:] = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
149+
"u, s, v = np.linalg.svd(rr_model.coef_)\n",
150+
"for ii, d in enumerate(range(1, Y.shape[1])):\n",
151+
" rr_model.coef_ = (u[:, :d] * s[:d]) @ v[:d]\n",
152+
" max_scores[ii] = r2_score(Y[lag:], rr_model.predict(X[:-lag]))\n",
146153
"\n",
147-
"plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained')\n",
148-
"plt.ylim(0, 1)\n",
149-
"plt.plot(ds, pca1_scores / max_score, label=r'$R^2')\n",
154+
"\n",
155+
"plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained', c='C0')\n",
156+
"plt.ylim(0, 1.01)\n",
157+
"plt.plot(ds, pca1_scores / max_scores[-1], label=r'PCA $R^2$', c='C1')\n",
158+
"plt.plot(ds, max_scores / max_scores[-1], label=r'Max $R^2$', c='C3')\n",
150159
"plt.legend(loc='best')\n",
151160
"plt.xlabel('Projected dimensionality')\n",
152161
"plt.ylabel('0-1 normalized metric')"
@@ -202,10 +211,13 @@
202211
"metadata": {},
203212
"outputs": [],
204213
"source": [
205-
"plt.plot(ds, pi / max_pi, label='PI')\n",
206-
"plt.plot(ds, dca_scores / max_score, label=r'$R^2')\n",
207-
"plt.ylim(0, 1)\n",
208-
"plt.legend(loc='best')\n",
214+
"plt.plot(ds, pi / max_pi, label='PI', c='C0')\n",
215+
"plt.plot(ds, pca1_scores / max_scores[-1], label=r'PCA $R^2$', c='C1')\n",
216+
"plt.plot(ds, dca_scores / max_scores[-1], label=r'DCA $R^2$', c='C2')\n",
217+
"plt.plot(ds, max_scores / max_scores[-1], label=r'Max $R^2$', c='C3')\n",
218+
"\n",
219+
"plt.ylim(0, 1.01)\n",
220+
"plt.legend(loc='lower right')\n",
209221
"plt.xlabel('Projected dimensionality')\n",
210222
"plt.ylabel('0-1 normalized metric')"
211223
]
@@ -235,16 +247,25 @@
235247
"var = np.sum(pca_model.explained_variance_)\n",
236248
"\n",
237249
"pca2_scores = np.zeros(ds.size)\n",
250+
"max_scores2 = np.zeros(ds.size)\n",
238251
"for ii, d in enumerate(ds):\n",
239252
" Xd = pca_model.transform(Xn)[:, :d]\n",
240253
" rr_model = RR(alpha=1e-6)\n",
241254
" rr_model.fit(Xd[:-lag], Y[lag:])\n",
242255
" pca2_scores[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
256+
"rr_model = RR(alpha=1e-6)\n",
257+
"rr_model.fit(Xd[:-lag], Y[lag:])\n",
258+
"max_scores2[:] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
259+
"u, s, v = np.linalg.svd(rr_model.coef_)\n",
260+
"for ii, d in enumerate(range(1, Y.shape[1])):\n",
261+
" rr_model.coef_ = (u[:, :d] * s[:d]) @ v[:d]\n",
262+
" max_scores2[ii] = r2_score(Y[lag:], rr_model.predict(Xd[:-lag]))\n",
243263
"\n",
244-
"plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained')\n",
245-
"plt.ylim(0, 1)\n",
246-
"plt.plot(ds, pca2_scores / max_score, label=r'$R^2')\n",
247-
"plt.legend(loc='best')\n",
264+
"plt.plot(ds, np.cumsum(pca_model.explained_variance_)[:ds.size] / var, label='Var. explained', c='C0')\n",
265+
"plt.ylim(0, 1.01)\n",
266+
"plt.plot(ds, pca2_scores / max_scores2[-1], label=r'PCA $R^2$', c='C1')\n",
267+
"plt.plot(ds, max_scores2 / max_scores2[-1], label=r'Max $R^2$', c='C3')\n",
268+
"plt.legend(loc='lower right')\n",
248269
"plt.xlabel('Projected dimensionality')\n",
249270
"plt.ylabel('0-1 normalized metric')"
250271
]
@@ -284,9 +305,11 @@
284305
"outputs": [],
285306
"source": [
286307
"plt.plot(ds, pi / max_pi, label='PI')\n",
287-
"plt.plot(ds, dca_scores / max_score, label=r'$R^2')\n",
288-
"plt.ylim(0, 1)\n",
289-
"plt.legend(loc='best')\n",
308+
"plt.plot(ds, pca2_scores / max_scores2[-1], label=r'PCA $R^2$', c='C1')\n",
309+
"plt.plot(ds, dca_scores / max_scores2[-1], label=r'DCA $R^2$', c='C2')\n",
310+
"plt.plot(ds, max_scores2 / max_scores2[-1], label=r'Max $R^2$', c='C3')\n",
311+
"plt.ylim(0, 1.01)\n",
312+
"plt.legend(loc='lower right')\n",
290313
"plt.xlabel('Projected dimensionality')\n",
291314
"plt.ylabel('0-1 normalized metric')"
292315
]
@@ -326,7 +349,7 @@
326349
"name": "python",
327350
"nbconvert_exporter": "python",
328351
"pygments_lexer": "ipython3",
329-
"version": "3.7.7"
352+
"version": "3.8.8"
330353
}
331354
},
332355
"nbformat": 4,

0 commit comments

Comments
 (0)