@@ -84,22 +84,32 @@ class MovingAveragePCA:
8484 n_features_ : int
8585 Number of features in the training data.
8686 n_samples_ : int
87- Number of samples in the training data.
88- noise_variance_ : float
89- The estimated noise covariance following the Probabilistic PCA model
90- from Tipping and Bishop 1999.
91- See “Pattern Recognition and Machine Learning” by C. Bishop, 12.2.1 p. 574
92- or http://www.miketipping.com/papers/met-mppca.pdf.
93- It is required to compute the estimated data covariance and score samples.
94-
95- Equal to the average of (min(n_features, n_samples) - n_components) smallest
96- eigenvalues of the covariance matrix of X.
97- aic_ : :obj:`numpy.ndarray`, shape (n_components)
98- The Akaike Information Criterion optimization curve.
99- kic_ : :obj:`numpy.ndarray`, shape (n_components)
100- The Kullback-Leibler Information Criterion optimization curve.
101- mdl_ : :obj:`numpy.ndarray`, shape (n_components)
102- The Minimum Description Length optimization curve.
87+ Number of samples in the training data
88+ aic_ : dict
89+ Dictionary containing the Akaike Information Criterion results:
90+ - 'n_components': The number of components chosen by the AIC criterion.
91+ - 'value': The AIC curve values.
92+ - 'explained_variance_total': The total explained variance of the components.
93+ kic_ : dict
94+ Dictionary containing the Kullback-Leibler Information Criterion results:
95+ - 'n_components': The number of components chosen by the KIC criterion.
96+ - 'value': The KIC curve values.
97+ - 'explained_variance_total': The total explained variance of the components.
98+ mdl_ : dict
99+ Dictionary containing the Minimum Description Length results:
100+ - 'n_components': The number of components chosen by the MDL criterion.
101+ - 'value': The MDL curve values.
102+ - 'explained_variance_total': The total explained variance of the components.
103+ varexp_90 : dict
104+ Dictionary containing the 90% variance explained results:
105+ - 'n_components': The number of components chosen by the 90% variance explained
106+ criterion.
107+ - 'explained_variance_total': The total explained variance of the components.
108+ varexp_95 : dict
109+ Dictionary containing the 95% variance explained results:
110+ - 'n_components': The number of components chosen by the 95% variance explained
111+ criterion.
112+ - 'explained_variance_total': The total explained variance of the components.
103113
104114 References
105115 ----------
@@ -240,66 +250,104 @@ def _fit(self, img, mask):
240250
241251 LGR .info ("Estimating the dimensionality ..." )
242252 p = n_timepoints
243- self . aic_ = np .zeros (p - 1 )
244- self . kic_ = np .zeros (p - 1 )
245- self . mdl_ = np .zeros (p - 1 )
253+ aic = np .zeros (p - 1 )
254+ kic = np .zeros (p - 1 )
255+ mdl = np .zeros (p - 1 )
246256
247257 for k_idx , k in enumerate (np .arange (1 , p )):
248258 LH = np .log (np .prod (np .power (eigenvalues [k :], 1 / (p - k ))) / np .mean (eigenvalues [k :]))
249259 mlh = 0.5 * N * (p - k ) * LH
250260 df = 1 + 0.5 * k * (2 * p - k + 1 )
251- self . aic_ [k_idx ] = (- 2 * mlh ) + (2 * df )
252- self . kic_ [k_idx ] = (- 2 * mlh ) + (3 * df )
253- self . mdl_ [k_idx ] = - mlh + (0.5 * df * np .log (N ))
261+ aic [k_idx ] = (- 2 * mlh ) + (2 * df )
262+ kic [k_idx ] = (- 2 * mlh ) + (3 * df )
263+ mdl [k_idx ] = - mlh + (0.5 * df * np .log (N ))
254264
255- itc = np .row_stack ([self . aic_ , self . kic_ , self . mdl_ ])
265+ itc = np .row_stack ([aic , kic , mdl ])
256266
257267 dlap = np .diff (itc , axis = 1 )
258268
269+ # Calculate optimal number of components with each criterion
259270 # AIC
260271 a_aic = np .where (dlap [0 , :] > 0 )[0 ] + 1
261272 if a_aic .size == 0 :
262- self . n_aic_ = itc [0 , :].shape [0 ]
273+ n_aic = itc [0 , :].shape [0 ]
263274 else :
264- self . n_aic_ = a_aic [0 ]
275+ n_aic = a_aic [0 ]
265276
266277 # KIC
267278 a_kic = np .where (dlap [1 , :] > 0 )[0 ] + 1
268279 if a_kic .size == 0 :
269- self . n_kic_ = itc [1 , :].shape [0 ]
280+ n_kic = itc [1 , :].shape [0 ]
270281 else :
271- self . n_kic_ = a_kic [0 ]
282+ n_kic = a_kic [0 ]
272283
273284 # MDL
274285 a_mdl = np .where (dlap [2 , :] > 0 )[0 ] + 1
275286 if a_mdl .size == 0 :
276- self . n_mdl_ = itc [2 , :].shape [0 ]
287+ n_mdl = itc [2 , :].shape [0 ]
277288 else :
278- self . n_mdl_ = a_mdl [0 ]
289+ n_mdl = a_mdl [0 ]
279290
280291 if self .criterion == "aic" :
281- n_components = self . n_aic_
292+ n_components = n_aic
282293 elif self .criterion == "kic" :
283- n_components = self . n_kic_
294+ n_components = n_kic
284295 elif self .criterion == "mdl" :
285- n_components = self . n_mdl_
296+ n_components = n_mdl
286297
287- LGR .info ("Estimated number of components is %d" % n_components )
298+ LGR .info ("Performing PCA" )
288299
289- # PCA with estimated number of components
290- ppca = PCA (n_components = n_components , svd_solver = "full" , copy = False , whiten = False )
300+ # PCA with all possible components (the estimated selection is made after)
301+ ppca = PCA (n_components = None , svd_solver = "full" , copy = False , whiten = False )
291302 ppca .fit (X )
292303
304+ # Get cumulative explained variance as components are added
305+ cumsum_varexp = np .cumsum (ppca .explained_variance_ratio_ )
306+
307+ # Calculate number of components for 90% varexp
308+ n_comp_varexp_90 = np .where (cumsum_varexp >= 0.9 )[0 ][0 ] + 1
309+
310+ # Calculate number of components for 95% varexp
311+ n_comp_varexp_95 = np .where (cumsum_varexp >= 0.95 )[0 ][0 ] + 1
312+
313+ LGR .info ("Estimated number of components is %d" % n_components )
314+
315+ # Save results of each criterion into dictionaries
316+ self .aic_ = {
317+ "n_components" : n_aic ,
318+ "value" : aic ,
319+ "explained_variance_total" : cumsum_varexp [n_aic - 1 ],
320+ }
321+ self .kic_ = {
322+ "n_components" : n_kic ,
323+ "value" : kic ,
324+ "explained_variance_total" : cumsum_varexp [n_kic - 1 ],
325+ }
326+ self .mdl_ = {
327+ "n_components" : n_mdl ,
328+ "value" : mdl ,
329+ "explained_variance_total" : cumsum_varexp [n_mdl - 1 ],
330+ }
331+ self .varexp_90_ = {
332+ "n_components" : n_comp_varexp_90 ,
333+ "explained_variance_total" : cumsum_varexp [n_comp_varexp_90 - 1 ],
334+ }
335+ self .varexp_95_ = {
336+ "n_components" : n_comp_varexp_95 ,
337+ "explained_variance_total" : cumsum_varexp [n_comp_varexp_95 - 1 ],
338+ }
339+
293340 # Assign attributes from model
294- self .components_ = ppca .components_
295- self .explained_variance_ = ppca .explained_variance_
296- self .explained_variance_ratio_ = ppca .explained_variance_ratio_
297- self .singular_values_ = ppca .singular_values_
341+ self .components_ = ppca .components_ [: n_components , :]
342+ self .explained_variance_ = ppca .explained_variance_ [: n_components ]
343+ self .explained_variance_ratio_ = ppca .explained_variance_ratio_ [: n_components ]
344+ self .singular_values_ = ppca .singular_values_ [: n_components ]
298345 self .mean_ = ppca .mean_
299- self .n_components_ = ppca . n_components_
346+ self .n_components_ = n_components
300347 self .n_features_ = ppca .n_features_
301348 self .n_samples_ = ppca .n_samples_
302- self .noise_variance_ = ppca .noise_variance_
349+ # Commenting out noise variance as it depends on the covariance of the estimation
350+ # self.noise_variance_ = ppca.noise_variance_
303351 component_maps = np .dot (
304352 np .dot (X , self .components_ .T ), np .diag (1.0 / self .explained_variance_ )
305353 )
0 commit comments