Skip to content

Commit f1ffc7a

Browse files
authored
Merge pull request #49 from JesseLivezey/docs
docs update
2 parents e5393b3 + f20cf2e commit f1ffc7a

7 files changed

Lines changed: 208 additions & 180 deletions

File tree

codecov.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ ignore:
33
- "tests"
44
- "dca/plotting.py"
55
- "dca/style.py"
6+
- "dca/knn.py"
7+
- "dca/data_util.py"

dca/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22
SlowFeatureAnalysis,
33
ForecastableComponentsAnalysis)
44
from .dca import (DynamicalComponentsAnalysis,
5-
DynamicalComponentsAnalysisKNN,
65
DynamicalComponentsAnalysisFFT)
76

87
__all__ = ['DynamicalComponentsAnalysis',
9-
'DynamicalComponentsAnalysisKNN',
108
'DynamicalComponentsAnalysisFFT',
119
'GaussianProcessFactorAnalysis',
1210
'SlowFeatureAnalysis',

dca/cov_util.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,54 @@
33
import collections
44
import torch
55
import functools
6+
from numpy.lib.stride_tricks import as_strided
67

7-
from .data_util import form_lag_matrix
88
from sklearn.utils.extmath import randomized_svd
99

1010

11+
def form_lag_matrix(X, T, stride=1, stride_tricks=True, writeable=False):
12+
"""Form the data matrix with `T` lags.
13+
14+
Parameters
15+
----------
16+
X : ndarray (n_time, N)
17+
Timeseries with no lags.
18+
T : int
19+
Number of lags.
20+
stride : int
21+
Number of original samples to move between lagged samples.
22+
stride_tricks : bool
23+
Whether to use numpy stride tricks to form the lagged matrix or create
24+
a new array. Using numpy stride tricks can can lower memory usage, especially for
25+
large `T`. If `False`, a new array is created.
26+
writeable : bool
27+
For testing. You should not need to set this to True. This function uses stride tricks
28+
to form the lag matrix which means writing to the array will have confusing behavior.
29+
If `stride_tricks` is `False`, this flag does nothing.
30+
31+
Returns
32+
-------
33+
X_with_lags : ndarray (n_lagged_time, N * T)
34+
Timeseries with lags.
35+
"""
36+
if not isinstance(stride, int) or stride < 1:
37+
raise ValueError('stride should be an int and greater than or equal to 1.')
38+
N = X.shape[1]
39+
n_lagged_samples = (len(X) - T) // stride + 1
40+
if n_lagged_samples < 1:
41+
raise ValueError('T is too long for a timeseries of length {}.'.format(len(X)))
42+
if stride_tricks:
43+
X = np.asarray(X, dtype=float, order='C')
44+
shape = (n_lagged_samples, N * T)
45+
strides = (X.strides[0] * stride,) + (X.strides[-1],)
46+
X_with_lags = as_strided(X, shape=shape, strides=strides, writeable=writeable)
47+
else:
48+
X_with_lags = np.zeros((n_lagged_samples, T * N))
49+
for i in range(n_lagged_samples):
50+
X_with_lags[i, :] = X[i * stride:i * stride + T, :].flatten()
51+
return X_with_lags
52+
53+
1154
def rectify_spectrum(cov, epsilon=1e-6, verbose=False):
1255
"""Rectify the spectrum of a covariance matrix.
1356
@@ -299,15 +342,15 @@ def calc_cov_from_cross_cov_mats(cross_cov_mats):
299342
return cov
300343

301344

302-
def calc_pi_from_data(X, T):
303-
"""Calculates the mutual information ("predictive information"
304-
or "PI") between variables {1,...,T_pi} and {T_pi+1,...,2*T_pi}, which
305-
are jointly Gaussian with covariance matrix cov_2_T_pi.
345+
def calc_pi_from_data(X, T, proj=None):
346+
"""Calculates the Gaussian Predictive Information between variables
347+
{1,...,T_pi} and {T_pi+1,...,2*T_pi}..
306348
307349
Parameters
308350
----------
309-
cov_2_T_pi : np.ndarray, shape (2*T_pi, 2*T_pi)
310-
Covariance matrix.
351+
T : int
352+
This T should be 2 * T_pi. This T sets the joint window length not the
353+
past or future window length.
311354
312355
Returns
313356
-------
@@ -316,13 +359,12 @@ def calc_pi_from_data(X, T):
316359
"""
317360
ccms = calc_cross_cov_mats_from_data(X, T)
318361

319-
return calc_pi_from_cross_cov_mats(ccms)
362+
return calc_pi_from_cross_cov_mats(ccms, proj=proj)
320363

321364

322365
def calc_pi_from_cov(cov_2_T_pi):
323-
"""Calculates the mutual information ("predictive information"
324-
or "PI") between variables {1,...,T_pi} and {T_pi+1,...,2*T_pi}, which
325-
are jointly Gaussian with covariance matrix cov_2_T_pi.
366+
"""Calculates the Gaussian Predictive Information between variables
367+
{1,...,T_pi} and {T_pi+1,...,2*T_pi} with covariance matrix cov_2_T_pi.
326368
327369
Parameters
328370
----------

dca/data_util.py

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,8 @@
44
from scipy.interpolate import interp1d
55
from scipy.signal import resample
66
from scipy.ndimage import convolve1d
7-
from numpy.lib.stride_tricks import as_strided
8-
9-
10-
def form_lag_matrix(X, T, stride=1, stride_tricks=True, writeable=False):
11-
"""Form the data matrix with `T` lags.
12-
13-
Parameters
14-
----------
15-
X : ndarray (n_time, N)
16-
Timeseries with no lags.
17-
T : int
18-
Number of lags.
19-
stride : int
20-
Number of original samples to move between lagged samples.
21-
stride_tricks : bool
22-
Whether to use numpy stride tricks to form the lagged matrix or create
23-
a new array. Using numpy stride tricks can can lower memory usage, especially for
24-
large `T`. If `False`, a new array is created.
25-
writeable : bool
26-
For testing. You should not need to set this to True. This function uses stride tricks
27-
to form the lag matrix which means writing to the array will have confusing behavior.
28-
If `stride_tricks` is `False`, this flag does nothing.
29-
30-
Returns
31-
-------
32-
X_with_lags : ndarray (n_lagged_time, N * T)
33-
Timeseries with lags.
34-
"""
35-
if not isinstance(stride, int) or stride < 1:
36-
raise ValueError('stride should be an int and greater than or equal to 1.')
37-
N = X.shape[1]
38-
n_lagged_samples = (len(X) - T) // stride + 1
39-
if n_lagged_samples < 1:
40-
raise ValueError('T is too long for a timeseries of length {}.'.format(len(X)))
41-
if stride_tricks:
42-
X = np.asarray(X, dtype=float, order='C')
43-
shape = (n_lagged_samples, N * T)
44-
strides = (X.strides[0] * stride,) + (X.strides[-1],)
45-
X_with_lags = as_strided(X, shape=shape, strides=strides, writeable=writeable)
46-
else:
47-
X_with_lags = np.zeros((n_lagged_samples, T * N))
48-
for i in range(n_lagged_samples):
49-
X_with_lags[i, :] = X[i * stride:i * stride + T, :].flatten()
50-
return X_with_lags
7+
8+
from .cov_util import form_lag_matrix # noqa:F401
519

5210

5311
def sum_over_chunks(X, stride):

dca/dca.py

Lines changed: 29 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@
88
import torch.nn.functional as F
99

1010
from .cov_util import (calc_cross_cov_mats_from_data, calc_pi_from_cross_cov_mats,
11-
form_lag_matrix, calc_pi_from_cross_cov_mats_block_toeplitz)
11+
calc_pi_from_cross_cov_mats_block_toeplitz)
1212

1313
__all__ = ['DynamicalComponentsAnalysis',
1414
'DynamicalComponentsAnalysisFFT',
15-
'DynamicalComponentsAnalysisKNN',
1615
'ortho_reg_fn',
1716
'build_loss',
1817
'init_coef']
@@ -181,8 +180,9 @@ class DynamicalComponentsAnalysis(object):
181180
"""Dynamical Components Analysis.
182181
183182
Runs DCA on multidimensional timeseries data X to discover a projection
184-
onto a d-dimensional subspace which maximizes the complexity, as defined by the Gaussian
185-
Predictive Information (PI) of the d-dimensional dynamics over windows of length T.
183+
onto a d-dimensional subspace of an N-dimensional space which maximizes the complexity, as
184+
defined by the Gaussian Predictive Information (PI) of the d-dimensional dynamics over windows
185+
of length T.
186186
187187
Parameters
188188
----------
@@ -226,6 +226,18 @@ class DynamicalComponentsAnalysis(object):
226226
227227
Attributes
228228
----------
229+
T : int
230+
Default T used for PI.
231+
T_fit : int
232+
T used for last cross covariance estimation.
233+
d : int
234+
Default d used for fitting the projection.
235+
d_fit : int
236+
d used for last projection fit.
237+
cross covs : torch tensor
238+
Cross covariance matrices from the last covariance estimation.
239+
coef_ : ndarray (N, d)
240+
Projection matrix from fit.
229241
230242
"""
231243
def __init__(self, d=None, T=None, init="random_ortho", n_init=1, stride=1, tol=1e-6,
@@ -278,7 +290,7 @@ def estimate_cross_covariance(self, X, T=None, regularization=None, reg_ops=None
278290
X : ndarray or list of ndarrays
279291
Data to estimate the cross covariance matrix.
280292
T : int
281-
T for PI calculation (optional.)
293+
T for PI calculation (optional).
282294
regularization : str
283295
Whether to regularize cross covariance estimation.
284296
reg_ops : dict
@@ -313,6 +325,10 @@ def fit_projection(self, d=None, T=None, n_init=None):
313325
----------
314326
d : int
315327
Dimensionality of the projection (optional.)
328+
T : int
329+
T for PI calculation (optional). Default is `self.T`. If `T` is set here
330+
it must be less than or equal to `self.T` or self.estimate_cross_covariance() must
331+
be called with a larger `T`.
316332
n_init : int
317333
Number of random restarts (optional.)
318334
"""
@@ -340,6 +356,10 @@ def _fit_projection(self, d=None, T=None, record_V=False):
340356
----------
341357
d : int
342358
Dimensionality of the projection (optional.)
359+
T : int
360+
T for PI calculation (optional). Default is `self.T`. If `T` is set here
361+
it must be less than or equal to `self.T` or self.estimate_cross_covariance() must
362+
be called with a larger `T`.
343363
record_V : bool
344364
If True, saves a copy of V at each optimization step. Default is False.
345365
"""
@@ -350,13 +370,15 @@ def _fit_projection(self, d=None, T=None, record_V=False):
350370
self.d_fit = d
351371
if T is None:
352372
T = self.T
373+
if T < 1:
374+
raise ValueError
353375
if (2 * T) > self.cross_covs.shape[0]:
354376
raise ValueError('T must less than or equal to the value when ' +
355-
'`estimate_cross_covariance` was called.')
377+
'`estimate_cross_covariance()` was called.')
356378
self.T_fit = T
357379

358380
if self.cross_covs is None:
359-
raise ValueError('Call estimate_cross_covariance() first.')
381+
raise ValueError('Call `estimate_cross_covariance()` first.')
360382

361383
c = self.cross_covs[:2 * T]
362384
N = c.shape[1]
@@ -706,117 +728,3 @@ def score(self, X):
706728
X : ndarray or list
707729
"""
708730
return pi_fft(X, self.coef_, self.T)
709-
710-
711-
class DynamicalComponentsAnalysisKNN(object):
712-
"""Dynamical Components Analysis using MI estimation using the
713-
k-nearest neighbors methos of Kraskov, et al. This estimator is not
714-
differentiable and so numerical gradients are taken (very slow!).
715-
716-
WARNING: This code has not been used or tested and is still being developed.
717-
718-
Runs DCA on multidimensional timeseries data X to discover a projection
719-
onto a d-dimensional subspace which maximizes the complexity of the d-dimensional
720-
dynamics.
721-
722-
Parameters
723-
----------
724-
d : int
725-
Number of basis vectors onto which the data X are projected.
726-
T : int
727-
Size of time windows across which to compute mutual information.
728-
init : string
729-
Options: "random", "PCA"
730-
Method for initializing the projection matrix.
731-
"""
732-
def __init__(self, d=None, T=None, init="random", n_init=1, tol=1e-6,
733-
ortho_lambda=10., verbose=False, use_scipy=True):
734-
self.d = d
735-
self.T = T
736-
self.init = init
737-
self.n_init = n_init
738-
self.tol = tol
739-
self.ortho_lambda = ortho_lambda
740-
self.verbose = verbose
741-
self.coef_ = None
742-
743-
def fit(self, X, d=None, T=None, n_init=None):
744-
self.mean_ = X.mean(axis=0, keepdims=True)
745-
X -= self.mean_
746-
if n_init is None:
747-
n_init = self.n_init
748-
pis = []
749-
coefs = []
750-
for ii in range(n_init):
751-
coef, pi = self._fit_projection(X, d=d)
752-
pis.append(pi)
753-
coefs.append(coef)
754-
idx = np.argmax(pis)
755-
self.coef_ = coefs[idx]
756-
return self
757-
758-
def _fit_projection(self, X, d=None):
759-
from info_measures.continuous import kraskov_stoegbauer_grassberger as ksg
760-
if d is None:
761-
d = self.d
762-
763-
N = X.shape[1]
764-
if type(self.init) == str:
765-
if self.init == "random":
766-
V_init = np.random.normal(0, 1, (N, d))
767-
elif self.init == "random_ortho":
768-
V_init = scipy.stats.ortho_group.rvs(N)[:, :d]
769-
elif self.init == "uniform":
770-
V_init = np.ones((N, d)) / np.sqrt(N)
771-
V_init = V_init + np.random.normal(0, 1e-3, V_init.shape)
772-
else:
773-
raise ValueError
774-
else:
775-
raise ValueError
776-
V_init /= np.linalg.norm(V_init, axis=0, keepdims=True)
777-
778-
callback = None
779-
if self.verbose:
780-
781-
def callback(v_flat):
782-
v = v_flat.reshape(N, d)
783-
X_lag = form_lag_matrix(X.dot(v), 2 * self.T)
784-
mi = ksg.MutualInformation(X_lag[:, :self.T * d],
785-
X_lag[:, self.T * d:])
786-
pi = mi.mutual_information()
787-
reg_val = ortho_reg_fn(v, self.ortho_lambda)
788-
print("PI: {} bits, reg: {}".format(str(np.round(pi, 4)),
789-
str(np.round(reg_val, 4))))
790-
callback(V_init)
791-
792-
def f(v_flat):
793-
v = v_flat.reshape(N, d)
794-
X_lag = form_lag_matrix(X.dot(v), 2 * self.T)
795-
mi = ksg.MutualInformation(X_lag[:, :self.T], X_lag[:, self.T:])
796-
pi = mi.mutual_information()
797-
reg_val = ortho_reg_fn(v, self.ortho_lambda)
798-
loss = -pi + reg_val
799-
return loss
800-
opt = minimize(f, V_init.ravel(), method='L-BFGS-B', callback=callback)
801-
v = opt.x.reshape(N, d)
802-
803-
# Orthonormalize the basis prior to returning it
804-
V_opt = scipy.linalg.orth(v)
805-
final_pi = self.score(X, V_opt)
806-
return V_opt, final_pi
807-
808-
def transform(self, X):
809-
return (X - self.mean_).dot(self.coef_)
810-
811-
def fit_transform(self, X, d=None, T=None):
812-
self.fit(X, d=d, T=T)
813-
return self.transform(X)
814-
815-
def score(self, X, coef=None):
816-
if coef is None:
817-
coef = self.coef_
818-
from info_measures.continuous import kraskov_stoegbauer_grassberger as ksg
819-
X_lag = form_lag_matrix(X.dot(coef), 2 * self.T)
820-
mi = ksg.MutualInformation(X_lag[:, :self.T], X_lag[:, self.T:])
821-
pi = mi.mutual_information()
822-
return pi

0 commit comments

Comments
 (0)