initial commit
This commit is contained in:
8
mne/stats/__init__.py
Normal file
8
mne/stats/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
"""Functions for statistical analysis."""
|
||||
import lazy_loader as lazy
|
||||
|
||||
(__getattr__, __dir__, __all__) = lazy.attach_stub(__name__, __file__)
|
||||
44
mne/stats/__init__.pyi
Normal file
44
mne/stats/__init__.pyi
Normal file
@@ -0,0 +1,44 @@
|
||||
__all__ = [
|
||||
"_ci",
|
||||
"_parametric_ci",
|
||||
"_st_mask_from_s_inds",
|
||||
"bonferroni_correction",
|
||||
"bootstrap_confidence_interval",
|
||||
"combine_adjacency",
|
||||
"erp",
|
||||
"f_mway_rm",
|
||||
"f_oneway",
|
||||
"f_threshold_mway_rm",
|
||||
"fdr_correction",
|
||||
"linear_regression",
|
||||
"linear_regression_raw",
|
||||
"permutation_cluster_1samp_test",
|
||||
"permutation_cluster_test",
|
||||
"permutation_t_test",
|
||||
"spatio_temporal_cluster_1samp_test",
|
||||
"spatio_temporal_cluster_test",
|
||||
"summarize_clusters_stc",
|
||||
"ttest_1samp_no_p",
|
||||
"ttest_ind_no_p",
|
||||
]
|
||||
from . import erp
|
||||
from ._adjacency import combine_adjacency
|
||||
from .cluster_level import (
|
||||
_st_mask_from_s_inds,
|
||||
permutation_cluster_1samp_test,
|
||||
permutation_cluster_test,
|
||||
spatio_temporal_cluster_1samp_test,
|
||||
spatio_temporal_cluster_test,
|
||||
summarize_clusters_stc,
|
||||
)
|
||||
from .multi_comp import bonferroni_correction, fdr_correction
|
||||
from .parametric import (
|
||||
_parametric_ci,
|
||||
f_mway_rm,
|
||||
f_oneway,
|
||||
f_threshold_mway_rm,
|
||||
ttest_1samp_no_p,
|
||||
ttest_ind_no_p,
|
||||
)
|
||||
from .permutations import _ci, bootstrap_confidence_interval, permutation_t_test
|
||||
from .regression import linear_regression, linear_regression_raw
|
||||
118
mne/stats/_adjacency.py
Normal file
118
mne/stats/_adjacency.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
|
||||
from ..utils import _check_option, _validate_type
|
||||
from ..utils.check import int_like
|
||||
|
||||
|
||||
def combine_adjacency(*structure):
|
||||
"""Create a sparse binary adjacency/neighbors matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*structure : list
|
||||
The adjacency along each dimension. Each entry can be:
|
||||
|
||||
- ndarray or scipy.sparse.sparray
|
||||
A square binary adjacency matrix for the given dimension.
|
||||
For example created by :func:`mne.channels.find_ch_adjacency`.
|
||||
- int
|
||||
The number of elements along the given dimension. A lattice
|
||||
adjacency will be generated, which is a binary matrix
|
||||
reflecting that element N of an array is adjacent to
|
||||
elements at indices N - 1 and N + 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
adjacency : scipy.sparse.coo_array, shape (n_features, n_features)
|
||||
The square adjacency matrix, where the shape ``n_features``
|
||||
corresponds to the product of the length of all dimensions.
|
||||
For example ``len(times) * len(freqs) * len(chans)``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
mne.channels.find_ch_adjacency
|
||||
mne.channels.read_ch_adjacency
|
||||
|
||||
Notes
|
||||
-----
|
||||
For 4-dimensional data with shape ``(n_obs, n_times, n_freqs, n_chans)``,
|
||||
you can specify **no** connections among elements in a particular
|
||||
dimension by passing a matrix of zeros. For example:
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.sparse import diags
|
||||
>>> from mne.stats import combine_adjacency
|
||||
>>> n_times, n_freqs, n_chans = (50, 7, 16)
|
||||
>>> chan_adj = diags([1., 1.], offsets=(-1, 1), shape=(n_chans, n_chans))
|
||||
>>> combine_adjacency(
|
||||
... n_times, # regular lattice adjacency for times
|
||||
... np.zeros((n_freqs, n_freqs)), # no adjacency between freq. bins
|
||||
... chan_adj, # custom matrix, or use mne.channels.find_ch_adjacency
|
||||
... ) # doctest: +SKIP
|
||||
<5600x5600 sparse array of type '<class 'numpy.float64'>'
|
||||
with 27076 stored elements in COOrdinate format>
|
||||
"""
|
||||
structure = list(structure)
|
||||
for di, dim in enumerate(structure):
|
||||
name = f"structure[{di}]"
|
||||
_validate_type(dim, ("int-like", np.ndarray, "sparse"), name)
|
||||
if isinstance(dim, int_like):
|
||||
# Don't add the diagonal, because we explicitly remove it later
|
||||
dim = sparse.dia_array(
|
||||
(np.ones((2, dim)), [-1, 1]),
|
||||
shape=(dim, dim),
|
||||
).tocoo()
|
||||
else:
|
||||
_check_option(f"{name}.ndim", dim.ndim, [2])
|
||||
if dim.shape[0] != dim.shape[1]:
|
||||
raise ValueError(f"{name} must be square, got shape {dim.shape}")
|
||||
if not isinstance(dim, sparse.coo_array):
|
||||
dim = sparse.coo_array(dim)
|
||||
else:
|
||||
dim = dim.copy()
|
||||
dim.data[dim.row == dim.col] = 0.0 # remove diagonal, will add later
|
||||
dim.eliminate_zeros()
|
||||
if not (dim.data == 1).all():
|
||||
raise ValueError("All adjacency values must be 0 or 1")
|
||||
structure[di] = dim
|
||||
# list of coo
|
||||
assert all(isinstance(dim, sparse.coo_array) for dim in structure)
|
||||
shape = np.array([d.shape[0] for d in structure], int)
|
||||
n_others = np.array(
|
||||
[
|
||||
np.prod(np.concatenate([shape[:di], shape[di + 1 :]]))
|
||||
for di in range(len(structure))
|
||||
],
|
||||
int,
|
||||
)
|
||||
n_each = np.array([dim.data.size for dim in structure], int) * n_others
|
||||
n_off = n_each.sum() # off-diagonal terms
|
||||
n_diag = np.prod(shape)
|
||||
vertices = np.arange(n_diag).reshape(shape)
|
||||
edges = np.empty((2, n_off + n_diag), int)
|
||||
used = np.zeros(n_off, bool)
|
||||
weights = np.empty(n_off + n_diag, float) # even though just 0/1
|
||||
offset = 0
|
||||
for di, dim in enumerate(structure):
|
||||
s_l = [slice(None)] * len(shape)
|
||||
s_r = [slice(None)] * len(shape)
|
||||
s_l[di] = dim.row
|
||||
s_r[di] = dim.col
|
||||
assert dim.row.shape == dim.col.shape == dim.data.shape
|
||||
sl = slice(offset, offset + n_each[di])
|
||||
edges[:, sl] = [vertices[tuple(s_l)].ravel(), vertices[tuple(s_r)].ravel()]
|
||||
weights[sl] = np.tile(dim.data, n_others[di])
|
||||
offset += n_each[di]
|
||||
assert not used[sl].any()
|
||||
used[sl] = True
|
||||
assert used.all()
|
||||
# Handle the diagonal separately at the end to avoid duplicate entries
|
||||
edges[:, n_off:] = vertices.ravel()
|
||||
weights[n_off:] = 1.0
|
||||
graph = sparse.coo_array((weights, edges), (vertices.size, vertices.size))
|
||||
return graph
|
||||
1731
mne/stats/cluster_level.py
Normal file
1731
mne/stats/cluster_level.py
Normal file
File diff suppressed because it is too large
Load Diff
86
mne/stats/erp.py
Normal file
86
mne/stats/erp.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""ERP-related statistics."""
|
||||
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mne.utils import _validate_type
|
||||
|
||||
|
||||
def compute_sme(epochs, start=None, stop=None):
|
||||
"""Compute standardized measurement error (SME).
|
||||
|
||||
The standardized measurement error :footcite:`LuckEtAl2021` can be used as a
|
||||
universal measure of data quality in ERP studies.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
epochs : mne.Epochs
|
||||
The epochs containing the data for which to compute the SME.
|
||||
start : int | float | None
|
||||
Start time (in s) of the time window used for SME computation. If ``None``, use
|
||||
the start of the epoch.
|
||||
stop : int | float | None
|
||||
Stop time (in s) of the time window used for SME computation. If ``None``, use
|
||||
the end of the epoch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sme : array, shape (n_channels,)
|
||||
SME in given time window for each channel.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Currently, only the mean value in the given time window is supported, meaning that
|
||||
the resulting SME is only valid in studies which quantify the amplitude of an ERP
|
||||
component as the mean within the time window (as opposed to e.g. the peak, which
|
||||
would require bootstrapping).
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
|
||||
Examples
|
||||
--------
|
||||
Given an :class:`~mne.Epochs` object, the SME for the entire epoch duration can be
|
||||
computed as follows:
|
||||
|
||||
>>> compute_sme(epochs) # doctest: +SKIP
|
||||
|
||||
However, the SME is best used to estimate the precision of a specific ERP measure,
|
||||
specifically the mean amplitude of an ERP component in a time window of interest.
|
||||
For example, the SME for the mean amplitude of the P3 component in the 300-500 ms
|
||||
time window could be computed as follows:
|
||||
|
||||
>>> compute_sme(epochs, start=0.3, stop=0.5) # doctest: +SKIP
|
||||
|
||||
Usually, it will be more informative to compute the SME for specific conditions
|
||||
separately. This can be done by selecting the epochs of interest as follows:
|
||||
|
||||
>>> compute_sme(epochs["oddball"], 0.3, 0.5) # doctest: +SKIP
|
||||
|
||||
Note that the SME will be reported for each channel separately. If you are only
|
||||
interested in a single channel (or a subset of channels), select the channels
|
||||
before computing the SME:
|
||||
|
||||
>>> compute_sme(epochs.pick("Pz"), 0.3, 0.5) # doctest: +SKIP
|
||||
|
||||
Selecting both conditions and channels is also possible:
|
||||
|
||||
>>> compute_sme(epochs["oddball"].pick("Pz"), 0.3, 0.5) # doctest: +SKIP
|
||||
|
||||
In any case, the output will be a NumPy array with the SME value for each channel.
|
||||
"""
|
||||
_validate_type(start, ("numeric", None), "start", "int or float")
|
||||
_validate_type(stop, ("numeric", None), "stop", "int or float")
|
||||
start = epochs.tmin if start is None else start
|
||||
stop = epochs.tmax if stop is None else stop
|
||||
if start < epochs.tmin:
|
||||
raise ValueError("start is out of bounds.")
|
||||
if stop > epochs.tmax:
|
||||
raise ValueError("stop is out of bounds.")
|
||||
|
||||
data = epochs.get_data(tmin=start, tmax=stop)
|
||||
return data.mean(axis=2).std(axis=0) / np.sqrt(data.shape[0])
|
||||
96
mne/stats/multi_comp.py
Normal file
96
mne/stats/multi_comp.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _ecdf(x):
|
||||
"""No frills empirical cdf used in fdrcorrection."""
|
||||
nobs = len(x)
|
||||
return np.arange(1, nobs + 1) / float(nobs)
|
||||
|
||||
|
||||
def fdr_correction(pvals, alpha=0.05, method="indep"):
|
||||
"""P-value correction with False Discovery Rate (FDR).
|
||||
|
||||
Correction for multiple comparison using FDR :footcite:`GenoveseEtAl2002`.
|
||||
|
||||
This covers Benjamini/Hochberg for independent or positively correlated and
|
||||
Benjamini/Yekutieli for general or negatively correlated tests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pvals : array_like
|
||||
Set of p-values of the individual tests.
|
||||
alpha : float
|
||||
Error rate.
|
||||
method : 'indep' | 'negcorr'
|
||||
If 'indep' it implements Benjamini/Hochberg for independent or if
|
||||
'negcorr' it corresponds to Benjamini/Yekutieli.
|
||||
|
||||
Returns
|
||||
-------
|
||||
reject : array, bool
|
||||
True if a hypothesis is rejected, False if not.
|
||||
pval_corrected : array
|
||||
P-values adjusted for multiple hypothesis testing to limit FDR.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
pvals = np.asarray(pvals)
|
||||
shape_init = pvals.shape
|
||||
pvals = pvals.ravel()
|
||||
|
||||
pvals_sortind = np.argsort(pvals)
|
||||
pvals_sorted = pvals[pvals_sortind]
|
||||
sortrevind = pvals_sortind.argsort()
|
||||
|
||||
if method in ["i", "indep", "p", "poscorr"]:
|
||||
ecdffactor = _ecdf(pvals_sorted)
|
||||
elif method in ["n", "negcorr"]:
|
||||
cm = np.sum(1.0 / np.arange(1, len(pvals_sorted) + 1))
|
||||
ecdffactor = _ecdf(pvals_sorted) / cm
|
||||
else:
|
||||
raise ValueError("Method should be 'indep' and 'negcorr'")
|
||||
|
||||
reject = pvals_sorted < (ecdffactor * alpha)
|
||||
if reject.any():
|
||||
rejectmax = max(np.nonzero(reject)[0])
|
||||
else:
|
||||
rejectmax = 0
|
||||
reject[:rejectmax] = True
|
||||
|
||||
pvals_corrected_raw = pvals_sorted / ecdffactor
|
||||
pvals_corrected = np.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]
|
||||
pvals_corrected[pvals_corrected > 1.0] = 1.0
|
||||
pvals_corrected = pvals_corrected[sortrevind].reshape(shape_init)
|
||||
reject = reject[sortrevind].reshape(shape_init)
|
||||
return reject, pvals_corrected
|
||||
|
||||
|
||||
def bonferroni_correction(pval, alpha=0.05):
|
||||
"""P-value correction with Bonferroni method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pval : array_like
|
||||
Set of p-values of the individual tests.
|
||||
alpha : float
|
||||
Error rate.
|
||||
|
||||
Returns
|
||||
-------
|
||||
reject : array, bool
|
||||
True if a hypothesis is rejected, False if not.
|
||||
pval_corrected : array
|
||||
P-values adjusted for multiple hypothesis testing to limit FDR.
|
||||
"""
|
||||
pval = np.asarray(pval)
|
||||
pval_corrected = pval * float(pval.size)
|
||||
# p-values must not be larger than 1.
|
||||
pval_corrected = pval_corrected.clip(max=1.0)
|
||||
reject = pval_corrected < alpha
|
||||
return reject, pval_corrected
|
||||
423
mne/stats/parametric.py
Normal file
423
mne/stats/parametric.py
Normal file
@@ -0,0 +1,423 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
from functools import reduce
|
||||
from string import ascii_uppercase
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from scipy.signal import detrend
|
||||
|
||||
from ..utils import _check_option
|
||||
|
||||
# The following function is a rewriting of scipy.stats.f_oneway
|
||||
# Contrary to the scipy.stats.f_oneway implementation it does not
|
||||
# copy the data while keeping the inputs unchanged.
|
||||
|
||||
|
||||
def ttest_1samp_no_p(X, sigma=0, method="relative"):
|
||||
"""Perform one-sample t-test.
|
||||
|
||||
This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids
|
||||
a (relatively) time-consuming p-value calculation, and can adjust
|
||||
for implausibly small variance values :footcite:`RidgwayEtAl2012`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array
|
||||
Array to return t-values for.
|
||||
sigma : float
|
||||
The variance estimate will be given by ``var + sigma * max(var)`` or
|
||||
``var + sigma``, depending on "method". By default this is 0 (no
|
||||
adjustment). See Notes for details.
|
||||
method : str
|
||||
If 'relative', the minimum variance estimate will be sigma * max(var),
|
||||
if 'absolute' the minimum variance estimate will be sigma.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : array
|
||||
T-values, potentially adjusted using the hat method.
|
||||
|
||||
Notes
|
||||
-----
|
||||
To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value
|
||||
of ``sigma=1e-3`` may be a reasonable choice.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
_check_option("method", method, ["absolute", "relative"])
|
||||
var = np.var(X, axis=0, ddof=1)
|
||||
if sigma > 0:
|
||||
limit = sigma * np.max(var) if method == "relative" else sigma
|
||||
var += limit
|
||||
return np.mean(X, axis=0) / np.sqrt(var / X.shape[0])
|
||||
|
||||
|
||||
def ttest_ind_no_p(a, b, equal_var=True, sigma=0.0):
|
||||
"""Independent samples t-test without p calculation.
|
||||
|
||||
This is a modified version of :func:`scipy.stats.ttest_ind`. It operates
|
||||
along the first axis. The ``sigma`` parameter provides an optional "hat"
|
||||
adjustment (see :func:`ttest_1samp_no_p` and :footcite:`RidgwayEtAl2012`).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : array-like
|
||||
The first array.
|
||||
b : array-like
|
||||
The second array.
|
||||
equal_var : bool
|
||||
Assume equal variance. See :func:`scipy.stats.ttest_ind`.
|
||||
sigma : float
|
||||
The regularization. See :func:`ttest_1samp_no_p`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : array
|
||||
T values.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
v1 = np.var(a, axis=0, ddof=1)
|
||||
v2 = np.var(b, axis=0, ddof=1)
|
||||
n1 = a.shape[0]
|
||||
n2 = b.shape[0]
|
||||
if equal_var:
|
||||
df = n1 + n2 - 2.0
|
||||
var = ((n1 - 1) * v1 + (n2 - 1) * v2) / df
|
||||
var = var * (1.0 / n1 + 1.0 / n2)
|
||||
else:
|
||||
vn1 = v1 / n1
|
||||
vn2 = v2 / n2
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
df = (vn1 + vn2) ** 2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))
|
||||
|
||||
# If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
|
||||
# Hence it doesn't matter what df is as long as it's not NaN.
|
||||
df = np.where(np.isnan(df), 1, df)
|
||||
var = vn1 + vn2
|
||||
if sigma > 0:
|
||||
var += sigma * np.max(var)
|
||||
denom = np.sqrt(var)
|
||||
d = np.mean(a, 0) - np.mean(b, 0)
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
t = np.divide(d, denom)
|
||||
return t
|
||||
|
||||
|
||||
def f_oneway(*args):
|
||||
"""Perform a 1-way ANOVA.
|
||||
|
||||
The one-way ANOVA tests the null hypothesis that 2 or more groups have
|
||||
the same population mean. The test is applied to samples from two or
|
||||
more groups, possibly with differing sizes :footcite:`Lowry2014`.
|
||||
|
||||
This is a modified version of :func:`scipy.stats.f_oneway` that avoids
|
||||
computing the associated p-value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*args : array_like
|
||||
The sample measurements should be given as arguments.
|
||||
|
||||
Returns
|
||||
-------
|
||||
F-value : float
|
||||
The computed F-value of the test.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The ANOVA test has important assumptions that must be satisfied in order
|
||||
for the associated p-value to be valid.
|
||||
|
||||
1. The samples are independent
|
||||
2. Each sample is from a normally distributed population
|
||||
3. The population standard deviations of the groups are all equal. This
|
||||
property is known as homoscedasticity.
|
||||
|
||||
If these assumptions are not true for a given set of data, it may still be
|
||||
possible to use the Kruskal-Wallis H-test (:func:`scipy.stats.kruskal`)
|
||||
although with some loss of power.
|
||||
|
||||
The algorithm is from Heiman :footcite:`Heiman2002`, pp.394-7.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
n_classes = len(args)
|
||||
n_samples_per_class = np.array([len(a) for a in args])
|
||||
n_samples = np.sum(n_samples_per_class)
|
||||
ss_alldata = reduce(lambda x, y: x + y, [np.sum(a**2, axis=0) for a in args])
|
||||
sums_args = [np.sum(a, axis=0) for a in args]
|
||||
square_of_sums_alldata = reduce(lambda x, y: x + y, sums_args) ** 2
|
||||
square_of_sums_args = [s**2 for s in sums_args]
|
||||
sstot = ss_alldata - square_of_sums_alldata / float(n_samples)
|
||||
ssbn = 0
|
||||
for k, _ in enumerate(args):
|
||||
ssbn += square_of_sums_args[k] / n_samples_per_class[k]
|
||||
ssbn -= square_of_sums_alldata / float(n_samples)
|
||||
sswn = sstot - ssbn
|
||||
dfbn = n_classes - 1
|
||||
dfwn = n_samples - n_classes
|
||||
msb = ssbn / float(dfbn)
|
||||
msw = sswn / float(dfwn)
|
||||
f = msb / msw
|
||||
return f
|
||||
|
||||
|
||||
def _map_effects(n_factors, effects):
|
||||
"""Map effects to indices."""
|
||||
if n_factors > len(ascii_uppercase):
|
||||
raise ValueError("Maximum number of factors supported is 26")
|
||||
|
||||
factor_names = list(ascii_uppercase[:n_factors])
|
||||
|
||||
if isinstance(effects, str):
|
||||
if "*" in effects and ":" in effects:
|
||||
raise ValueError('Not "*" and ":" permitted in effects')
|
||||
elif "+" in effects and ":" in effects:
|
||||
raise ValueError('Not "+" and ":" permitted in effects')
|
||||
elif effects == "all":
|
||||
effects = None
|
||||
elif len(effects) == 1 or ":" in effects:
|
||||
effects = [effects]
|
||||
elif "+" in effects:
|
||||
# all main effects
|
||||
effects = effects.split("+")
|
||||
elif "*" in effects:
|
||||
pass # handle later
|
||||
else:
|
||||
raise ValueError(f'"{effects}" is not a valid option for "effects"')
|
||||
if isinstance(effects, list):
|
||||
bad_names = [e for e in effects if e not in factor_names]
|
||||
if len(bad_names) > 1:
|
||||
raise ValueError(
|
||||
f"Effect names: {bad_names} are not valid. They should consist of the "
|
||||
f"first `n_factors` ({n_factors}) characters from the alphabet"
|
||||
)
|
||||
|
||||
indices = list(np.arange(2**n_factors - 1))
|
||||
names = list()
|
||||
for this_effect in indices:
|
||||
contrast_idx = _get_contrast_indices(this_effect + 1, n_factors)
|
||||
this_code = (n_factors - 1) - np.where(contrast_idx == 1)[0]
|
||||
this_name = [factor_names[e] for e in this_code]
|
||||
this_name.sort()
|
||||
names.append(":".join(this_name))
|
||||
|
||||
if effects is None or isinstance(effects, str):
|
||||
effects_ = names
|
||||
else:
|
||||
effects_ = effects
|
||||
|
||||
selection = [names.index(sel) for sel in effects_]
|
||||
names = [names[sel] for sel in selection]
|
||||
|
||||
if isinstance(effects, str):
|
||||
if "*" in effects:
|
||||
# hierarchical order of effects
|
||||
# the * based effect can be used as stop index
|
||||
sel_ind = names.index(effects.replace("*", ":")) + 1
|
||||
names = names[:sel_ind]
|
||||
selection = selection[:sel_ind]
|
||||
|
||||
return selection, names
|
||||
|
||||
|
||||
def _get_contrast_indices(effect_idx, n_factors): # noqa: D401
|
||||
"""Henson's factor coding, see num2binvec."""
|
||||
binrepr = np.binary_repr(effect_idx, n_factors)
|
||||
return np.array([int(i) for i in binrepr], dtype=int)
|
||||
|
||||
|
||||
def _iter_contrasts(n_subjects, factor_levels, effect_picks):
|
||||
"""Set up contrasts."""
|
||||
sc = []
|
||||
n_factors = len(factor_levels)
|
||||
# prepare computation of Kronecker products
|
||||
for n_levels in factor_levels:
|
||||
# for each factor append
|
||||
# 1) column vector of length == number of levels,
|
||||
# 2) square matrix with diagonal == number of levels
|
||||
|
||||
# main + interaction effects for contrasts
|
||||
sc.append([np.ones([n_levels, 1]), detrend(np.eye(n_levels), type="constant")])
|
||||
|
||||
for this_effect in effect_picks:
|
||||
contrast_idx = _get_contrast_indices(this_effect + 1, n_factors)
|
||||
c_ = sc[0][contrast_idx[n_factors - 1]]
|
||||
for i_contrast in range(1, n_factors):
|
||||
this_contrast = contrast_idx[(n_factors - 1) - i_contrast]
|
||||
c_ = np.kron(c_, sc[i_contrast][this_contrast])
|
||||
df1 = np.linalg.matrix_rank(c_)
|
||||
df2 = df1 * (n_subjects - 1)
|
||||
yield c_, df1, df2
|
||||
|
||||
|
||||
def f_threshold_mway_rm(n_subjects, factor_levels, effects="A*B", pvalue=0.05):
|
||||
"""Compute F-value thresholds for a two-way ANOVA.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_subjects : int
|
||||
The number of subjects to be analyzed.
|
||||
factor_levels : list-like
|
||||
The number of levels per factor.
|
||||
effects : str
|
||||
A string denoting the effect to be returned. The following
|
||||
mapping is currently supported:
|
||||
|
||||
* ``'A'``: main effect of A
|
||||
* ``'B'``: main effect of B
|
||||
* ``'A:B'``: interaction effect
|
||||
* ``'A+B'``: both main effects
|
||||
* ``'A*B'``: all three effects
|
||||
|
||||
pvalue : float
|
||||
The p-value to be thresholded.
|
||||
|
||||
Returns
|
||||
-------
|
||||
F_threshold : list | float
|
||||
List of F-values for each effect if the number of effects
|
||||
requested > 2, else float.
|
||||
|
||||
See Also
|
||||
--------
|
||||
f_oneway
|
||||
f_mway_rm
|
||||
|
||||
Notes
|
||||
-----
|
||||
.. versionadded:: 0.10
|
||||
"""
|
||||
effect_picks, _ = _map_effects(len(factor_levels), effects)
|
||||
|
||||
F_threshold = []
|
||||
for _, df1, df2 in _iter_contrasts(n_subjects, factor_levels, effect_picks):
|
||||
F_threshold.append(stats.f(df1, df2).isf(pvalue))
|
||||
|
||||
return F_threshold if len(F_threshold) > 1 else F_threshold[0]
|
||||
|
||||
|
||||
def f_mway_rm(data, factor_levels, effects="all", correction=False, return_pvals=True):
|
||||
"""Compute M-way repeated measures ANOVA for fully balanced designs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : ndarray
|
||||
3D array where the first two dimensions are compliant
|
||||
with a subjects X conditions scheme where the first
|
||||
factor repeats slowest::
|
||||
|
||||
A1B1 A1B2 A2B1 A2B2
|
||||
subject 1 1.34 2.53 0.97 1.74
|
||||
subject ... .... .... .... ....
|
||||
subject k 2.45 7.90 3.09 4.76
|
||||
|
||||
The last dimensions is thought to carry the observations
|
||||
for mass univariate analysis.
|
||||
factor_levels : list-like
|
||||
The number of levels per factor.
|
||||
effects : str | list
|
||||
A string denoting the effect to be returned. The following
|
||||
mapping is currently supported (example with 2 factors):
|
||||
|
||||
* ``'A'``: main effect of A
|
||||
* ``'B'``: main effect of B
|
||||
* ``'A:B'``: interaction effect
|
||||
* ``'A+B'``: both main effects
|
||||
* ``'A*B'``: all three effects
|
||||
* ``'all'``: all effects (equals 'A*B' in a 2 way design)
|
||||
|
||||
If list, effect names are used: ``['A', 'B', 'A:B']``.
|
||||
correction : bool
|
||||
The correction method to be employed if one factor has more than two
|
||||
levels. If True, sphericity correction using the Greenhouse-Geisser
|
||||
method will be applied.
|
||||
return_pvals : bool
|
||||
If True, return p-values corresponding to F-values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
F_vals : ndarray
|
||||
An array of F-statistics with length corresponding to the number
|
||||
of effects estimated. The shape depends on the number of effects
|
||||
estimated.
|
||||
p_vals : ndarray
|
||||
If not requested via return_pvals, defaults to an empty array.
|
||||
|
||||
See Also
|
||||
--------
|
||||
f_oneway
|
||||
f_threshold_mway_rm
|
||||
|
||||
Notes
|
||||
-----
|
||||
.. versionadded:: 0.10
|
||||
"""
|
||||
out_reshape = (-1,)
|
||||
if data.ndim == 2: # general purpose support, e.g. behavioural data
|
||||
data = data[:, :, np.newaxis]
|
||||
elif data.ndim > 3: # let's allow for some magic here
|
||||
out_reshape = data.shape[2:]
|
||||
data = data.reshape(data.shape[0], data.shape[1], np.prod(data.shape[2:]))
|
||||
|
||||
effect_picks, _ = _map_effects(len(factor_levels), effects)
|
||||
n_obs = data.shape[2]
|
||||
n_replications = data.shape[0]
|
||||
|
||||
# put last axis in front to 'iterate' over mass univariate instances.
|
||||
data = np.rollaxis(data, 2)
|
||||
fvalues, pvalues = [], []
|
||||
for c_, df1, df2 in _iter_contrasts(n_replications, factor_levels, effect_picks):
|
||||
y = np.dot(data, c_)
|
||||
b = np.mean(y, axis=1)[:, np.newaxis, :]
|
||||
ss = np.sum(np.sum(y * b, axis=2), axis=1)
|
||||
mse = (np.sum(np.sum(y * y, axis=2), axis=1) - ss) / (df2 / df1)
|
||||
fvals = ss / mse
|
||||
fvalues.append(fvals)
|
||||
if correction:
|
||||
# sample covariances, leave off "/ (y.shape[1] - 1)" norm because
|
||||
# it falls out.
|
||||
v = np.array([np.dot(y_.T, y_) for y_ in y])
|
||||
v = np.array([np.trace(vv) for vv in v]) ** 2 / (
|
||||
df1 * np.sum(np.sum(v * v, axis=2), axis=1)
|
||||
)
|
||||
eps = v
|
||||
|
||||
df1, df2 = np.zeros(n_obs) + df1, np.zeros(n_obs) + df2
|
||||
if correction:
|
||||
# numerical imprecision can cause eps=0.99999999999999989
|
||||
# even with a single category, so never let our degrees of
|
||||
# freedom drop below 1.
|
||||
df1, df2 = (np.maximum(d[None, :] * eps, 1.0) for d in (df1, df2))
|
||||
|
||||
if return_pvals:
|
||||
pvals = stats.f(df1, df2).sf(fvals)
|
||||
else:
|
||||
pvals = np.empty(0)
|
||||
pvalues.append(pvals)
|
||||
|
||||
# handle single effect returns
|
||||
return [
|
||||
np.squeeze(np.asarray([v.reshape(out_reshape) for v in vv]))
|
||||
for vv in (fvalues, pvalues)
|
||||
]
|
||||
|
||||
|
||||
def _parametric_ci(arr, ci=0.95):
|
||||
"""Calculate the `ci`% parametric confidence interval for `arr`."""
|
||||
mean = arr.mean(0)
|
||||
if len(arr) < 2: # can't compute standard error
|
||||
sigma = np.full_like(mean, np.nan)
|
||||
return mean, sigma
|
||||
sigma = stats.sem(arr, 0)
|
||||
return stats.t.interval(ci, loc=mean, scale=sigma, df=arr.shape[0])
|
||||
162
mne/stats/permutations.py
Normal file
162
mne/stats/permutations.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""T-test with permutations."""
|
||||
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..parallel import parallel_func
|
||||
from ..utils import check_random_state, logger, verbose
|
||||
|
||||
|
||||
def _max_stat(X, X2, perms, dof_scaling):
|
||||
"""Aux function for permutation_t_test (for parallel comp)."""
|
||||
n_samples = len(X)
|
||||
mus = np.dot(perms, X) / float(n_samples)
|
||||
stds = np.sqrt(X2[None, :] - mus * mus) * dof_scaling # std with splitting
|
||||
max_abs = np.max(np.abs(mus) / (stds / sqrt(n_samples)), axis=1) # t-max
|
||||
return max_abs
|
||||
|
||||
|
||||
@verbose
|
||||
def permutation_t_test(
|
||||
X, n_permutations=10000, tail=0, n_jobs=None, seed=None, verbose=None
|
||||
):
|
||||
"""One sample/paired sample permutation test based on a t-statistic.
|
||||
|
||||
This function can perform the test on one variable or
|
||||
simultaneously on multiple variables. When applying the test to multiple
|
||||
variables, the "tmax" method is used for adjusting the p-values of each
|
||||
variable for multiple comparisons. Like Bonferroni correction, this method
|
||||
adjusts p-values in a way that controls the family-wise error rate.
|
||||
However, the permutation method will be more
|
||||
powerful than Bonferroni correction when different variables in the test
|
||||
are correlated (see :footcite:`NicholsHolmes2002`).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_tests)
|
||||
Samples (observations) by number of tests (variables).
|
||||
n_permutations : int | 'all'
|
||||
Number of permutations. If n_permutations is 'all' all possible
|
||||
permutations are tested. It's the exact test, that
|
||||
can be untractable when the number of samples is big (e.g. > 20).
|
||||
If n_permutations >= 2**n_samples then the exact test is performed.
|
||||
tail : -1 or 0 or 1 (default = 0)
|
||||
If tail is 1, the alternative hypothesis is that the
|
||||
mean of the data is greater than 0 (upper tailed test). If tail is 0,
|
||||
the alternative hypothesis is that the mean of the data is different
|
||||
than 0 (two tailed test). If tail is -1, the alternative hypothesis
|
||||
is that the mean of the data is less than 0 (lower tailed test).
|
||||
%(n_jobs)s
|
||||
%(seed)s
|
||||
%(verbose)s
|
||||
|
||||
Returns
|
||||
-------
|
||||
T_obs : array of shape [n_tests]
|
||||
T-statistic observed for all variables.
|
||||
p_values : array of shape [n_tests]
|
||||
P-values for all the tests (a.k.a. variables).
|
||||
H0 : array of shape [n_permutations]
|
||||
T-statistic obtained by permutations and t-max trick for multiple
|
||||
comparison.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If ``n_permutations >= 2 ** (n_samples - (tail == 0))``,
|
||||
``n_permutations`` and ``seed`` will be ignored since an exact test
|
||||
(full permutation test) will be performed.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
from .cluster_level import _get_1samp_orders
|
||||
|
||||
n_samples, n_tests = X.shape
|
||||
X2 = np.mean(X**2, axis=0) # precompute moments
|
||||
mu0 = np.mean(X, axis=0)
|
||||
dof_scaling = sqrt(n_samples / (n_samples - 1.0))
|
||||
std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with var splitting
|
||||
T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
|
||||
rng = check_random_state(seed)
|
||||
orders, _, extra = _get_1samp_orders(n_samples, n_permutations, tail, rng)
|
||||
perms = 2 * np.array(orders) - 1 # from 0, 1 -> 1, -1
|
||||
logger.info(f"Permuting {len(orders)} times{extra}...")
|
||||
parallel, my_max_stat, n_jobs = parallel_func(_max_stat, n_jobs)
|
||||
max_abs = np.concatenate(
|
||||
parallel(
|
||||
my_max_stat(X, X2, p, dof_scaling) for p in np.array_split(perms, n_jobs)
|
||||
)
|
||||
)
|
||||
max_abs = np.concatenate((max_abs, [np.abs(T_obs).max()]))
|
||||
H0 = np.sort(max_abs)
|
||||
if tail == 0:
|
||||
p_values = (H0 >= np.abs(T_obs[:, np.newaxis])).mean(-1)
|
||||
elif tail == 1:
|
||||
p_values = (H0 >= T_obs[:, np.newaxis]).mean(-1)
|
||||
elif tail == -1:
|
||||
p_values = (-H0 <= T_obs[:, np.newaxis]).mean(-1)
|
||||
return T_obs, p_values, H0
|
||||
|
||||
|
||||
def bootstrap_confidence_interval(
|
||||
arr, ci=0.95, n_bootstraps=2000, stat_fun="mean", random_state=None
|
||||
):
|
||||
"""Get confidence intervals from non-parametric bootstrap.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arr : ndarray, shape (n_samples, ...)
|
||||
The input data on which to calculate the confidence interval.
|
||||
ci : float
|
||||
Level of the confidence interval between 0 and 1.
|
||||
n_bootstraps : int
|
||||
Number of bootstraps.
|
||||
stat_fun : str | callable
|
||||
Can be "mean", "median", or a callable operating along ``axis=0``.
|
||||
random_state : int | float | array_like | None
|
||||
The seed at which to initialize the bootstrap.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cis : ndarray, shape (2, ...)
|
||||
Containing the lower boundary of the CI at ``cis[0, ...]`` and the
|
||||
upper boundary of the CI at ``cis[1, ...]``.
|
||||
"""
|
||||
if stat_fun == "mean":
|
||||
|
||||
def stat_fun(x):
|
||||
return x.mean(axis=0)
|
||||
|
||||
elif stat_fun == "median":
|
||||
|
||||
def stat_fun(x):
|
||||
return np.median(x, axis=0)
|
||||
|
||||
elif not callable(stat_fun):
|
||||
raise ValueError("stat_fun must be 'mean', 'median' or callable.")
|
||||
n_trials = arr.shape[0]
|
||||
indices = np.arange(n_trials, dtype=int) # BCA would be cool to have too
|
||||
rng = check_random_state(random_state)
|
||||
boot_indices = rng.choice(indices, replace=True, size=(n_bootstraps, len(indices)))
|
||||
stat = np.array([stat_fun(arr[inds]) for inds in boot_indices])
|
||||
ci = (((1 - ci) / 2) * 100, (1 - ((1 - ci) / 2)) * 100)
|
||||
ci_low, ci_up = np.percentile(stat, ci, axis=0)
|
||||
return np.array([ci_low, ci_up])
|
||||
|
||||
|
||||
def _ci(arr, ci=0.95, method="bootstrap", n_bootstraps=2000, random_state=None):
|
||||
"""Calculate confidence interval. Aux function for plot_compare_evokeds."""
|
||||
if method == "bootstrap":
|
||||
return bootstrap_confidence_interval(
|
||||
arr, ci=ci, n_bootstraps=n_bootstraps, random_state=random_state
|
||||
)
|
||||
else:
|
||||
from .parametric import _parametric_ci
|
||||
|
||||
return _parametric_ci(arr, ci=ci)
|
||||
433
mne/stats/regression.py
Normal file
433
mne/stats/regression.py
Normal file
@@ -0,0 +1,433 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
from collections import namedtuple
|
||||
from inspect import isgenerator
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg, sparse, stats
|
||||
|
||||
from .._fiff.pick import _picks_to_idx, pick_info, pick_types
|
||||
from ..epochs import BaseEpochs
|
||||
from ..evoked import Evoked, EvokedArray
|
||||
from ..source_estimate import SourceEstimate
|
||||
from ..utils import _reject_data_segments, fill_doc, logger, warn
|
||||
|
||||
|
||||
def linear_regression(inst, design_matrix, names=None):
|
||||
"""Fit Ordinary Least Squares (OLS) regression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inst : instance of Epochs | iterable of SourceEstimate
|
||||
The data to be regressed. Contains all the trials, sensors, and time
|
||||
points for the regression. For Source Estimates, accepts either a list
|
||||
or a generator object.
|
||||
design_matrix : ndarray, shape (n_observations, n_regressors)
|
||||
The regressors to be used. Must be a 2d array with as many rows as
|
||||
the first dimension of the data. The first column of this matrix will
|
||||
typically consist of ones (intercept column).
|
||||
names : array-like | None
|
||||
Optional parameter to name the regressors (i.e., the columns in the
|
||||
design matrix). If provided, the length must correspond to the number
|
||||
of columns present in design matrix (including the intercept, if
|
||||
present). Otherwise, the default names are ``'x0'``, ``'x1'``,
|
||||
``'x2', …, 'x(n-1)'`` for ``n`` regressors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : dict of namedtuple
|
||||
For each regressor (key), a namedtuple is provided with the
|
||||
following attributes:
|
||||
|
||||
- ``beta`` : regression coefficients
|
||||
- ``stderr`` : standard error of regression coefficients
|
||||
- ``t_val`` : t statistics (``beta`` / ``stderr``)
|
||||
- ``p_val`` : two-sided p-value of t statistic under the t
|
||||
distribution
|
||||
- ``mlog10_p_val`` : -log₁₀-transformed p-value.
|
||||
|
||||
The tuple members are numpy arrays. The shape of each numpy array is
|
||||
the shape of the data minus the first dimension; e.g., if the shape of
|
||||
the original data was ``(n_observations, n_channels, n_timepoints)``,
|
||||
then the shape of each of the arrays will be
|
||||
``(n_channels, n_timepoints)``.
|
||||
"""
|
||||
if names is None:
|
||||
names = [f"x{i}" for i in range(design_matrix.shape[1])]
|
||||
|
||||
if isinstance(inst, BaseEpochs):
|
||||
picks = pick_types(
|
||||
inst.info,
|
||||
meg=True,
|
||||
eeg=True,
|
||||
ref_meg=True,
|
||||
stim=False,
|
||||
eog=False,
|
||||
ecg=False,
|
||||
emg=False,
|
||||
exclude=["bads"],
|
||||
)
|
||||
if [inst.ch_names[p] for p in picks] != inst.ch_names:
|
||||
warn("Fitting linear model to non-data or bad channels. Check picking")
|
||||
msg = "Fitting linear model to epochs"
|
||||
data = inst.get_data(copy=False)
|
||||
out = EvokedArray(np.zeros(data.shape[1:]), inst.info, inst.tmin)
|
||||
elif isgenerator(inst):
|
||||
msg = "Fitting linear model to source estimates (generator input)"
|
||||
out = next(inst)
|
||||
data = np.array([out.data] + [i.data for i in inst])
|
||||
elif isinstance(inst, list) and isinstance(inst[0], SourceEstimate):
|
||||
msg = "Fitting linear model to source estimates (list input)"
|
||||
out = inst[0]
|
||||
data = np.array([i.data for i in inst])
|
||||
else:
|
||||
raise ValueError("Input must be epochs or iterable of source estimates")
|
||||
logger.info(msg + f", ({np.prod(data.shape[1:])} targets, {len(names)} regressors)")
|
||||
lm_params = _fit_lm(data, design_matrix, names)
|
||||
lm = namedtuple("lm", "beta stderr t_val p_val mlog10_p_val")
|
||||
lm_fits = {}
|
||||
for name in names:
|
||||
parameters = [p[name] for p in lm_params]
|
||||
for ii, value in enumerate(parameters):
|
||||
out_ = out.copy()
|
||||
if not isinstance(out_, SourceEstimate | Evoked):
|
||||
raise RuntimeError("Invalid container.")
|
||||
out_._data[:] = value
|
||||
parameters[ii] = out_
|
||||
lm_fits[name] = lm(*parameters)
|
||||
logger.info("Done")
|
||||
return lm_fits
|
||||
|
||||
|
||||
def _fit_lm(data, design_matrix, names):
|
||||
"""Aux function."""
|
||||
n_samples = len(data)
|
||||
n_features = np.prod(data.shape[1:])
|
||||
if design_matrix.ndim != 2:
|
||||
raise ValueError("Design matrix must be a 2d array")
|
||||
n_rows, n_predictors = design_matrix.shape
|
||||
|
||||
if n_samples != n_rows:
|
||||
raise ValueError(
|
||||
"Number of rows in design matrix must be equal to number of observations"
|
||||
)
|
||||
if n_predictors != len(names):
|
||||
raise ValueError(
|
||||
"Number of regressor names must be equal to "
|
||||
"number of column in design matrix"
|
||||
)
|
||||
|
||||
y = np.reshape(data, (n_samples, n_features))
|
||||
betas, resid_sum_squares, _, _ = linalg.lstsq(a=design_matrix, b=y)
|
||||
|
||||
df = n_rows - n_predictors
|
||||
sqrt_noise_var = np.sqrt(resid_sum_squares / df).reshape(data.shape[1:])
|
||||
design_invcov = linalg.inv(np.dot(design_matrix.T, design_matrix))
|
||||
unscaled_stderrs = np.sqrt(np.diag(design_invcov))
|
||||
tiny = np.finfo(np.float64).tiny
|
||||
beta, stderr, t_val, p_val, mlog10_p_val = (dict() for _ in range(5))
|
||||
for x, unscaled_stderr, predictor in zip(betas, unscaled_stderrs, names):
|
||||
beta[predictor] = x.reshape(data.shape[1:])
|
||||
stderr[predictor] = sqrt_noise_var * unscaled_stderr
|
||||
p_val[predictor] = np.empty_like(stderr[predictor])
|
||||
t_val[predictor] = np.empty_like(stderr[predictor])
|
||||
|
||||
stderr_pos = stderr[predictor] > 0
|
||||
beta_pos = beta[predictor] > 0
|
||||
t_val[predictor][stderr_pos] = (
|
||||
beta[predictor][stderr_pos] / stderr[predictor][stderr_pos]
|
||||
)
|
||||
cdf = stats.t.cdf(np.abs(t_val[predictor][stderr_pos]), df)
|
||||
p_val[predictor][stderr_pos] = np.clip((1.0 - cdf) * 2.0, tiny, 1.0)
|
||||
# degenerate cases
|
||||
mask = ~stderr_pos & beta_pos
|
||||
t_val[predictor][mask] = np.inf * np.sign(beta[predictor][mask])
|
||||
p_val[predictor][mask] = tiny
|
||||
# could do NaN here, but hopefully this is safe enough
|
||||
mask = ~stderr_pos & ~beta_pos
|
||||
t_val[predictor][mask] = 0
|
||||
p_val[predictor][mask] = 1.0
|
||||
mlog10_p_val[predictor] = -np.log10(p_val[predictor])
|
||||
|
||||
return beta, stderr, t_val, p_val, mlog10_p_val
|
||||
|
||||
|
||||
@fill_doc
|
||||
def linear_regression_raw(
|
||||
raw,
|
||||
events,
|
||||
event_id=None,
|
||||
tmin=-0.1,
|
||||
tmax=1,
|
||||
covariates=None,
|
||||
reject=None,
|
||||
flat=None,
|
||||
tstep=1.0,
|
||||
decim=1,
|
||||
picks=None,
|
||||
solver="cholesky",
|
||||
):
|
||||
"""Estimate regression-based evoked potentials/fields by linear modeling.
|
||||
|
||||
This models the full M/EEG time course, including correction for
|
||||
overlapping potentials and allowing for continuous/scalar predictors.
|
||||
Internally, this constructs a predictor matrix X of size
|
||||
n_samples * (n_conds * window length), solving the linear system
|
||||
``Y = bX`` and returning ``b`` as evoked-like time series split by
|
||||
condition. See :footcite:`SmithKutas2015`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw : instance of Raw
|
||||
A raw object. Note: be very careful about data that is not
|
||||
downsampled, as the resulting matrices can be enormous and easily
|
||||
overload your computer. Typically, 100 Hz sampling rate is
|
||||
appropriate - or using the decim keyword (see below).
|
||||
events : ndarray of int, shape (n_events, 3)
|
||||
An array where the first column corresponds to samples in raw
|
||||
and the last to integer codes in event_id.
|
||||
event_id : dict | None
|
||||
As in Epochs; a dictionary where the values may be integers or
|
||||
iterables of integers, corresponding to the 3rd column of
|
||||
events, and the keys are condition names.
|
||||
If None, uses all events in the events array.
|
||||
tmin : float | dict
|
||||
If float, gives the lower limit (in seconds) for the time window for
|
||||
which all event types' effects are estimated. If a dict, can be used to
|
||||
specify time windows for specific event types: keys correspond to keys
|
||||
in event_id and/or covariates; for missing values, the default (-.1) is
|
||||
used.
|
||||
tmax : float | dict
|
||||
If float, gives the upper limit (in seconds) for the time window for
|
||||
which all event types' effects are estimated. If a dict, can be used to
|
||||
specify time windows for specific event types: keys correspond to keys
|
||||
in event_id and/or covariates; for missing values, the default (1.) is
|
||||
used.
|
||||
covariates : dict-like | None
|
||||
If dict-like (e.g., a pandas DataFrame), values have to be array-like
|
||||
and of the same length as the rows in ``events``. Keys correspond
|
||||
to additional event types/conditions to be estimated and are matched
|
||||
with the time points given by the first column of ``events``. If
|
||||
None, only binary events (from event_id) are used.
|
||||
reject : None | dict
|
||||
For cleaning raw data before the regression is performed: set up
|
||||
rejection parameters based on peak-to-peak amplitude in continuously
|
||||
selected subepochs. If None, no rejection is done.
|
||||
If dict, keys are types ('grad' | 'mag' | 'eeg' | 'eog' | 'ecg')
|
||||
and values are the maximal peak-to-peak values to select rejected
|
||||
epochs, e.g.::
|
||||
|
||||
reject = dict(grad=4000e-12, # T / m (gradiometers)
|
||||
mag=4e-11, # T (magnetometers)
|
||||
eeg=40e-5, # V (EEG channels)
|
||||
eog=250e-5 # V (EOG channels))
|
||||
|
||||
flat : None | dict
|
||||
For cleaning raw data before the regression is performed: set up
|
||||
rejection parameters based on flatness of the signal. If None, no
|
||||
rejection is done. If a dict, keys are ('grad' | 'mag' |
|
||||
'eeg' | 'eog' | 'ecg') and values are minimal peak-to-peak values to
|
||||
select rejected epochs.
|
||||
tstep : float
|
||||
Length of windows for peak-to-peak detection for raw data cleaning.
|
||||
decim : int
|
||||
Decimate by choosing only a subsample of data points. Highly
|
||||
recommended for data recorded at high sampling frequencies, as
|
||||
otherwise huge intermediate matrices have to be created and inverted.
|
||||
%(picks_good_data)s
|
||||
solver : str | callable
|
||||
Either a function which takes as its inputs the sparse predictor
|
||||
matrix X and the observation matrix Y, and returns the coefficient
|
||||
matrix b; or a string.
|
||||
X is of shape (n_times, n_predictors * time_window_length).
|
||||
y is of shape (n_channels, n_times).
|
||||
If str, must be ``'cholesky'``, in which case the solver used is
|
||||
``linalg.solve(dot(X.T, X), dot(X.T, y))``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
evokeds : dict
|
||||
A dict where the keys correspond to conditions and the values are
|
||||
Evoked objects with the ER[F/P]s. These can be used exactly like any
|
||||
other Evoked object, including e.g. plotting or statistics.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
if isinstance(solver, str):
|
||||
if solver not in {"cholesky"}:
|
||||
raise ValueError(f"No such solver: {solver}")
|
||||
if solver == "cholesky":
|
||||
|
||||
def solver(X, y):
|
||||
a = (X.T * X).toarray() # dot product of sparse matrices
|
||||
return linalg.solve(
|
||||
a, X.T * y, assume_a="pos", overwrite_a=True, overwrite_b=True
|
||||
).T
|
||||
|
||||
elif callable(solver):
|
||||
pass
|
||||
else:
|
||||
raise TypeError("The solver must be a str or a callable.")
|
||||
|
||||
# build data
|
||||
data, info, events = _prepare_rerp_data(raw, events, picks=picks, decim=decim)
|
||||
|
||||
if event_id is None:
|
||||
event_id = {str(v): v for v in set(events[:, 2])}
|
||||
|
||||
# build predictors
|
||||
X, conds, cond_length, tmin_s, tmax_s = _prepare_rerp_preds(
|
||||
n_samples=data.shape[1],
|
||||
sfreq=info["sfreq"],
|
||||
events=events,
|
||||
event_id=event_id,
|
||||
tmin=tmin,
|
||||
tmax=tmax,
|
||||
covariates=covariates,
|
||||
)
|
||||
|
||||
# remove "empty" and contaminated data points
|
||||
X, data = _clean_rerp_input(X, data, reject, flat, decim, info, tstep)
|
||||
|
||||
# solve linear system
|
||||
coefs = solver(X, data.T)
|
||||
if coefs.shape[0] != data.shape[0]:
|
||||
raise ValueError(
|
||||
f"solver output has unexcepted shape {coefs.shape}. Supply a "
|
||||
"function that returns coefficients in the form "
|
||||
"(n_targets, n_features), where "
|
||||
f"n_targets == n_channels == {data.shape[0]}."
|
||||
)
|
||||
|
||||
# construct Evoked objects to be returned from output
|
||||
evokeds = _make_evokeds(coefs, conds, cond_length, tmin_s, tmax_s, info)
|
||||
|
||||
return evokeds
|
||||
|
||||
|
||||
def _prepare_rerp_data(raw, events, picks=None, decim=1):
|
||||
"""Prepare events and data, primarily for `linear_regression_raw`."""
|
||||
picks = _picks_to_idx(raw.info, picks)
|
||||
info = pick_info(raw.info, picks)
|
||||
decim = int(decim)
|
||||
with info._unlock():
|
||||
info["sfreq"] /= decim
|
||||
data, times = raw[:]
|
||||
data = data[picks, ::decim]
|
||||
if len(set(events[:, 0])) < len(events[:, 0]):
|
||||
raise ValueError(
|
||||
"`events` contains duplicate time points. Make "
|
||||
"sure all entries in the first column of `events` "
|
||||
"are unique."
|
||||
)
|
||||
|
||||
events = events.copy()
|
||||
events[:, 0] -= raw.first_samp
|
||||
events[:, 0] //= decim
|
||||
if len(set(events[:, 0])) < len(events[:, 0]):
|
||||
raise ValueError(
|
||||
"After decimating, `events` contains duplicate time "
|
||||
"points. This means some events are too closely "
|
||||
"spaced for the requested decimation factor. Choose "
|
||||
"different events, drop close events, or choose a "
|
||||
"different decimation factor."
|
||||
)
|
||||
|
||||
return data, info, events
|
||||
|
||||
|
||||
def _prepare_rerp_preds(
|
||||
n_samples, sfreq, events, event_id=None, tmin=-0.1, tmax=1, covariates=None
|
||||
):
|
||||
"""Build predictor matrix and metadata (e.g. condition time windows)."""
|
||||
conds = list(event_id)
|
||||
if covariates is not None:
|
||||
conds += list(covariates)
|
||||
|
||||
# time windows (per event type) are converted to sample points from times
|
||||
# int(round()) to be safe and match Epochs constructor behavior
|
||||
if isinstance(tmin, float | int):
|
||||
tmin_s = {cond: int(round(tmin * sfreq)) for cond in conds}
|
||||
else:
|
||||
tmin_s = {cond: int(round(tmin.get(cond, -0.1) * sfreq)) for cond in conds}
|
||||
if isinstance(tmax, float | int):
|
||||
tmax_s = {cond: int(round(tmax * sfreq) + 1) for cond in conds}
|
||||
else:
|
||||
tmax_s = {cond: int(round(tmax.get(cond, 1.0) * sfreq)) + 1 for cond in conds}
|
||||
|
||||
# Construct predictor matrix
|
||||
# We do this by creating one array per event type, shape (lags, samples)
|
||||
# (where lags depends on tmin/tmax and can be different for different
|
||||
# event types). Columns correspond to predictors, predictors correspond to
|
||||
# time lags. Thus, each array is mostly sparse, with one diagonal of 1s
|
||||
# per event (for binary predictors).
|
||||
|
||||
cond_length = dict()
|
||||
xs = []
|
||||
for cond in conds:
|
||||
tmin_, tmax_ = tmin_s[cond], tmax_s[cond]
|
||||
n_lags = int(tmax_ - tmin_) # width of matrix
|
||||
if cond in event_id: # for binary predictors
|
||||
ids = (
|
||||
[event_id[cond]] if isinstance(event_id[cond], int) else event_id[cond]
|
||||
)
|
||||
onsets = -(events[np.isin(events[:, 2], ids), 0] + tmin_)
|
||||
values = np.ones((len(onsets), n_lags))
|
||||
|
||||
else: # for predictors from covariates, e.g. continuous ones
|
||||
covs = covariates[cond]
|
||||
if len(covs) != len(events):
|
||||
error = (
|
||||
f"Condition {cond} from ``covariates`` is not the same length as "
|
||||
"``events``"
|
||||
)
|
||||
raise ValueError(error)
|
||||
onsets = -(events[np.where(covs != 0), 0] + tmin_)[0]
|
||||
v = np.asarray(covs)[np.nonzero(covs)].astype(float)
|
||||
values = np.ones((len(onsets), n_lags)) * v[:, np.newaxis]
|
||||
|
||||
cond_length[cond] = len(onsets)
|
||||
xs.append(sparse.dia_matrix((values, onsets), shape=(n_samples, n_lags)))
|
||||
|
||||
return sparse.hstack(xs), conds, cond_length, tmin_s, tmax_s
|
||||
|
||||
|
||||
def _clean_rerp_input(X, data, reject, flat, decim, info, tstep):
|
||||
"""Remove empty and contaminated points from data & predictor matrices."""
|
||||
# find only those positions where at least one predictor isn't 0
|
||||
has_val = np.unique(X.nonzero()[0])
|
||||
|
||||
# reject positions based on extreme steps in the data
|
||||
if reject is not None:
|
||||
_, inds = _reject_data_segments(
|
||||
data, reject, flat, decim=None, info=info, tstep=tstep
|
||||
)
|
||||
for t0, t1 in inds:
|
||||
has_val = np.setdiff1d(has_val, range(t0, t1))
|
||||
|
||||
return X.tocsr()[has_val], data[:, has_val]
|
||||
|
||||
|
||||
def _make_evokeds(coefs, conds, cond_length, tmin_s, tmax_s, info):
|
||||
"""Create a dictionary of Evoked objects.
|
||||
|
||||
These will be created from a coefs matrix and condition durations.
|
||||
"""
|
||||
evokeds = dict()
|
||||
cumul = 0
|
||||
for cond in conds:
|
||||
tmin_, tmax_ = tmin_s[cond], tmax_s[cond]
|
||||
evokeds[cond] = EvokedArray(
|
||||
coefs[:, cumul : cumul + tmax_ - tmin_],
|
||||
info=info,
|
||||
comment=cond,
|
||||
tmin=tmin_ / float(info["sfreq"]),
|
||||
nave=cond_length[cond],
|
||||
kind="average",
|
||||
) # nave and kind are technically incorrect
|
||||
cumul += tmax_ - tmin_
|
||||
return evokeds
|
||||
Reference in New Issue
Block a user