initial commit

2025-08-19 09:13:22 -07:00
parent 28464811d6
commit 0977a3e14d
820 changed files with 1003358 additions and 2 deletions
--- a/mne/_fiff/utils.py
+++ b/mne/_fiff/utils.py
@@ -0,0 +1,331 @@
+# Authors: The MNE-Python contributors.
+# License: BSD-3-Clause
+# Copyright the MNE-Python contributors.
+
+import os
+import os.path as op
+from pathlib import Path
+
+import numpy as np
+
+from .constants import FIFF
+from .meas_info import _get_valid_units
+
+
+def _check_orig_units(orig_units):
+    """Check original units from a raw file.
+
+    Units that are close to a valid_unit but not equal can be remapped to fit
+    into the valid_units. All other units that are not valid will be replaced
+    with "n/a".
+
+    Parameters
+    ----------
+    orig_units : dict
+        Dictionary mapping channel names to their units as specified in
+        the header file. Example: {'FC1': 'nV'}
+
+    Returns
+    -------
+    orig_units_remapped : dict
+        Dictionary mapping channel names to their VALID units as specified in
+        the header file. Invalid units are now labeled "n/a".
+        Example: {'FC1': 'nV', 'Hfp3erz': 'n/a'}
+    """
+    if orig_units is None:
+        return
+    valid_units = _get_valid_units()
+    valid_units_lowered = [unit.lower() for unit in valid_units]
+    orig_units_remapped = dict(orig_units)
+    for ch_name, unit in orig_units.items():
+        # Be lenient: we ignore case for now.
+        if unit.lower() in valid_units_lowered:
+            continue
+
+        # Common "invalid units" can be remapped to their valid equivalent
+        remap_dict = dict()
+        remap_dict["uv"] = "µV"
+        remap_dict["μv"] = "µV"  # greek letter mu vs micro sign. use micro
+        remap_dict["\x83\xeav"] = "µV"  # for shift-jis mu, use micro
+        if unit.lower() in remap_dict:
+            orig_units_remapped[ch_name] = remap_dict[unit.lower()]
+            continue
+
+        # Some units cannot be saved, they are invalid: assign "n/a"
+        orig_units_remapped[ch_name] = "n/a"
+
+    return orig_units_remapped
+
+
+def _find_channels(ch_names, ch_type="EOG"):
+    """Find EOG channel."""
+    substrings = (ch_type,)
+    substrings = [s.upper() for s in substrings]
+    if ch_type == "EOG":
+        substrings = ("EOG", "EYE")
+    eog_idx = [
+        idx
+        for idx, ch in enumerate(ch_names)
+        if any(substring in ch.upper() for substring in substrings)
+    ]
+    return eog_idx
+
+
+def _mult_cal_one(data_view, one, idx, cals, mult):
+    """Take a chunk of raw data, multiply by mult or cals, and store."""
+    one = np.asarray(one, dtype=data_view.dtype)
+    assert data_view.shape[1] == one.shape[1], (
+        data_view.shape[1],
+        one.shape[1],
+    )  # noqa: E501
+    if mult is not None:
+        assert mult.ndim == one.ndim == 2
+        data_view[:] = mult @ one[idx]
+    else:
+        assert cals is not None
+        if isinstance(idx, slice):
+            data_view[:] = one[idx]
+        else:
+            # faster than doing one = one[idx]
+            np.take(one, idx, axis=0, out=data_view)
+        data_view *= cals
+
+
+def _blk_read_lims(start, stop, buf_len):
+    """Deal with indexing in the middle of a data block.
+
+    Parameters
+    ----------
+    start : int
+        Starting index.
+    stop : int
+        Ending index (exclusive).
+    buf_len : int
+        Buffer size in samples.
+
+    Returns
+    -------
+    block_start_idx : int
+        The first block to start reading from.
+    r_lims : list
+        The read limits.
+    d_lims : list
+        The write limits.
+
+    Notes
+    -----
+    Consider this example::
+
+        >>> start, stop, buf_len = 2, 27, 10
+
+                    +---------+---------+---------
+    File structure: |  buf0   |   buf1  |   buf2  |
+                    +---------+---------+---------
+    File time:      0        10        20        30
+                    +---------+---------+---------
+    Requested time:   2                       27
+
+                    |                             |
+                blockstart                    blockstop
+                      |                        |
+                    start                    stop
+
+    We need 27 - 2 = 25 samples (per channel) to store our data, and
+    we need to read from 3 buffers (30 samples) to get all of our data.
+
+    On all reads but the first, the data we read starts at
+    the first sample of the buffer. On all reads but the last,
+    the data we read ends on the last sample of the buffer.
+
+    We call ``this_data`` the variable that stores the current buffer's data,
+    and ``data`` the variable that stores the total output.
+
+    On the first read, we need to do this::
+
+        >>> data[0:buf_len-2] = this_data[2:buf_len]  # doctest: +SKIP
+
+    On the second read, we need to do::
+
+        >>> data[1*buf_len-2:2*buf_len-2] = this_data[0:buf_len]  # doctest: +SKIP
+
+    On the final read, we need to do::
+
+        >>> data[2*buf_len-2:3*buf_len-2-3] = this_data[0:buf_len-3]  # doctest: +SKIP
+
+    This function encapsulates this logic to allow a loop over blocks, where
+    data is stored using the following limits::
+
+        >>> data[d_lims[ii, 0]:d_lims[ii, 1]] = this_data[r_lims[ii, 0]:r_lims[ii, 1]]  # doctest: +SKIP
+
+    """  # noqa: E501
+    # this is used to deal with indexing in the middle of a sampling period
+    assert all(isinstance(x, int) for x in (start, stop, buf_len))
+    block_start_idx = start // buf_len
+    block_start = block_start_idx * buf_len
+    last_used_samp = stop - 1
+    block_stop = last_used_samp - last_used_samp % buf_len + buf_len
+    read_size = block_stop - block_start
+    n_blk = read_size // buf_len + (read_size % buf_len != 0)
+    start_offset = start - block_start
+    end_offset = block_stop - stop
+    d_lims = np.empty((n_blk, 2), int)
+    r_lims = np.empty((n_blk, 2), int)
+    for bi in range(n_blk):
+        # Triage start (sidx) and end (eidx) indices for
+        # data (d) and read (r)
+        if bi == 0:
+            d_sidx = 0
+            r_sidx = start_offset
+        else:
+            d_sidx = bi * buf_len - start_offset
+            r_sidx = 0
+        if bi == n_blk - 1:
+            d_eidx = stop - start
+            r_eidx = buf_len - end_offset
+        else:
+            d_eidx = (bi + 1) * buf_len - start_offset
+            r_eidx = buf_len
+        d_lims[bi] = [d_sidx, d_eidx]
+        r_lims[bi] = [r_sidx, r_eidx]
+    return block_start_idx, r_lims, d_lims
+
+
+def _file_size(fname):
+    """Get the file size in bytes."""
+    with open(fname, "rb") as f:
+        f.seek(0, os.SEEK_END)
+        return f.tell()
+
+
+def _read_segments_file(
+    raw,
+    data,
+    idx,
+    fi,
+    start,
+    stop,
+    cals,
+    mult,
+    dtype,
+    n_channels=None,
+    offset=0,
+    trigger_ch=None,
+):
+    """Read a chunk of raw data."""
+    if n_channels is None:
+        n_channels = raw._raw_extras[fi]["orig_nchan"]
+
+    n_bytes = np.dtype(dtype).itemsize
+    # data_offset and data_left count data samples (channels x time points),
+    # not bytes.
+    data_offset = n_channels * start * n_bytes + offset
+    data_left = (stop - start) * n_channels
+
+    # Read up to 100 MB of data at a time, block_size is in data samples
+    block_size = ((int(100e6) // n_bytes) // n_channels) * n_channels
+    block_size = min(data_left, block_size)
+    with open(raw.filenames[fi], "rb", buffering=0) as fid:
+        fid.seek(data_offset)
+        # extract data in chunks
+        for sample_start in np.arange(0, data_left, block_size) // n_channels:
+            count = min(block_size, data_left - sample_start * n_channels)
+            block = np.fromfile(fid, dtype, count)
+            if block.size != count:
+                raise RuntimeError(
+                    f"Incorrect number of samples ({block.size} != {count}), please "
+                    "report this error to MNE-Python developers"
+                )
+            block = block.reshape(n_channels, -1, order="F")
+            n_samples = block.shape[1]  # = count // n_channels
+            sample_stop = sample_start + n_samples
+            if trigger_ch is not None:
+                stim_ch = trigger_ch[start:stop][sample_start:sample_stop]
+                block = np.vstack((block, stim_ch))
+            data_view = data[:, sample_start:sample_stop]
+            _mult_cal_one(data_view, block, idx, cals, mult)
+
+
+def read_str(fid, count=1):
+    """Read string from a binary file in a python version compatible way."""
+    dtype = np.dtype(f">S{count}")
+    string = fid.read(dtype.itemsize)
+    data = np.frombuffer(string, dtype=dtype)[0]
+    bytestr = b"".join([data[0 : data.index(b"\x00") if b"\x00" in data else count]])
+
+    return str(bytestr.decode("ascii"))  # Return native str type for Py2/3
+
+
+def _create_chs(ch_names, cals, ch_coil, ch_kind, eog, ecg, emg, misc):
+    """Initialize info['chs'] for eeg channels."""
+    chs = list()
+    for idx, ch_name in enumerate(ch_names):
+        if ch_name in eog or idx in eog:
+            coil_type = FIFF.FIFFV_COIL_NONE
+            kind = FIFF.FIFFV_EOG_CH
+        elif ch_name in ecg or idx in ecg:
+            coil_type = FIFF.FIFFV_COIL_NONE
+            kind = FIFF.FIFFV_ECG_CH
+        elif ch_name in emg or idx in emg:
+            coil_type = FIFF.FIFFV_COIL_NONE
+            kind = FIFF.FIFFV_EMG_CH
+        elif ch_name in misc or idx in misc:
+            coil_type = FIFF.FIFFV_COIL_NONE
+            kind = FIFF.FIFFV_MISC_CH
+        else:
+            coil_type = ch_coil
+            kind = ch_kind
+
+        chan_info = {
+            "cal": cals[idx],
+            "logno": idx + 1,
+            "scanno": idx + 1,
+            "range": 1.0,
+            "unit_mul": FIFF.FIFF_UNITM_NONE,
+            "ch_name": ch_name,
+            "unit": FIFF.FIFF_UNIT_V,
+            "coord_frame": FIFF.FIFFV_COORD_HEAD,
+            "coil_type": coil_type,
+            "kind": kind,
+            "loc": np.zeros(12),
+        }
+        if coil_type == FIFF.FIFFV_COIL_EEG:
+            chan_info["loc"][:3] = np.nan
+        chs.append(chan_info)
+    return chs
+
+
+def _construct_bids_filename(base, ext, part_idx, validate=True):
+    """Construct a BIDS compatible filename for split files."""
+    # insert index in filename
+    dirname = op.dirname(base)
+    base = op.basename(base)
+    deconstructed_base = base.split("_")
+    if len(deconstructed_base) < 2 and validate:
+        raise ValueError(
+            "Filename base must end with an underscore followed "
+            f"by the modality (e.g., _eeg or _meg), got {base}"
+        )
+    suffix = deconstructed_base[-1]
+    base = "_".join(deconstructed_base[:-1])
+    use_fname = f"{base}_split-{part_idx + 1:02}_{suffix}{ext}"
+    if dirname:
+        use_fname = op.join(dirname, use_fname)
+    return use_fname
+
+
+def _make_split_fnames(fname, n_splits, split_naming):
+    """Make a list of split filenames."""
+    if n_splits == 1:
+        fname = Path(fname)
+        return [fname]
+    res = []
+    base, ext = op.splitext(fname)
+    for i in range(n_splits):
+        if split_naming == "neuromag":
+            path = Path(f"{base}-{i:d}{ext}" if i else fname)
+            res.append(path)
+        else:
+            assert split_naming == "bids"
+            path = Path(_construct_bids_filename(base, ext, i))
+            res.append(path)
+    return res