diff --git a/phys2bids/cli/run.py b/phys2bids/cli/run.py index b1a3dec14..55f007f1d 100644 --- a/phys2bids/cli/run.py +++ b/phys2bids/cli/run.py @@ -164,6 +164,14 @@ def _get_parser(): help="Column header (for json file output).", default=[], ) + optional.add_argument( + "-ddir", + "--dicomdir", + dest="dicomdir", + type=str, + help="full path to MRI dicom folder corresponding to physiological data. Use this in conjunction with SIEMENS log files ONLY", + default="", + ) optional.add_argument( "-yml", "--participant-yml", diff --git a/phys2bids/io.py b/phys2bids/io.py index d4a5e3903..730886c4e 100644 --- a/phys2bids/io.py +++ b/phys2bids/io.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """phys2bids interfaces for loading extension files.""" - import logging +import os import warnings from copy import deepcopy +from glob import glob from itertools import groupby +from pathlib import Path import numpy as np @@ -485,10 +487,6 @@ def load_gep(filename): -------- physio_obj.BlueprintInput """ - import os - from glob import glob - from pathlib import Path - # Initiate lists of column names and units with time and trigger names = ["time", "trigger"] units = ["s", "mV"] # Assuming recording units are mV... @@ -510,7 +508,7 @@ def load_gep(filename): # Calculate time in seconds for first input (starts from -30s) interval = 1 / freq[0] duration = data[0].shape[0] * interval - t_ch = np.ogrid[-30 : duration - 30 : interval] + time_ch = np.ogrid[-30 : duration - 30 : interval] # Find and add additional data files filename = Path(filename) @@ -535,11 +533,234 @@ def load_gep(filename): trigger = np.hstack((np.zeros(int(30 / interval)), np.ones(int((duration - 30) / interval)))) # Create final list of timeseries - timeseries = [t_ch, trigger] + timeseries = [time_ch, trigger] timeseries.extend(data) return BlueprintInput(timeseries, freq, names, units, 1) +def acqtime_to_seconds(acq_time_str): + """ """ + hh = int(acq_time_str[0:2]) + mm = int(acq_time_str[2:4]) + ss = int(acq_time_str[4:6]) + frac = float("0." + acq_time_str.split(".")[1]) if "." in acq_time_str else 0 + return hh * 3600 + mm * 60 + ss + frac + + +def physiologtime_to_seconds(pulse_time): + """ """ + hh = pulse_time.hour + mm = pulse_time.minute + ss = pulse_time.second + frac = pulse_time.microsecond / 1000000 + return hh * 3600 + mm * 60 + ss + frac + + +def read_siemens_channel( + filename, channel_type, names, units, timeseries, freq, starttime, stoptime +): + """ """ + import fmri_physio_log as fpl + + data = fpl.PhysioLog.from_filename(filename) + + if np.all(data.ts != 0): + + if channel_type == "ECG": + freq_mod = round( + len(data.ts[::4]) + / ( + ( + physiologtime_to_seconds(data.mdh.stop_time) + - physiologtime_to_seconds(data.mdh.start_time) + ) + * data.rate + ) + ) + if freq_mod != 1: + LGR.warning( + f"Incoherency found between logged time and frequency by a factor of {freq_mod}. Please check frequency time." + ) + + frequency = data.freq * freq_mod + start = physiologtime_to_seconds(data.mdh.start_time) + stop = start + len(data.ts - 1) * frequency + for i in range(4): + names.append(f"{channel_type}{i+1}") + units.append("") + timeseries.append(data.ts[i::4]) + freq.append(data.rate * freq_mod) + starttime.append(physiologtime_to_seconds(data.mdh.start_time)) + stoptime.append(stop) + + else: + freq_mod = round( + len(data.ts[::4]) + / ( + ( + physiologtime_to_seconds(data.mdh.stop_time) + - physiologtime_to_seconds(data.mdh.start_time) + ) + * data.rate + ) + ) + if freq_mod != 1: + LGR.warning( + f"Incoherency found between logged time and frequency by a factor of {freq_mod}. Please check frequency time." + ) + + frequency = data.freq * freq_mod + start = physiologtime_to_seconds(data.mdh.start_time) + stop = start + len(data.ts - 1) * frequency + names.append(channel_type) + units.append("") + timeseries.append(data.ts) + freq.append(data.rate * freq_mod) + starttime.append(physiologtime_to_seconds(data.mdh.start_time)) + stoptime.append(stop) + + return names, units, timeseries, freq, starttime, stoptime, data + + +def load_siemens(filename, dicomfolder=None): + """ + Populate object phys_input from SIEMENS physiological files. + + Uses the filename that the user provides to find any matching inputs + from other recording types (.puls, .resp, or .ecg). + + Populates physio_obj with all identified recording types. + Takes into account possible differences in start and stop times across channels. + Uses DICOM metadata to find scan start and stop times and create trigger channel. + The modalities are in the order of 'names' entered by the user (after time and trigger). + + Parameters + ---------- + filename: str + path to the SIEMENS scanner physiological file + dicomfolder: str or None + path to the folder containing the DICOM files + + Returns + ------- + BlueprintInput + + Note + ---- + + SIEMENS physiological files do not record a trigger so a column is added at + position 1. This has a value of zero up to the scan start time and then + a value of one for the duration of the scan. + + See Also + -------- + physio_obj.BlueprintInput + """ + import pydicom + + # initialise lists of metadata and data + names = [] + units = [] + freq = [] + timeseries = [] + starttime = [] + stoptime = [] + + # Find and add additional data files + filename = Path(filename) + fnames = sorted(glob(os.path.join(filename.parent, f"*{filename.name}.*"))) + if not len(fnames) == 0: + for fname in fnames: + if fname.endswith(".ecg"): + names, units, timeseries, freq, starttime, stoptime, data = read_siemens_channel( + fname, "ECG", names, units, timeseries, freq, starttime, stoptime + ) + + elif fname.endswith(".ext"): + names, units, timeseries, freq, starttime, stoptime, data = read_siemens_channel( + fname, "EXT", names, units, timeseries, freq, starttime, stoptime + ) + + elif fname.endswith(".ext2"): + names, units, timeseries, freq, starttime, stoptime, data = read_siemens_channel( + fname, "EXT2", names, units, timeseries, freq, starttime, stoptime + ) + + elif fname.endswith(".puls"): + names, units, timeseries, freq, starttime, stoptime, data = read_siemens_channel( + fname, "PPG", names, units, timeseries, freq, starttime, stoptime + ) + + elif fname.endswith(".resp"): + names, units, timeseries, freq, starttime, stoptime, data = read_siemens_channel( + fname, "respiratory", names, units, timeseries, freq, starttime, stoptime + ) + + checkstartlen = np.unique(starttime) + checkstoplen = np.unique(stoptime) + checkfreqlen = np.unique(freq) + + time_ch = np.ogrid[checkstartlen[0] : checkstoplen[-1] : 1 / checkfreqlen[-1] * 1000] + if checkstartlen.size > 1 or checkstoplen.size > 1: + for n, t in enumerate(timeseries): + time = np.ogrid[checkstartlen[0] : checkstoplen[-1] : 1 / freq[n] * 1000] + timeseries[n] = np.ones_like(time) * np.nan + phys_start = int(np.argmax(np.isclose(time, starttime[n]))) + phys_stop = phys_start + len(t) + timeseries[n][phys_start:phys_stop] = t + + # Calculate time in seconds + time_ch = time_ch / 1000 + + # Read the dicom to find start time, if it's given, otherwise assume it's + trigger = np.zeros_like(time_ch) + + try: + trigger_samples = np.asarray(data.trigger_times, dtype=int) + valid = trigger_samples[(trigger_samples >= 0) & (trigger_samples < trigger.size)] + trigger[valid] = 1.0 + LGR.info(f"Found {len(valid)} trigger events in log file.") + except AttributeError: + LGR.warning("Log file trigger events not found — attempting opening DICOM instead.") + + if dicomfolder is not None: + files = sorted( + [ + os.path.join(dicomfolder, f) + for f in os.listdir(dicomfolder) + if os.path.isfile(os.path.join(dicomfolder, f)) + ] + ) + first_dcm = pydicom.dcmread(files[0], stop_before_pixels=True) + last_dcm = pydicom.dcmread(files[-1], stop_before_pixels=True) + + epi_start = acqtime_to_seconds(first_dcm.AcquisitionTime) - data.mdh.start_time / 1000 + epi_stop = ( + acqtime_to_seconds(last_dcm.AcquisitionTime) + - data.mdh.start_time / 1000 + + float(last_dcm.get("RepetitionTime", 0)) / 1000 + ) + + # epi_stop needs to be incremented by TR in seconds + + take_start = int(np.argmax(np.isclose(time_ch, epi_start))) + take_end = int(np.argmax(np.isclose(time_ch, epi_stop))) + + trigger[take_start:take_end] = 1 + else: + LGR.warning( + "DICOM folder not provided. Setting start_time to 0 (although " + "it might be wrong)." + ) + + # Initiate lists of column names and units with time and trigger + names = ["time", "trigger"] + names + units = ["s", ""] + units + freq = [checkfreqlen[-1], checkfreqlen[-1]] + freq + + return BlueprintInput(timeseries, freq, names, units, 1) + + def load_smr(filename, chtrig=0): """Load Spike2 smr file and populate object phys_input. diff --git a/phys2bids/phys2bids.py b/phys2bids/phys2bids.py index 5161792bf..ed9ae99ae 100644 --- a/phys2bids/phys2bids.py +++ b/phys2bids/phys2bids.py @@ -144,6 +144,7 @@ def phys2bids( thr=None, pad=9, ch_name=[], + dicomdir=None, yml="", debug=False, quiet=False, @@ -260,6 +261,10 @@ def phys2bids( from phys2bids.io import load_gep phys_in = load_gep(infile) + elif ftype in ["puls", "resp", "ecg", "ext", "ext2"]: + from phys2bids.io import load_siemens + + phys_in = load_siemens(infile, dicomdir) LGR.info("Checking that units of measure are BIDS compatible") for index, unit in enumerate(phys_in.units): @@ -515,6 +520,10 @@ def phys2bids( phys_out[key].filename = f"{phys_out[key].filename}_{uniq_freq:.0f}Hz" LGR.info(f"Exporting files for take {take} freq {uniq_freq}") + # Check if timeseries has NaNs and save them as n/a + phys_out[key].timeseries = phys_out[key].timeseries.astype(object) + phys_out[key].timeseries[np.isnan(phys_out[key].timeseries)] = "n/a" + np.savetxt( phys_out[key].filename + ".tsv.gz", phys_out[key].timeseries, diff --git a/phys2bids/tests/conftest.py b/phys2bids/tests/conftest.py index 0c43e6dde..92c0ff8c5 100644 --- a/phys2bids/tests/conftest.py +++ b/phys2bids/tests/conftest.py @@ -116,13 +116,13 @@ def ge_one_gep_file(testpath): @pytest.fixture def ge_two_gep_files_ppg(testpath): - tmp = fetch_file("qawjv", testpath, "RESPData_epiRT_0000000000_00_00_000.gep") + _ = fetch_file("qawjv", testpath, "RESPData_epiRT_0000000000_00_00_000.gep") return fetch_file("wb84d", testpath, "PPGData_epiRT_0000000000_00_00_000.gep") @pytest.fixture def ge_two_gep_files_resp(testpath): - tmp = fetch_file("wb84d", testpath, "PPGData_epiRT_0000000000_00_00_000.gep") + _ = fetch_file("wb84d", testpath, "PPGData_epiRT_0000000000_00_00_000.gep") return fetch_file("qawjv", testpath, "RESPData_epiRT_0000000000_00_00_000.gep") @@ -133,17 +133,26 @@ def ge_one_raw_file(testpath): @pytest.fixture def ge_two_raw_files(testpath): - tmp = fetch_file("49xpw", testpath, "RESPData_epiRT_0000000000_00_00_000") + _ = fetch_file("49xpw", testpath, "RESPData_epiRT_0000000000_00_00_000") return fetch_file("u9wsr", testpath, "PPGData_epiRT_0000000000_00_00_000") @pytest.fixture def ge_badfiles(testpath): - tmp = fetch_file("tdmyn", testpath, "PPGData_epiRT_columnscsv_00_00_000") - tmp = fetch_file("b6skq", testpath, "PPGData_epiRT_columnstsv_00_00_000") + _ = fetch_file("tdmyn", testpath, "PPGData_epiRT_columnscsv_00_00_000") + _ = fetch_file("b6skq", testpath, "PPGData_epiRT_columnstsv_00_00_000") return fetch_file("8235b", testpath, "PPGData_epiRT_string0000_00_00_000") +@pytest.fixture +def SIEMENS_files(testpath): + _ = fetch_file(" ", testpath, "275_pulse_part_1.puls") + _ = fetch_file(" ", testpath, "275_pulse_part_1.ecg") + _ = fetch_file(" ", testpath, "275_pulse_part_1.ext") + _ = fetch_file(" ", testpath, "275_pulse_part_1.ext2") + return fetch_file(" ", testpath, "275_resp_part_1.resp") + + @pytest.fixture def spike2_smrx_file(testpath): return fetch_file("7x5qw", testpath, "Test_ppg_pulse_spike2.smrx") diff --git a/phys2bids/tests/test_io.py b/phys2bids/tests/test_io.py index ffed64546..a3299bf9e 100644 --- a/phys2bids/tests/test_io.py +++ b/phys2bids/tests/test_io.py @@ -2,6 +2,7 @@ import os import sys +import fmri_physio_log as fpl import numpy as np import pytest from pytest import raises @@ -217,6 +218,38 @@ def test_load_gep_two_files_resp(ge_two_gep_files_resp, testpath): assert np.array_equal(gep_data2, phys_obj.timeseries[3]) +def test_SIEMENS(SIEMENS_files, testpath): + # Load data + phys_obj = io.load_siemens(SIEMENS_files) + + assert phys_obj.ch_name == [ + "time", + "trigger", + "ECG1", + "ECG2", + "ECG3", + "ECG4", + "PPG", + "respiratory", + ] + + # Check the channel data is as expected + siemens_ecg = fpl.PhysioLog.from_filename(os.path.join(testpath, "275_pulse_part_1.ecg")) + data_ecg1 = np.array(siemens_ecg.ts[::4]) + data_ecg3 = np.array(siemens_ecg.ts[2::4]) + siemens_resp = fpl.PhysioLog.from_filename(SIEMENS_files) + data_resp = np.array(siemens_resp.ts) + siemens_pulse = fpl.PhysioLog.from_filename(os.path.join(testpath, "275_pulse_part_1.puls")) + data_pulse = np.array(siemens_pulse.ts) + assert np.array_equal(data_ecg1, phys_obj.timeseries[2][~np.isnan(phys_obj.timeseries[2])]) + assert np.array_equal(data_ecg3, phys_obj.timeseries[4][~np.isnan(phys_obj.timeseries[4])]) + assert np.array_equal(data_pulse, phys_obj.timeseries[6][~np.isnan(phys_obj.timeseries[6])]) + assert np.array_equal(data_resp, phys_obj.timeseries[7][~np.isnan(phys_obj.timeseries[7])]) + + +# check if data in phys_obj timeseries are padded, padding needs to be undone for the test to succeed + + @pytest.mark.skipif( sys.version_info < (3, 7) or sys.version_info > (3, 9), reason="Requires python between 3.7 and 3.9", diff --git a/phys2bids/utils.py b/phys2bids/utils.py index 05949afa9..0b38fa85d 100644 --- a/phys2bids/utils.py +++ b/phys2bids/utils.py @@ -9,7 +9,7 @@ LGR = logging.getLogger(__name__) -SUPPORTED_FTYPES = ("acq", "txt", "mat", "gep") +SUPPORTED_FTYPES = ("acq", "txt", "mat", "gep", "puls", "resp", "ecg", "ext", "ext2") def check_input_ext(filename, ext): diff --git a/setup.cfg b/setup.cfg index 20204cd93..ccf1e0a02 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,9 @@ doc = sphinx >=2.0 sphinx-argparse sphinx_rtd_theme +siemens = + fmri_physio_log + pydicom style = flake8 >=4.0 black @@ -126,6 +129,7 @@ skip = venvs,.venv,versioneer.py,.git,build,./docs/_build write-changes = count = quiet-level = 3 +ignore-words-list = puls [tool:pytest] doctest_optionflags = NORMALIZE_WHITESPACE