# Basic imports
import numpy as np
import pandas as pd
import textwrap
import copy

from muniverse.utils.bids_routines import (
    BIDSDataset, 
    EMGBIDSRecording, 
    BIDSDecompositionDerivative
)

readme = """
# Header 1
Lorem ipsum dolor sit amet, consectetur adipiscing elit, 
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris 
nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in 
reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 
Excepteur sint occaecat cupidatat non proident, sunt in culpa 
qui officia deserunt mollit anim id est laborum.

# Header 2
Lorem ipsum dolor sit amet, consectetur adipiscing elit, 
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris 
nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in 
reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 
Excepteur sint occaecat cupidatat non proident, sunt in culpa 
qui officia deserunt mollit anim id est laborum.

"""

dataset_sidecar = {
  "Name": "FictionalDatasetExample",  # The name of your dataset
  "License": "CC BY 4.0",             # License this dataset will be available under.
  "DatasetType": "raw",               # Indicates that this unprocessed data
  "Authors": [                        # List of individuals who contributed to the creation/curation of                                     
    "alice",                          #   the dataset. Has to be a list even if it has only one entry. 
    "bob"
  ], 
  "ReferencesAndLinks": [             # E.g. the name of the related publication and the corresponing DOI. 
    "citation of related publication as text",
    "related publication as DOI"
  ], 
  "EthicsApprovals": [                # List of ethics committee approvals of the research protocols and/or 
    "number of ethics approval."      #   protocol identifiers. Has to be a list even if it has only one entry. 
  ], 
  "GeneratedBy": [                    # A list of tools used to generate this dataset (must be a list of objects)  
        {
          "Name":"MUniverse"
        }
    ], 
}

subjects_data = {
    "participant_id": [     # Unique subject identifier. Every participant_id must start with "sub-"
        "sub-01", 
        "sub-02", 
        "sub-03", 
        "sub-04", 
        "sub-05", 
        "sub-06"
    ], 
    "age": [                # Age in years (Optional)
        42, 43, 44, 45, 46, 47
    ], 
    "sex": [                # F: female, M: male, or O: other (Optional)
        "M", "F", "M", "F", "M", "F"
    ], 
    "handedness": [         # L: left, R: right (Optional)
        "R", "L", "R", "R", "L", "R"
    ], 
    "weight": [             # Weight in kg (Optional)
        70, 68, 66, 64, 62, 60
    ], 
    "height": [             # Height in m (Optional)
        1.7, 1.72, 1.74, 1.76, 1.78, 1.8
    ],                      
    "group": [              # T: treatment, C: control (Optional)
        "T", "T", "T", "P", "P", "P"
    ],  
}

subjects_sidecar = {
    "group": {
        "Description": "Group the subject belongs to.",
        "Levels": {
            "T": "Treatment", 
            "P": "Placebo"
        }
    }
}

BIDSIGNORE = ["derivatives/"]

# (i) Init the BIDS-dataset class
FictionalDatasetExample = BIDSDataset(
    datasetname="FictionalDatasetExample",
    path="./" # Where do you want to store the dataset
)
# (ii) Set metadata 
FictionalDatasetExample.set_metadata(field_name='subjects_data', source=subjects_data)
FictionalDatasetExample.set_default_participant_sidecar() # Make use of a pre-implemented template
FictionalDatasetExample.set_metadata(field_name='subjects_sidecar', source=subjects_sidecar) # Add custom fields
FictionalDatasetExample.set_metadata(field_name='dataset_sidecar', source=dataset_sidecar)
FictionalDatasetExample.readme = readme
FictionalDatasetExample.BIDSIGNORE = BIDSIGNORE
# (iii) Write
FictionalDatasetExample.write()

# Describe (free-text) how electrodes are placed:
# - Include anatomical landmarks used for positioning. 
# - Include the measurement method for placement. 
# - Include placement of reference electrode(s). 
# - Include placement of ground electrode. 
# - For multiple electrodes make sure to cover all (e.g., iterating over them).  
placement_description = (
    "(i) Surface EMG: lorem ipsum. "
    "(ii) Invasive thin film EMG: lorem ipsum. "
    "(iii) Invasive Fine Wire EMG: lorem ipsum. "
    "(iv) Invasive Needle EMG: lorem ipsum."
)

emg_sidecar_template = {
    "EMGPlacementScheme": "Measured", # Must be the keyword "Measured" if electrode locations are defined in coordinate systems. 
    "EMGPlacementSchemeDescription": placement_description,
    "EMGReference": "ChannelSpecific",
    "EMGGround": "G1", # The name of the ground electrode (as specified in electrodes metadata). 
    "SamplingFrequency": 2048, # The sampling frequency in Hz 
    "PowerLineFrequency": 50, # Main power line frequency in Hz  
    "RecordingType": "continuous", 
    "HardwareFilters": { # A json object containing filter parameters. Use "n/a" if no filter was used.
        "Low-pass filter": {
            "Frequency": 500,
            "Roll-off": "6dB/Octave"
        },
        "High-pass filter": {
            "Frequency": 20,
            "Roll-off": "6dB/Octave"
        }
    }, 
    "SoftwareFilters": "n/a", # A json object containing filter parameters. Use "n/a" if no filter was used.
    "EMGChannelCount":45,
    "TaskName": "restNoise",
    "TaskDescription": "Relaxed muscle for 5 seconds.",
    "Instructions": "Relax your muscle completely.", 
    "Preamplification": 1, # Amplification built into an EMG bipolar sensor, electrode grid, or other device.
    "Gain": 100, # Signal gain from an in-line amplifier, applied between the EMG sensor and the acquisition computer. 
    "Manufacturer": "some amplifier manufacturer", # Manufacturer of the amplifier used to collect the data. 
    "ManufacturersModelName": "some amplifier model name" # Model name of the amplifier. 
}

coord_sidecars = {
    "forearmCoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "percent",
        "EMGCoordinateSystemDescription": (
            "x: Radial Styloid Process (RSP) -> Ulnar Styloid Process (USP); "
            "y: Right-hand rule (limits: Olecranon Process -> Cubital Fossa); "
            "z: midpoint RSP-USP -> Lateral Humerus Epicondyle (LHE)"
        )
    }, 

    "grid1CoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "mm",
        "EMGCoordinateSystemDescription": (
            "The x-axis is left to right; "
            "the y-axis is bottom to top. " 
            "Note: the z-axis is not used."
        ),
        "ParentCoordinateSystem": "forearmCoordSys",
        "AnchorElectrode": "E001",
        "AnchorCoordinates": [30, 50, 80]
    }, 

    "grid2CoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "mm",
        "EMGCoordinateSystemDescription": (
            "The x-axis is left to right; "
            "the y-axis is bottom to top. " 
            "Note: the z-axis is not used."
        ),
        "AnchorElectrode": "E009",
        "AnchorCoordinates": [30, 50, 60]
    }, 

    "grid3CoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "mm",
        "EMGCoordinateSystemDescription": (
            "The x-axis is left to right; "
            "the y-axis is bottom to top. " 
            "Note: the z-axis is not used."
        ),
        "ParentCoordinateSystem": "forearmCoordSys",
        "AnchorElectrode": "E017",
        "AnchorCoordinates": [30, 50, 40]
    }, 

    "intraGrid1CoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "mm",
        "EMGCoordinateSystemDescription": (
            "The x-axis is left to right; "
            "the y-axis is bottom to top. " 
            "Note: the z-axis is not used."
        ),
        "ParentCoordinateSystem": "forearmCoordSys",
        "AnchorElectrode": "iE001",
        "AnchorCoordinates": [30, 50, 70]
    }, 

    "intraGrid2CoordSys" : {
        "EMGCoordinateSystem": "Other",
        "EMGCoordinateUnits": "mm",
        "EMGCoordinateSystemDescription": (
            "The x-axis is left to right; "
            "the y-axis is bottom to top. " 
            "Note: the z-axis is not used."
        ),
        "ParentCoordinateSystem": "forearmCoordSys",
        "AnchorElectrode": "iE009",
        "AnchorCoordinates": [30, 50, 50]
    }
}

el_metadata = pd.read_csv("fictionalDatasetExampleInputMetadata//electrodes.tsv", sep="\t") 
el_metadata.head()

electrodes_sidecar = {
    "interelectrode_distance": (
        "Distance between pairs of electrodes. "
        "In a grid this means distance between neighboring electrodes. "
        "In a fine wire it means distance between the wire tips."
    ), 
    "electrode_surface_area": "Surface area of the electrode in mm^2", 
    "electrode_diameter": "Diameter of the electrode in mm", 
    "electrode_tip_length": "Unisolated length (in mm) of the fine wire tip", 
    "cannula_diameter": "Diameter of Cannula in mm", 
    "cannula_length": "Length of Cannula in mm", 
    "manufacturer": "Name of electrode manufacturer", 
    "manufacturers_model_name": "Model name of Electrode", 
}

ch_metadata = pd.read_csv("fictionalDatasetExampleInputMetadata//channels.tsv", sep="\t") 
ch_metadata.head()

# we define an optional column for illustrative purposes
channels_sidecar = {
    "some_additional_column": "some Description of this column" 
}

# the baseline noise recording does not get an events.tsv file, 
# because there are no events we could write into it. 
events_metadata1 = pd.read_csv("fictionalDatasetExampleInputMetadata//events30PercentMVC.tsv", sep="\t")
events_metadata2 = pd.read_csv("fictionalDatasetExampleInputMetadata//events50PercentMVC.tsv", sep="\t")
# To retrieve these cleanly later from inside a for-loop, 
# we pack them into a dict with tasknames as keys 
events_metadata_taskDict = {
    "trapezoidalContraction30PercentMVC": events_metadata1, 
    "trapezoidalContraction50PercentMVC": events_metadata2,    
}
events_metadata1.head()

events_sidecar = {
    "sample": {
        "Description": "Sample index of the event onset (zero-indexing)",
        "Unit": "samples"
    },
    "mvc_rate": {
        "Description": "Rate at which the torque changes in percent MVC per second",
        "Unit": "% MVC / s"
    },
    "mvc_level": {
        "Description": "MVC (maximum voluntary contraction) level at the onset of the event",
        "Unit": "% MVC"
    },
    "event_type": {
        "Description": "Event label.",
        "Levels": {
            "muscle_on": "The muscle is activated.",
            "muscle_off": "The muscle is deactivated.",
            "linear_isometric_ramp": "The isometric torque changes linearly over time with a fixed rate.",
            "steady_isometric": "Steady isometric contraction at a fixed MVC level."
        }
    },
    "description": {
        "Description": "Free text event description."
    }
}

tasks = [
    "restNoise", 
    "trapezoidalContraction30PercentMVC", 
    "trapezoidalContraction50PercentMVC"
]

task_descriptions = [
    "The muscle is fully relaxed.",
    (
        "A trapezoidal contraction at 30 percent MVC, "
        "consisting of linear ramps up and down performed at 30 percent per second " 
        "and a plateau maintained for 1 s."
    ),
    (
        "A trapezoidal contraction at 50 percent MVC, "
        "consisting of linear ramps up and down performed at 50 percent per second " 
        "and a plateau maintained for 1 s."
     )
]

task_instructions = [
    "Do nothing.",
    "Follow path provided via visual feedback.",
    "Follow path provided via visual feedback."
]

samplingFrequency = 2048 # samples per second
n_channels = len(ch_metadata.loc[:,"name"])

# Ensure reproducible random numbers
rng = np.random.default_rng(seed=12345)

# Loop over all recordings
for participant_id in subjects_data["participant_id"]:
    
    subject_label = participant_id.split("-")[-1]
    print(f"Bidsifying data of sub-{subject_label}") # Print progress

    for idx, task in enumerate(tasks):
        # create a random array of the correct size to be our raw data. 
        recordingLength = rng.integers(low=5,high=8) # in seconds  
        n_samples = np.ceil(recordingLength * samplingFrequency)
        data = rng.uniform(low=0, high=1, size=(n_channels, n_samples))

        # Update the EMG sidecar
        emg_sidecar = emg_sidecar_template.copy()
        emg_sidecar["TaskName"] =  task
        emg_sidecar["TaskDescription"] = task_descriptions[idx]
        emg_sidecar["Instructions"] = task_instructions[idx]

        # Init the bids recording class
        emg_recording = EMGBIDSRecording(
            parent_dataset = FictionalDatasetExample, 
            subject_label=subject_label, 
            task_label=task, 
            datatype='emg',
            inherited_metadata=[    # Here is where we define which files to inherit
                "coordsystem.json", 
                "electrodes.tsv", 
                "electrodes.json", 
                "events.json"
            ], 
            inherited_level=[       # and here which level they are inherited to
                "subject", 
                "subject", 
                "dataset", 
                "dataset"
            ],  
        )
        # Set data and metadata
        emg_recording.set_metadata(
            field_name='emg_sidecar', source=emg_sidecar
        )
        emg_recording.set_metadata(
            field_name='electrodes_sidecar', source=electrodes_sidecar
        )
        emg_recording.set_metadata(
            field_name='electrodes', source=el_metadata
        ) 
        emg_recording.set_metadata(
            field_name='channels_sidecar', source=channels_sidecar
        )
        emg_recording.set_metadata(
            field_name='channels', source=ch_metadata
        )
        # There is a own function to handle coordinate system (as there are multiple files)
        for coord_name, coord_metadata in coord_sidecars.items():
            emg_recording.add_coordinate_system(coord_name, coord_metadata)
        # Add the actual data
        emg_recording.set_data(
            field_name='data', mydata=data,fsamp=samplingFrequency
        )
        # events.tsv is not needed for resting tasks
        if task in events_metadata_taskDict.keys(): 
            emg_recording.set_metadata(
                field_name="events_sidecar", source=events_sidecar
            )
            emg_recording.set_metadata(
                field_name="events", source=events_metadata_taskDict[task]
            )
        # Write metadata and data    
        emg_recording.write(overwrite=True)

Bidsifying data of sub-01
Bidsifying data of sub-02
Bidsifying data of sub-03
Bidsifying data of sub-04
Bidsifying data of sub-05
Bidsifying data of sub-06

err, warn, _ = FictionalDatasetExample.validate(
    print_errors=True,
    print_warnings=True,
    ignored_codes=[
        "EVENTS_TSV_MISSING",   # we specify events.tsv files, but not for restNoise. 
                                # In order to not get one warning per restNoise recording in the dataset, 
                                # we ignore the code. 
    ],
    ignored_fields=[            # We ignore some fields that would produce warnings
        "SourceDatasets",       # there isn't one
        "DeviceSerialNumber",   # no hardware was used to record this dataset
        "SoftwareVersions",     # this also pertains to measurement hardware 
        "InstitutionName",      # we also don't want to specify an affiliated institution 
        "InstitutionAddress",
        "InstitutionalDepartmentName",

        "HEDVersion",           # we don't use HED. 
        "StimulusPresentation", # we also ignore this, to not clutter this jupyter notebook. 

        # We have different ElectrodeManufacturer and ElectrodeManufacturersModelName per electrode, 
        # so we specify it on a per electrode basis in the electrodes.tsv file, rather than inside 
        # of emg.json (as mandated by the BIDS documentation for this case). However at the time 
        # of writing this tutorial, the Validator is not smart enough to understand this. 
        # So we ignore the raised warning. 
        "ElectrodeManufacturer", 
        "ElectrodeManufacturersModelName", 
    ],
    ignored_files=[],
)

print("The BIDS conversion has completed")
print(f"Your BIDS dataset contains {len(err)} errors and {len(warn)} warnings")

Number of detected errors: 0
[]
Number of detected warnings: 0
[]
The BIDS conversion has completed
Your BIDS dataset contains 0 errors and 0 warnings

labels_dataset_sidecar = {
    "DatasetType": "derivative", # Indicates that this derived data
    "GeneratedBy": [{
        "Name": "Some Software Tool",
        "Version": 3.11,
        "URL": "Link to the repository",
        "Commit": "123456"
    }]    
}

dataset_labels = BIDSDataset(
    datasetname="manual_decomposition",
    path=FictionalDatasetExample.root + "derivatives/"
)
dataset_labels.set_metadata(field_name="dataset_sidecar", source=labels_dataset_sidecar)
dataset_labels.write(overwrite=True)

for participant_id in subjects_data["participant_id"]:
    
    subject_label = participant_id.split("-")[-1]
    print(f"Bidsifying motor unit labels of sub-{subject_label}") # Print progress

    for idx, task in enumerate(tasks):

        if task == "restNoise":
            continue # There are no labels for resting tasks

        # Make some random spike trains
        n_units = 3
        n_spikes = 20
        spikes_times = {i: [] for i in range(n_units)}
        for unit_idx in range(n_units):
            spikes_times[unit_idx] = np.sort(rng.integers(
                low=2048, high=2048*4, size=n_spikes))

        decomposed_recording = BIDSDecompositionDerivative(
            parent_dataset=dataset_labels,
            subject_label=subject_label,   
            task_label=task, 
            datatype='emg',
            inherited_metadata=["events.json"],
            inherited_level=["dataset"],
        )
        decomposed_recording.add_spikes(spikes=spikes_times, fsamp=2048)    
        decomposed_recording.write(overwrite=True)

Bidsifying motor unit labels of sub-01
Bidsifying motor unit labels of sub-02
Bidsifying motor unit labels of sub-03
Bidsifying motor unit labels of sub-04
Bidsifying motor unit labels of sub-05
Bidsifying motor unit labels of sub-06

	name	x	y	z	coordinate_system	type	material	group	interelectrode_distance	electrode_surface_area	electrode_diameter	electrode_tip_length	cannula_diameter	cannula_length	manufacturer	manufacturers_model_name
0	E001	0	0	NaN	grid1CoordSys	HDsEMG	gold	Grid1	10	0,1	NaN	NaN	NaN	NaN	manufacturerPlaceholder	manufacturerModelNamePlaceholder
1	E002	10	0	NaN	grid1CoordSys	HDsEMG	gold	Grid1	10	0,1	NaN	NaN	NaN	NaN	manufacturerPlaceholder	manufacturerModelNamePlaceholder
2	E003	20	0	NaN	grid1CoordSys	HDsEMG	gold	Grid1	10	0,1	NaN	NaN	NaN	NaN	manufacturerPlaceholder	manufacturerModelNamePlaceholder
3	E004	30	0	NaN	grid1CoordSys	HDsEMG	gold	Grid1	10	0,1	NaN	NaN	NaN	NaN	manufacturerPlaceholder	manufacturerModelNamePlaceholder
4	E005	0	10	NaN	grid1CoordSys	HDsEMG	gold	Grid1	10	0,1	NaN	NaN	NaN	NaN	manufacturerPlaceholder	manufacturerModelNamePlaceholder

	name	type	units	description	signal_electrode	reference	group	status	low_cutoff	high_cutoff	some_additional_column
0	Ch1	EMG	mV	Monopolar surface EMG	E001	R1	Grid1	good	10.0	500.0	none
1	Ch2	EMG	mV	Monopolar surface EMG	E002	R1	Grid1	good	10.0	500.0	none
2	Ch3	EMG	mV	Monopolar surface EMG	E003	R1	Grid1	good	10.0	500.0	none
3	Ch4	EMG	mV	Monopolar surface EMG	E004	R1	Grid1	good	10.0	500.0	none
4	Ch5	EMG	mV	Monopolar surface EMG	E005	R1	Grid1	bad	10.0	500.0	none

	onset	duration	sample	mvc_rate	mvc_level	event_type	description
0	1.0	0.0	2048	NaN	NaN	muscle_on	Time at which the muscle is activated.
1	1.0	1.0	2048	30.0	0.0	linear_isometric_ramp	Linear ramp (rate: 30 % MVC per s; duration: 1...
2	2.0	1.0	4096	0.0	30.0	steady_isometric	Steady isometric torque at 30% MVC for 15 s
3	3.0	1.0	6144	-30.0	50.0	linear_isometric_ramp	Linear ramp (rate: -30 % MVC per s; duration: ...
4	4.0	0.0	8192	NaN	NaN	muscle_off	Time at which the muscle is deactivated.

Overview¶

Introduction to BIDS¶

Why BIDS?¶

Structure of a BIDS dataset¶

Electrodes vs Channels¶

The BIDS-MUniverse utilities¶

Building an example EMG-BIDS dataset¶

Global (dataset wide) metadata¶

Readme File¶

Dataset sidecar¶

Subjects data & sidecar¶

Bidsignore¶

Storing the dataset level metadata¶

Local (recording specific) metadata¶

EMG sidecar¶

Coordinate system sidecar¶

Electrode data & sidecar¶

Channel data & sidecar¶

Events data & sidecar¶

Building the dataset¶

Tasks¶

Validation¶

Spike labels¶