Ver código fonte

Add reduced scripts and configs used to build the resource

Multiple pipelines have been used to put together the
resource. Starting from raw data: organization, migration, curation,
quality control, and flagging have been performed. The scripts for
these pipelines are added to the repository for future reference. To
ensure the data remain anonymized, specific parameters and files used
have been removed.
Girish Mohan 5 meses atrás
pai
commit
f7a7cde4e9

+ 7 - 0
scripts/README.md

@@ -0,0 +1,7 @@
+# Scripts
+
+The scripts are a collection of python, bash, and sql scripts used to
+develop pipelines while building the CALM-Brain resource. In the
+interest of preventing de-anonymization of data, the scripts are a
+reduced form of the ones used while building the resource.The config
+files used in these scripts can be found under the configs directory.

+ 504 - 0
scripts/configs/converters/dcm2bids.json

@@ -0,0 +1,504 @@
+{
+    "post_op": [
+        {
+            "cmd": "pydeface --outfile dst_file src_file",
+            "datatype": "anat",
+            "suffix": [
+                "T1w",
+                "T2w"
+            ]
+        }
+    ],
+    "descriptions": [
+        {
+            "criteria": {
+                "SeriesDescription": "T1w"
+            },
+            "datatype": "anat",
+            "suffix": "T1w"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "T1w_PSIR"
+            },
+            "custom_entities": "acq-psir",
+            "datatype": "anat",
+            "suffix": "T1w"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "T2w"
+            },
+            "datatype": "anat",
+            "suffix": "T2w"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "T2W_FFE"
+            },
+            "custom_entities": "acq-ffe",
+            "datatype": "anat",
+            "suffix": "T2w"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "T2W_TSE"
+            },
+            "custom_entities": "acq-tse",
+            "datatype": "anat",
+            "suffix": "T2w"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "FLAIR"
+            },
+            "datatype": "anat",
+            "suffix": "FLAIR"
+        },
+        {
+            "id": "task_rest",
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "Task_rest-bold",
+                        "task-rest_bold"
+                    ]
+                }
+            },
+            "custom_entities": "task-rest",
+            "datatype": "func",
+            "suffix": "bold",
+            "sidecar_changes": {
+                "TaskName": "rest"
+            }
+        },
+        {
+            "id": "task_trends",
+            "criteria": {
+                "SeriesDescription": "task-trends_bold"
+            },
+            "custom_entities": "task-trends",
+            "datatype": "func",
+            "suffix": "bold",
+            "sidecar_changes": {
+                "TaskName": "trends"
+            }
+        },
+        {
+            "id": "task_vft",
+            "criteria": {
+                "SeriesDescription": "task-vft_bold"
+            },
+            "custom_entities": "task-vft",
+            "datatype": "func",
+            "suffix": "bold",
+            "sidecar_changes": {
+                "TaskName": "vft"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_REST_SE_EPI_AP_SHORT",
+                        "Ref_rest_AP"
+                    ]
+                }
+            },
+            "custom_entities": "acq-rest_dir-AP",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_rest"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_REST_SE_EPI_PA_SHORT",
+                        "Ref_rest_PA"
+                    ]
+                }
+            },
+            "custom_entities": "acq-rest_dir-PA",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_rest"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_TRENDS_SE_EPI_AP_SHORT",
+                        "Ref_TRENDS_AP"
+                    ]
+                }
+            },
+            "custom_entities": "acq-trends_dir-AP",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_trends"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_TRENDS_SE_EPI_PA_SHORT",
+                        "Ref_TRENDS_PA"
+                    ]
+                }
+            },
+            "custom_entities": "acq-trends_dir-PA",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_trends"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_vft_SE_EPI_AP_SHORT",
+                        "Ref_VFT_AP"
+                    ]
+                }
+            },
+            "custom_entities": "acq-vft_dir-AP",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_vft"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_vft_SE_EPI_PA_SHORT",
+                        "Ref_VFT_PA"
+                    ]
+                }
+            },
+            "custom_entities": "acq-vft_dir-PA",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "task_vft"
+            }
+        },
+        {
+            "id": "dki",
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "DKI",
+                        "DKI_56slices_SHIM_PB_VOLUME"
+                    ]
+                }
+            },
+            "custom_entities": "acq-dki",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "id": "dti",
+            "criteria": {
+                "SeriesDescription": "DTI"
+            },
+            "custom_entities": "acq-dti",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "id": "dti06dir",
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "DTI_6dir",
+                        "DTI_B500_6DIR"
+                    ]
+                }
+            },
+            "custom_entities": "acq-dti06dir",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "id": "128dti",
+            "criteria": {
+                "SeriesDescription": "DTI_opt_128"
+            },
+            "custom_entities": "acq-128dti",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "id": "128dti64dir",
+            "criteria": {
+                "SeriesDescription": "DTI_opt_128_64dir"
+            },
+            "custom_entities": "acq-128dti64dir",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "id": "dti_lasi",
+            "criteria": {
+                "SeriesDescription": "DTI_LASI"
+            },
+            "custom_entities": "acq-lasi",
+            "datatype": "dwi",
+            "suffix": "dwi"
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_DWI_SE_EPI_AP_SHORT",
+                        "Ref_DWI_AP"
+                    ]
+                }
+            },
+            "custom_entities": "acq-dwi_dir-AP",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": [
+                    "dki",
+                    "dti",
+                    "dti06dir",
+                    "128dti",
+                    "128dti64dir"
+                ]
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": {
+                    "any": [
+                        "REF_DWI_SE_EPI_PA_SHORT",
+                        "Ref_DWI_PA"
+                    ]
+                }
+            },
+            "custom_entities": "acq-dwi_dir-PA",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": [
+                    "dki",
+                    "dti",
+                    "dti06dir",
+                    "128dti",
+                    "128dti64dir"
+                ]
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "DTI_LASI_AP"
+            },
+            "custom_entities": "acq-lasi_dir-AP",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "dti_lasi"
+            }
+        },
+        {
+            "criteria": {
+                "SeriesDescription": "DTI_LASI_PA"
+            },
+            "custom_entities": "acq-lasi_dir-PA",
+            "datatype": "fmap",
+            "suffix": "epi",
+            "sidecar_changes": {
+                "IntendedFor": "dti_lasi"
+            }
+        },
+        {
+            "criteria": {
+                "EchoNumber": 1,
+                "ImageType": [
+                    "ORIGINAL",
+                    "PRIMARY",
+                    "M",
+                    "FFE",
+                    "M",
+                    "FFE"
+                ],
+                "SeriesDescription": "fieldmap"
+            },
+            "custom_entities": "acq-fieldmap",
+            "datatype": "fmap",
+            "suffix": "magnitude1"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 2,
+                "ImageType": [
+                    "ORIGINAL",
+                    "PRIMARY",
+                    "M",
+                    "FFE",
+                    "M",
+                    "FFE"
+                ],
+                "SeriesDescription": "fieldmap"
+            },
+            "custom_entities": "acq-fieldmap",
+            "datatype": "fmap",
+            "suffix": "magnitude2"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 1,
+                "ImageType": [
+                    "ORIGINAL",
+                    "PRIMARY",
+                    "PHASE MAP",
+                    "P",
+                    "FFE",
+                    "PHASE"
+                ],
+                "SeriesDescription": "fieldmap"
+            },
+            "custom_entities": "acq-fieldmap",
+            "datatype": "fmap",
+            "suffix": "phase1"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 2,
+                "ImageType": [
+                    "ORIGINAL",
+                    "PRIMARY",
+                    "PHASE MAP",
+                    "P",
+                    "FFE",
+                    "PHASE"
+                ],
+                "SeriesDescription": "fieldmap"
+            },
+            "custom_entities": "acq-fieldmap",
+            "datatype": "fmap",
+            "suffix": "phase2"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 1,
+                "ImageType": {
+                    "any": [
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "T1",
+                            "MIXED"
+                        ],
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "M",
+                            "FFE",
+                            "M",
+                            "FFE"
+                        ]
+                    ]
+                },
+                "SeriesDescription": "B0 map"
+            },
+            "custom_entities": "acq-B0map",
+            "datatype": "fmap",
+            "suffix": "magnitude1"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 2,
+                "ImageType": {
+                    "any": [
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "T1",
+                            "MIXED"
+                        ],
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "M",
+                            "FFE",
+                            "M",
+                            "FFE"
+                        ]
+                    ]
+                },
+                "SeriesDescription": "B0 map"
+            },
+            "custom_entities": "acq-B0map",
+            "datatype": "fmap",
+            "suffix": "magnitude2"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 1,
+                "ImageType": {
+                    "any": [
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "T1",
+                            "MIXED",
+                            "PHASE"
+                        ],
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "PHASE MAP",
+                            "P",
+                            "FFE",
+                            "PHASE"
+                        ]
+                    ]
+                },
+                "SeriesDescription": "B0 map"
+            },
+            "custom_entities": "acq-B0map",
+            "datatype": "fmap",
+            "suffix": "phase1"
+        },
+        {
+            "criteria": {
+                "EchoNumber": 2,
+                "ImageType": {
+                    "any": [
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "T1",
+                            "MIXED",
+                            "PHASE"
+                        ],
+                        [
+                            "ORIGINAL",
+                            "PRIMARY",
+                            "PHASE MAP",
+                            "P",
+                            "FFE",
+                            "PHASE"
+                        ]
+                    ]
+                },
+                "SeriesDescription": "B0 map"
+            },
+            "custom_entities": "acq-B0map",
+            "datatype": "fmap",
+            "suffix": "phase2"
+        }
+    ]
+}

Diferenças do arquivo suprimidas por serem muito extensas
+ 4417 - 0
scripts/configs/mappings/transfer_db.yaml


+ 141 - 0
scripts/configs/rules/rules.yaml

@@ -0,0 +1,141 @@
+# To organize raw MRI data
+source: ""
+destination: ""
+pattern: "sub-[0-9]+"
+tag_rules:
+  - name: subject
+    pattern: "sub-\\d+(?:\\d{3})(\\d{6})"
+    replace:
+      col: subjectID
+      with: D_number
+      from: ""
+  - name: session
+    pattern: "sub-\\d+(\\d{3})(?:\\d{6})"
+  - name: run
+    value: "01"
+  - name: suffix
+    value: mri
+  - name: sourcetype
+    value: mri
+  - name: extension
+    value: dcm
+---
+# To organize raw EEG data
+source: ""
+destination: ""
+pattern: "EEG[0-9_]+.mff"
+tag_rules:
+  - name: subject
+    pattern: "EEG(?:.+)_(?:\\d{3})?(\\d{6})_"
+    replace:
+      col: subjectID
+      with: D_number
+      from: ""
+  - name: session
+    pattern: "EEG(?:.+)_(\\d{3})?(?:\\d{6})_"
+    default: 101
+  - name: task
+    value: rest
+  - name: run
+    value: "01"
+  - name: suffix
+    value: eeg
+  - name: sourcetype
+    value: eeg
+  - name: extension
+    value: mff
+---
+# To organize raw Eye tracking data
+source: ""
+destination: ""
+pattern: "[0-9]+.edf"
+copy_fellows: True
+tag_rules:
+  - name: subject
+    pattern: "(?:\\d{0,3})(\\d{6}).edf"
+    replace:
+      col: subjectID
+      with: D_number
+      from: ""    
+  - name: session
+    pattern: "(\\d{0,3})(?:\\d{6}).edf"
+    length: 3
+    iffy_prepend: 1
+    default: 101
+  - name: task
+    pattern: "\\d+[-_]([\\w]+)"
+    case: lower
+  - name: run
+    value: "01"
+  - name: suffix
+    value: eyetrack
+  - name: sourcetype
+    value: eyetrack
+  - name: extension
+    value: edf
+---
+# To organize raw fNIRS data
+source: ""
+destination: ""
+pattern: "(\\d{6,9}(_\\d)?_[\\w ]+|\\d+-\\d+-\\d+_\\d+|EO|Resting_EO|Resting EO)"
+overwrite: Yes
+add:
+  - path: "probeInfo.mat"
+    position: content
+  - path: "digpts.txt"
+    position: content
+tag_rules:
+  - name: subject
+    pattern: "(?:\\d{3})?(\\d{6})"
+    replace:
+      col: subjectID
+      with: D_number
+      from: ""
+  - name: session
+    pattern: "(\\d{3})?(?:\\d{6})"
+    default: 101
+  - name: run
+    value: "01"
+  - name: suffix
+    value: nirs
+  - name: task
+    value: rest
+  - name: sourcetype
+    value: nirs
+  - name: extension
+    value: nirx
+---
+# To organize genome sequence aligned reads
+source: ""
+destination: ""
+pattern: ".*bam"
+tag_rules:
+  - name: subject
+    pattern: "Sample_([a-zA-Z0-9]+)_"
+    pad:
+      length: 5
+      character: 0    
+    replace:
+      col: sampleID
+      with: D_number
+      from: ""
+  - name: session
+    pattern: "Sample_([a-zA-Z0-9]+)_"
+    pad:
+      length: 5
+      character: 0    
+    replace:
+      col: sampleID
+      with: sessionID
+      from: ""
+  - name: sample
+    pattern: "Sample_([a-zA-Z0-9]+)_"
+    pad:
+      length: 5
+      character: 0    
+  - name: acquisition
+    value: seq
+  - name: suffix
+    value: exome
+  - name: extension
+    value: bam

+ 41 - 0
scripts/configs/specifications/bids.yaml

@@ -0,0 +1,41 @@
+---
+tags:
+  - name: subject
+    pattern: "[/\\\\]+sub-([a-zA-Z0-9]+)"
+  - name: session
+    pattern: "[_/\\\\]+ses-([a-zA-Z0-9]+)"
+  - name: datatype
+    pattern: "(?:(?<!sourcedata))[/\\\\]+(anat|dwi|eeg|eyetrack|fmap|func|nirs)[/\\\\]+"    
+  - name: task
+    pattern: "[_/\\\\]+task-([a-zA-Z0-9]+)"
+  - name: acquisition
+    pattern: "[_/\\\\]+acq-([a-zA-Z0-9]+)"
+  - name: direction
+    pattern: "[_/\\\\]+dir-([a-zA-Z0-9]+)"    
+  - name: run
+    pattern: "[_/\\\\]+run-([0-9]+)"    
+  - name: space
+    pattern: "[_/\\\\]+space-([a-zA-Z0-9]+)"
+  - name: suffix
+    pattern: "(?:^|[_/\\\\])([a-zA-Z0-9]+)\\.[^/\\\\]+$"
+  - name: extension
+    pattern: "[^./\\\\](\\.[^/\\\\]+)$"
+path_patterns:
+  - "sub-{subject}[/ses-{session}]/{datatype<anat>|anat}/sub-{subject}[_ses-{session}][_task-{task}][_acq-{acquisition}][_run-{run}]_{suffix<T1w|T2w|FLAIR>}{extension<.nii|.nii.gz|.json>|.nii.gz}"
+  - "sub-{subject}[/ses-{session}]/{datatype<func>|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_dir-{direction}][_run-{run}]_{suffix<bold>}{extension<.nii|.nii.gz|.json>|.nii.gz}"
+  - "sub-{subject}[/ses-{session}]/{datatype<dwi>|dwi}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix<dwi>}{extension<.bval|.bvec|.json|.nii.gz|.nii>|.nii.gz}"
+  - "sub-{subject}[/ses-{session}]/{datatype<fmap>|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_dir-{direction}][_run-{run}]_{suffix<phasediff|magnitude1|magnitude2|phase1|phase2|fieldmap>}{extension<.nii|.nii.gz|.json>|.nii.gz}"
+  - "sub-{subject}[/ses-{session}]/{datatype<fmap>|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_dir-{direction}[_run-{run}]_{suffix<epi>}{extension<.nii|.nii.gz|.json>|.nii.gz}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>|eeg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<eeg>}{extension<.edf|.vhdr|.vmrk|.eeg|.set|.fdt|.bdf|.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<channels|events>}{extension<.tsv|.json>|.tsv}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_space-{space}]_{suffix<coordsystem>}{extension<.json>|.json}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_space-{space}]_{suffix<electrodes>}{extension<.tsv|.json>|.tsv}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix<photo>|photo}{extension<.jpg>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eeg>|eeg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<physio>}{extension<.tsv.gz|.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<nirs>|nirs}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<nirs>}{extension<.snirf|.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<nirs>|nirs}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<events|channels>}{extension<.tsv|.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<nirs>|nirs}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix<optodes>}{extension<.tsv|.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<nirs>|nirs}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix<coordsystem>}{extension<.json>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eyetrack>|eyetrack}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<eyetrack>}{extension<.asc>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<eyetrack>|eyetrack}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<events>}{extension<.tsv|.json>}"
+

+ 21 - 0
scripts/configs/specifications/derivative.yaml

@@ -0,0 +1,21 @@
+---
+tags:
+  - name: pipeline
+    pattern: "derivatives/([a-zA-Z0-9]+)"  
+  - name: subject
+    pattern: "[/\\\\]+sub-([a-zA-Z0-9]+)"
+  - name: session
+    pattern: "[_/\\\\]+ses-([a-zA-Z0-9]+)"
+  - name: filename
+    pattern: "[_/\\\\]+([a-zA-Z0-9_]+)[\\.]"    
+  - name: acquisition
+    pattern: "[_/\\\\]+acq-([a-zA-Z0-9]+)"
+  - name: run
+    pattern: "[_/\\\\]+run-([0-9]+)"
+  - name: suffix
+    pattern: "(?:^|[_/\\\\])([a-zA-Z0-9]+)\\.[^/\\\\]+$"
+  - name: extension
+    pattern: "[^./\\\\](\\.[^/\\\\]+)$"
+path_patterns:
+  - "derivatives/{pipeline}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}][_acq-{acquisition}][_run-{run}_{suffix}{extension}]"
+  - "derivatives/{pipeline}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}]_{filename}{extension}"

+ 19 - 0
scripts/configs/specifications/gids.yaml

@@ -0,0 +1,19 @@
+---
+tags:
+  - name: subject
+    pattern: "[/\\\\]+sub-([a-zA-Z0-9]+)"
+  - name: session
+    pattern: "[_/\\\\]+ses-([a-zA-Z0-9]+)"
+  - name: datatype
+    pattern: "(?:(?<!sourcedata))[/\\\\]+(genome)[/\\\\]+"
+  - name: sample
+    pattern: "[_/\\\\]+sample-([a-zA-Z0-9]+)"
+  - name: acquisition
+    pattern: "[_/\\\\]+acq-([a-zA-Z0-9]+)"       
+  - name: suffix
+    pattern: "(?:^|[_/\\\\])([a-zA-Z0-9]+)\\.[^/\\\\]+$"
+  - name: extension
+    pattern: "[^./\\\\](\\.[^/\\\\]+)$"
+path_patterns:
+  - "sub-{subject}[/ses-{session}]/{datatype<genome>|genome}/sub-{subject}[_ses-{session}]_sample-{sample}[_acq-{acquisition}]_{suffix<exome>|exome}{extension<.bam>}"
+  - "sub-{subject}[/ses-{session}]/{datatype<genome>|genome}/sub-{subject}[_ses-{session}]_sample-{sample}[_acq-{acquisition}]_{suffix<annotated>|annotated}{extension<.vcf|.txt>}"

+ 23 - 0
scripts/configs/specifications/sourcedata.yaml

@@ -0,0 +1,23 @@
+---
+tags:
+  - name: sourcetype
+    pattern: "(eeg|eyetrack|mri|nirs|genome)"
+  - name: subject
+    pattern: "[/\\\\]+sub-([a-zA-Z0-9]+)"
+  - name: session
+    pattern: "[_/\\\\]+ses-([a-zA-Z0-9]+)"
+  - name: task
+    pattern: "[_/\\\\]+task-([a-zA-Z0-9]+)"
+  - name: acquisition
+    pattern: "[_/\\\\]+acq-([a-zA-Z0-9]+)"
+  - name: run
+    pattern: "[_/\\\\]+run-([0-9]+)"
+  - name: suffix
+    pattern: "(?:^|[_/\\\\])([a-zA-Z0-9]+)\\.[^/\\\\]+$"
+  - name: extension
+    pattern: "[^./\\\\](\\.[^/\\\\]+)$"
+path_patterns:
+  - "{sourcetype<mri>|mri}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}][_task-{task}][_acq-{acquisition}][_run-{run}]_{suffix<mri>|mri}{extension<.dcm>}"  
+  - "{sourcetype<eeg>|eeg}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<eeg>|eeg}{extension<.bdf|.cnt|.eeg|.edf|.data|.gdf|.mat|.mff|.nxe|.set|.vhdr|.vmrk>}"  
+  - "{sourcetype<nirs>|nirs}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<nirs>|nirs}{extension<.nirx>}"  
+  - "{sourcetype<eyetrack>|eyetrack}/sub-{subject}[/ses-{session}]/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix<eyetrack>|eyetrack}{extension<.edf|.dat|.log>}"

+ 47 - 0
scripts/convert/convert_eeg_mff_to_bvcf.py

@@ -0,0 +1,47 @@
+""" Pipeline to convert mff format EEG data to others."""
+
+import logging
+
+from almirah import index
+from almirah import Layout
+from almirah import Specification
+from almirah.utils.convert import convert
+
+logging.basicConfig(filename="logs/convert_eeg_to_bvcf.log", level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+SRC = ""
+DST = ""
+
+source_spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
+src_lay = Layout(root=SRC, specification_name="sourcedata")
+
+bids_spec = Specification.create_from_file("configs/specifications/bids.yaml")
+dst_lay = Layout(root=DST, specification_name="bids")
+
+src_lay.index(f"{SRC}/eeg")
+index.commit()
+
+files = src_lay.query(sourcetype="eeg", extension=".mff")
+
+tag_map = {
+    "subject": "subject",
+    "session": "session",
+    "acquisition": "acquisition",
+    "task": "task",
+    "datatype": "datatype",
+    "run": "run",
+    "suffix": "suffix",
+}
+
+anonymize = {"daysback": "", "keep_his": False, "keep_source": False}
+
+convert(
+    files,
+    "BrainVision",
+    dst_lay,
+    logging="DEBUG",
+    line_freq=50,
+    tag_map=tag_map,
+    anonymize=anonymize,
+)

+ 26 - 0
scripts/convert/convert_eyetrack_EDF_to_ASCII.py

@@ -0,0 +1,26 @@
+"""Pipeline to convert EDF to ASCII."""
+
+import logging
+
+from almirah import index
+from almirah import Layout
+from almirah import Specification
+from almirah.utils.convert import convert
+
+logging.basicConfig(filename="logs/convert_eyetrack_to_asc.log", level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+SRC = ""
+DST = ""
+
+source_spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
+src_lay = Layout(root=SRC, specification_name="sourcedata")
+
+bids_spec = Specification.create_from_file("configs/specifications/bids.yaml")
+dst_lay = Layout(root=DST, specification_name="bids")
+
+src_lay.index(f"{SRC}/eyetrack")
+index.commit()
+
+files = src_lay.query(sourcetype="eyetrack", extension=".edf")
+convert(files, "ASCII", dst_lay)

+ 32 - 0
scripts/convert/convert_mri_DICOM_to_NIfTY.py

@@ -0,0 +1,32 @@
+"""Pipeline to convert DICOM to NIfTY."""
+
+import logging
+
+from almirah import index
+from almirah import Layout
+from almirah import Specification
+from almirah.utils.convert import convert
+
+logging.basicConfig(filename="logs/convert_mri_to_NIfTI.log", level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+SRC = ""
+DST = ""
+
+source_spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
+src_lay = Layout(root=SRC, specification_name="sourcedata")
+
+bids_spec = Specification.create_from_file("configs/specifications/bids.yaml")
+dst_lay = Layout(root=DST, specification_name="bids")
+
+src_lay.index(f"{SRC}/mri")
+index.commit()
+
+files = src_lay.query(sourcetype="mri", extension=".dcm")
+convert(
+    files,
+    "NIfTI",
+    dst_lay,
+    logging="DEBUG",
+    config="configs/converters/dcm2bids.json",
+)

+ 27 - 0
scripts/convert/convert_nirs_NIRx_to_SNIRF.py

@@ -0,0 +1,27 @@
+"""Pipeline to convert .nirx to SNIRF."""
+
+import logging
+
+from almirah import index
+from almirah import Layout
+from almirah import Specification
+from almirah.utils.convert import convert
+
+logging.basicConfig(filename="logs/convert_nirs_to_SNIRF.log", level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+SRC = ""
+DST = ""
+
+source_spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
+src_lay = Layout(root=SRC, specification_name="sourcedata")
+
+bids_spec = Specification.create_from_file("configs/specifications/bids.yaml")
+dst_lay = Layout(root=DST, specification_name="bids")
+
+src_lay.index(f"{SRC}/nirs")
+index.commit()
+
+files = src_lay.query(sourcetype="nirs", extension=".nirx")
+anonymize = {"daysback": "", "keep_his": False}
+convert(files, "SNIRF", dst_lay, anonymize=anonymize)

+ 38 - 0
scripts/migrate/migrate_from_raw_db.py

@@ -0,0 +1,38 @@
+"""Transform and transfer tables from src to target database."""
+
+import sys
+import logging
+
+from almirah import Database
+from almirah.database import migrate
+
+from almirah.utils.gen import read_multi_yaml
+
+logging.basicConfig(filename="logs/migrate_from_raw_db.log", level=logging.DEBUG)
+logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
+
+src = Database(name="", backend="", host="")
+target = Database(name="", backend="", host="")
+
+src.connect("", "")
+target.connect("", "")
+
+mapping = "configs/mappings/transfer_db.yaml"
+
+dtype_kws = {
+    "format": "mixed",
+}
+
+migrate(
+    src,
+    target,
+    read_multi_yaml(mapping),
+    dry_run=False,
+    na_vals=[","],
+    dtype_kws=dtype_kws,
+    check_dups=True,
+    resolve_dups=False,
+    check_fks=True,
+    resolve_fks=True,
+    insert_ignore=True,
+)

+ 29 - 0
scripts/organize/organize_genetics_data.py

@@ -0,0 +1,29 @@
+"""Pipeline to organize raw data."""
+
+import yaml
+import logging
+
+from almirah import Specification
+
+# Get inputs
+modality = input("Enter modality to organize: ")
+position = input("Enter index position of modality in rules: ")
+
+logging.basicConfig(
+    filename=f"logs/organize_{modality}_detailed.log",
+    format="%(asctime)s %(levelname)s: %(message)s",
+    level=logging.DEBUG,
+)
+
+
+logging.getLogger().addHandler(logging.StreamHandler())
+
+# Read rules yaml
+with open("configs/rules/rules.yaml") as f:
+    rules = yaml.load_all(f, yaml.SafeLoader)
+    spec = Specification.create_from_file("configs/specifications/gids.yaml")
+
+    for pos, rule in enumerate(rules):
+        if pos == int(position):
+            logging.info(f"Organizing by rule: \n {rule}")
+            spec.organize(rule)

+ 29 - 0
scripts/organize/organize_raw_data.py

@@ -0,0 +1,29 @@
+"""Pipeline to organize raw data."""
+
+import yaml
+import logging
+
+from almirah import Specification
+
+# Get inputs
+modality = input("Enter modality to organize: ")
+position = input("Enter index position of modality in rules: ")
+
+logging.basicConfig(
+    filename=f"logs/organize_{modality}_detailed.log",
+    format="%(asctime)s %(levelname)s: %(message)s",
+    level=logging.DEBUG,
+)
+
+
+logging.getLogger().addHandler(logging.StreamHandler())
+
+# Read rules yaml
+with open("configs/rules/rules.yaml") as f:
+    rules = yaml.load_all(f, yaml.SafeLoader)
+    spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
+
+    for pos, rule in enumerate(rules):
+        if pos == int(position):
+            logging.info(f"Organizing by rule: \n {rule}")
+            spec.organize(rule)