Exploring the CALM Brain Resource with almirah
Load the dataset
from almirah import Dataset
Dataset.options()
[<Dataset name: 'calm-brain'>]
ds = Dataset(name="calm-brain")
ds.components
[<Layout root: '/path/to/data'>,
<Layout root: '/path/to/genome'>,
<Database url: 'request:calm-brain@https://www.calm-brain.ncbs.res.in/db-request/'>]
Quering layouts
lay = ds.components[0]
print(lay)
len(lay.files)
<Layout root: '/path/to/data'>
42652
from almirah import Tag
tags = Tag.options()
len(tags)
1589
tags_names_possible = {tag.name for tag in tags}
tags_names_possible
{'acquisition',
'datatype',
'direction',
'extension',
'run',
'sample',
'session',
'space',
'subject',
'suffix',
'task'}
Tag.options(name="datatype")
[<Tag datatype: 'anat'>,
<Tag datatype: 'dwi'>,
<Tag datatype: 'eeg'>,
<Tag datatype: 'eyetrack'>,
<Tag datatype: 'fmap'>,
<Tag datatype: 'func'>,
<Tag datatype: 'genome'>,
<Tag datatype: 'nirs'>]
files = lay.query(datatype="eeg")
len(files)
15821
file = files[0]
file.rel_path
'sub-D0828/ses-101/eeg/sub-D0828_ses-101_task-auditoryPCP_run-01_events.json'
file.tags
{'datatype': 'eeg', 'extension': '.json', 'run': '01', 'session': '101', 'subject': 'D0828', 'suffix': 'events', 'task': 'auditoryPCP'}
Querying databases
db = ds.components[2]
db
<Database url: 'request:calm-brain@https://www.calm-brain.ncbs.res.in/db-request/'>
db.connect("username", "password")
df = db.query(table="presenting_disorders")
df[["subject", "session", "addiction"]].head()
|
subject |
session |
addiction |
0 |
D0019 |
101 |
0 |
1 |
D0019 |
111 |
0 |
2 |
D0020 |
101 |
0 |
3 |
D0020 |
111 |
<NA> |
4 |
D0021 |
101 |
0 |
Generating summaries
anat_subject_tags = ds.query(returns="subject", datatype="anat")
anat_subjects = {subject for t in anat_subject_tags for subject in t}
len(anat_subjects)
699
eyetrack_subject_tags = ds.query(returns="subject", datatype="eyetrack")
eyetrack_subjects = {subject for t in eyetrack_subject_tags for subject in t}
len(eyetrack_subjects)
1075
df = db.query(table="subjects")
len(df)
2276
df = db.query(table="modified_kuppuswamy_socioeconomic_scale")
len(df)
1444