1234567891011121314151617181920212223242526272829 |
- """Pipeline to organize raw data."""
- import yaml
- import logging
- from almirah import Specification
- # Get inputs
- modality = input("Enter modality to organize: ")
- position = input("Enter index position of modality in rules: ")
- logging.basicConfig(
- filename=f"logs/organize_{modality}_detailed.log",
- format="%(asctime)s %(levelname)s: %(message)s",
- level=logging.DEBUG,
- )
- logging.getLogger().addHandler(logging.StreamHandler())
- # Read rules yaml
- with open("configs/rules/rules.yaml") as f:
- rules = yaml.load_all(f, yaml.SafeLoader)
- spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
- for pos, rule in enumerate(rules):
- if pos == int(position):
- logging.info(f"Organizing by rule: \n {rule}")
- spec.organize(rule)
|