organize_raw_data.py 799 B

1234567891011121314151617181920212223242526272829
  1. """Pipeline to organize raw data."""
  2. import yaml
  3. import logging
  4. from almirah import Specification
  5. # Get inputs
  6. modality = input("Enter modality to organize: ")
  7. position = input("Enter index position of modality in rules: ")
  8. logging.basicConfig(
  9. filename=f"logs/organize_{modality}_detailed.log",
  10. format="%(asctime)s %(levelname)s: %(message)s",
  11. level=logging.DEBUG,
  12. )
  13. logging.getLogger().addHandler(logging.StreamHandler())
  14. # Read rules yaml
  15. with open("configs/rules/rules.yaml") as f:
  16. rules = yaml.load_all(f, yaml.SafeLoader)
  17. spec = Specification.create_from_file("configs/specifications/sourcedata.yaml")
  18. for pos, rule in enumerate(rules):
  19. if pos == int(position):
  20. logging.info(f"Organizing by rule: \n {rule}")
  21. spec.organize(rule)