EMSL-Computing
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎support_code/nmdc/nom/api_info_retriever.py‎
Lines changed: 53 additions & 0 deletions b/‎support_code/nmdc/nom/api_info_retriever.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎…pport_code/nmdc/nom/nmdc_metadata_gen.py‎ ‎…om/archived_scripts/nmdc_metadata_gen.py‎support_code/nmdc/nom/nmdc_metadata_gen.py renamed to support_code/nmdc/nom/archived_scripts/nmdc_metadata_gen.py b/‎…pport_code/nmdc/nom/nmdc_metadata_gen.py‎ ‎…om/archived_scripts/nmdc_metadata_gen.py‎support_code/nmdc/nom/nmdc_metadata_gen.py renamed to support_code/nmdc/nom/archived_scripts/nmdc_metadata_gen.py
diff --git a/‎…pport_code/nmdc/nom/nom_grow_workflow.py‎ ‎…om/archived_scripts/nom_grow_workflow.py‎support_code/nmdc/nom/nom_grow_workflow.py renamed to support_code/nmdc/nom/archived_scripts/nom_grow_workflow.py b/‎…pport_code/nmdc/nom/nom_grow_workflow.py‎ ‎…om/archived_scripts/nom_grow_workflow.py‎support_code/nmdc/nom/nom_grow_workflow.py renamed to support_code/nmdc/nom/archived_scripts/nom_grow_workflow.py
diff --git a/‎support_code/nmdc/nom/config.toml‎
Lines changed: 10 additions & 0 deletions b/‎support_code/nmdc/nom/config.toml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎support_code/nmdc/nom/main.py‎
Lines changed: 74 additions & 0 deletions b/‎support_code/nmdc/nom/main.py‎
Lines changed: 74 additions & 0 deletions
@@ -82,3 +82,4 @@ r00*/
 !ext_lib/dotnet/*.xml
 ext_lib/
 *.egg*
+config.yaml
@@ -0,0 +1,53 @@
+import requests
+from typing import Optional
+
+# TODO: Update og_url to be regular nmdc api url when berkeley is implemented
+
+class ApiInfoRetriever:
+    def __init__(self, collection_name: str):
+        self.collection_name = collection_name
+
+    def get_id_by_slot_from_collection(self, slot_name: str, slot_field_value: str):
+        """
+        Retrieve the identifier from a specified collection based on a slot name and field value.
+
+        Parameters
+        ----------
+        slot_name : str
+            The name of the slot to filter by.
+        slot_field_value : str
+            The value of the slot field to filter for. Trailing whitespace will be removed.
+
+        Returns
+        -------
+        str
+            The identifier corresponding to the specified slot name and field value.
+
+        Raises
+        ------
+        ValueError
+            If the request to the API fails or if no resources are found for the given slot name and value.
+        """
+        # trim trailing white spaces
+        slot_field_value = slot_field_value.rstrip()
+
+        filter = f'{{"{slot_name}": "{slot_field_value}"}}'
+        field = "id"
+
+        og_url = f'https://api-berkeley.microbiomedata.org/nmdcschema/{self.collection_name}?&filter={filter}&projection={field}'
+        resp = requests.get(og_url)
+
+        # Check if the response status is 200
+        if resp.status_code != 200:
+            raise ValueError(f"Failed to retrieve data from {self.collection_name}, response code: {resp.status_code}")
+        
+        data = resp.json()
+
+        # Ensure there is at least one resource in the response
+        if not data['resources']:
+            raise ValueError(f"No resources found for {slot_name} with value {slot_field_value}")
+        
+        identifier = data['resources'][0]['id']
+
+        return identifier
+
@@ -0,0 +1,10 @@
+metadata_file = "path/to/metadata/spreadsheet.csv"
+data_dir = "path/to/directory/where/data/lives/"
+ref_calibration_path = "path/to/reference/calibration/file.ref"
+raw_data_object_type = "LC-DDA-MS/MS Raw Data"
+processed_data_object_type = "FT ICR-MS Analysis Results"
+registration_file = "path/to/where/database/dump/lives.json"
+execution_resource = "EMSL-RZR"
+field_strength = 7
+workflow_version = "2.2.3"
+minting_client_config_path = "enviroMS/nmdc_metadata_generation/config.yaml"
@@ -0,0 +1,74 @@
+from metadata_generator import MetadataGenerator
+import toml
+from pathlib import Path
+
+def main():
+    """
+    Runs the MetadataGenerator using the configuration provided in a TOML file.
+
+    The TOML configuration file must include the following fields:
+
+    - metadata_file: str
+        The path to the .csv, .xlsx, or .xls file where the biosample metadata is stored (include file extension).
+        Example: "metadata_file.csv"
+    - data_dir: str
+        The directory path where the data lives or will be stored.
+        Example: "/path/to/data/"
+    - ref_calibration_path: str
+        The path to the .ref file where the reference calibration data is stored (include .ref).
+        Example: "/path/to/calibration_file.ref"
+    - raw_data_object_type: str
+        The raw data object type. Must match one of the following options:
+        - 'Direct Infusion FT ICR-MS Raw Data'
+        - 'LC-DDA-MS/MS Raw Data'
+    - processed_data_object_type: str
+        The processed data object type. Must match one of the following options:
+        - 'FT ICR-MS Analysis Results'
+        - 'GC-MS Metabolomics Results'
+    - registration_file: str
+        The desired name of the output file where the data will be dumped (include .json).
+        Example: "output_file.json"
+    - execution_resource: str
+        The execution resource used for the analysis. Must match one of the following options:
+        - 'EMSL-RZR'
+        - 'EMSL'
+    - field_strength: int
+        The field strength for the NOM analysis. Must match one of the following values:
+        - 7
+        - 12
+        - 15
+        - 21
+    - minting_client_config_path: str
+        The path to the NMDC minting client configuration file. Defaults to 'enviroMS/nmdc_metadata_generation/config.yaml'.
+
+    Notes
+    -----
+    This function assumes:
+    - If a biosample_id does not exist for the sample in the metadata_file, a biosample will be generated. On the other hand, 
+      if a biosample_id does exist in the spreadsheet, no biosample will be generated. And the biosample_id present will be used to 
+      generate all other metadata.
+    - The metadata_file conforms to a predefined structure. See example spreadsheet: https://docs.google.com/spreadsheets/d/1-xHGkkG5Gpw5Pen1iM_JUP2XmphuF19ps2ZYONvtDUs/edit?gid=1112301083#gid=1112301083
+    - Necessary configuration and calibrations are already added to MongoDB. If new configurations are needed, they must be added beforehand.
+    """
+
+
+    # Load arguments from TOML file
+    config_data = toml.load('enviroMS/nmdc_metadata_generation/config.toml')
+
+    generator = MetadataGenerator(
+        metadata_file=config_data['metadata_file'],
+        data_dir=Path(config_data['data_dir']),
+        ref_calibration_path=Path(config_data['ref_calibration_path']),
+        raw_data_object_type=config_data['raw_data_object_type'],
+        processed_data_object_type=config_data['processed_data_object_type'],
+        database_dump_json_path=config_data['registration_file'],
+        execution_resource=config_data['execution_resource'],
+        field_strength=config_data['field_strength'],
+        workflow_version=config_data['workflow_version'],
+        minting_client_config_path=config_data['minting_client_config_path']
+    )
+    
+    generator.run()
+
+if __name__ == "__main__":
+    main()