1515
1616env_mediums = {'ENVO_00002042' : 'surface water' ,
1717 'ENVO_00002007' : 'sediment' ,
18+ 'ENVO:00001998' : 'soil'
1819 }
19- env_local_scales = {'ENVO_00000022' : 'river' }
20- env_broad_scales = {'ENVO_01000253' : 'freshwater river biome' }
20+ env_local_scales = {'ENVO_00000022' : 'river' ,
21+ 'ENVO:01000861' : 'area of dwarf scrub' ,
22+ 'ENVO:00000516' : 'hummock' ,
23+ 'ENVO:01000869' : 'area of scrub' ,
24+ 'ENVO:01000887' : 'area of sedge- and forb-dominated herbaceous vegetation' ,
25+ 'ENVO:01001370' : 'tundra ecosystem'
26+ }
27+ env_broad_scales = {'ENVO_01000253' : 'freshwater river biome' ,
28+ 'ENVO:00000446' : 'terrestrial biome'
29+ }
2130
2231
2332@dataclass
@@ -26,13 +35,15 @@ class NomAnalysisActivity:
2635 cluster_name :str = "EMSL-RZR"
2736 nom_21T_instrument_name : str = "21T_Agilent"
2837 nom_12T_instrument_name : str = "12T_FTICR_B"
38+ nom_7T_instrument_name : str = "7T_FT_ICR_MS"
2939
3040@dataclass
3141class OmicsProcessing :
3242 nom_omics_processing_type :str = "Organic Matter Characterization"
3343 nom_omics_processing_description :str = "High resolution MS spectra only"
3444 nom_21T_instrument_name : str = "21T Agilent"
3545 nom_12T_instrument_name : str = "12T_FTICR_B"
46+ nom_7T_instrument_name : str = "7T_FT_ICR_MS"
3647
3748@dataclass
3849class DataObject :
@@ -42,13 +53,13 @@ class DataObject:
4253 nom_dp_data_object_description :str = "EnviroMS FT ICR-MS natural organic matter workflow molecular formula assignment output details"
4354
4455@dataclass
45- class BioSample :
56+ class Biosample :
4657 pass
4758
4859@dataclass
4960class NMDC_Types :
5061
51- BioSample :str = "nmdc:Biosample"
62+ Biosample :str = "nmdc:Biosample"
5263 OmicsProcessing :str = "nmdc:OmicsProcessing"
5364 NomAnalysisActivity :str = "nmdc:NomAnalysisActivity"
5465 DataObject :str = "nmdc:DataObject"
@@ -68,7 +79,7 @@ def __dict__(self):
6879 @property
6980 def json (self ):
7081 return dumps (self .__dict__ )
71-
82+
7283def mint_nmdc_id (type :NMDC_Types , how_many :int = 1 ) -> List [str ]:
7384
7485 config = yaml .safe_load (open ('./config.yaml' ,'r' ))
@@ -93,7 +104,7 @@ def mint_nmdc_id(type:NMDC_Types, how_many:int = 1) -> List[str]:
93104
94105def get_biosample_object (emsl_metadata :EMSL_Metadata ) -> nmdc .Biosample :
95106
96- nmdc_id = mint_nmdc_id ({'id' : NMDC_Types .BioSample })[0 ]
107+ nmdc_id = mint_nmdc_id ({'id' : NMDC_Types .Biosample })[0 ]
97108
98109 env_medium = {
99110 'has_raw_value' : emsl_metadata .env_medium ,
@@ -121,7 +132,7 @@ def get_biosample_object(emsl_metadata:EMSL_Metadata) -> nmdc.Biosample:
121132 "longitude" : emsl_metadata .longitude ,
122133 }
123134
124- collection_date = { 'has_raw_value' : emsl_metadata .collection_date }
135+ collection_date = {'has_raw_value' : emsl_metadata .collection_date }
125136
126137 geo_loc_name = {'has_raw_value' : emsl_metadata .geo_loc_name }
127138
@@ -166,7 +177,7 @@ def get_data_object(file_path:Path, base_url:str, was_generated_by:str,
166177 "description" : description ,
167178 "type" : "nmdc:DataObject"
168179 }
169-
180+
170181 data_object = nmdc .DataObject (** data_dict )
171182
172183 return data_object
@@ -228,46 +239,45 @@ def create_nmdc_metadata(raw_data_path:Path, data_product_path:Path, base_url:st
228239
229240 if not biosample_id :
230241
231- # biosample_id = mint_nmdc_id({'id': NMDC_Types.BioSample })[0]
242+ biosample_id = mint_nmdc_id ({'id' : NMDC_Types .Biosample })[0 ]
232243 bioSample = get_biosample_object (emsl_metadata )
233244 biosample_id = bioSample .id
234245
235246 else :
236247
237248 ''' needs to finish the logic for creating biosamples, this will fail because it is missing some required fields'''
238- bioSample = nmdc . BioSample ( id = biosample_id )
249+ bioSample = None
239250
240251 omicsProcessing = get_omics_processing (raw_data_path ,
241- OmicsProcessing .nom_12T_instrument_name ,
242- biosample_id , None ,
252+ OmicsProcessing .nom_7T_instrument_name ,
253+ biosample_id , 'nmdc:placeholder' ,
243254 OmicsProcessing .nom_omics_processing_type ,
244255 OmicsProcessing .nom_omics_processing_description ,
245- emsl_metadata .nmdc_study
256+ emsl_metadata .nmdc_study
246257 )
247258
248- rawDataObject = get_data_object (raw_data_path , base_url + 'nom/grow /raw/' ,
259+ rawDataObject = get_data_object (raw_data_path , base_url + 'nom/1000soils /raw/' ,
249260 was_generated_by = omicsProcessing .id ,
250261 data_object_type = DataObject .nom_raw_data_object_type ,
251262 description = DataObject .nom_raw_data_object_description )
252263
253264 nomAnalysisActivity = get_nom_analysis_activity (NomAnalysisActivity .cluster_name ,
254265 NomAnalysisActivity .codebase_url ,
255- rawDataObject .id , None , False ,
266+ rawDataObject .id , 'nmdc:placeholder' , False ,
256267 omicsProcessing .id ,
257- NomAnalysisActivity .nom_12T_instrument_name )
268+ NomAnalysisActivity .nom_7T_instrument_name )
258269
259- dataProductDataObject = get_data_object (data_product_path , base_url + 'nom/grow /results/' ,
270+ dataProductDataObject = get_data_object (data_product_path , base_url + 'nom/1000soils /results/' ,
260271 was_generated_by = nomAnalysisActivity .id ,
261272 data_object_type = DataObject .nom_dp_data_object_type ,
262273 description = DataObject .nom_dp_data_object_description )
263274
264-
265275 #circular dependencies : great!
266- nomAnalysisActivity .has_input = [rawDataObject .id ]
267276 nomAnalysisActivity .has_output = [dataProductDataObject .id ]
268277 omicsProcessing .has_output = [rawDataObject .id ]
269278
270- nom_metadata_db .biosample_set .append (bioSample )
279+ if bioSample :
280+ nom_metadata_db .biosample_set .append (bioSample )
271281 nom_metadata_db .data_object_set .append (rawDataObject )
272282 nom_metadata_db .nom_analysis_activity_set .append (nomAnalysisActivity )
273283 nom_metadata_db .omics_processing_set .append (omicsProcessing )
@@ -276,4 +286,3 @@ def create_nmdc_metadata(raw_data_path:Path, data_product_path:Path, base_url:st
276286def dump_nmdc_database (ndmc_database :nmdc .Database , output_filepath :str ):
277287
278288 json_dumper .dump (ndmc_database , output_filepath )
279-
0 commit comments