Skip to content

Commit cff24bd

Browse files
committed
Update ingest scripts
1 parent d668447 commit cff24bd

4 files changed

Lines changed: 1111 additions & 9594 deletions

File tree

server/data/ingest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def getEcosystem(name):
8181
name = data['meta_']['Genome Name / Sample Name'].split(' - ')[1]
8282
latitude = data['meta_']['Lat']
8383
longitude = data['meta_']['Long']
84+
omics = data['meta_']['Transect']
8485
material = getMaterial(data['meta_']['Genome Name / Sample Name'])
8586
ecosystem = getEcosystem(data['meta_']['Genome Name / Sample Name'])
8687
return {
@@ -90,8 +91,8 @@ def getEcosystem(name):
9091
'timestemp': None,
9192
'material': material,
9293
'feature': None,
93-
'biome': None,
94-
'ecosystem': ecosystem,
94+
'biome': ecosystem,
95+
'omics': omics,
9596
'source': 'LLNL'
9697
}
9798

server/data/ingest_1000.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,20 @@
55

66

77
gc = girder_client.GirderClient(apiUrl='http://localhost:8080/api/v1')
8-
gc.authenticate('admin', 'letmein')
9-
parent = gc.resourceLookup('collection/ResonantEco/datasets/LLNL')
8+
gc.authenticate('girder', 'girder')
9+
parent = gc.resourceLookup('collection/ResonantEco/datasets/JGI')
1010

1111
lookup_table = {}
12+
with open('envo_lookup.json', 'r') as f:
13+
lookup_table = json.load(f)
14+
print(lookup_table)
15+
1216

1317
def lookup_envo_number(envo):
18+
if envo in lookup_table:
19+
return lookup_table[envo]
1420
response = requests.get('https://www.ebi.ac.uk/ols/api/select',
15-
params={'q': envo})
21+
params={'q': envo})
1622
json_response = response.json()
1723
try:
1824
label = json_response['response']['docs'][0]['label']
@@ -21,39 +27,35 @@ def lookup_envo_number(envo):
2127
except IndexError:
2228
return None
2329

24-
def get_envo_number(envo):
25-
with open('envo_lookup.json', 'r') as f:
26-
lookup = json.load(f)
27-
try:
28-
return lookup[envo]
29-
except KeyError:
30-
return None
3130

3231
def create_item_from_row(row):
3332
if not row[7]:
3433
return
3534
print("Ingesting {}".format(row[0]))
3635
item = gc.createItem(parent['_id'], row[0], reuseExisting=True)
3736
metadata = {
38-
'latitude': None,
39-
'longitude': None,
37+
'latitude': row[60],
38+
'longitude': row[59],
4039
'source': 'JGI'
4140
}
4241
metadata['timestamp'] = row[48]
4342
metadata['name'] = row[1]
44-
metadata['biome'] = get_envo_number(row[5])
45-
metadata['feature'] = get_envo_number(row[6])
46-
metadata['material'] = get_envo_number(row[7])
43+
metadata['biome'] = lookup_envo_number(row[5])
44+
metadata['feature'] = lookup_envo_number(row[6])
45+
metadata['material'] = lookup_envo_number(row[7])
46+
metadata['omics'] = row[20].lower()
4747
gc.addMetadataToItem(item['_id'], {'meta': metadata})
4848

49+
4950
def create_items_from_csv(path):
5051
with open(path) as csvfile:
5152
reader = csv.reader(csvfile, delimiter=',')
5253
next(reader)
5354
for row in reader:
5455
create_item_from_row(row)
5556

57+
5658
if __name__ == '__main__':
57-
create_items_from_csv('metadata_1K_datasets_curated_JGI.csv')
59+
create_items_from_csv('./jgi_data/NMDC_metadata_datasets - NMDC_datasets_metadata.csv')
5860
# with open("envo_lookup.json", "w") as f:
5961
# f.write(json.dumps(lookup_table))

0 commit comments

Comments
 (0)