Skip to content

Commit 8a2d079

Browse files
author
Doruk Ozturk
committed
Create an envo lookup table for performance
1 parent 2e7d5d9 commit 8a2d079

2 files changed

Lines changed: 23 additions & 6 deletions

File tree

server/data/envo_lookup.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"ENVO:00000446": "terrestrial biome", "ENVO:00000292": "watershed", "ENVO:00001998": "soil", "ENVO:00000447": "marine biome", "ENVO:00000032": "bay", "ENVO:00002150": "coastal sea water", "ENVO:00000137": "coastal inlet", "ENVO:01000023": "marine pelagic biome", "ENVO:00002149": "sea water", "ENVO:01000048": "ocean biome", "ENVO:00000137\n": "coastal inlet", "ENVO:00002150\n": "coastal sea water", "ENVO:00002119": "alkaline hot spring", "ENVO:01000008": "microbial mat", "ENVO:01000157": "microbial mat material", "ENVO:01000024": "marine benthic biome", "ENVO:00000208": "marine pelagic zone", "ENVO:00002113": "deep marine sediment", "ENVO:01000065": "marine oxygen minimum zone", "ENVO:01000177": "grassland biome", "ENVO:00005750": "grassland soil", "ENVO:01000219": "anthropogenic terrestrial biome", "ENVO:00000078": "farm", "ENVO:00005749": "farm soil", "ENVO:01000252": "freshwater lake biome", "ENVO:00000021": "freshwater lake", "ENVO:00002011": "fresh water", "ENVO:00002131": "epilimnion", "ENVO:00002268": "sphagnum bog", "ENVO:01001020": "hypersaline lake", "ENVO:00002012": "hypersaline water"}

server/data/ingest_1000.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,33 @@
11
import csv
22
import girder_client
33
import requests
4+
import json
5+
46

57
gc = girder_client.GirderClient(apiUrl='http://localhost:8080/api/v1')
68
gc.authenticate('admin', 'letmein')
79
parent = gc.resourceLookup('collection/ResonantEco/datasets/LLNL')
810

11+
lookup_table = {}
912

1013
def lookup_envo_number(envo):
1114
response = requests.get('https://www.ebi.ac.uk/ols/api/select',
1215
params={'q': envo})
1316
json_response = response.json()
14-
return json_response['response']['docs'][0]['label']
15-
16-
17+
try:
18+
label = json_response['response']['docs'][0]['label']
19+
lookup_table[envo] = label
20+
return label
21+
except IndexError:
22+
return None
23+
24+
def get_envo_number(envo):
25+
with open('envo_lookup.json', 'r') as f:
26+
lookup = json.load(f)
27+
try:
28+
return lookup[envo]
29+
except KeyError:
30+
return None
1731

1832
def create_item_from_row(row):
1933
print("Ingesting {}".format(row[0]))
@@ -25,9 +39,9 @@ def create_item_from_row(row):
2539
}
2640
metadata['timestamp'] = row[48]
2741
metadata['name'] = row[1]
28-
metadata['biome'] = lookup_envo_number(row[5])
29-
metadata['feature'] = lookup_envo_number(row[6])
30-
metadata['material'] = lookup_envo_number(row[7])
42+
metadata['biome'] = get_envo_number(row[5])
43+
metadata['feature'] = get_envo_number(row[6])
44+
metadata['material'] = get_envo_number(row[7])
3145
gc.addMetadataToItem(item['_id'], {'meta': metadata})
3246

3347
def create_items_from_csv(path):
@@ -39,3 +53,5 @@ def create_items_from_csv(path):
3953

4054
if __name__ == '__main__':
4155
create_items_from_csv('metadata_1K_datasets_curated_JGI.csv')
56+
# with open("envo_lookup.json", "w") as f:
57+
# f.write(json.dumps(lookup_table))

0 commit comments

Comments
 (0)