Skip to content

Commit d9b9a37

Browse files
authored
Merge pull request #5 from OpenDataAnalytics/ingest-thousand
Ingest thousand dataset
2 parents 2300dcb + 8a2d079 commit d9b9a37

4 files changed

Lines changed: 9635 additions & 0 deletions

File tree

server/data/envo_lookup.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"ENVO:00000446": "terrestrial biome", "ENVO:00000292": "watershed", "ENVO:00001998": "soil", "ENVO:00000447": "marine biome", "ENVO:00000032": "bay", "ENVO:00002150": "coastal sea water", "ENVO:00000137": "coastal inlet", "ENVO:01000023": "marine pelagic biome", "ENVO:00002149": "sea water", "ENVO:01000048": "ocean biome", "ENVO:00000137\n": "coastal inlet", "ENVO:00002150\n": "coastal sea water", "ENVO:00002119": "alkaline hot spring", "ENVO:01000008": "microbial mat", "ENVO:01000157": "microbial mat material", "ENVO:01000024": "marine benthic biome", "ENVO:00000208": "marine pelagic zone", "ENVO:00002113": "deep marine sediment", "ENVO:01000065": "marine oxygen minimum zone", "ENVO:01000177": "grassland biome", "ENVO:00005750": "grassland soil", "ENVO:01000219": "anthropogenic terrestrial biome", "ENVO:00000078": "farm", "ENVO:00005749": "farm soil", "ENVO:01000252": "freshwater lake biome", "ENVO:00000021": "freshwater lake", "ENVO:00002011": "fresh water", "ENVO:00002131": "epilimnion", "ENVO:00002268": "sphagnum bog", "ENVO:01001020": "hypersaline lake", "ENVO:00002012": "hypersaline water"}

server/data/ingest_1000.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import csv
2+
import girder_client
3+
import requests
4+
import json
5+
6+
7+
gc = girder_client.GirderClient(apiUrl='http://localhost:8080/api/v1')
8+
gc.authenticate('admin', 'letmein')
9+
parent = gc.resourceLookup('collection/ResonantEco/datasets/LLNL')
10+
11+
lookup_table = {}
12+
13+
def lookup_envo_number(envo):
14+
response = requests.get('https://www.ebi.ac.uk/ols/api/select',
15+
params={'q': envo})
16+
json_response = response.json()
17+
try:
18+
label = json_response['response']['docs'][0]['label']
19+
lookup_table[envo] = label
20+
return label
21+
except IndexError:
22+
return None
23+
24+
def get_envo_number(envo):
25+
with open('envo_lookup.json', 'r') as f:
26+
lookup = json.load(f)
27+
try:
28+
return lookup[envo]
29+
except KeyError:
30+
return None
31+
32+
def create_item_from_row(row):
33+
print("Ingesting {}".format(row[0]))
34+
item = gc.createItem(parent['_id'], row[0], reuseExisting=True)
35+
metadata = {
36+
'latitude': None,
37+
'longitude': None,
38+
'source': 'JGI'
39+
}
40+
metadata['timestamp'] = row[48]
41+
metadata['name'] = row[1]
42+
metadata['biome'] = get_envo_number(row[5])
43+
metadata['feature'] = get_envo_number(row[6])
44+
metadata['material'] = get_envo_number(row[7])
45+
gc.addMetadataToItem(item['_id'], {'meta': metadata})
46+
47+
def create_items_from_csv(path):
48+
with open(path) as csvfile:
49+
reader = csv.reader(csvfile, delimiter=',')
50+
next(reader)
51+
for row in reader:
52+
create_item_from_row(row)
53+
54+
if __name__ == '__main__':
55+
create_items_from_csv('metadata_1K_datasets_curated_JGI.csv')
56+
# with open("envo_lookup.json", "w") as f:
57+
# f.write(json.dumps(lookup_table))

0 commit comments

Comments
 (0)