Skip to content

Commit d42e4c4

Browse files
author
Doruk Ozturk
committed
Update ingest scripts so that they can be used in docker environment
1 parent 98937fb commit d42e4c4

2 files changed

Lines changed: 35 additions & 30 deletions

File tree

server/data/ingest.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,10 @@
33
from os import listdir
44
import re
55
import sys
6+
from girder_client import GirderClient
67

7-
from girder.models.user import User
8-
from girder.models.collection import Collection
9-
from girder.models.folder import Folder
10-
from girder.models.item import Item
118

12-
13-
def ingest(directory):
9+
def ingest(directory, gc):
1410
table7Dict = {}
1511
table8Dict = {}
1612
table9Dict = {}
@@ -29,14 +25,12 @@ def ingest(directory):
2925
table9 = parseTable(directory, filename)
3026
table9Dict[table9['taxon_oid']] = table9
3127

32-
admin = User().findOne({"admin": True})
3328
datasetsFolder = findDatasetFolder()
3429
for taxon_oid in metaDict:
3530
data = {"meta_": metaDict[taxon_oid], "summary": summaryDict[taxon_oid],
3631
"table7": table7Dict[taxon_oid], "table8": table8Dict[taxon_oid], "table9": table9Dict[taxon_oid]}
3732
data['meta'] = extractMeta(data)
38-
item = Item().createItem(taxon_oid, admin, datasetsFolder)
39-
Item().setMetadata(item, data)
33+
gc.loadOrCreateItem(taxon_oid, datasetsFolder['_id'], metadata=data)
4034

4135

4236
def parseCSV(directory, filename):
@@ -98,12 +92,19 @@ def getEcosystem(name):
9892

9993

10094
def findDatasetFolder():
101-
collection = Collection().findOne({"name": 'ResonantEco'})
102-
datasets = Folder().findOne({"name": "datasets", "parentId": collection['_id']})
103-
return Folder().findOne({"name": "LLNL", "parentId": datasets['_id']})
95+
collection = gc.resourceLookup('/collection/ResonantEco')
96+
datasets = gc.resourceLookup('/collection/ResonantEco/datasets')
97+
return gc.resourceLookup('/collection/ResonantEco/datasets/LLNL')
10498

10599

106100
if __name__ == '__main__':
107-
if len(sys.argv) < 2:
108-
sys.exit('Missing argument')
109-
ingest(sys.argv[1])
101+
if len(sys.argv) < 5:
102+
sys.exit('Sample call: python ingest.py ./data localhost 8080 admin letmein')
103+
data_dir = sys.argv[1]
104+
host = sys.argv[2]
105+
port = sys.argv[3]
106+
user = sys.argv[4]
107+
password = sys.argv[5]
108+
gc = GirderClient(apiUrl='http://{}:{}/api/v1'.format(host, port))
109+
gc.authenticate(user, password)
110+
ingest(data_dir, gc)

server/data/ingest_1000.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
11
import csv
2-
import girder_client
2+
from girder_client import GirderClient
33
import requests
44
import json
5-
6-
7-
gc = girder_client.GirderClient(apiUrl='http://localhost:8080/api/v1')
8-
gc.authenticate('girder', 'girder')
9-
parent = gc.resourceLookup('collection/ResonantEco/datasets/JGI')
10-
11-
lookup_table = {}
12-
with open('envo_lookup.json', 'r') as f:
13-
lookup_table = json.load(f)
14-
print(lookup_table)
5+
import sys
156

167

178
def lookup_envo_number(envo):
9+
with open('envo_lookup.json', 'r') as f:
10+
lookup_table = json.load(f)
11+
1812
if envo in lookup_table:
1913
return lookup_table[envo]
2014
response = requests.get('https://www.ebi.ac.uk/ols/api/select',
@@ -28,10 +22,11 @@ def lookup_envo_number(envo):
2822
return None
2923

3024

31-
def create_item_from_row(row):
25+
def create_item_from_row(row, gc):
3226
if not row[7]:
3327
return
3428
print("Ingesting {}".format(row[0]))
29+
parent = gc.resourceLookup('collection/ResonantEco/datasets/JGI')
3530
item = gc.createItem(parent['_id'], row[0], reuseExisting=True)
3631
latitude = None
3732
longitude = None
@@ -54,15 +49,24 @@ def create_item_from_row(row):
5449
gc.addMetadataToItem(item['_id'], {'meta': metadata})
5550

5651

57-
def create_items_from_csv(path):
52+
def create_items_from_csv(path, gc):
5853
with open(path) as csvfile:
5954
reader = csv.reader(csvfile, delimiter=',')
6055
next(reader)
6156
for row in reader:
62-
create_item_from_row(row)
57+
create_item_from_row(row, gc)
6358

6459

6560
if __name__ == '__main__':
66-
create_items_from_csv('./jgi_data/NMDC_metadata_datasets - NMDC_datasets_metadata.csv')
61+
if len(sys.argv) < 5:
62+
sys.exit('Sample call: python ingest.py ./data localhost 8080 admin letmein')
63+
data_dir = sys.argv[1]
64+
host = sys.argv[2]
65+
port = sys.argv[3]
66+
user = sys.argv[4]
67+
password = sys.argv[5]
68+
gc = GirderClient(apiUrl='http://{}:{}/api/v1'.format(host, port))
69+
gc.authenticate(user, password)
70+
create_items_from_csv('./jgi_data/NMDC_metadata_datasets - NMDC_datasets_metadata.csv', gc)
6771
# with open("envo_lookup.json", "w") as f:
6872
# f.write(json.dumps(lookup_table))

0 commit comments

Comments
 (0)