Skip to content

Commit 2300dcb

Browse files
committed
New data ingest with girder Item
1 parent d60a108 commit 2300dcb

1 file changed

Lines changed: 63 additions & 40 deletions

File tree

server/data/ingest.py

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,43 +4,45 @@
44
import re
55
import sys
66

7-
from resonanteco_server.model.meta import Meta
8-
from resonanteco_server.model.summary import Summary
9-
from resonanteco_server.model.table7 import Table7
10-
from resonanteco_server.model.table8 import Table8
11-
from resonanteco_server.model.table9 import Table9
7+
from girder.models.user import User
8+
from girder.models.collection import Collection
9+
from girder.models.folder import Folder
10+
from girder.models.item import Item
1211

1312

1413
def ingest(directory):
15-
Meta().collection.drop()
16-
Summary().collection.drop()
17-
Table7().collection.drop()
18-
Table8().collection.drop()
19-
Table9().collection.drop()
14+
table7Dict = {}
15+
table8Dict = {}
16+
table9Dict = {}
2017
for filename in [f for f in listdir(directory) if isfile(join(directory, f))]:
2118
if 'meta.txt' in filename:
22-
parseMeta(directory, filename)
19+
metaDict = parseCSV(directory, filename)
2320
if 'summary.txt' in filename:
24-
parseSummary(directory, filename)
21+
summaryDict = parseCSV(directory, filename)
2522
elif 'Table_7' in filename:
26-
parseTable7(directory, filename)
23+
table7 = parseTable(directory, filename)
24+
table7Dict[table7['taxon_oid']] = table7
2725
elif 'Table_8' in filename:
28-
parseTable8(directory, filename)
26+
table8 = parseTable(directory, filename)
27+
table8Dict[table8['taxon_oid']] = table8
2928
elif 'Table_9' in filename:
30-
parseTable9(directory, filename)
29+
table9 = parseTable(directory, filename)
30+
table9Dict[table9['taxon_oid']] = table9
3131

32+
admin = User().findOne({"admin": True})
33+
datasetsFolder = findDatasetFolder()
34+
for taxon_oid in metaDict:
35+
data = {"meta_": metaDict[taxon_oid], "summary": summaryDict[taxon_oid],
36+
"table7": table7Dict[taxon_oid], "table8": table8Dict[taxon_oid], "table9": table9Dict[taxon_oid]}
37+
data['meta'] = extractMeta(data)
38+
item = Item().createItem(taxon_oid, admin, datasetsFolder)
39+
Item().setMetadata(item, data)
3240

33-
def parseMeta(directory, filename):
34-
with open(join(directory, filename), 'r') as myfile:
35-
reader = csv.DictReader(myfile, delimiter='\t')
36-
for obj in reader:
37-
Meta().save(obj)
3841

39-
def parseSummary(directory, filename):
42+
def parseCSV(directory, filename):
4043
with open(join(directory, filename), 'r') as myfile:
4144
reader = csv.DictReader(myfile, delimiter='\t')
42-
for obj in reader:
43-
Summary().save(obj)
45+
return {value['taxon_oid']: value for value in list(reader)}
4446

4547

4648
def thatFormatReader(taxon_oid, text):
@@ -58,25 +60,46 @@ def thatFormatReader(taxon_oid, text):
5860
return dic
5961

6062

61-
def parseTable7(directory, filename):
62-
taxon_oid = re.search('([0-9]{2,})', filename).groups()[0]
63-
with open(join(directory, filename), 'r') as myfile:
64-
dic = thatFormatReader(taxon_oid, myfile.read())
65-
Table7().save(dic)
66-
67-
68-
def parseTable8(directory, filename):
69-
taxon_oid = re.search('([0-9]{2,})', filename).groups()[0]
70-
with open(join(directory, filename), 'r') as myfile:
71-
dic = thatFormatReader(taxon_oid, myfile.read())
72-
Table8().save(dic)
73-
74-
75-
def parseTable9(directory, filename):
63+
def parseTable(directory, filename):
7664
taxon_oid = re.search('([0-9]{2,})', filename).groups()[0]
7765
with open(join(directory, filename), 'r') as myfile:
78-
dic = thatFormatReader(taxon_oid, myfile.read())
79-
Table9().save(dic)
66+
return thatFormatReader(taxon_oid, myfile.read())
67+
68+
69+
def extractMeta(data):
70+
def getSampleType(name):
71+
if re.search('soil', name, re.IGNORECASE):
72+
return 'Soil'
73+
elif re.search('water', name, re.IGNORECASE):
74+
return "Water"
75+
elif re.search('vegetation', name, re.IGNORECASE):
76+
return 'Vegetation'
77+
78+
def getEcosystem(name):
79+
if re.search('arctic', name, re.IGNORECASE):
80+
return 'Arctic'
81+
name = data['meta_']['Genome Name / Sample Name'].split(' - ')[1]
82+
latitude = data['meta_']['Lat']
83+
longitude = data['meta_']['Long']
84+
sampleType = getSampleType(data['meta_']['Genome Name / Sample Name'])
85+
ecosystem = getEcosystem(data['meta_']['Genome Name / Sample Name'])
86+
return {
87+
'name': name,
88+
'latitude': latitude,
89+
'longitude': longitude,
90+
'timestemp': None,
91+
'sampleType': sampleType,
92+
'omicsType': None,
93+
'ecosystem': ecosystem,
94+
'ontology': None,
95+
'source': 'LLNL'
96+
}
97+
98+
99+
def findDatasetFolder():
100+
collection = Collection().findOne({"name": 'ResonantEco'})
101+
datasets = Folder().findOne({"name": "datasets", "parentId": collection['_id']})
102+
return Folder().findOne({"name": "LLNL", "parentId": datasets['_id']})
80103

81104

82105
if __name__ == '__main__':

0 commit comments

Comments
 (0)