44import re
55import sys
66
7- from resonanteco_server .model .meta import Meta
8- from resonanteco_server .model .summary import Summary
9- from resonanteco_server .model .table7 import Table7
10- from resonanteco_server .model .table8 import Table8
11- from resonanteco_server .model .table9 import Table9
7+ from girder .models .user import User
8+ from girder .models .collection import Collection
9+ from girder .models .folder import Folder
10+ from girder .models .item import Item
1211
1312
1413def ingest (directory ):
15- Meta ().collection .drop ()
16- Summary ().collection .drop ()
17- Table7 ().collection .drop ()
18- Table8 ().collection .drop ()
19- Table9 ().collection .drop ()
14+ table7Dict = {}
15+ table8Dict = {}
16+ table9Dict = {}
2017 for filename in [f for f in listdir (directory ) if isfile (join (directory , f ))]:
2118 if 'meta.txt' in filename :
22- parseMeta (directory , filename )
19+ metaDict = parseCSV (directory , filename )
2320 if 'summary.txt' in filename :
24- parseSummary (directory , filename )
21+ summaryDict = parseCSV (directory , filename )
2522 elif 'Table_7' in filename :
26- parseTable7 (directory , filename )
23+ table7 = parseTable (directory , filename )
24+ table7Dict [table7 ['taxon_oid' ]] = table7
2725 elif 'Table_8' in filename :
28- parseTable8 (directory , filename )
26+ table8 = parseTable (directory , filename )
27+ table8Dict [table8 ['taxon_oid' ]] = table8
2928 elif 'Table_9' in filename :
30- parseTable9 (directory , filename )
29+ table9 = parseTable (directory , filename )
30+ table9Dict [table9 ['taxon_oid' ]] = table9
3131
32+ admin = User ().findOne ({"admin" : True })
33+ datasetsFolder = findDatasetFolder ()
34+ for taxon_oid in metaDict :
35+ data = {"meta_" : metaDict [taxon_oid ], "summary" : summaryDict [taxon_oid ],
36+ "table7" : table7Dict [taxon_oid ], "table8" : table8Dict [taxon_oid ], "table9" : table9Dict [taxon_oid ]}
37+ data ['meta' ] = extractMeta (data )
38+ item = Item ().createItem (taxon_oid , admin , datasetsFolder )
39+ Item ().setMetadata (item , data )
3240
33- def parseMeta (directory , filename ):
34- with open (join (directory , filename ), 'r' ) as myfile :
35- reader = csv .DictReader (myfile , delimiter = '\t ' )
36- for obj in reader :
37- Meta ().save (obj )
3841
39- def parseSummary (directory , filename ):
42+ def parseCSV (directory , filename ):
4043 with open (join (directory , filename ), 'r' ) as myfile :
4144 reader = csv .DictReader (myfile , delimiter = '\t ' )
42- for obj in reader :
43- Summary ().save (obj )
45+ return {value ['taxon_oid' ]: value for value in list (reader )}
4446
4547
4648def thatFormatReader (taxon_oid , text ):
@@ -58,25 +60,46 @@ def thatFormatReader(taxon_oid, text):
5860 return dic
5961
6062
61- def parseTable7 (directory , filename ):
62- taxon_oid = re .search ('([0-9]{2,})' , filename ).groups ()[0 ]
63- with open (join (directory , filename ), 'r' ) as myfile :
64- dic = thatFormatReader (taxon_oid , myfile .read ())
65- Table7 ().save (dic )
66-
67-
68- def parseTable8 (directory , filename ):
69- taxon_oid = re .search ('([0-9]{2,})' , filename ).groups ()[0 ]
70- with open (join (directory , filename ), 'r' ) as myfile :
71- dic = thatFormatReader (taxon_oid , myfile .read ())
72- Table8 ().save (dic )
73-
74-
75- def parseTable9 (directory , filename ):
63+ def parseTable (directory , filename ):
7664 taxon_oid = re .search ('([0-9]{2,})' , filename ).groups ()[0 ]
7765 with open (join (directory , filename ), 'r' ) as myfile :
78- dic = thatFormatReader (taxon_oid , myfile .read ())
79- Table9 ().save (dic )
66+ return thatFormatReader (taxon_oid , myfile .read ())
67+
68+
69+ def extractMeta (data ):
70+ def getSampleType (name ):
71+ if re .search ('soil' , name , re .IGNORECASE ):
72+ return 'Soil'
73+ elif re .search ('water' , name , re .IGNORECASE ):
74+ return "Water"
75+ elif re .search ('vegetation' , name , re .IGNORECASE ):
76+ return 'Vegetation'
77+
78+ def getEcosystem (name ):
79+ if re .search ('arctic' , name , re .IGNORECASE ):
80+ return 'Arctic'
81+ name = data ['meta_' ]['Genome Name / Sample Name' ].split (' - ' )[1 ]
82+ latitude = data ['meta_' ]['Lat' ]
83+ longitude = data ['meta_' ]['Long' ]
84+ sampleType = getSampleType (data ['meta_' ]['Genome Name / Sample Name' ])
85+ ecosystem = getEcosystem (data ['meta_' ]['Genome Name / Sample Name' ])
86+ return {
87+ 'name' : name ,
88+ 'latitude' : latitude ,
89+ 'longitude' : longitude ,
90+ 'timestemp' : None ,
91+ 'sampleType' : sampleType ,
92+ 'omicsType' : None ,
93+ 'ecosystem' : ecosystem ,
94+ 'ontology' : None ,
95+ 'source' : 'LLNL'
96+ }
97+
98+
99+ def findDatasetFolder ():
100+ collection = Collection ().findOne ({"name" : 'ResonantEco' })
101+ datasets = Folder ().findOne ({"name" : "datasets" , "parentId" : collection ['_id' ]})
102+ return Folder ().findOne ({"name" : "LLNL" , "parentId" : datasets ['_id' ]})
80103
81104
82105if __name__ == '__main__' :
0 commit comments