77
88from exceptions import (
99 MethodNotAllowedError , NoContainerError , OperationFailedError ,
10- ConnectionError ,
10+ ConnectionError , MetadataNotFoundError
1111)
1212from file import DataverseFile
1313from settings import SWORD_BOOTSTRAP
@@ -22,14 +22,16 @@ def __init__(self, entry=SWORD_BOOTSTRAP, dataverse=None, edit_uri=None,
2222 This can be specified in the atom entry or as kwargs
2323 """
2424 self .dataverse = dataverse
25- self ._statement = None
26- self ._state = None
2725
2826 self .edit_uri = edit_uri
2927 self .edit_media_uri = edit_media_uri
3028 self .statement_uri = statement_uri
3129
3230 self ._entry = etree .XML (entry ) if isinstance (entry , str ) else entry
31+ self ._statement = None
32+ self ._state = None
33+ self ._json = None
34+ self ._id = None
3335
3436 # Updates sword entry from keyword arguments
3537 for key , value in kwargs .iteritems ():
@@ -72,8 +74,49 @@ def from_dataverse(cls, entry_element, dataverse):
7274
7375 @property
7476 def doi (self ):
77+ if not self .dataverse :
78+ raise NoContainerError ('This dataset has not been added to a Dataverse.' )
79+
7580 # Note: This depends strongly on URL structure, and may break easily
76- return self .edit_media_uri .rsplit ("/study/" )[- 1 ]
81+ return self .edit_media_uri .rsplit ("/study/" , 1 )[- 1 ]
82+
83+ @property
84+ def id (self ):
85+ if self ._id :
86+ return self ._id
87+
88+ if not self .dataverse :
89+ raise NoContainerError ('This dataset has not been added to a Dataverse.' )
90+
91+ for dataset in self .dataverse .get_contents (refresh = True ):
92+ doi = '{0}:{1}/{2}' .format (
93+ dataset ['protocol' ],
94+ dataset ['authority' ],
95+ dataset ['identifier' ],
96+ )
97+ if doi == self .doi :
98+ self ._id = dataset ['id' ]
99+ return self ._id
100+
101+ raise MetadataNotFoundError ('The dataset ID could not be found.' )
102+
103+ def get_contents (self , refresh = False ):
104+ if not refresh and self ._contents_json :
105+ return self ._contents_json
106+
107+ content_uri = 'https://{0}/api/dataverses/{1}/contents' .format (
108+ self .connection .host , self .alias
109+ )
110+ resp = requests .get (
111+ content_uri ,
112+ params = {'key' : self .connection .token }
113+ )
114+
115+ if resp .status_code != 200 :
116+ raise ConnectionError ('Atom entry could not be retrieved.' )
117+
118+ self ._contents_json = resp .json ()
119+ return self ._contents_json
77120
78121 @property
79122 def citation (self ):
@@ -145,21 +188,48 @@ def get_state(self, refresh=False):
145188 ).text
146189 return self ._state
147190
148- def get_file (self , file_name , published = False ):
149- files = self .get_files (published )
191+ def get_json (self , refresh = False ):
192+ if not refresh and self ._json :
193+ return self ._json
194+
195+ if not self .dataverse :
196+ raise NoContainerError ('This dataset has not been added to a Dataverse.' )
197+
198+ # TODO: Allow specification of other versions
199+ json_url = 'https://{0}/api/datasets/{1}/versions/:latest-published' .format (
200+ self .connection .host ,
201+ self .id
202+ )
203+
204+ resp = requests .get (json_url , params = {'key' : self .connection .token })
205+
206+ if resp .status_code != 200 :
207+ raise ConnectionError ('JSON metadata could not be retrieved.' )
208+
209+ self ._json = resp .json ()['data' ]
210+ return self ._json
211+
212+ def get_file (self , file_name , published = False , refresh = True ):
213+ files = self .get_files (published , refresh )
150214 return next ((f for f in files if f .name == file_name ), None )
151215
152- def get_file_by_id (self , file_id , published = False ):
153- files = self .get_files (published )
216+ def get_file_by_id (self , file_id , published = False , refresh = True ):
217+ files = self .get_files (published , refresh )
154218 return next ((f for f in files if f .id == file_id ), None )
155219
156220 def get_files (self , published = False , refresh = True ):
157- if self .get_state (refresh ) == 'DRAFT' and published :
158- return []
159- elements = get_elements (self .get_statement (), 'entry' )
160- return [DataverseFile .from_statement (element , self )
221+ if published :
222+ return self .get_published_files (refresh )
223+
224+ # TODO: Should the native API be preferred?
225+ elements = get_elements (self .get_statement (refresh ), 'entry' )
226+ return [DataverseFile .from_statement (self , element )
161227 for element in elements ]
162228
229+ def get_published_files (self , refresh = True ):
230+ return [DataverseFile .from_json (self , file_json )
231+ for file_json in self .get_json (refresh )['files' ]]
232+
163233 def add_file (self , filepath ):
164234 self .add_files ([filepath ])
165235
@@ -235,6 +305,7 @@ def delete_all_files(self):
235305 self .delete_file (f )
236306
237307 # TODO: DANGEROUS! Will delete all unspecified fields! Deposit receipts only give SOME of the fields
308+ # Can potentially be replaced with native API functionality
238309 # def update_metadata(self):
239310 # depositReceipt = self.hostDataverse.connection.sword.update(
240311 # dr=self.lastDepositReceipt,
0 commit comments