33import requests
44import hashlib
55import json
6+ from tqdm import tqdm
7+ from SPARQLWrapper import SPARQLWrapper , JSON
8+ from hashlib import sha256
69
710__debug = False
811
@@ -386,3 +389,100 @@ def deploy(
386389 if debug or __debug :
387390 print ("---------" )
388391 print (resp .text )
392+
393+
394+ def __download_file__ (url , filename ):
395+ """
396+ Download a file from the internet with a progress bar using tqdm.
397+
398+ Parameters:
399+ - url: the URL of the file to download
400+ - filename: the local file path where the file should be saved
401+ """
402+ print ("download " + url )
403+ response = requests .get (url , stream = True )
404+ total_size_in_bytes = int (response .headers .get ('content-length' , 0 ))
405+ block_size = 1024 # 1 Kibibyte
406+
407+ progress_bar = tqdm (total = total_size_in_bytes , unit = 'iB' , unit_scale = True )
408+ with open (filename , 'wb' ) as file :
409+ for data in response .iter_content (block_size ):
410+ progress_bar .update (len (data ))
411+ file .write (data )
412+ progress_bar .close ()
413+ if total_size_in_bytes != 0 and progress_bar .n != total_size_in_bytes :
414+ print ("ERROR, something went wrong" )
415+
416+
417+ def __query_sparql__ (endpoint_url , query )-> dict :
418+ """
419+ Query a SPARQL endpoint and return results in JSON format.
420+
421+ Parameters:
422+ - endpoint_url: the URL of the SPARQL endpoint
423+ - query: the SPARQL query string
424+
425+ Returns:
426+ - Dictionary containing the query results
427+ """
428+ sparql = SPARQLWrapper (endpoint_url )
429+ sparql .method = 'POST'
430+ sparql .setQuery (query )
431+ sparql .setReturnFormat (JSON )
432+ results = sparql .query ().convert ()
433+ return results
434+
435+
436+ def __handle__databus_file_query__ (endpoint_url , query ) -> List [str ]:
437+ result_dict = __query_sparql__ (endpoint_url ,query )
438+ for binding in result_dict ['results' ]['bindings' ]:
439+ if len (binding .keys ()) > 1 :
440+ print ("Error multiple bindings in query response" )
441+ break
442+ else :
443+ value = binding [next (iter (binding .keys ()))]['value' ]
444+ yield value
445+
446+
447+ def wsha256 (raw : str ):
448+ return sha256 (raw .encode ('utf-8' )).hexdigest ()
449+
450+
451+ def __handle_databus_collection__ (endpoint , uri : str )-> str :
452+ headers = {"Accept" : "text/sparql" }
453+ return requests .get (uri , headers = headers ).text
454+
455+
456+ def __download_list__ (urls : List [str ], localDir : str ):
457+ for url in urls :
458+ __download_file__ (url = url ,filename = localDir + "/" + wsha256 (url ))
459+
460+
461+ def download (
462+ localDir : str ,
463+ endpoint : str ,
464+ databusURIs : List [str ]
465+ ) -> None :
466+ """
467+ Download datasets to local storage from databus registry
468+ ------
469+ localDir: the local directory
470+ databusURIs: identifiers to access databus registered datasets
471+ """
472+ for databusURI in databusURIs :
473+ # dataID or databus collection
474+ if databusURI .startswith ("http://" ) or databusURI .startswith ("https://" ):
475+ # databus collection
476+ if "/collections/" in databusURI :
477+ query = __handle_databus_collection__ (endpoint ,databusURI )
478+ res = __handle__databus_file_query__ (endpoint , query )
479+ else :
480+ print ("dataId not supported yet" )
481+ # query in local file
482+ elif databusURI .startswith ("file://" ):
483+ print ("query in file not supported yet" )
484+ # query as argument
485+ else :
486+ print ("QUERY {}" , databusURI .replace ("\n " ," " ))
487+ res = __handle__databus_file_query__ (endpoint ,databusURI )
488+ __download_list__ (res ,localDir )
0 commit comments