1212import socket
1313import random
1414import shutil
15+ import hashlib
1516from urlparse import urlparse
1617
1718try :
4142# Global variable for the location of the caches.json file
4243caches_json_location = None
4344
45+ # Global variable for the name of a pre-configured cache list
46+ cache_list_name = None
47+
4448# Global variable for the location of the token to use for reading / writing
4549token_location = None
4650
@@ -559,100 +563,184 @@ def get_ips(name):
559563 # always prefer IPv4
560564 return ipv4s + ipv6s
561565
566+ # Return best stashcache and set nearest_cache_list global
562567def get_best_stashcache ():
563568 global nearest_cache_list
564569
565- # Check if the user provided a caches json file location
566- if caches_json_location and os .path .exists (caches_json_location ):
567- cache_files = [ caches_json_location ]
568- else :
569- prefix = os .environ .get ("OSG_LOCATION" , "/" )
570- cache_files = [os .path .join (prefix , "etc/stashcache/caches.json" ),
571- os .path .join (prefix , "usr/share/stashcache/caches.json" ),
572- os .path .join (prefix , "usr/local/share/stashcache/caches.json" )]
573- if resource_filename :
574- try :
575- cache_files .append (resource_filename (__name__ , 'caches.json' ))
576- except IOError as ioe :
577- logging .debug ("Unable to retrieve caches.json using resource string, trying other locations" )
578-
579- for cache_file in cache_files :
580- if os .path .isfile (cache_file ):
581- with open (cache_file , 'r' ) as f :
582- caches_list = json .loads (f .read ())
583- logging .debug ("Loaded caches list from %s" , cache_file )
584- break
585- else :
586- logging .error ("Unable to find caches.json in %r" , cache_files )
587- return None
588-
589- # Format the caches for the GeoIP query
590- caches_string = ""
591- usable_caches = []
592- for cache in caches_list :
593- if cache ['status' ] == 0 :
594- continue
595- usable_caches .append (cache )
596- parsed_url = urlparse (cache ['name' ])
597- caches_string = "%s,%s" % (caches_string , parsed_url .hostname )
598- caches_list = usable_caches
599- # Remove the first comma
600- caches_string = caches_string [1 :]
601-
602570 # Use the geo ip service on the WLCG Web Proxy Auto Discovery machines
603571 geo_ip_sites = ["wlcg-wpad.cern.ch" , "wlcg-wpad.fnal.gov" ]
604572
605- # Append text before caches string
606- append_text = "api/v1.0/geo/stashcp"
607-
608573 # Headers for the HTTP request
609574 headers = {'Cache-control' : 'max-age=0' , 'User-Agent' : user_agent }
610575
611576 # Randomize the geo ip sites
612577 random .shuffle (geo_ip_sites )
613- order_str = ''
578+
579+ api_text = ''
580+
581+ caches_list = []
582+
583+ # Check if the user provided a caches json file location
584+ if caches_json_location and os .path .exists (caches_json_location ):
585+ # Use geo ip api on caches in provided json file
586+ try :
587+ with open (caches_json_location , 'r' ) as f :
588+ caches_list = json .loads (f .read ())
589+ logging .debug ("Loaded caches list from %s" , caches_json_location )
590+ except :
591+ logging .error ("Unable to open or parse json in %s: %s" ,
592+ caches_json_location , str (sys .exc_info ()[1 ]))
593+ return None
594+
595+ # Format the caches for the GeoIP query
596+ caches_string = ""
597+ usable_caches = []
598+ for cache in caches_list :
599+ if 'status' in cache and cache ['status' ] == 0 :
600+ continue
601+ if 'name' in cache :
602+ usable_caches .append (cache ['name' ])
603+ parsed_url = urlparse (cache ['name' ])
604+ caches_string = "%s,%s" % (caches_string , parsed_url .hostname )
605+ if len (usable_caches ) == 0 :
606+ logging .error ("No cache names found in %s without zero status" , caches_json_location )
607+ return None
608+
609+ caches_list = usable_caches
610+
611+ # Remove the first comma
612+ caches_string = caches_string [1 :]
613+
614+ api_text = "api/v1.0/geo/stashcp/" + caches_string
615+
616+ else :
617+ # Use stashservers.dat api
618+ api_text = "stashservers.dat"
619+ if cache_list_name != None :
620+ api_text += '?list=' + cache_list_name
621+
622+ responselines = []
614623 i = 0
615- while order_str == '' and i < len (geo_ip_sites ):
624+ while len ( responselines ) == 0 and i < len (geo_ip_sites ):
616625 cur_site = geo_ip_sites [i ]
617626 headers ['Host' ] = cur_site
627+ logging .debug ("Trying server site of %s" , cur_site )
618628 for ip in get_ips (cur_site ):
619- logging .debug ("Trying geoip site of: %s [%s]" , cur_site , ip )
620- final_url = "http://%s/%s/%s" % (ip , append_text , caches_string )
621- logging .debug ("Querying for closest cache: %s" , final_url )
629+ final_url = "http://%s/%s" % (ip , api_text )
630+ logging .debug ("Querying %s" , final_url )
622631 try :
623632 # Make the request
624633 req = urllib2 .Request (final_url , headers = headers )
625634 response = urllib2 .urlopen (req , timeout = 10 )
626635 if response .getcode () == 200 :
627636 logging .debug ("Got OK code 200 from %s" , cur_site )
628- order_str = response .read ()
637+ responselines = response .read (). split ( ' \n ' )
629638 response .close ()
630639 break
631640 response .close ()
632641 except urllib2 .URLError , e :
633642 logging .debug ("URL error: %s" , str (e ))
634643 except Exception , e :
635644 logging .debug ("Error: %s" , str (e ))
636- i += 1
645+ i += 1
646+
647+ order_str = ''
648+ if len (responselines ) > 0 :
649+ order_str = responselines [0 ]
637650
638651 if order_str == '' :
652+ if len (caches_list ) == 0 :
653+ logging .error ("unable to get list of caches" )
654+ return None
639655 # Unable to find a geo_ip server to use, return random choice from caches!
640- minsite = random . choice ( caches_list )[ 'name' ]
641- random .shuffle (caches_list )
642- nearest_cache_list = [ cache [ 'name' ] for cache in caches_list ]
656+ nearest_cache_list = caches_list
657+ random .shuffle (nearest_cache_list )
658+ minsite = nearest_cache_list [ 0 ]
643659 logging .warning ("Unable to use Geoip to find closest cache! Returning random cache %s" , minsite )
644- logging .debug ("Ordered list of nearest caches: %s" , str (nearest_cache_list ))
660+ logging .debug ("Randomized list of nearest caches: %s" , str (nearest_cache_list ))
645661 return minsite
646662 else :
647663 # The order string should be something like:
648664 # 3,1,2
649665 ordered_list = order_str .strip ().split ("," )
650666 logging .debug ("Got order %s" , str (ordered_list ))
651- minsite = caches_list [int (ordered_list [0 ])- 1 ]['name' ]
667+
668+ if len (caches_list ) == 0 :
669+ # Used the stashservers.dat api
670+
671+ if len (responselines ) < 8 :
672+ logging .error ("stashservers response too short, less than 8 lines" )
673+ return None
674+ hashname = responselines [4 ][- 5 :]
675+ if hashname != "-sha1" :
676+ logging .error ("stashservers response does not have sha1 hash: %s" , hashname )
677+ return None
678+ hashedtext = '\n ' .join (responselines [1 :5 ]) + '\n '
679+ hash = hashlib .sha1 (hashedtext ).hexdigest ()
680+ if responselines [6 ] != hash :
681+ logging .debug ("stashservers hash %s does not match expected hash %s" , responselines [6 ], hash )
682+ logging .debug ("hashed text:\n %s" , hashedtext )
683+ logging .error ("stashservers response hash does not match expected hash" )
684+ return None
685+
686+ if not os .path .exists ("/usr/bin/openssl" ):
687+ logging .debug ("openssl not installed, skipping signature check" )
688+ else :
689+ sig = '\n ' .join (responselines [7 :])
690+
691+ # Look for the OSG cvmfs public key to verify signature
692+ prefix = os .environ .get ("OSG_LOCATION" , "/" )
693+ osgpub = 'opensciencegrid.org.pub'
694+ pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub ,
695+ os .path .join (prefix , "etc/stashcache" , osgpub ),
696+ os .path .join (prefix , "usr/share/stashcache" , osgpub )]
697+ if resource_filename :
698+ try :
699+ pubkey_files .append (resource_filename (__name__ , osgpub ))
700+ except IOError as ioe :
701+ logging .debug ("Unable to retrieve caches.json using resource string, trying other locations" )
702+
703+ for pubkey_file in pubkey_files :
704+ if os .path .isfile (pubkey_file ):
705+ break
706+ else :
707+ logging .error ("Unable to find osg cvmfs key in %r" , pubkey_files )
708+ return None
709+
710+ cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
711+ logging .debug ("Running %s" , cmd )
712+ p = subprocess .Popen (cmd , shell = True ,
713+ stdin = subprocess .PIPE , stdout = subprocess .PIPE )
714+ p .stdin .write (sig )
715+ p .stdin .close ()
716+ decryptedhash = p .stdout .read ()
717+ p .stdout .close ()
718+ if hash != decryptedhash :
719+ logging .debug ("stashservers hash %s does not match decrypted signature %s" , hash , decryptedhash )
720+ logging .error ("stashservers signature does not verify" )
721+ return None
722+ logging .debug ("Signature matched" )
723+
724+ lists = responselines [4 ].split (';' )
725+ logging .debug ("Cache lists: %s" , lists )
726+
727+ if cache_list_name == None :
728+ caches = lists [0 ].split ('=' )[1 ]
729+ else :
730+ for l in lists :
731+ n = len (cache_list_name )+ 1
732+ if l [0 :n ] == cache_list_name + '=' :
733+ caches = l [n :]
734+ break
735+ caches_list = caches .split (',' )
736+ for i in range (len (caches_list )):
737+ caches_list [i ] = 'root://' + caches_list [i ]
738+
739+ minsite = caches_list [int (ordered_list [0 ])- 1 ]
652740
653741 nearest_cache_list = []
654742 for ordered_index in ordered_list :
655- nearest_cache_list .append (caches_list [int (ordered_index )- 1 ][ 'name' ] )
743+ nearest_cache_list .append (caches_list [int (ordered_index )- 1 ])
656744
657745 logging .debug ("Returning closest cache: %s" , minsite )
658746 logging .debug ("Ordered list of nearest caches: %s" , str (nearest_cache_list ))
@@ -663,6 +751,7 @@ def main():
663751 global nearest_cache
664752 global nearest_cache_list
665753 global caches_json_location
754+ global cache_list_name
666755 global token_location
667756
668757 usage = "usage: %prog [options] source destination"
@@ -671,7 +760,9 @@ def main():
671760 parser .add_option ('-r' , dest = 'recursive' , action = 'store_true' , help = 'recursively copy' )
672761 parser .add_option ('--closest' , action = 'store_true' , help = "Return the closest cache and exit" )
673762 parser .add_option ('-c' , '--cache' , dest = 'cache' , help = "Cache to use" )
674- parser .add_option ('-j' , '--caches-json' , dest = 'caches_json' , help = "The JSON file containing the list of caches" ,
763+ parser .add_option ('-j' , '--caches-json' , dest = 'caches_json' , help = "A JSON file containing the list of caches" ,
764+ default = None )
765+ parser .add_option ('-n' , '--cache-list-name' , dest = 'cache_list_name' , help = "Name of pre-configured cache list to use" ,
675766 default = None )
676767 parser .add_option ('--methods' , dest = 'methods' , help = "Comma separated list of methods to try, in order. Default: cvmfs,xrootd,http" , default = "cvmfs,xrootd,http" )
677768 parser .add_option ('-t' , '--token' , dest = 'token' , help = "Token file to use for reading and/or writing" )
@@ -691,6 +782,7 @@ def main():
691782 caches_json_location = os .environ ['CACHES_JSON' ]
692783 else :
693784 caches_json_location = args .caches_json
785+ cache_list_name = args .cache_list_name
694786 if args .closest :
695787 print get_best_stashcache ()
696788 sys .exit (0 )
0 commit comments