@@ -563,6 +563,138 @@ def get_ips(name):
563563 # always prefer IPv4
564564 return ipv4s + ipv6s
565565
566+
567+ # Return list of cache URLs
568+ def get_json_caches (caches_json_location ):
569+ try :
570+ with open (caches_json_location , 'r' ) as f :
571+ caches_list = json .loads (f .read ())
572+ logging .debug ("Loaded caches list from %s" , caches_json_location )
573+ except :
574+ logging .error ("Unable to open or parse json in %s: %s" ,
575+ caches_json_location , str (sys .exc_info ()[1 ]))
576+ return None
577+
578+ usable_caches = []
579+ for cache in caches_list :
580+ if 'status' in cache and cache ['status' ] == 0 :
581+ continue
582+ if 'name' in cache :
583+ usable_caches .append (cache ['name' ])
584+ if len (usable_caches ) == 0 :
585+ logging .error ("No cache names found in %s without zero status" , caches_json_location )
586+ return None
587+
588+ return usable_caches
589+
590+
591+ # Return list of caches as root:// URLs
592+ def get_stashservers_caches (responselines ):
593+
594+ # After the geo order of the selected server list on line zero,
595+ # the rest of the response is in .cvmfswhitelist format.
596+ # This is done to avoid using https for every request on the
597+ # wlcg-wpad servers and takes advantage of conveniently
598+ # existing infrastructure.
599+ # The format contains the following lines:
600+ # 1. Creation date stamp, e.g. 20200414170005. For debugging
601+ # only.
602+ # 2. Expiration date stamp, e.g. E20200421170005. cvmfs clients
603+ # check this to avoid replay attacks, but for this api that
604+ # is not much of a risk so it is ignored.
605+ # 3. "Repository" name, e.g. Nstash-servers. cvmfs clients
606+ # also check this but it is not important here.
607+ # 4. With cvmfs the 4th line has a repository fingerprint, but
608+ # for this api it instead contains a semi-colon separated list
609+ # of named server lists. Each server list is of the form
610+ # name=servers where servers is comma-separated. Ends with
611+ # "hash=-sha1" because cvmfs_server expects the hash name
612+ # to be there. e.g.
613+ # xroot=stashcache.t2.ucsd.edu,sg-gftp.pace.gatech.edu;xroots=xrootd-local.unl.edu,stashcache.t2.ucsd.edu;hash=-sha1
614+ # 5. A two-dash separator, i.e "--"
615+ # 6. The sha1 hash of lines 1 through 4.
616+ # 7. The signature, i.e. an RSA encryption of the hash that can
617+ # be decrypted by the OSG cvmfs public key. Contains binary
618+ # information so it may contain a variable number of newlines
619+ # which would have caused it to have been split into multiple
620+ # response "lines".
621+
622+ if len (responselines ) < 8 :
623+ logging .error ("stashservers response too short, less than 8 lines" )
624+ return None
625+ hashname = responselines [4 ][- 5 :]
626+ if hashname != "-sha1" :
627+ logging .error ("stashservers response does not have sha1 hash: %s" , hashname )
628+ return None
629+ hashedtext = '\n ' .join (responselines [1 :5 ]) + '\n '
630+ hash = hashlib .sha1 (hashedtext ).hexdigest ()
631+ if responselines [6 ] != hash :
632+ logging .debug ("stashservers hash %s does not match expected hash %s" , responselines [6 ], hash )
633+ logging .debug ("hashed text:\n %s" , hashedtext )
634+ logging .error ("stashservers response hash does not match expected hash" )
635+ return None
636+
637+ # Call out to /usr/bin/openssl if present, in order to avoid
638+ # python dependency on a crypto package.
639+ if not os .path .exists ("/usr/bin/openssl" ):
640+ # The signature check isn't critical to be done everywhere;
641+ # any tampering will likely to be caught somewhere and
642+ # investigated. Usually openssl is present.
643+ logging .debug ("openssl not installed, skipping signature check" )
644+ else :
645+ sig = '\n ' .join (responselines [7 :])
646+
647+ # Look for the OSG cvmfs public key to verify signature
648+ prefix = os .environ .get ("OSG_LOCATION" , "/" )
649+ osgpub = 'opensciencegrid.org.pub'
650+ pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub ,
651+ os .path .join (prefix , "etc/stashcache" , osgpub ),
652+ os .path .join (prefix , "usr/share/stashcache" , osgpub )]
653+ if resource_filename :
654+ try :
655+ pubkey_files .append (resource_filename (__name__ , osgpub ))
656+ except IOError as ioe :
657+ logging .debug ("Unable to retrieve caches.json using resource string, trying other locations" )
658+
659+ for pubkey_file in pubkey_files :
660+ if os .path .isfile (pubkey_file ):
661+ break
662+ else :
663+ logging .error ("Unable to find osg cvmfs key in %r" , pubkey_files )
664+ return None
665+
666+ cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
667+ logging .debug ("Running %s" , cmd )
668+ p = subprocess .Popen (cmd , shell = True ,
669+ stdin = subprocess .PIPE , stdout = subprocess .PIPE )
670+ p .stdin .write (sig )
671+ p .stdin .close ()
672+ decryptedhash = p .stdout .read ()
673+ p .stdout .close ()
674+ if hash != decryptedhash :
675+ logging .debug ("stashservers hash %s does not match decrypted signature %s" , hash , decryptedhash )
676+ logging .error ("stashservers signature does not verify" )
677+ return None
678+ logging .debug ("Signature matched" )
679+
680+ lists = responselines [4 ].split (';' )
681+ logging .debug ("Cache lists: %s" , lists )
682+
683+ if cache_list_name == None :
684+ caches = lists [0 ].split ('=' )[1 ]
685+ else :
686+ for l in lists :
687+ n = len (cache_list_name )+ 1
688+ if l [0 :n ] == cache_list_name + '=' :
689+ caches = l [n :]
690+ break
691+ caches_list = caches .split (',' )
692+ for i in range (len (caches_list )):
693+ caches_list [i ] = 'root://' + caches_list [i ]
694+
695+ return caches_list
696+
697+
566698# Return best stashcache and set nearest_cache_list global
567699def get_best_stashcache ():
568700 global nearest_cache_list
@@ -576,48 +708,29 @@ def get_best_stashcache():
576708 # Randomize the geo ip sites
577709 random .shuffle (geo_ip_sites )
578710
579- api_text = ''
711+ api_text = ""
580712
581713 caches_list = []
582714
583715 # Check if the user provided a caches json file location
584- if caches_json_location and os .path .exists (caches_json_location ):
585- # Use geo ip api on caches in provided json file
586- try :
587- with open (caches_json_location , 'r' ) as f :
588- caches_list = json .loads (f .read ())
589- logging .debug ("Loaded caches list from %s" , caches_json_location )
590- except :
591- logging .error ("Unable to open or parse json in %s: %s" ,
592- caches_json_location , str (sys .exc_info ()[1 ]))
716+ if caches_json_location :
717+ if not os .path .exists (caches_json_location ):
718+ logging .error (caches_json_location + " does not exist" )
593719 return None
594-
595- # Format the caches for the GeoIP query
720+ # Use geo ip api on caches in provided json file
721+ caches_list = get_json_caches ( caches_json_location )
596722 caches_string = ""
597- usable_caches = []
598723 for cache in caches_list :
599- if 'status' in cache and cache ['status' ] == 0 :
600- continue
601- if 'name' in cache :
602- usable_caches .append (cache ['name' ])
603- parsed_url = urlparse (cache ['name' ])
604- caches_string = "%s,%s" % (caches_string , parsed_url .hostname )
605- if len (usable_caches ) == 0 :
606- logging .error ("No cache names found in %s without zero status" , caches_json_location )
607- return None
608-
609- caches_list = usable_caches
610-
724+ parsed_url = urlparse (cache )
725+ caches_string = "%s,%s" % (caches_string , parsed_url .hostname )
611726 # Remove the first comma
612727 caches_string = caches_string [1 :]
613-
614728 api_text = "api/v1.0/geo/stashcp/" + caches_string
615-
616729 else :
617730 # Use stashservers.dat api
618731 api_text = "stashservers.dat"
619732 if cache_list_name != None :
620- api_text += ' ?list=' + cache_list_name
733+ api_text += " ?list=" + cache_list_name
621734
622735 responselines = []
623736 i = 0
@@ -644,11 +757,11 @@ def get_best_stashcache():
644757 logging .debug ("Error: %s" , str (e ))
645758 i += 1
646759
647- order_str = ''
760+ order_str = ""
648761 if len (responselines ) > 0 :
649762 order_str = responselines [0 ]
650763
651- if order_str == '' :
764+ if order_str == "" :
652765 if len (caches_list ) == 0 :
653766 logging .error ("unable to get list of caches" )
654767 return None
@@ -667,107 +780,9 @@ def get_best_stashcache():
667780
668781 if len (caches_list ) == 0 :
669782 # Used the stashservers.dat api
670-
671- # After the geo order of the selected server list on line zero,
672- # the rest of the response is in .cvmfswhitelist format.
673- # This is done to avoid using https for every request on the
674- # wlcg-wpad servers and takes advantage of conveniently
675- # existing infrastructure.
676- # The format contains the following lines:
677- # 1. Creation date stamp, e.g. 20200414170005. For debugging
678- # only.
679- # 2. Expiration date stamp, e.g. E20200421170005. cvmfs clients
680- # check this to avoid replay attacks, but for this api that
681- # is not much of a risk so it is ignored.
682- # 3. "Repository" name, e.g. Nstash-servers. cvmfs clients
683- # also check this but it is not important here.
684- # 4. With cvmfs the 4th line has a repository fingerprint, but
685- # for this api it instead contains a semi-colon separated list
686- # of named server lists. Each server list is of the form
687- # name=servers where servers is comma-separated. Ends with
688- # "hash=-sha1" because cvmfs_server expects the hash name
689- # to be there. e.g.
690- # xroot=stashcache.t2.ucsd.edu,sg-gftp.pace.gatech.edu;xroots=xrootd-local.unl.edu,stashcache.t2.ucsd.edu;hash=-sha1
691- # 5. A two-dash separator, i.e "--"
692- # 6. The sha1 hash of lines 1 through 4.
693- # 7. The signature, i.e. an RSA encryption of the hash that can
694- # be decrypted by the OSG cvmfs public key. Contains binary
695- # information so it may contain a variable number of newlines
696- # which would have caused it to have been split into multiple
697- # response "lines".
698-
699- if len (responselines ) < 8 :
700- logging .error ("stashservers response too short, less than 8 lines" )
701- return None
702- hashname = responselines [4 ][- 5 :]
703- if hashname != "-sha1" :
704- logging .error ("stashservers response does not have sha1 hash: %s" , hashname )
783+ caches_list = get_stashservers_caches (responselines )
784+ if caches_list is None :
705785 return None
706- hashedtext = '\n ' .join (responselines [1 :5 ]) + '\n '
707- hash = hashlib .sha1 (hashedtext ).hexdigest ()
708- if responselines [6 ] != hash :
709- logging .debug ("stashservers hash %s does not match expected hash %s" , responselines [6 ], hash )
710- logging .debug ("hashed text:\n %s" , hashedtext )
711- logging .error ("stashservers response hash does not match expected hash" )
712- return None
713-
714- # Call out to /usr/bin/openssl if present, in order to avoid
715- # python dependency on a crypto package.
716- if not os .path .exists ("/usr/bin/openssl" ):
717- # The signature check isn't critical to be done everywhere;
718- # any tampering will likely to be caught somewhere and
719- # investigated. Usually openssl is present.
720- logging .debug ("openssl not installed, skipping signature check" )
721- else :
722- sig = '\n ' .join (responselines [7 :])
723-
724- # Look for the OSG cvmfs public key to verify signature
725- prefix = os .environ .get ("OSG_LOCATION" , "/" )
726- osgpub = 'opensciencegrid.org.pub'
727- pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub ,
728- os .path .join (prefix , "etc/stashcache" , osgpub ),
729- os .path .join (prefix , "usr/share/stashcache" , osgpub )]
730- if resource_filename :
731- try :
732- pubkey_files .append (resource_filename (__name__ , osgpub ))
733- except IOError as ioe :
734- logging .debug ("Unable to retrieve caches.json using resource string, trying other locations" )
735-
736- for pubkey_file in pubkey_files :
737- if os .path .isfile (pubkey_file ):
738- break
739- else :
740- logging .error ("Unable to find osg cvmfs key in %r" , pubkey_files )
741- return None
742-
743- cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
744- logging .debug ("Running %s" , cmd )
745- p = subprocess .Popen (cmd , shell = True ,
746- stdin = subprocess .PIPE , stdout = subprocess .PIPE )
747- p .stdin .write (sig )
748- p .stdin .close ()
749- decryptedhash = p .stdout .read ()
750- p .stdout .close ()
751- if hash != decryptedhash :
752- logging .debug ("stashservers hash %s does not match decrypted signature %s" , hash , decryptedhash )
753- logging .error ("stashservers signature does not verify" )
754- return None
755- logging .debug ("Signature matched" )
756-
757- lists = responselines [4 ].split (';' )
758- logging .debug ("Cache lists: %s" , lists )
759-
760- if cache_list_name == None :
761- caches = lists [0 ].split ('=' )[1 ]
762- else :
763- for l in lists :
764- n = len (cache_list_name )+ 1
765- if l [0 :n ] == cache_list_name + '=' :
766- caches = l [n :]
767- break
768- caches_list = caches .split (',' )
769- for i in range (len (caches_list )):
770- caches_list [i ] = 'root://' + caches_list [i ]
771786
772787 minsite = caches_list [int (ordered_list [0 ])- 1 ]
773788
0 commit comments