Skip to content

Commit 726e260

Browse files
committed
move json loading and stashserver loading into functions
1 parent d0bd6a9 commit 726e260

1 file changed

Lines changed: 145 additions & 130 deletions

File tree

stashcp/__init__.py

Lines changed: 145 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,138 @@ def get_ips(name):
563563
# always prefer IPv4
564564
return ipv4s + ipv6s
565565

566+
567+
# Return list of cache URLs
568+
def get_json_caches(caches_json_location):
569+
try:
570+
with open(caches_json_location, 'r') as f:
571+
caches_list = json.loads(f.read())
572+
logging.debug("Loaded caches list from %s", caches_json_location)
573+
except:
574+
logging.error("Unable to open or parse json in %s: %s",
575+
caches_json_location, str(sys.exc_info()[1]))
576+
return None
577+
578+
usable_caches = []
579+
for cache in caches_list:
580+
if 'status' in cache and cache['status'] == 0:
581+
continue
582+
if 'name' in cache:
583+
usable_caches.append(cache['name'])
584+
if len(usable_caches) == 0:
585+
logging.error("No cache names found in %s without zero status", caches_json_location)
586+
return None
587+
588+
return usable_caches
589+
590+
591+
# Return list of caches as root:// URLs
592+
def get_stashservers_caches(responselines):
593+
594+
# After the geo order of the selected server list on line zero,
595+
# the rest of the response is in .cvmfswhitelist format.
596+
# This is done to avoid using https for every request on the
597+
# wlcg-wpad servers and takes advantage of conveniently
598+
# existing infrastructure.
599+
# The format contains the following lines:
600+
# 1. Creation date stamp, e.g. 20200414170005. For debugging
601+
# only.
602+
# 2. Expiration date stamp, e.g. E20200421170005. cvmfs clients
603+
# check this to avoid replay attacks, but for this api that
604+
# is not much of a risk so it is ignored.
605+
# 3. "Repository" name, e.g. Nstash-servers. cvmfs clients
606+
# also check this but it is not important here.
607+
# 4. With cvmfs the 4th line has a repository fingerprint, but
608+
# for this api it instead contains a semi-colon separated list
609+
# of named server lists. Each server list is of the form
610+
# name=servers where servers is comma-separated. Ends with
611+
# "hash=-sha1" because cvmfs_server expects the hash name
612+
# to be there. e.g.
613+
# xroot=stashcache.t2.ucsd.edu,sg-gftp.pace.gatech.edu;xroots=xrootd-local.unl.edu,stashcache.t2.ucsd.edu;hash=-sha1
614+
# 5. A two-dash separator, i.e "--"
615+
# 6. The sha1 hash of lines 1 through 4.
616+
# 7. The signature, i.e. an RSA encryption of the hash that can
617+
# be decrypted by the OSG cvmfs public key. Contains binary
618+
# information so it may contain a variable number of newlines
619+
# which would have caused it to have been split into multiple
620+
# response "lines".
621+
622+
if len(responselines) < 8:
623+
logging.error("stashservers response too short, less than 8 lines")
624+
return None
625+
hashname = responselines[4][-5:]
626+
if hashname != "-sha1":
627+
logging.error("stashservers response does not have sha1 hash: %s", hashname)
628+
return None
629+
hashedtext = '\n'.join(responselines[1:5]) + '\n'
630+
hash = hashlib.sha1(hashedtext).hexdigest()
631+
if responselines[6] != hash:
632+
logging.debug("stashservers hash %s does not match expected hash %s", responselines[6], hash)
633+
logging.debug("hashed text:\n%s", hashedtext)
634+
logging.error("stashservers response hash does not match expected hash")
635+
return None
636+
637+
# Call out to /usr/bin/openssl if present, in order to avoid
638+
# python dependency on a crypto package.
639+
if not os.path.exists("/usr/bin/openssl"):
640+
# The signature check isn't critical to be done everywhere;
641+
# any tampering will likely to be caught somewhere and
642+
# investigated. Usually openssl is present.
643+
logging.debug("openssl not installed, skipping signature check")
644+
else:
645+
sig = '\n'.join(responselines[7:])
646+
647+
# Look for the OSG cvmfs public key to verify signature
648+
prefix = os.environ.get("OSG_LOCATION", "/")
649+
osgpub = 'opensciencegrid.org.pub'
650+
pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub,
651+
os.path.join(prefix, "etc/stashcache", osgpub),
652+
os.path.join(prefix, "usr/share/stashcache", osgpub)]
653+
if resource_filename:
654+
try:
655+
pubkey_files.append(resource_filename(__name__, osgpub))
656+
except IOError as ioe:
657+
logging.debug("Unable to retrieve caches.json using resource string, trying other locations")
658+
659+
for pubkey_file in pubkey_files:
660+
if os.path.isfile(pubkey_file):
661+
break
662+
else:
663+
logging.error("Unable to find osg cvmfs key in %r", pubkey_files)
664+
return None
665+
666+
cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
667+
logging.debug("Running %s", cmd)
668+
p = subprocess.Popen(cmd, shell=True,
669+
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
670+
p.stdin.write(sig)
671+
p.stdin.close()
672+
decryptedhash = p.stdout.read()
673+
p.stdout.close()
674+
if hash != decryptedhash:
675+
logging.debug("stashservers hash %s does not match decrypted signature %s", hash, decryptedhash)
676+
logging.error("stashservers signature does not verify")
677+
return None
678+
logging.debug("Signature matched")
679+
680+
lists = responselines[4].split(';')
681+
logging.debug("Cache lists: %s", lists)
682+
683+
if cache_list_name == None:
684+
caches = lists[0].split('=')[1]
685+
else:
686+
for l in lists:
687+
n=len(cache_list_name)+1
688+
if l[0:n] == cache_list_name + '=':
689+
caches = l[n:]
690+
break
691+
caches_list = caches.split(',')
692+
for i in range(len(caches_list)):
693+
caches_list[i] = 'root://' + caches_list[i]
694+
695+
return caches_list
696+
697+
566698
# Return best stashcache and set nearest_cache_list global
567699
def get_best_stashcache():
568700
global nearest_cache_list
@@ -576,48 +708,29 @@ def get_best_stashcache():
576708
# Randomize the geo ip sites
577709
random.shuffle(geo_ip_sites)
578710

579-
api_text = ''
711+
api_text = ""
580712

581713
caches_list = []
582714

583715
# Check if the user provided a caches json file location
584-
if caches_json_location and os.path.exists(caches_json_location):
585-
# Use geo ip api on caches in provided json file
586-
try:
587-
with open(caches_json_location, 'r') as f:
588-
caches_list = json.loads(f.read())
589-
logging.debug("Loaded caches list from %s", caches_json_location)
590-
except:
591-
logging.error("Unable to open or parse json in %s: %s",
592-
caches_json_location, str(sys.exc_info()[1]))
716+
if caches_json_location:
717+
if not os.path.exists(caches_json_location):
718+
logging.error(caches_json_location + " does not exist")
593719
return None
594-
595-
# Format the caches for the GeoIP query
720+
# Use geo ip api on caches in provided json file
721+
caches_list = get_json_caches(caches_json_location)
596722
caches_string = ""
597-
usable_caches = []
598723
for cache in caches_list:
599-
if 'status' in cache and cache['status'] == 0:
600-
continue
601-
if 'name' in cache:
602-
usable_caches.append(cache['name'])
603-
parsed_url = urlparse(cache['name'])
604-
caches_string = "%s,%s" % (caches_string, parsed_url.hostname)
605-
if len(usable_caches) == 0:
606-
logging.error("No cache names found in %s without zero status", caches_json_location)
607-
return None
608-
609-
caches_list = usable_caches
610-
724+
parsed_url = urlparse(cache)
725+
caches_string = "%s,%s" % (caches_string, parsed_url.hostname)
611726
# Remove the first comma
612727
caches_string = caches_string[1:]
613-
614728
api_text = "api/v1.0/geo/stashcp/" + caches_string
615-
616729
else:
617730
# Use stashservers.dat api
618731
api_text = "stashservers.dat"
619732
if cache_list_name != None:
620-
api_text += '?list=' + cache_list_name
733+
api_text += "?list=" + cache_list_name
621734

622735
responselines = []
623736
i = 0
@@ -644,11 +757,11 @@ def get_best_stashcache():
644757
logging.debug("Error: %s", str(e))
645758
i+=1
646759

647-
order_str = ''
760+
order_str = ""
648761
if len(responselines) > 0:
649762
order_str = responselines[0]
650763

651-
if order_str == '':
764+
if order_str == "":
652765
if len(caches_list) == 0:
653766
logging.error("unable to get list of caches")
654767
return None
@@ -667,107 +780,9 @@ def get_best_stashcache():
667780

668781
if len(caches_list) == 0:
669782
# Used the stashservers.dat api
670-
671-
# After the geo order of the selected server list on line zero,
672-
# the rest of the response is in .cvmfswhitelist format.
673-
# This is done to avoid using https for every request on the
674-
# wlcg-wpad servers and takes advantage of conveniently
675-
# existing infrastructure.
676-
# The format contains the following lines:
677-
# 1. Creation date stamp, e.g. 20200414170005. For debugging
678-
# only.
679-
# 2. Expiration date stamp, e.g. E20200421170005. cvmfs clients
680-
# check this to avoid replay attacks, but for this api that
681-
# is not much of a risk so it is ignored.
682-
# 3. "Repository" name, e.g. Nstash-servers. cvmfs clients
683-
# also check this but it is not important here.
684-
# 4. With cvmfs the 4th line has a repository fingerprint, but
685-
# for this api it instead contains a semi-colon separated list
686-
# of named server lists. Each server list is of the form
687-
# name=servers where servers is comma-separated. Ends with
688-
# "hash=-sha1" because cvmfs_server expects the hash name
689-
# to be there. e.g.
690-
# xroot=stashcache.t2.ucsd.edu,sg-gftp.pace.gatech.edu;xroots=xrootd-local.unl.edu,stashcache.t2.ucsd.edu;hash=-sha1
691-
# 5. A two-dash separator, i.e "--"
692-
# 6. The sha1 hash of lines 1 through 4.
693-
# 7. The signature, i.e. an RSA encryption of the hash that can
694-
# be decrypted by the OSG cvmfs public key. Contains binary
695-
# information so it may contain a variable number of newlines
696-
# which would have caused it to have been split into multiple
697-
# response "lines".
698-
699-
if len(responselines) < 8:
700-
logging.error("stashservers response too short, less than 8 lines")
701-
return None
702-
hashname = responselines[4][-5:]
703-
if hashname != "-sha1":
704-
logging.error("stashservers response does not have sha1 hash: %s", hashname)
783+
caches_list = get_stashservers_caches(responselines)
784+
if caches_list is None:
705785
return None
706-
hashedtext = '\n'.join(responselines[1:5]) + '\n'
707-
hash = hashlib.sha1(hashedtext).hexdigest()
708-
if responselines[6] != hash:
709-
logging.debug("stashservers hash %s does not match expected hash %s", responselines[6], hash)
710-
logging.debug("hashed text:\n%s", hashedtext)
711-
logging.error("stashservers response hash does not match expected hash")
712-
return None
713-
714-
# Call out to /usr/bin/openssl if present, in order to avoid
715-
# python dependency on a crypto package.
716-
if not os.path.exists("/usr/bin/openssl"):
717-
# The signature check isn't critical to be done everywhere;
718-
# any tampering will likely to be caught somewhere and
719-
# investigated. Usually openssl is present.
720-
logging.debug("openssl not installed, skipping signature check")
721-
else:
722-
sig = '\n'.join(responselines[7:])
723-
724-
# Look for the OSG cvmfs public key to verify signature
725-
prefix = os.environ.get("OSG_LOCATION", "/")
726-
osgpub = 'opensciencegrid.org.pub'
727-
pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub,
728-
os.path.join(prefix, "etc/stashcache", osgpub),
729-
os.path.join(prefix, "usr/share/stashcache", osgpub)]
730-
if resource_filename:
731-
try:
732-
pubkey_files.append(resource_filename(__name__, osgpub))
733-
except IOError as ioe:
734-
logging.debug("Unable to retrieve caches.json using resource string, trying other locations")
735-
736-
for pubkey_file in pubkey_files:
737-
if os.path.isfile(pubkey_file):
738-
break
739-
else:
740-
logging.error("Unable to find osg cvmfs key in %r", pubkey_files)
741-
return None
742-
743-
cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
744-
logging.debug("Running %s", cmd)
745-
p = subprocess.Popen(cmd, shell=True,
746-
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
747-
p.stdin.write(sig)
748-
p.stdin.close()
749-
decryptedhash = p.stdout.read()
750-
p.stdout.close()
751-
if hash != decryptedhash:
752-
logging.debug("stashservers hash %s does not match decrypted signature %s", hash, decryptedhash)
753-
logging.error("stashservers signature does not verify")
754-
return None
755-
logging.debug("Signature matched")
756-
757-
lists = responselines[4].split(';')
758-
logging.debug("Cache lists: %s", lists)
759-
760-
if cache_list_name == None:
761-
caches = lists[0].split('=')[1]
762-
else:
763-
for l in lists:
764-
n=len(cache_list_name)+1
765-
if l[0:n] == cache_list_name + '=':
766-
caches = l[n:]
767-
break
768-
caches_list = caches.split(',')
769-
for i in range(len(caches_list)):
770-
caches_list[i] = 'root://' + caches_list[i]
771786

772787
minsite = caches_list[int(ordered_list[0])-1]
773788

0 commit comments

Comments
 (0)