Skip to content

Commit 667853d

Browse files
committed
read caches list from wlcg-wpad servers by default
1 parent cd84034 commit 667853d

5 files changed

Lines changed: 161 additions & 70 deletions

File tree

explanation.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
WARNING: this is out of date. It is only left for historical reasons
2+
13
## Usage
24

35
```
@@ -139,4 +141,4 @@ It is recommended that `$timeout` not be set to 1 second, as tests showed that d
139141

140142
* No record of whether the file was new to the cache it was pulled from or not
141143

142-
* Does not currently allow for file renaming.
144+
* Does not currently allow for file renaming.

setup.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,17 +152,14 @@
152152
#
153153
# If using Python 2.6 or earlier, then these have to be included in
154154
# MANIFEST.in as well.
155-
package_data={'stashcp': ['caches.json'],},
156-
#package_data={ # Optional
157-
# '': ['bin/caches.json'],
158-
#},
155+
package_data={'stashcp': ['opensciencegrid.org.pub'],},
159156

160157
# Although 'package_data' is the preferred approach, in some case you may
161158
# need to place data files outside of your packages. See:
162159
# http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
163160
#
164161
# In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
165-
data_files=[('share/stashcache/', ['stashcp/caches.json'])],
162+
data_files=[('share/stashcache/', ['stashcp/opensciencegrid.org.pub'])],
166163

167164
# To provide executable scripts, use entry points in preference to the
168165
# "scripts" keyword. Entry points provide cross-platform support and allow

stashcp/__init__.py

Lines changed: 146 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import socket
1313
import random
1414
import shutil
15+
import hashlib
1516
from urlparse import urlparse
1617

1718
try:
@@ -41,6 +42,9 @@
4142
# Global variable for the location of the caches.json file
4243
caches_json_location = None
4344

45+
# Global variable for the name of a pre-configured cache list
46+
cache_list_name = None
47+
4448
# Global variable for the location of the token to use for reading / writing
4549
token_location = None
4650

@@ -559,100 +563,184 @@ def get_ips(name):
559563
# always prefer IPv4
560564
return ipv4s + ipv6s
561565

566+
# Return best stashcache and set nearest_cache_list global
562567
def get_best_stashcache():
563568
global nearest_cache_list
564569

565-
# Check if the user provided a caches json file location
566-
if caches_json_location and os.path.exists(caches_json_location):
567-
cache_files = [ caches_json_location ]
568-
else:
569-
prefix = os.environ.get("OSG_LOCATION", "/")
570-
cache_files = [os.path.join(prefix, "etc/stashcache/caches.json"),
571-
os.path.join(prefix, "usr/share/stashcache/caches.json"),
572-
os.path.join(prefix, "usr/local/share/stashcache/caches.json")]
573-
if resource_filename:
574-
try:
575-
cache_files.append(resource_filename(__name__, 'caches.json'))
576-
except IOError as ioe:
577-
logging.debug("Unable to retrieve caches.json using resource string, trying other locations")
578-
579-
for cache_file in cache_files:
580-
if os.path.isfile(cache_file):
581-
with open(cache_file, 'r') as f:
582-
caches_list = json.loads(f.read())
583-
logging.debug("Loaded caches list from %s", cache_file)
584-
break
585-
else:
586-
logging.error("Unable to find caches.json in %r", cache_files)
587-
return None
588-
589-
# Format the caches for the GeoIP query
590-
caches_string = ""
591-
usable_caches = []
592-
for cache in caches_list:
593-
if cache['status'] == 0:
594-
continue
595-
usable_caches.append(cache)
596-
parsed_url = urlparse(cache['name'])
597-
caches_string = "%s,%s" % (caches_string, parsed_url.hostname)
598-
caches_list = usable_caches
599-
# Remove the first comma
600-
caches_string = caches_string[1:]
601-
602570
# Use the geo ip service on the WLCG Web Proxy Auto Discovery machines
603571
geo_ip_sites = ["wlcg-wpad.cern.ch", "wlcg-wpad.fnal.gov"]
604572

605-
# Append text before caches string
606-
append_text = "api/v1.0/geo/stashcp"
607-
608573
# Headers for the HTTP request
609574
headers = {'Cache-control': 'max-age=0', 'User-Agent': user_agent }
610575

611576
# Randomize the geo ip sites
612577
random.shuffle(geo_ip_sites)
613-
order_str = ''
578+
579+
api_text = ''
580+
581+
caches_list = []
582+
583+
# Check if the user provided a caches json file location
584+
if caches_json_location and os.path.exists(caches_json_location):
585+
# Use geo ip api on caches in provided json file
586+
try:
587+
with open(caches_json_location, 'r') as f:
588+
caches_list = json.loads(f.read())
589+
logging.debug("Loaded caches list from %s", caches_json_location)
590+
except:
591+
logging.error("Unable to open or parse json in %s: %s",
592+
caches_json_location, str(sys.exc_info()[1]))
593+
return None
594+
595+
# Format the caches for the GeoIP query
596+
caches_string = ""
597+
usable_caches = []
598+
for cache in caches_list:
599+
if 'status' in cache and cache['status'] == 0:
600+
continue
601+
if 'name' in cache:
602+
usable_caches.append(cache['name'])
603+
parsed_url = urlparse(cache['name'])
604+
caches_string = "%s,%s" % (caches_string, parsed_url.hostname)
605+
if len(usable_caches) == 0:
606+
logging.error("No cache names found in %s without zero status", caches_json_location)
607+
return None
608+
609+
caches_list = usable_caches
610+
611+
# Remove the first comma
612+
caches_string = caches_string[1:]
613+
614+
api_text = "api/v1.0/geo/stashcp/" + caches_string
615+
616+
else:
617+
# Use stashservers.dat api
618+
api_text = "stashservers.dat"
619+
if cache_list_name != None:
620+
api_text += '?list=' + cache_list_name
621+
622+
responselines = []
614623
i = 0
615-
while order_str == '' and i < len(geo_ip_sites):
624+
while len(responselines) == 0 and i < len(geo_ip_sites):
616625
cur_site = geo_ip_sites[i]
617626
headers['Host'] = cur_site
627+
logging.debug("Trying server site of %s", cur_site)
618628
for ip in get_ips(cur_site):
619-
logging.debug("Trying geoip site of: %s [%s]", cur_site, ip)
620-
final_url = "http://%s/%s/%s" % (ip, append_text, caches_string)
621-
logging.debug("Querying for closest cache: %s", final_url)
629+
final_url = "http://%s/%s" % (ip, api_text)
630+
logging.debug("Querying %s", final_url)
622631
try:
623632
# Make the request
624633
req = urllib2.Request(final_url, headers=headers)
625634
response = urllib2.urlopen(req, timeout=10)
626635
if response.getcode() == 200:
627636
logging.debug("Got OK code 200 from %s", cur_site)
628-
order_str = response.read()
637+
responselines = response.read().split('\n')
629638
response.close()
630639
break
631640
response.close()
632641
except urllib2.URLError, e:
633642
logging.debug("URL error: %s", str(e))
634643
except Exception, e:
635644
logging.debug("Error: %s", str(e))
636-
i+=1
645+
i+=1
646+
647+
order_str = ''
648+
if len(responselines) > 0:
649+
order_str = responselines[0]
637650

638651
if order_str == '':
652+
if len(caches_list) == 0:
653+
logging.error("unable to get list of caches")
654+
return None
639655
# Unable to find a geo_ip server to use, return random choice from caches!
640-
minsite = random.choice(caches_list)['name']
641-
random.shuffle(caches_list)
642-
nearest_cache_list = [cache['name'] for cache in caches_list]
656+
nearest_cache_list = caches_list
657+
random.shuffle(nearest_cache_list)
658+
minsite = nearest_cache_list[0]
643659
logging.warning("Unable to use Geoip to find closest cache! Returning random cache %s", minsite)
644-
logging.debug("Ordered list of nearest caches: %s", str(nearest_cache_list))
660+
logging.debug("Randomized list of nearest caches: %s", str(nearest_cache_list))
645661
return minsite
646662
else:
647663
# The order string should be something like:
648664
# 3,1,2
649665
ordered_list = order_str.strip().split(",")
650666
logging.debug("Got order %s", str(ordered_list))
651-
minsite = caches_list[int(ordered_list[0])-1]['name']
667+
668+
if len(caches_list) == 0:
669+
# Used the stashservers.dat api
670+
671+
if len(responselines) < 8:
672+
logging.error("stashservers response too short, less than 8 lines")
673+
return None
674+
hashname = responselines[4][-5:]
675+
if hashname != "-sha1":
676+
logging.error("stashservers response does not have sha1 hash: %s", hashname)
677+
return None
678+
hashedtext = '\n'.join(responselines[1:5]) + '\n'
679+
hash = hashlib.sha1(hashedtext).hexdigest()
680+
if responselines[6] != hash:
681+
logging.debug("stashservers hash %s does not match expected hash %s", responselines[6], hash)
682+
logging.debug("hashed text:\n%s", hashedtext)
683+
logging.error("stashservers response hash does not match expected hash")
684+
return None
685+
686+
if not os.path.exists("/usr/bin/openssl"):
687+
logging.debug("openssl not installed, skipping signature check")
688+
else:
689+
sig = '\n'.join(responselines[7:])
690+
691+
# Look for the OSG cvmfs public key to verify signature
692+
prefix = os.environ.get("OSG_LOCATION", "/")
693+
osgpub = 'opensciencegrid.org.pub'
694+
pubkey_files = ['/etc/cvmfs/keys/opensciencegrid.org/' + osgpub,
695+
os.path.join(prefix, "etc/stashcache", osgpub),
696+
os.path.join(prefix, "usr/share/stashcache", osgpub)]
697+
if resource_filename:
698+
try:
699+
pubkey_files.append(resource_filename(__name__, osgpub))
700+
except IOError as ioe:
701+
logging.debug("Unable to retrieve caches.json using resource string, trying other locations")
702+
703+
for pubkey_file in pubkey_files:
704+
if os.path.isfile(pubkey_file):
705+
break
706+
else:
707+
logging.error("Unable to find osg cvmfs key in %r", pubkey_files)
708+
return None
709+
710+
cmd = "/usr/bin/openssl rsautl -verify -pubin -inkey " + pubkey_file
711+
logging.debug("Running %s", cmd)
712+
p = subprocess.Popen(cmd, shell=True,
713+
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
714+
p.stdin.write(sig)
715+
p.stdin.close()
716+
decryptedhash = p.stdout.read()
717+
p.stdout.close()
718+
if hash != decryptedhash:
719+
logging.debug("stashservers hash %s does not match decrypted signature %s", hash, decryptedhash)
720+
logging.error("stashservers signature does not verify")
721+
return None
722+
logging.debug("Signature matched")
723+
724+
lists = responselines[4].split(';')
725+
logging.debug("Cache lists: %s", lists)
726+
727+
if cache_list_name == None:
728+
caches = lists[0].split('=')[1]
729+
else:
730+
for l in lists:
731+
n=len(cache_list_name)+1
732+
if l[0:n] == cache_list_name + '=':
733+
caches = l[n:]
734+
break
735+
caches_list = caches.split(',')
736+
for i in range(len(caches_list)):
737+
caches_list[i] = 'root://' + caches_list[i]
738+
739+
minsite = caches_list[int(ordered_list[0])-1]
652740

653741
nearest_cache_list = []
654742
for ordered_index in ordered_list:
655-
nearest_cache_list.append(caches_list[int(ordered_index)-1]['name'])
743+
nearest_cache_list.append(caches_list[int(ordered_index)-1])
656744

657745
logging.debug("Returning closest cache: %s", minsite)
658746
logging.debug("Ordered list of nearest caches: %s", str(nearest_cache_list))
@@ -663,6 +751,7 @@ def main():
663751
global nearest_cache
664752
global nearest_cache_list
665753
global caches_json_location
754+
global cache_list_name
666755
global token_location
667756

668757
usage = "usage: %prog [options] source destination"
@@ -671,7 +760,9 @@ def main():
671760
parser.add_option('-r', dest='recursive', action='store_true', help='recursively copy')
672761
parser.add_option('--closest', action='store_true', help="Return the closest cache and exit")
673762
parser.add_option('-c', '--cache', dest='cache', help="Cache to use")
674-
parser.add_option('-j', '--caches-json', dest='caches_json', help="The JSON file containing the list of caches",
763+
parser.add_option('-j', '--caches-json', dest='caches_json', help="A JSON file containing the list of caches",
764+
default=None)
765+
parser.add_option('-n', '--cache-list-name', dest='cache_list_name', help="Name of pre-configured cache list to use",
675766
default=None)
676767
parser.add_option('--methods', dest='methods', help="Comma separated list of methods to try, in order. Default: cvmfs,xrootd,http", default="cvmfs,xrootd,http")
677768
parser.add_option('-t', '--token', dest='token', help="Token file to use for reading and/or writing")
@@ -691,6 +782,7 @@ def main():
691782
caches_json_location = os.environ['CACHES_JSON']
692783
else:
693784
caches_json_location = args.caches_json
785+
cache_list_name = args.cache_list_name
694786
if args.closest:
695787
print get_best_stashcache()
696788
sys.exit(0)

stashcp/caches.json

Lines changed: 0 additions & 10 deletions
This file was deleted.

stashcp/opensciencegrid.org.pub

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-----BEGIN PUBLIC KEY-----
2+
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqQGYXTp9cRcMbGeDoijB
3+
gKNTCEpIWB7XcqIHVXJjfxEkycQXMyZkB7O0CvV3UmmY2K7CQqTnd9ddcApn7BqQ
4+
/7QGP0H1jfXLfqVdwnhyjIHxmV2x8GIHRHFA0wE+DadQwoi1G0k0SNxOVS5qbdeV
5+
yiyKsoU4JSqy5l2tK3K/RJE4htSruPCrRCK3xcN5nBeZK5gZd+/ufPIG+hd78kjQ
6+
Dy3YQXwmEPm7kAZwIsEbMa0PNkp85IDkdR1GpvRvDMCRmUaRHrQUPBwPIjs0akL+
7+
qoTxJs9k6quV0g3Wd8z65s/k5mEZ+AnHHI0+0CL3y80wnuLSBYmw05YBtKyoa1Fb
8+
FQIDAQAB
9+
-----END PUBLIC KEY-----
10+

0 commit comments

Comments
 (0)