Skip to content

Commit 48fdd8f

Browse files
authored
Merge pull request #79 from DrDaveD/wlcg-wpad-geoip
switch to using wlcg-wpad geo ip service
2 parents 8536c16 + 53f7b04 commit 48fdd8f

1 file changed

Lines changed: 52 additions & 30 deletions

File tree

bin/stashcp

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import os
99
import json
1010
import multiprocessing
1111
import urllib2
12+
import socket
1213
import random
1314
import shutil
1415

@@ -342,6 +343,28 @@ def timed_transfer(filename, cache, destination, debug=False):
342343
return str(xrd_exit)
343344

344345

346+
def get_ips(name):
347+
ipv4s = []
348+
ipv6s = []
349+
try:
350+
info = socket.getaddrinfo(name, 0, 0, socket.IPPROTO_TCP)
351+
except:
352+
logging.error("Unable to look up %s", name)
353+
return []
354+
355+
for tuple in info:
356+
if (tuple[0] == socket.AF_INET):
357+
ipv4s.append(tuple[4][0])
358+
elif (tuple[0] == socket.AF_INET6):
359+
ipv6s.append(tuple[4][0])
360+
361+
# randomize the order of each
362+
random.shuffle(ipv4s)
363+
random.shuffle(ipv6s)
364+
365+
# always prefer IPv4
366+
return ipv4s + ipv6s
367+
345368
def get_best_stashcache():
346369

347370
# First, check for caches.json file in this file's directory:
@@ -356,9 +379,7 @@ def get_best_stashcache():
356379
caches_list = json.loads(f.read())
357380
f.close()
358381

359-
# Get the possible GeoIP sites
360-
361-
# Format the caches for the CVMFS query
382+
# Format the caches for the GeoIP query
362383
caches_string = ""
363384
usable_caches = []
364385
for cache in caches_list:
@@ -371,12 +392,8 @@ def get_best_stashcache():
371392
# Remove the first comma
372393
caches_string = caches_string[1:]
373394

374-
# Here is a list from the output of the command:
375-
# attr -qg host_list /cvmfs/oasis.opensciencegrid.org
376-
geo_ip_sites = "http://cvmfs-s1fnal.opensciencegrid.org:8000/cvmfs/oasis.opensciencegrid.org;http://cvmfs-s1bnl.opensciencegrid.org:8000/cvmfs/oasis.opensciencegrid.org;http://cvmfs-egi.gridpp.rl.ac.uk:8000/cvmfs/oasis.opensciencegrid.org;http://klei.nikhef.nl:8000/cvmfs/oasis.opensciencegrid.org;http://cvmfsrep.grid.sinica.edu.tw:8000/cvmfs/oasis.opensciencegrid.org".split(';')
377-
378-
# Add HCC's, for good measure
379-
geo_ip_sites.insert(0,"http://hcc-cvmfs.unl.edu:8000/cvmfs/config-osg.opensciencegrid.org")
395+
# Use the geo ip service on the WLCG Web Proxy Auto Discovery machines
396+
geo_ip_sites = ["wlcg-wpad.cern.ch", "wlcg-wpad.fnal.gov"]
380397

381398
# Append text before caches string
382399
append_text = "api/v1.0/geo/stashcp"
@@ -386,36 +403,41 @@ def get_best_stashcache():
386403

387404
# Randomize the geo ip sites
388405
random.shuffle(geo_ip_sites)
389-
found = False
406+
order_str = ''
390407
i = 0
391-
while found == False and i < len(geo_ip_sites):
408+
while order_str == '' and i < len(geo_ip_sites):
392409
cur_site = geo_ip_sites[i]
393-
logging.debug("Trying geoip site of: %s", cur_site)
394-
final_url = "%s/%s/%s" % (cur_site, append_text, caches_string)
395-
logging.debug("Querying for closest cache: %s", final_url)
396-
try:
397-
# Make the request
398-
req = urllib2.Request(final_url, headers=headers)
399-
response = urllib2.urlopen(req)
400-
if response.getcode() == 200:
401-
logging.debug("Got error code 200 from %s", cur_site)
402-
found = True
403-
break
404-
except urllib2.URLError, e:
405-
logging.debug("URL error: %s", str(e))
406-
i+=1
410+
headers['Host'] = cur_site
411+
for ip in get_ips(cur_site):
412+
logging.debug("Trying geoip site of: %s [%s]", cur_site, ip)
413+
final_url = "http://%s/%s/%s" % (ip, append_text, caches_string)
414+
logging.debug("Querying for closest cache: %s", final_url)
415+
try:
416+
# Make the request
417+
req = urllib2.Request(final_url, headers=headers)
418+
response = urllib2.urlopen(req, timeout=10)
419+
if response.getcode() == 200:
420+
logging.debug("Got OK code 200 from %s", cur_site)
421+
order_str = response.read()
422+
response.close()
423+
break
424+
response.close()
425+
except urllib2.URLError, e:
426+
logging.debug("URL error: %s", str(e))
427+
except Exception, e:
428+
logging.debug("Error: %s", str(e))
429+
i+=1
407430

408-
if found == False:
431+
if order_str == '':
409432
# Unable to find a geo_ip server to use, return random choice from caches!
410433
minsite = random.choice(caches_list)
411434
logging.error("Unable to use Geoip to find closest cache! Returning random cache %s", minsite)
412435
return minsite
413436
else:
414-
415-
# From the response, should respond with something like:
437+
# The order string should be something like:
416438
# 3,1,2
417-
ordered_list = response.read().strip().split(",")
418-
logging.debug("Got response %s", str(ordered_list))
439+
ordered_list = order_str.strip().split(",")
440+
logging.debug("Got order %s", str(ordered_list))
419441
minsite = caches_list[int(ordered_list[0])-1]['name']
420442

421443
logging.debug("Returning closest cache: %s", minsite)

0 commit comments

Comments
 (0)