github.com/htcondor/osdf-client/v6@v6.13.0-rc1.0.20231009141709-766e7b4d1dc8/get_best_cache.go (about)

     1  package stashcp
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"io"
     7  	"math/rand"
     8  	"net"
     9  	"net/http"
    10  	"net/url"
    11  	"os"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	log "github.com/sirupsen/logrus"
    17  )
    18  
    19  func GetBestCache(cacheListName string) ([]string, error) {
    20  
    21  	if cacheListName == "" {
    22  		cacheListName = "xroot"
    23  	}
    24  
    25  	GeoIpUrl := url.URL{}
    26  	// Use the geo ip service on the WLCG Web Proxy Auto Discovery machines
    27  	geo_ip_sites := [...]string{"wlcg-wpad.cern.ch", "wlcg-wpad.fnal.gov"}
    28  
    29  	// randomize the geo ip sitess
    30  	rand.Seed(time.Now().UnixNano())
    31  	rand.Shuffle(len(geo_ip_sites), func(i, j int) {
    32  		geo_ip_sites[i], geo_ip_sites[j] = geo_ip_sites[j], geo_ip_sites[i]
    33  	})
    34  
    35  	var caches_list []string
    36  
    37  	// Check if the user provided a caches json file location
    38  	if CachesJsonLocation != "" {
    39  		if _, err := os.Stat(CachesJsonLocation); os.IsNotExist(err) {
    40  			// path does not exist
    41  			log.Errorln(CachesJsonLocation, "does not exist")
    42  
    43  			return nil, errors.New("Unable to open caches json file at: " + CachesJsonLocation)
    44  		}
    45  
    46  		//Use geo ip api on caches in provided json file
    47  		//caches_list := get_json_caches(caches_json_location)
    48  		var caches_string string = ""
    49  
    50  		for _, cache := range caches_list {
    51  			parsed_url, err := url.Parse(cache)
    52  			if err != nil {
    53  				log.Errorln("Could not parse URL")
    54  			}
    55  
    56  			caches_string = caches_string + parsed_url.Host
    57  
    58  			// Remove the first comma
    59  			caches_string = string([]rune(caches_string)[1:])
    60  			GeoIpUrl.Path = "api/v1.0/geo/stashcp/" + caches_string
    61  		}
    62  	} else {
    63  		//Use Stashservers.dat api
    64  
    65  		//api_text = "stashservers.dat"
    66  		GeoIpUrl.Path = "stashservers.dat"
    67  
    68  		if cacheListName != "" {
    69  			queryParams := GeoIpUrl.Query()
    70  			queryParams.Set("list", cacheListName)
    71  			GeoIpUrl.RawQuery = queryParams.Encode()
    72  		}
    73  	}
    74  
    75  	var responselines_b [][]byte
    76  
    77  	type header struct {
    78  		Host string
    79  	}
    80  
    81  	i := 0
    82  
    83  	for i = 0; i < len(geo_ip_sites); i++ {
    84  
    85  		cur_site := geo_ip_sites[i]
    86  		var headers header
    87  		headers.Host = cur_site
    88  		log.Debugf("Trying server site of %s", cur_site)
    89  
    90  		for _, ip := range get_ips(cur_site) {
    91  			GeoIpUrl.Host = ip
    92  			GeoIpUrl.Scheme = "http"
    93  
    94  			// Headers for the HTTP request
    95  			// Create an HTTP client
    96  			var resp *http.Response
    97  			disableProxy := false
    98  			skipResponse := false
    99  			for {
   100  				defaultTransport := http.DefaultTransport.(*http.Transport).Clone()
   101  				if disableProxy {
   102  					log.Debugln("Querying (without proxy)", GeoIpUrl.String())
   103  					defaultTransport.Proxy = nil
   104  				} else {
   105  					log.Debugln("Querying", GeoIpUrl.String())
   106  				}
   107  				client := &http.Client{Transport: defaultTransport}
   108  				req, err := http.NewRequest("GET", GeoIpUrl.String(), nil)
   109  				if err != nil {
   110  					log.Errorln("Failed to create HTTP request:", err)
   111  					skipResponse = true
   112  					break
   113  				}
   114  				req.Header.Add("Cache-control", "max-age=0")
   115  				req.Header.Add("User-Agent", "stashcp/"+Options.Version)
   116  				resp, err = client.Do(req)
   117  				if err == nil {
   118  					break
   119  				}
   120  				if urle, ok := err.(*url.Error); ok && urle.Unwrap() != nil {
   121  					if ope, ok := urle.Unwrap().(*net.OpError); ok && ope.Op == "proxyconnect" {
   122  						log.Warnln("Failed to connect to proxy; will retry without. ", ope)
   123  						if !disableProxy {
   124  							disableProxy = true
   125  							continue
   126  						}
   127  					}
   128  				}
   129  				log.Errorln("Could not open URL", err)
   130  				skipResponse = true
   131  				break
   132  			}
   133  			if skipResponse {
   134  				continue
   135  			}
   136  
   137  			if resp.StatusCode == 200 {
   138  				log.Debugf("Got OK code 200 from %s", cur_site)
   139  				responsetext_b, err := io.ReadAll(resp.Body)
   140  				if err != nil {
   141  					log.Errorln("Could not aquire http response text")
   142  				}
   143  				//responsetext_s := string(responsetext_b)
   144  				//log.Debugln("Recieved from GeoIP server:", responsetext_s)
   145  				responselines_b = bytes.Split(responsetext_b, []byte("\n"))
   146  				defer resp.Body.Close()
   147  				break
   148  			}
   149  		}
   150  
   151  		// If we got a response, then stop trying other geoip servers
   152  		if len(responselines_b) > 0 {
   153  			break
   154  		}
   155  
   156  	}
   157  	order_str := ""
   158  
   159  	if len(responselines_b) > 0 {
   160  		order_str = string(responselines_b[0])
   161  	}
   162  
   163  	if order_str == "" {
   164  		if len(caches_list) == 0 {
   165  			log.Errorln("unable to get list of caches")
   166  			return nil, errors.New("Unable to get the list of caches")
   167  		}
   168  		//Unable to find a geo_ip server to user, return random choice from caches
   169  		rand.Shuffle(len(caches_list), func(i, j int) {
   170  			caches_list[i], caches_list[j] = caches_list[j], caches_list[i]
   171  		})
   172  		minsite := caches_list[0]
   173  		NearestCacheList = caches_list
   174  		log.Debugf("Unable to use Geoip to find closest cache!  Returning random cache %s", minsite)
   175  		log.Debugf("Randomized list of nearest caches: %s", strings.Join(caches_list, ","))
   176  		return caches_list, nil
   177  	} else {
   178  		// The order string should be something like: 3,1,2
   179  		ordered_list := strings.Split(strings.TrimSpace(order_str), ",")
   180  		log.Debugln("Ordered list of caches:", ordered_list)
   181  
   182  		//Used the stashservers.dat api
   183  		var err error
   184  		cachesList, err := get_stashservers_caches(responselines_b)
   185  
   186  		if err != nil {
   187  			log.Errorln("Error from getting stashcache caches:", err)
   188  			return nil, err
   189  		}
   190  
   191  		// Ordered list is an array of index values which are used
   192  		// to index into caches_list
   193  		minIndex, err := strconv.Atoi(ordered_list[0])
   194  		if err != nil {
   195  			log.Errorln("Received a non integer min site from the WPAD servers")
   196  			return nil, errors.New("Received a non integer min site from the WPAD servers")
   197  		}
   198  		minsite := cachesList[cacheListName][minIndex-1]
   199  		log.Debugln("Closest cache:", minsite)
   200  
   201  		for _, ordered_index := range ordered_list {
   202  			orderedIndex, _ := strconv.Atoi(ordered_index)
   203  			NearestCacheList = append(NearestCacheList, cachesList[cacheListName][orderedIndex-1])
   204  		}
   205  
   206  		log.Debugf("Returning closest cache: %s", minsite)
   207  		log.Debugf("Ordered list of nearest caches: %s", NearestCacheList)
   208  		return NearestCacheList, nil
   209  	}
   210  }