github.com/pelicanplatform/pelican@v1.0.5/client/get_best_cache.go (about)

     1  /***************************************************************
     2   *
     3   * Copyright (C) 2023, University of Nebraska-Lincoln
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License"); you
     6   * may not use this file except in compliance with the License.  You may
     7   * obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   ***************************************************************/
    18  
    19  package client
    20  
    21  import (
    22  	"bytes"
    23  	"errors"
    24  	"io"
    25  	"math/rand"
    26  	"net"
    27  	"net/http"
    28  	"net/url"
    29  	"os"
    30  	"strconv"
    31  	"strings"
    32  
    33  	log "github.com/sirupsen/logrus"
    34  )
    35  
    36  func GetBestCache(cacheListName string) ([]string, error) {
    37  
    38  	if cacheListName == "" {
    39  		cacheListName = "xroot"
    40  	}
    41  
    42  	GeoIpUrl := url.URL{}
    43  	// Use the geo ip service on the WLCG Web Proxy Auto Discovery machines
    44  	geo_ip_sites := [...]string{"wlcg-wpad.cern.ch", "wlcg-wpad.fnal.gov"}
    45  
    46  	// randomize the geo ip sitess
    47  	rand.Shuffle(len(geo_ip_sites), func(i, j int) {
    48  		geo_ip_sites[i], geo_ip_sites[j] = geo_ip_sites[j], geo_ip_sites[i]
    49  	})
    50  
    51  	var caches_list []string
    52  
    53  	// Check if the user provided a caches json file location
    54  	if CachesJsonLocation != "" {
    55  		if _, err := os.Stat(CachesJsonLocation); os.IsNotExist(err) {
    56  			// path does not exist
    57  			log.Errorln(CachesJsonLocation, "does not exist")
    58  
    59  			return nil, errors.New("Unable to open caches json file at: " + CachesJsonLocation)
    60  		}
    61  
    62  		//Use geo ip api on caches in provided json file
    63  		//caches_list := get_json_caches(caches_json_location)
    64  		var caches_string string = ""
    65  
    66  		for _, cache := range caches_list {
    67  			parsed_url, err := url.Parse(cache)
    68  			if err != nil {
    69  				log.Errorln("Could not parse URL")
    70  			}
    71  
    72  			caches_string = caches_string + parsed_url.Host
    73  
    74  			// Remove the first comma
    75  			caches_string = string([]rune(caches_string)[1:])
    76  			GeoIpUrl.Path = "api/v1.0/geo/stashcp/" + caches_string
    77  		}
    78  	} else {
    79  		//Use Stashservers.dat api
    80  
    81  		//api_text = "stashservers.dat"
    82  		GeoIpUrl.Path = "stashservers.dat"
    83  
    84  		if cacheListName != "" {
    85  			queryParams := GeoIpUrl.Query()
    86  			queryParams.Set("list", cacheListName)
    87  			GeoIpUrl.RawQuery = queryParams.Encode()
    88  		}
    89  	}
    90  
    91  	var responselines_b [][]byte
    92  
    93  	type header struct {
    94  		Host string
    95  	}
    96  
    97  	i := 0
    98  
    99  	for i = 0; i < len(geo_ip_sites); i++ {
   100  
   101  		cur_site := geo_ip_sites[i]
   102  		var headers header
   103  		headers.Host = cur_site
   104  		log.Debugf("Trying server site of %s", cur_site)
   105  
   106  		for _, ip := range get_ips(cur_site) {
   107  			GeoIpUrl.Host = ip
   108  			GeoIpUrl.Scheme = "http"
   109  
   110  			// Headers for the HTTP request
   111  			// Create an HTTP client
   112  			var resp *http.Response
   113  			disableProxy := false
   114  			skipResponse := false
   115  			for {
   116  				defaultTransport := http.DefaultTransport.(*http.Transport).Clone()
   117  				if disableProxy {
   118  					log.Debugln("Querying (without proxy)", GeoIpUrl.String())
   119  					defaultTransport.Proxy = nil
   120  				} else {
   121  					log.Debugln("Querying", GeoIpUrl.String())
   122  				}
   123  				client := &http.Client{Transport: defaultTransport}
   124  				req, err := http.NewRequest("GET", GeoIpUrl.String(), nil)
   125  				if err != nil {
   126  					log.Errorln("Failed to create HTTP request:", err)
   127  					skipResponse = true
   128  					break
   129  				}
   130  				req.Header.Add("Cache-control", "max-age=0")
   131  				req.Header.Add("User-Agent", "pelican/"+ObjectClientOptions.Version)
   132  				resp, err = client.Do(req)
   133  				if err == nil {
   134  					break
   135  				}
   136  				if urle, ok := err.(*url.Error); ok && urle.Unwrap() != nil {
   137  					if ope, ok := urle.Unwrap().(*net.OpError); ok && ope.Op == "proxyconnect" {
   138  						log.Warnln("Failed to connect to proxy; will retry without. ", ope)
   139  						if !disableProxy {
   140  							disableProxy = true
   141  							continue
   142  						}
   143  					}
   144  				}
   145  				log.Errorln("Could not open URL", err)
   146  				skipResponse = true
   147  				break
   148  			}
   149  			if skipResponse {
   150  				continue
   151  			}
   152  
   153  			if resp.StatusCode == 200 {
   154  				log.Debugf("Got OK code 200 from %s", cur_site)
   155  				responsetext_b, err := io.ReadAll(resp.Body)
   156  				if err != nil {
   157  					log.Errorln("Could not aquire http response text")
   158  				}
   159  				//responsetext_s := string(responsetext_b)
   160  				//log.Debugln("Recieved from GeoIP server:", responsetext_s)
   161  				responselines_b = bytes.Split(responsetext_b, []byte("\n"))
   162  				defer resp.Body.Close()
   163  				break
   164  			}
   165  		}
   166  
   167  		// If we got a response, then stop trying other geoip servers
   168  		if len(responselines_b) > 0 {
   169  			break
   170  		}
   171  
   172  	}
   173  	order_str := ""
   174  
   175  	if len(responselines_b) > 0 {
   176  		order_str = string(responselines_b[0])
   177  	}
   178  
   179  	if order_str == "" {
   180  		if len(caches_list) == 0 {
   181  			log.Errorln("unable to get list of caches")
   182  			return nil, errors.New("Unable to get the list of caches")
   183  		}
   184  		//Unable to find a geo_ip server to user, return random choice from caches
   185  		rand.Shuffle(len(caches_list), func(i, j int) {
   186  			caches_list[i], caches_list[j] = caches_list[j], caches_list[i]
   187  		})
   188  		minsite := caches_list[0]
   189  		NearestCacheList = caches_list
   190  		log.Debugf("Unable to use Geoip to find closest cache!  Returning random cache %s", minsite)
   191  		log.Debugf("Randomized list of nearest caches: %s", strings.Join(caches_list, ","))
   192  		return caches_list, nil
   193  	} else {
   194  		// The order string should be something like: 3,1,2
   195  		ordered_list := strings.Split(strings.TrimSpace(order_str), ",")
   196  		log.Debugln("Ordered list of caches:", ordered_list)
   197  
   198  		//Used the stashservers.dat api
   199  		var err error
   200  		cachesList, err := get_stashservers_caches(responselines_b)
   201  
   202  		if err != nil {
   203  			log.Errorln("Error from getting stashcache caches:", err)
   204  			return nil, err
   205  		}
   206  
   207  		// Ordered list is an array of index values which are used
   208  		// to index into caches_list
   209  		minIndex, err := strconv.Atoi(ordered_list[0])
   210  		if err != nil {
   211  			log.Errorln("Received a non integer min site from the WPAD servers")
   212  			return nil, errors.New("Received a non integer min site from the WPAD servers")
   213  		}
   214  		minsite := cachesList[cacheListName][minIndex-1]
   215  		log.Debugln("Closest cache:", minsite)
   216  
   217  		for _, ordered_index := range ordered_list {
   218  			orderedIndex, _ := strconv.Atoi(ordered_index)
   219  			NearestCacheList = append(NearestCacheList, cachesList[cacheListName][orderedIndex-1])
   220  		}
   221  
   222  		log.Debugf("Returning closest cache: %s", minsite)
   223  		log.Debugf("Ordered list of nearest caches: %s", NearestCacheList)
   224  		return NearestCacheList, nil
   225  	}
   226  }