github.com/htcondor/osdf-client/v6@v6.13.0-rc1.0.20231009141709-766e7b4d1dc8/main.go (about)

     1  package stashcp
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"net"
     8  	"net/url"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  
    13  	//"net/http"
    14  	"math/rand"
    15  	"os"
    16  	"path"
    17  	"path/filepath"
    18  	"time"
    19  
    20  	// "crypto/sha1"
    21  	// "encoding/hex"
    22  	// "strings"
    23  
    24  	log "github.com/sirupsen/logrus"
    25  
    26  	namespaces "github.com/htcondor/osdf-client/v6/namespaces"
    27  )
    28  
    29  type OptionsStruct struct {
    30  	ProgressBars bool
    31  	Recursive    bool
    32  	Token        string
    33  	Version      string
    34  }
    35  
    36  var Options OptionsStruct
    37  
    38  var (
    39  	version string
    40  )
    41  
    42  // Nearest cache
    43  var NearestCache string
    44  
    45  // List of caches, in order from closest to furthest
    46  var NearestCacheList []string
    47  var CachesJsonLocation string
    48  
    49  // Number of caches to attempt to use in any invocation
    50  var CachesToTry int = 3
    51  
    52  // CacheOverride
    53  var CacheOverride bool
    54  
    55  type payloadStruct struct {
    56  	filename     string
    57  	sitename     string
    58  	status       string
    59  	Owner        string
    60  	ProjectName  string
    61  	version      string
    62  	start1       int64
    63  	end1         int64
    64  	timestamp    int64
    65  	downloadTime int64
    66  	fileSize     int64
    67  	downloadSize int64
    68  }
    69  
    70  // Determine the token name if it is embedded in the scheme, Condor-style
    71  func getTokenName(destination *url.URL) (scheme, tokenName string) {
    72  	schemePieces := strings.Split(destination.Scheme, "+")
    73  	tokenName = ""
    74  	// Scheme is always the last piece
    75  	scheme = schemePieces[len(schemePieces)-1]
    76  	// If there are 2 or more pieces, token name is everything but the last item, joined with a +
    77  	if len(schemePieces) > 1 {
    78  		tokenName = strings.Join(schemePieces[:len(schemePieces)-1], "+")
    79  	}
    80  	return
    81  }
    82  
    83  // Do writeback to stash using SciTokens
    84  func doWriteBack(source string, destination *url.URL, namespace namespaces.Namespace) (int64, error) {
    85  
    86  	scitoken_contents, err := getToken(destination, namespace, true, "")
    87  	if err != nil {
    88  		return 0, err
    89  	}
    90  	return UploadFile(source, destination, scitoken_contents, namespace)
    91  
    92  }
    93  
    94  // getToken returns the token to use for the given destination
    95  //
    96  // If token_name is not empty, it will be used as the token name.
    97  // If token_name is empty, the token name will be determined from the destination URL (if possible) using getTokenName
    98  func getToken(destination *url.URL, namespace namespaces.Namespace, isWrite bool, token_name string) (string, error) {
    99  	if token_name == "" {
   100  		_, token_name = getTokenName(destination)
   101  	}
   102  
   103  	type tokenJson struct {
   104  		AccessKey string `json:"access_token"`
   105  		ExpiresIn int    `json:"expires_in"`
   106  	}
   107  	/*
   108  		Search for the location of the authentiction token.  It can be set explicitly on the command line (TODO),
   109  		with the environment variable "TOKEN", or it can be searched in the standard HTCondor directory pointed
   110  		to by the environment variable "_CONDOR_CREDS".
   111  	*/
   112  	var token_location string
   113  	if Options.Token != "" {
   114  		token_location = Options.Token
   115  		log.Debugln("Getting token location from command line:", Options.Token)
   116  	} else {
   117  
   118  		// WLCG Token Discovery
   119  		if bearerToken, isBearerTokenSet := os.LookupEnv("BEARER_TOKEN"); isBearerTokenSet {
   120  			return bearerToken, nil
   121  		} else if bearerTokenFile, isBearerTokenFileSet := os.LookupEnv("BEARER_TOKEN_FILE"); isBearerTokenFileSet {
   122  			if _, err := os.Stat(bearerTokenFile); err != nil {
   123  				log.Warningln("Environment variable BEARER_TOKEN_FILE is set, but file being point to does not exist:", err)
   124  			} else {
   125  				token_location = bearerTokenFile
   126  			}
   127  		}
   128  		if xdgRuntimeDir, xdgRuntimeDirSet := os.LookupEnv("XDG_RUNTIME_DIR"); token_location == "" && xdgRuntimeDirSet {
   129  			// Get the uid
   130  			uid := os.Getuid()
   131  			tmpTokenPath := filepath.Join(xdgRuntimeDir, "bt_u"+strconv.Itoa(uid))
   132  			if _, err := os.Stat(tmpTokenPath); err == nil {
   133  				token_location = tmpTokenPath
   134  			}
   135  		}
   136  
   137  		// Check for /tmp/bt_u<uid>
   138  		if token_location == "" {
   139  			uid := os.Getuid()
   140  			tmpTokenPath := "/tmp/bt_u" + strconv.Itoa(uid)
   141  			if _, err := os.Stat(tmpTokenPath); err == nil {
   142  				token_location = tmpTokenPath
   143  			}
   144  		}
   145  
   146  		// Backwards compatibility for getting scitokens
   147  		// If TOKEN is not set in environment, and _CONDOR_CREDS is set, then...
   148  		if tokenFile, isTokenSet := os.LookupEnv("TOKEN"); isTokenSet && token_location == "" {
   149  			if _, err := os.Stat(tokenFile); err != nil {
   150  				log.Warningln("Environment variable TOKEN is set, but file being point to does not exist:", err)
   151  			} else {
   152  				token_location = tokenFile
   153  			}
   154  		}
   155  
   156  		// Finally, look in the HTCondor runtime
   157  		if token_location == "" {
   158  			token_location = discoverHTCondorToken(token_name)
   159  		}
   160  
   161  		if token_location == "" {
   162  			value, err := AcquireToken(destination, namespace, isWrite)
   163  			if err == nil {
   164  				return value, nil
   165  			}
   166  			log.Errorln("Failed to generate a new authorization token for this transfer: ", err)
   167  			log.Errorln("This transfer requires authorization to complete and no token is available")
   168  			err = errors.New("failed to find or generate a token as required for " + destination.String())
   169  			AddError(err)
   170  			return "", err
   171  		}
   172  	}
   173  
   174  	//Read in the JSON
   175  	log.Debug("Opening token file: " + token_location)
   176  	tokenContents, err := os.ReadFile(token_location)
   177  	if err != nil {
   178  		log.Errorln("Error reading token file:", err)
   179  		return "", err
   180  	}
   181  	tokenParsed := tokenJson{}
   182  	if err := json.Unmarshal(tokenContents, &tokenParsed); err != nil {
   183  		log.Debugln("Error unmarshalling JSON token contents:", err)
   184  		log.Debugln("Assuming the token file is not JSON, and only contains the TOKEN")
   185  		tokenStr := strings.TrimSpace(string(tokenContents))
   186  		return tokenStr, nil
   187  	}
   188  	return tokenParsed.AccessKey, nil
   189  }
   190  
   191  func GetCacheHostnames(testFile string) (urls []string, err error) {
   192  
   193  	ns, err := namespaces.MatchNamespace(testFile)
   194  	if err != nil {
   195  		return
   196  	}
   197  
   198  	caches, err := GetCachesFromNamespace(ns)
   199  	if err != nil {
   200  		return
   201  	}
   202  
   203  	for _, cache := range caches {
   204  		url_string := cache.AuthEndpoint
   205  		host := strings.Split(url_string, ":")[0]
   206  		urls = append(urls, host)
   207  	}
   208  
   209  	return
   210  }
   211  
   212  func GetCachesFromNamespace(namespace namespaces.Namespace) (caches []namespaces.Cache, err error) {
   213  
   214  	cacheListName := "xroot"
   215  	if namespace.ReadHTTPS || namespace.UseTokenOnRead {
   216  		cacheListName = "xroots"
   217  	}
   218  	if len(NearestCacheList) == 0 {
   219  		_, err = GetBestCache(cacheListName)
   220  		if err != nil {
   221  			log.Errorln("Failed to get best caches:", err)
   222  			return
   223  		}
   224  	}
   225  
   226  	log.Debugln("Nearest cache list:", NearestCacheList)
   227  	log.Debugln("Cache list name:", namespace.Caches)
   228  
   229  	// The main routine can set a global cache to use
   230  	if CacheOverride {
   231  		cache := namespaces.Cache{
   232  			Endpoint:     NearestCache,
   233  			AuthEndpoint: NearestCache,
   234  			Resource:     NearestCache,
   235  		}
   236  		caches = []namespaces.Cache{cache}
   237  	} else {
   238  		caches = namespace.MatchCaches(NearestCacheList)
   239  	}
   240  	log.Debugln("Matched caches:", caches)
   241  
   242  	return
   243  }
   244  
   245  func correctURLWithUnderscore(sourceFile string) (string, string) {
   246  	schemeIndex := strings.Index(sourceFile, "://")
   247  	if schemeIndex == -1 {
   248  		return sourceFile, ""
   249  	}
   250  	
   251  	originalScheme := sourceFile[:schemeIndex]
   252  	if strings.Contains(originalScheme, "_") {
   253  		scheme := strings.ReplaceAll(originalScheme, "_", ".")
   254  		sourceFile = scheme + sourceFile[schemeIndex:]
   255  	}
   256  	return sourceFile, originalScheme
   257  }
   258  
   259  func discoverHTCondorToken(tokenName string) (string) {
   260  	tokenLocation := ""
   261  
   262  	// Tokens with dots in their name may need to have dots converted to underscores.
   263  	if strings.Contains(tokenName, ".") {
   264  		underscoreTokenName := strings.ReplaceAll(tokenName, ".", "_")
   265  		// If we find a token after replacing dots, then we're already done.
   266  		tokenLocation = discoverHTCondorToken(underscoreTokenName)
   267  		if tokenLocation != "" {
   268  			return tokenLocation
   269  		}
   270  	}
   271  
   272  	tokenFilename := "scitokens.use"
   273  	if len(tokenName) > 0 {
   274  		tokenFilename = tokenName + ".use"
   275  	}
   276  	log.Debugln("Looking for token file:", tokenFilename)
   277  	if credsDir, isCondorCredsSet := os.LookupEnv("_CONDOR_CREDS"); tokenLocation == "" && isCondorCredsSet {
   278  		// Token wasn't specified on the command line or environment, try the default scitoken
   279  		if _, err := os.Stat(filepath.Join(credsDir, tokenFilename)); err != nil {
   280  			log.Warningln("Environment variable _CONDOR_CREDS is set, but file being point to does not exist:", err)
   281  		} else {
   282  			tokenLocation = filepath.Join(credsDir, tokenFilename)
   283  		}
   284  	}
   285  	if _, err := os.Stat(".condor_creds/" + tokenFilename); err == nil && tokenLocation == "" {
   286  		tokenLocation, _ = filepath.Abs(".condor_creds/" + tokenFilename)
   287  	}
   288  	return tokenLocation
   289  }
   290  
   291  // Start the transfer, whether read or write back
   292  func DoStashCPSingle(sourceFile string, destination string, methods []string, recursive bool) (bytesTransferred int64, err error) {
   293  
   294  	// First, create a handler for any panics that occur
   295  	defer func() {
   296  		if r := recover(); r != nil {
   297  			log.Errorln("Panic captured while attempting to perform transfer (DoStashCPSingle):", r)
   298  			ret := fmt.Sprintf("Unrecoverable error (panic) captured in DoStashCPSingle: %v", r)
   299  			err = errors.New(ret)
   300  			bytesTransferred = 0
   301  
   302  			// Attempt to add the panic to the error accumulator
   303  			AddError(errors.New(ret))
   304  		}
   305  	}()
   306  
   307  	// Parse the source and destination with URL parse
   308  	sourceFile, source_scheme := correctURLWithUnderscore(sourceFile)
   309  	source_url, err := url.Parse(sourceFile)
   310  	if err != nil {
   311  		log.Errorln("Failed to parse source URL:", err)
   312  		return 0, err
   313  	}
   314  	source_url.Scheme = source_scheme
   315  	
   316  	destination, dest_scheme := correctURLWithUnderscore(destination)
   317  	dest_url, err := url.Parse(destination)
   318  	if err != nil {
   319  		log.Errorln("Failed to parse destination URL:", err)
   320  		return 0, err
   321  	}
   322  	dest_url.Scheme = dest_scheme
   323  
   324  	// If there is a host specified, prepend it to the path
   325  	if source_url.Host != "" {
   326  		source_url.Path = "/" + path.Join(source_url.Host, source_url.Path)
   327  	}
   328  
   329  	if dest_url.Host != "" {
   330  		dest_url.Path = path.Join(dest_url.Host, dest_url.Path)
   331  	}
   332  
   333  	sourceScheme, _ := getTokenName(source_url)
   334  	destScheme, _ := getTokenName(dest_url)
   335  
   336  	understoodSchemes := []string{"stash", "file", "osdf", ""}
   337  
   338  	_, foundSource := Find(understoodSchemes, sourceScheme)
   339  	if !foundSource {
   340  		log.Errorln("Do not understand source scheme:", source_url.Scheme)
   341  		return 0, errors.New("Do not understand source scheme")
   342  	}
   343  
   344  	_, foundDest := Find(understoodSchemes, destScheme)
   345  	if !foundDest {
   346  		log.Errorln("Do not understand destination scheme:", source_url.Scheme)
   347  		return 0, errors.New("Do not understand destination scheme")
   348  	}
   349  
   350  	// Get the namespace of the remote filesystem
   351  	// For write back, it will be the destination
   352  	// For read it will be the source.
   353  
   354  	if destScheme == "stash" || destScheme == "osdf" {
   355  		log.Debugln("Detected writeback")
   356  		ns, err := namespaces.MatchNamespace(dest_url.Path)
   357  		if err != nil {
   358  			log.Errorln("Failed to get namespace information:", err)
   359  		}
   360  		return doWriteBack(source_url.Path, dest_url, ns)
   361  	}
   362  
   363  	if dest_url.Scheme == "file" {
   364  		destination = dest_url.Path
   365  	}
   366  
   367  	if sourceScheme == "stash" || sourceScheme == "osdf" {
   368  		sourceFile = source_url.Path
   369  	}
   370  
   371  	if string(sourceFile[0]) != "/" {
   372  		sourceFile = "/" + sourceFile
   373  	}
   374  
   375  	OSDFDirectorUrl, useOSDFDirector := os.LookupEnv("OSDF_DIRECTOR_URL")
   376  
   377  	var ns namespaces.Namespace
   378  	if useOSDFDirector {
   379  		dirResp, err := QueryDirector(sourceFile, OSDFDirectorUrl)
   380  		if err != nil {
   381  			log.Errorln("Error while querying the Director:", err)
   382  			return 0, err
   383  		}
   384  		err = CreateNsFromDirectorResp(dirResp, &ns)
   385  		if err != nil {
   386  			AddError(err)
   387  			return 0, err
   388  		}
   389  	} else {
   390  		ns, err = namespaces.MatchNamespace(source_url.Path)
   391  		if err != nil {
   392  			AddError(err)
   393  			return 0, err
   394  		}
   395  	}
   396  
   397  	// get absolute path
   398  	destPath, _ := filepath.Abs(destination)
   399  
   400  	//Check if path exists or if its in a folder
   401  	if destStat, err := os.Stat(destPath); os.IsNotExist(err) {
   402  		destination = destPath
   403  	} else if destStat.IsDir() {
   404  		// Get the file name of the source
   405  		sourceFilename := path.Base(sourceFile)
   406  		destination = path.Join(destPath, sourceFilename)
   407  	}
   408  
   409  	payload := payloadStruct{}
   410  	payload.version = version
   411  	var found bool
   412  	payload.sitename, found = os.LookupEnv("OSG_SITE_NAME")
   413  	if !found {
   414  		payload.sitename = "siteNotFound"
   415  	}
   416  
   417  	//Fill out the payload as much as possible
   418  	payload.filename = source_url.Path
   419  
   420  	// ??
   421  
   422  	parse_job_ad(payload)
   423  
   424  	payload.start1 = time.Now().Unix()
   425  
   426  	// Go thru the download methods
   427  	success := false
   428  
   429  	// If recursive, only do http method to guarantee freshest directory contents
   430  	if Options.Recursive {
   431  		methods = []string{"http"}
   432  	}
   433  
   434  	_, token_name := getTokenName(source_url)
   435  
   436  	// switch statement?
   437  	var downloaded int64 = 0
   438  Loop:
   439  	for _, method := range methods {
   440  
   441  		switch method {
   442  		case "cvmfs":
   443  			if strings.HasPrefix(sourceFile, "/osgconnect/") {
   444  				log.Info("Trying CVMFS...")
   445  				if downloaded, err = download_cvmfs(sourceFile, destination, &payload); err == nil {
   446  					success = true
   447  					break Loop
   448  					//check if break still works
   449  				}
   450  			} else {
   451  				log.Debug("Skipping CVMFS as file does not start with /osgconnect/")
   452  			}
   453  		case "http":
   454  			log.Info("Trying HTTP...")
   455  			if downloaded, err = download_http(sourceFile, destination, &payload, ns, recursive, token_name, OSDFDirectorUrl); err == nil {
   456  				success = true
   457  				break Loop
   458  			}
   459  
   460  		default:
   461  			log.Errorf("Unknown transfer method: %s", method)
   462  		}
   463  	}
   464  
   465  	payload.end1 = time.Now().Unix()
   466  
   467  	payload.timestamp = payload.end1
   468  	payload.downloadTime = (payload.end1 - payload.start1)
   469  
   470  	if success {
   471  		payload.status = "Success"
   472  
   473  		// Get the final size of the download file
   474  		payload.fileSize = downloaded
   475  		payload.downloadSize = downloaded
   476  	} else {
   477  		log.Error("All methods failed! Unable to download file.")
   478  		payload.status = "Fail"
   479  	}
   480  
   481  	if !success {
   482  		return downloaded, errors.New("failed to download file")
   483  	} else {
   484  		return downloaded, nil
   485  	}
   486  
   487  }
   488  
   489  // Find takes a slice and looks for an element in it. If found it will
   490  // return it's key, otherwise it will return -1 and a bool of false.
   491  // From https://golangcode.com/check-if-element-exists-in-slice/
   492  func Find(slice []string, val string) (int, bool) {
   493  	for i, item := range slice {
   494  		if item == val {
   495  			return i, true
   496  		}
   497  	}
   498  	return -1, false
   499  }
   500  
   501  // get_ips will resolve a hostname and return all corresponding IP addresses
   502  // in DNS.  This can be used to randomly pick an IP when DNS round robin
   503  // is used
   504  func get_ips(name string) []string {
   505  	var ipv4s []string
   506  	var ipv6s []string
   507  
   508  	info, err := net.LookupHost(name)
   509  	if err != nil {
   510  		log.Error("Unable to look up", name)
   511  
   512  		var empty []string
   513  		return empty
   514  	}
   515  
   516  	for _, addr := range info {
   517  		parsedIP := net.ParseIP(addr)
   518  
   519  		if parsedIP.To4() != nil {
   520  			ipv4s = append(ipv4s, addr)
   521  		} else if parsedIP.To16() != nil {
   522  			ipv6s = append(ipv6s, "["+addr+"]")
   523  		}
   524  	}
   525  
   526  	//Randomize the order of each
   527  	rand.Seed(time.Now().UnixNano())
   528  	rand.Shuffle(len(ipv4s), func(i, j int) { ipv4s[i], ipv4s[j] = ipv4s[j], ipv4s[i] })
   529  	rand.Shuffle(len(ipv6s), func(i, j int) { ipv6s[i], ipv6s[j] = ipv6s[j], ipv6s[i] })
   530  
   531  	// Always prefer IPv4
   532  	return append(ipv4s, ipv6s...)
   533  
   534  }
   535  
   536  func parse_job_ad(payload payloadStruct) { // TODO: needs the payload
   537  
   538  	//Parse the .job.ad file for the Owner (username) and ProjectName of the callee.
   539  
   540  	condorJobAd, isPresent := os.LookupEnv("_CONDOR_JOB_AD")
   541  	var filename string
   542  	if isPresent {
   543  		filename = condorJobAd
   544  	} else if _, err := os.Stat(".job.ad"); err == nil {
   545  		filename = ".job.ad"
   546  	} else {
   547  		return
   548  	}
   549  
   550  	// https://stackoverflow.com/questions/28574609/how-to-apply-regexp-to-content-in-file-go
   551  
   552  	b, err := os.ReadFile(filename)
   553  	if err != nil {
   554  		log.Warningln("Can not read .job.ad file", err)
   555  	}
   556  
   557  	// Get all matches from file
   558  	classadRegex, e := regexp.Compile(`^\s*(Owner|ProjectName)\s=\s"(.*)"`)
   559  	if e != nil {
   560  		log.Fatal(e)
   561  	}
   562  
   563  	matches := classadRegex.FindAll(b, -1)
   564  
   565  	for _, match := range matches {
   566  		if string(match[0]) == "Owner" {
   567  			payload.Owner = string(match[1])
   568  		} else if string(match) == "ProjectName" {
   569  			payload.ProjectName = string(match[1])
   570  		}
   571  	}
   572  
   573  }