github.com/htcondor/osdf-client/v6@v6.13.0-rc1.0.20231009141709-766e7b4d1dc8/main.go (about) 1 package stashcp 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "net" 8 "net/url" 9 "regexp" 10 "strconv" 11 "strings" 12 13 //"net/http" 14 "math/rand" 15 "os" 16 "path" 17 "path/filepath" 18 "time" 19 20 // "crypto/sha1" 21 // "encoding/hex" 22 // "strings" 23 24 log "github.com/sirupsen/logrus" 25 26 namespaces "github.com/htcondor/osdf-client/v6/namespaces" 27 ) 28 29 type OptionsStruct struct { 30 ProgressBars bool 31 Recursive bool 32 Token string 33 Version string 34 } 35 36 var Options OptionsStruct 37 38 var ( 39 version string 40 ) 41 42 // Nearest cache 43 var NearestCache string 44 45 // List of caches, in order from closest to furthest 46 var NearestCacheList []string 47 var CachesJsonLocation string 48 49 // Number of caches to attempt to use in any invocation 50 var CachesToTry int = 3 51 52 // CacheOverride 53 var CacheOverride bool 54 55 type payloadStruct struct { 56 filename string 57 sitename string 58 status string 59 Owner string 60 ProjectName string 61 version string 62 start1 int64 63 end1 int64 64 timestamp int64 65 downloadTime int64 66 fileSize int64 67 downloadSize int64 68 } 69 70 // Determine the token name if it is embedded in the scheme, Condor-style 71 func getTokenName(destination *url.URL) (scheme, tokenName string) { 72 schemePieces := strings.Split(destination.Scheme, "+") 73 tokenName = "" 74 // Scheme is always the last piece 75 scheme = schemePieces[len(schemePieces)-1] 76 // If there are 2 or more pieces, token name is everything but the last item, joined with a + 77 if len(schemePieces) > 1 { 78 tokenName = strings.Join(schemePieces[:len(schemePieces)-1], "+") 79 } 80 return 81 } 82 83 // Do writeback to stash using SciTokens 84 func doWriteBack(source string, destination *url.URL, namespace namespaces.Namespace) (int64, error) { 85 86 scitoken_contents, err := getToken(destination, namespace, true, "") 87 if err != nil { 88 return 0, err 89 } 90 return UploadFile(source, destination, scitoken_contents, namespace) 91 92 } 93 94 // getToken returns the token to use for the given destination 95 // 96 // If token_name is not empty, it will be used as the token name. 97 // If token_name is empty, the token name will be determined from the destination URL (if possible) using getTokenName 98 func getToken(destination *url.URL, namespace namespaces.Namespace, isWrite bool, token_name string) (string, error) { 99 if token_name == "" { 100 _, token_name = getTokenName(destination) 101 } 102 103 type tokenJson struct { 104 AccessKey string `json:"access_token"` 105 ExpiresIn int `json:"expires_in"` 106 } 107 /* 108 Search for the location of the authentiction token. It can be set explicitly on the command line (TODO), 109 with the environment variable "TOKEN", or it can be searched in the standard HTCondor directory pointed 110 to by the environment variable "_CONDOR_CREDS". 111 */ 112 var token_location string 113 if Options.Token != "" { 114 token_location = Options.Token 115 log.Debugln("Getting token location from command line:", Options.Token) 116 } else { 117 118 // WLCG Token Discovery 119 if bearerToken, isBearerTokenSet := os.LookupEnv("BEARER_TOKEN"); isBearerTokenSet { 120 return bearerToken, nil 121 } else if bearerTokenFile, isBearerTokenFileSet := os.LookupEnv("BEARER_TOKEN_FILE"); isBearerTokenFileSet { 122 if _, err := os.Stat(bearerTokenFile); err != nil { 123 log.Warningln("Environment variable BEARER_TOKEN_FILE is set, but file being point to does not exist:", err) 124 } else { 125 token_location = bearerTokenFile 126 } 127 } 128 if xdgRuntimeDir, xdgRuntimeDirSet := os.LookupEnv("XDG_RUNTIME_DIR"); token_location == "" && xdgRuntimeDirSet { 129 // Get the uid 130 uid := os.Getuid() 131 tmpTokenPath := filepath.Join(xdgRuntimeDir, "bt_u"+strconv.Itoa(uid)) 132 if _, err := os.Stat(tmpTokenPath); err == nil { 133 token_location = tmpTokenPath 134 } 135 } 136 137 // Check for /tmp/bt_u<uid> 138 if token_location == "" { 139 uid := os.Getuid() 140 tmpTokenPath := "/tmp/bt_u" + strconv.Itoa(uid) 141 if _, err := os.Stat(tmpTokenPath); err == nil { 142 token_location = tmpTokenPath 143 } 144 } 145 146 // Backwards compatibility for getting scitokens 147 // If TOKEN is not set in environment, and _CONDOR_CREDS is set, then... 148 if tokenFile, isTokenSet := os.LookupEnv("TOKEN"); isTokenSet && token_location == "" { 149 if _, err := os.Stat(tokenFile); err != nil { 150 log.Warningln("Environment variable TOKEN is set, but file being point to does not exist:", err) 151 } else { 152 token_location = tokenFile 153 } 154 } 155 156 // Finally, look in the HTCondor runtime 157 if token_location == "" { 158 token_location = discoverHTCondorToken(token_name) 159 } 160 161 if token_location == "" { 162 value, err := AcquireToken(destination, namespace, isWrite) 163 if err == nil { 164 return value, nil 165 } 166 log.Errorln("Failed to generate a new authorization token for this transfer: ", err) 167 log.Errorln("This transfer requires authorization to complete and no token is available") 168 err = errors.New("failed to find or generate a token as required for " + destination.String()) 169 AddError(err) 170 return "", err 171 } 172 } 173 174 //Read in the JSON 175 log.Debug("Opening token file: " + token_location) 176 tokenContents, err := os.ReadFile(token_location) 177 if err != nil { 178 log.Errorln("Error reading token file:", err) 179 return "", err 180 } 181 tokenParsed := tokenJson{} 182 if err := json.Unmarshal(tokenContents, &tokenParsed); err != nil { 183 log.Debugln("Error unmarshalling JSON token contents:", err) 184 log.Debugln("Assuming the token file is not JSON, and only contains the TOKEN") 185 tokenStr := strings.TrimSpace(string(tokenContents)) 186 return tokenStr, nil 187 } 188 return tokenParsed.AccessKey, nil 189 } 190 191 func GetCacheHostnames(testFile string) (urls []string, err error) { 192 193 ns, err := namespaces.MatchNamespace(testFile) 194 if err != nil { 195 return 196 } 197 198 caches, err := GetCachesFromNamespace(ns) 199 if err != nil { 200 return 201 } 202 203 for _, cache := range caches { 204 url_string := cache.AuthEndpoint 205 host := strings.Split(url_string, ":")[0] 206 urls = append(urls, host) 207 } 208 209 return 210 } 211 212 func GetCachesFromNamespace(namespace namespaces.Namespace) (caches []namespaces.Cache, err error) { 213 214 cacheListName := "xroot" 215 if namespace.ReadHTTPS || namespace.UseTokenOnRead { 216 cacheListName = "xroots" 217 } 218 if len(NearestCacheList) == 0 { 219 _, err = GetBestCache(cacheListName) 220 if err != nil { 221 log.Errorln("Failed to get best caches:", err) 222 return 223 } 224 } 225 226 log.Debugln("Nearest cache list:", NearestCacheList) 227 log.Debugln("Cache list name:", namespace.Caches) 228 229 // The main routine can set a global cache to use 230 if CacheOverride { 231 cache := namespaces.Cache{ 232 Endpoint: NearestCache, 233 AuthEndpoint: NearestCache, 234 Resource: NearestCache, 235 } 236 caches = []namespaces.Cache{cache} 237 } else { 238 caches = namespace.MatchCaches(NearestCacheList) 239 } 240 log.Debugln("Matched caches:", caches) 241 242 return 243 } 244 245 func correctURLWithUnderscore(sourceFile string) (string, string) { 246 schemeIndex := strings.Index(sourceFile, "://") 247 if schemeIndex == -1 { 248 return sourceFile, "" 249 } 250 251 originalScheme := sourceFile[:schemeIndex] 252 if strings.Contains(originalScheme, "_") { 253 scheme := strings.ReplaceAll(originalScheme, "_", ".") 254 sourceFile = scheme + sourceFile[schemeIndex:] 255 } 256 return sourceFile, originalScheme 257 } 258 259 func discoverHTCondorToken(tokenName string) (string) { 260 tokenLocation := "" 261 262 // Tokens with dots in their name may need to have dots converted to underscores. 263 if strings.Contains(tokenName, ".") { 264 underscoreTokenName := strings.ReplaceAll(tokenName, ".", "_") 265 // If we find a token after replacing dots, then we're already done. 266 tokenLocation = discoverHTCondorToken(underscoreTokenName) 267 if tokenLocation != "" { 268 return tokenLocation 269 } 270 } 271 272 tokenFilename := "scitokens.use" 273 if len(tokenName) > 0 { 274 tokenFilename = tokenName + ".use" 275 } 276 log.Debugln("Looking for token file:", tokenFilename) 277 if credsDir, isCondorCredsSet := os.LookupEnv("_CONDOR_CREDS"); tokenLocation == "" && isCondorCredsSet { 278 // Token wasn't specified on the command line or environment, try the default scitoken 279 if _, err := os.Stat(filepath.Join(credsDir, tokenFilename)); err != nil { 280 log.Warningln("Environment variable _CONDOR_CREDS is set, but file being point to does not exist:", err) 281 } else { 282 tokenLocation = filepath.Join(credsDir, tokenFilename) 283 } 284 } 285 if _, err := os.Stat(".condor_creds/" + tokenFilename); err == nil && tokenLocation == "" { 286 tokenLocation, _ = filepath.Abs(".condor_creds/" + tokenFilename) 287 } 288 return tokenLocation 289 } 290 291 // Start the transfer, whether read or write back 292 func DoStashCPSingle(sourceFile string, destination string, methods []string, recursive bool) (bytesTransferred int64, err error) { 293 294 // First, create a handler for any panics that occur 295 defer func() { 296 if r := recover(); r != nil { 297 log.Errorln("Panic captured while attempting to perform transfer (DoStashCPSingle):", r) 298 ret := fmt.Sprintf("Unrecoverable error (panic) captured in DoStashCPSingle: %v", r) 299 err = errors.New(ret) 300 bytesTransferred = 0 301 302 // Attempt to add the panic to the error accumulator 303 AddError(errors.New(ret)) 304 } 305 }() 306 307 // Parse the source and destination with URL parse 308 sourceFile, source_scheme := correctURLWithUnderscore(sourceFile) 309 source_url, err := url.Parse(sourceFile) 310 if err != nil { 311 log.Errorln("Failed to parse source URL:", err) 312 return 0, err 313 } 314 source_url.Scheme = source_scheme 315 316 destination, dest_scheme := correctURLWithUnderscore(destination) 317 dest_url, err := url.Parse(destination) 318 if err != nil { 319 log.Errorln("Failed to parse destination URL:", err) 320 return 0, err 321 } 322 dest_url.Scheme = dest_scheme 323 324 // If there is a host specified, prepend it to the path 325 if source_url.Host != "" { 326 source_url.Path = "/" + path.Join(source_url.Host, source_url.Path) 327 } 328 329 if dest_url.Host != "" { 330 dest_url.Path = path.Join(dest_url.Host, dest_url.Path) 331 } 332 333 sourceScheme, _ := getTokenName(source_url) 334 destScheme, _ := getTokenName(dest_url) 335 336 understoodSchemes := []string{"stash", "file", "osdf", ""} 337 338 _, foundSource := Find(understoodSchemes, sourceScheme) 339 if !foundSource { 340 log.Errorln("Do not understand source scheme:", source_url.Scheme) 341 return 0, errors.New("Do not understand source scheme") 342 } 343 344 _, foundDest := Find(understoodSchemes, destScheme) 345 if !foundDest { 346 log.Errorln("Do not understand destination scheme:", source_url.Scheme) 347 return 0, errors.New("Do not understand destination scheme") 348 } 349 350 // Get the namespace of the remote filesystem 351 // For write back, it will be the destination 352 // For read it will be the source. 353 354 if destScheme == "stash" || destScheme == "osdf" { 355 log.Debugln("Detected writeback") 356 ns, err := namespaces.MatchNamespace(dest_url.Path) 357 if err != nil { 358 log.Errorln("Failed to get namespace information:", err) 359 } 360 return doWriteBack(source_url.Path, dest_url, ns) 361 } 362 363 if dest_url.Scheme == "file" { 364 destination = dest_url.Path 365 } 366 367 if sourceScheme == "stash" || sourceScheme == "osdf" { 368 sourceFile = source_url.Path 369 } 370 371 if string(sourceFile[0]) != "/" { 372 sourceFile = "/" + sourceFile 373 } 374 375 OSDFDirectorUrl, useOSDFDirector := os.LookupEnv("OSDF_DIRECTOR_URL") 376 377 var ns namespaces.Namespace 378 if useOSDFDirector { 379 dirResp, err := QueryDirector(sourceFile, OSDFDirectorUrl) 380 if err != nil { 381 log.Errorln("Error while querying the Director:", err) 382 return 0, err 383 } 384 err = CreateNsFromDirectorResp(dirResp, &ns) 385 if err != nil { 386 AddError(err) 387 return 0, err 388 } 389 } else { 390 ns, err = namespaces.MatchNamespace(source_url.Path) 391 if err != nil { 392 AddError(err) 393 return 0, err 394 } 395 } 396 397 // get absolute path 398 destPath, _ := filepath.Abs(destination) 399 400 //Check if path exists or if its in a folder 401 if destStat, err := os.Stat(destPath); os.IsNotExist(err) { 402 destination = destPath 403 } else if destStat.IsDir() { 404 // Get the file name of the source 405 sourceFilename := path.Base(sourceFile) 406 destination = path.Join(destPath, sourceFilename) 407 } 408 409 payload := payloadStruct{} 410 payload.version = version 411 var found bool 412 payload.sitename, found = os.LookupEnv("OSG_SITE_NAME") 413 if !found { 414 payload.sitename = "siteNotFound" 415 } 416 417 //Fill out the payload as much as possible 418 payload.filename = source_url.Path 419 420 // ?? 421 422 parse_job_ad(payload) 423 424 payload.start1 = time.Now().Unix() 425 426 // Go thru the download methods 427 success := false 428 429 // If recursive, only do http method to guarantee freshest directory contents 430 if Options.Recursive { 431 methods = []string{"http"} 432 } 433 434 _, token_name := getTokenName(source_url) 435 436 // switch statement? 437 var downloaded int64 = 0 438 Loop: 439 for _, method := range methods { 440 441 switch method { 442 case "cvmfs": 443 if strings.HasPrefix(sourceFile, "/osgconnect/") { 444 log.Info("Trying CVMFS...") 445 if downloaded, err = download_cvmfs(sourceFile, destination, &payload); err == nil { 446 success = true 447 break Loop 448 //check if break still works 449 } 450 } else { 451 log.Debug("Skipping CVMFS as file does not start with /osgconnect/") 452 } 453 case "http": 454 log.Info("Trying HTTP...") 455 if downloaded, err = download_http(sourceFile, destination, &payload, ns, recursive, token_name, OSDFDirectorUrl); err == nil { 456 success = true 457 break Loop 458 } 459 460 default: 461 log.Errorf("Unknown transfer method: %s", method) 462 } 463 } 464 465 payload.end1 = time.Now().Unix() 466 467 payload.timestamp = payload.end1 468 payload.downloadTime = (payload.end1 - payload.start1) 469 470 if success { 471 payload.status = "Success" 472 473 // Get the final size of the download file 474 payload.fileSize = downloaded 475 payload.downloadSize = downloaded 476 } else { 477 log.Error("All methods failed! Unable to download file.") 478 payload.status = "Fail" 479 } 480 481 if !success { 482 return downloaded, errors.New("failed to download file") 483 } else { 484 return downloaded, nil 485 } 486 487 } 488 489 // Find takes a slice and looks for an element in it. If found it will 490 // return it's key, otherwise it will return -1 and a bool of false. 491 // From https://golangcode.com/check-if-element-exists-in-slice/ 492 func Find(slice []string, val string) (int, bool) { 493 for i, item := range slice { 494 if item == val { 495 return i, true 496 } 497 } 498 return -1, false 499 } 500 501 // get_ips will resolve a hostname and return all corresponding IP addresses 502 // in DNS. This can be used to randomly pick an IP when DNS round robin 503 // is used 504 func get_ips(name string) []string { 505 var ipv4s []string 506 var ipv6s []string 507 508 info, err := net.LookupHost(name) 509 if err != nil { 510 log.Error("Unable to look up", name) 511 512 var empty []string 513 return empty 514 } 515 516 for _, addr := range info { 517 parsedIP := net.ParseIP(addr) 518 519 if parsedIP.To4() != nil { 520 ipv4s = append(ipv4s, addr) 521 } else if parsedIP.To16() != nil { 522 ipv6s = append(ipv6s, "["+addr+"]") 523 } 524 } 525 526 //Randomize the order of each 527 rand.Seed(time.Now().UnixNano()) 528 rand.Shuffle(len(ipv4s), func(i, j int) { ipv4s[i], ipv4s[j] = ipv4s[j], ipv4s[i] }) 529 rand.Shuffle(len(ipv6s), func(i, j int) { ipv6s[i], ipv6s[j] = ipv6s[j], ipv6s[i] }) 530 531 // Always prefer IPv4 532 return append(ipv4s, ipv6s...) 533 534 } 535 536 func parse_job_ad(payload payloadStruct) { // TODO: needs the payload 537 538 //Parse the .job.ad file for the Owner (username) and ProjectName of the callee. 539 540 condorJobAd, isPresent := os.LookupEnv("_CONDOR_JOB_AD") 541 var filename string 542 if isPresent { 543 filename = condorJobAd 544 } else if _, err := os.Stat(".job.ad"); err == nil { 545 filename = ".job.ad" 546 } else { 547 return 548 } 549 550 // https://stackoverflow.com/questions/28574609/how-to-apply-regexp-to-content-in-file-go 551 552 b, err := os.ReadFile(filename) 553 if err != nil { 554 log.Warningln("Can not read .job.ad file", err) 555 } 556 557 // Get all matches from file 558 classadRegex, e := regexp.Compile(`^\s*(Owner|ProjectName)\s=\s"(.*)"`) 559 if e != nil { 560 log.Fatal(e) 561 } 562 563 matches := classadRegex.FindAll(b, -1) 564 565 for _, match := range matches { 566 if string(match[0]) == "Owner" { 567 payload.Owner = string(match[1]) 568 } else if string(match) == "ProjectName" { 569 payload.ProjectName = string(match[1]) 570 } 571 } 572 573 }