github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/rproxy_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/api"
    17  	"github.com/NVIDIA/aistore/api/apc"
    18  	"github.com/NVIDIA/aistore/cmn"
    19  	"github.com/NVIDIA/aistore/cmn/cos"
    20  	"github.com/NVIDIA/aistore/core/meta"
    21  	"github.com/NVIDIA/aistore/tools"
    22  	"github.com/NVIDIA/aistore/tools/tassert"
    23  	"github.com/NVIDIA/aistore/tools/tlog"
    24  )
    25  
    26  const (
    27  	// Public object name to download from Google Cloud Storage.
    28  	gcsBck      = "gcp-public-data-landsat"
    29  	gcsFilename = "LT08_L1GT_040021_20130506_20170310_01_T2_B10.TIF"
    30  	gcsObjXML   = "LT08/01/040/021/LT08_L1GT_040021_20130506_20170310_01_T2/" + gcsFilename
    31  )
    32  
    33  // generate URL to request object from GCS
    34  func genObjURL(isSecure, isXML bool) (s string) {
    35  	if isSecure || !isXML { // Using JSON requires HTTPS: "SSL is required to perform this operation."
    36  		s = "https://"
    37  	} else {
    38  		s = "http://"
    39  	}
    40  	if isXML {
    41  		s += fmt.Sprintf("storage.googleapis.com/%s/%s", gcsBck, gcsObjXML)
    42  	} else {
    43  		// Reformat object name from XML to JSON API requirements.
    44  		gcsObjJSON := strings.ReplaceAll(gcsObjXML, "/", "%2F")
    45  		s += fmt.Sprintf("www.googleapis.com/storage/v1/b/%s/o/%s?alt=media", gcsBck, gcsObjJSON)
    46  	}
    47  	return s
    48  }
    49  
    50  // build command line for CURL
    51  func genCURLCmdLine(t *testing.T, resURL, proxyURL string, targets meta.NodeMap) []string {
    52  	var noProxy []string
    53  	for _, t := range targets {
    54  		if !cos.StringInSlice(t.PubNet.Hostname, noProxy) {
    55  			noProxy = append(noProxy, t.PubNet.Hostname)
    56  		}
    57  	}
    58  
    59  	// TODO:  "--proxy-insecure" requires `curl` 7.58.0+ and is needed when we USE_HTTPS (see #885)
    60  	return []string{
    61  		"-L", "-X", "GET",
    62  		resURL,
    63  		"-o", filepath.Join(t.TempDir(), "curl.file"),
    64  		"-x", proxyURL,
    65  		"--max-redirs", "3",
    66  		"--noproxy", strings.Join(noProxy, ","),
    67  		"--insecure",
    68  	}
    69  }
    70  
    71  // Extract download speed from CURL output.
    72  func extractSpeed(out []byte) int64 {
    73  	lines := strings.Split(string(out), "\n")
    74  	for i := len(lines) - 1; i >= 0; i-- {
    75  		if lines[i] == "" {
    76  			continue
    77  		}
    78  		words := strings.Split(lines[i], " ")
    79  		if spd, err := cos.ParseSize(words[len(words)-1], cos.UnitsIEC); err == nil {
    80  			return spd
    81  		}
    82  	}
    83  	return 0
    84  }
    85  
    86  func TestRProxyGCS(t *testing.T) {
    87  	var (
    88  		resURL     = genObjURL(false, true)
    89  		proxyURL   = tools.GetPrimaryURL()
    90  		smap       = tools.GetClusterMap(t, proxyURL)
    91  		baseParams = tools.BaseAPIParams(proxyURL)
    92  
    93  		maxRetries = 2
    94  	)
    95  
    96  	if cos.IsHTTPS(proxyURL) {
    97  		t.Skip("test doesn't work for HTTPS")
    98  	}
    99  
   100  	initMountpaths(t, proxyURL)
   101  	bck := cmn.Bck{Provider: apc.HTTP}
   102  	queryBck := cmn.QueryBcks(bck)
   103  	bckList, err := api.ListBuckets(baseParams, queryBck, apc.FltExists)
   104  	tassert.CheckFatal(t, err)
   105  
   106  retry:
   107  	cmdline := genCURLCmdLine(t, resURL, proxyURL, smap.Tmap)
   108  	tlog.Logf("First time download via XML API: %s\n", cmdline)
   109  	out, err := exec.Command("curl", cmdline...).CombinedOutput()
   110  	tlog.Logln("\n" + string(out))
   111  	tassert.CheckFatal(t, err)
   112  
   113  	speedCold := extractSpeed(out)
   114  	tlog.Logf("Cold download speed:   %s\n", cos.ToSizeIEC(speedCold, 1))
   115  	tassert.Fatalf(t, speedCold != 0, "Failed to detect cold download speed")
   116  
   117  	// at less than 100KBps we likely failed to download
   118  	if speedCold < 100*cos.KiB {
   119  		if testing.Short() {
   120  			fmt := "cold download speed %s is way too low indicating potential timeout"
   121  			tools.ShortSkipf(t, fmt, cos.ToSizeIEC(speedCold, 1))
   122  		}
   123  		if maxRetries > 0 {
   124  			tlog.Logf("Warning: will retry (%d)\n", maxRetries)
   125  			time.Sleep(15 * time.Second)
   126  			tlog.Logln("Warning: retrying...")
   127  			maxRetries--
   128  			goto retry
   129  		}
   130  	}
   131  
   132  	bckListNew, err := api.ListBuckets(baseParams, queryBck, apc.FltExists)
   133  	tassert.CheckFatal(t, err)
   134  	bck, err = detectNewBucket(bckList, bckListNew)
   135  	tassert.CheckFatal(t, err)
   136  	t.Cleanup(func() {
   137  		tools.DestroyBucket(t, proxyURL, bck)
   138  	})
   139  
   140  	pathCached := findObjOnDisk(bck, gcsFilename)
   141  	tassert.Fatalf(t, pathCached != "", "object was not downloaded")
   142  	tlog.Logf("Downloaded as %q\n", pathCached)
   143  
   144  	tlog.Logf("HTTP download\n")
   145  	cmdline = genCURLCmdLine(t, resURL, proxyURL, smap.Tmap)
   146  	out, err = exec.Command("curl", cmdline...).CombinedOutput()
   147  	tlog.Logln(string(out))
   148  	tassert.CheckFatal(t, err)
   149  	speedHTTP := extractSpeed(out)
   150  	tassert.Fatalf(t, speedHTTP != 0, "Failed to detect speed for HTTP download")
   151  
   152  	/*
   153  		TODO: uncomment when target supports HTTPS client
   154  
   155  		tlog.Logf("HTTPS download\n")
   156  		cmdline = genCURLCmdLine(true, true, proxyURL, smap.Tmap)
   157  		out, err = exec.Command("curl", cmdline...).CombinedOutput()
   158  		tlog.Logln(string(out))
   159  		tassert.CheckFatal(t, err)
   160  		speedHTTPS := extractSpeed(out)
   161  		tassert.Fatalf(t, speedHTTPS != 0, "Failed to detect speed for HTTPS download")
   162  
   163  		bckListNew, err = api.ListBuckets(baseParams, queryBck)
   164  		tassert.CheckFatal(t, err)
   165  		bckHTTPS, err := detectNewBucket(bckList, bckListNew)
   166  		tassert.CheckFatal(t, err)
   167  		defer tools.DestroyBucket(t, proxyURL, bckHTTPS)
   168  
   169  		tlog.Logf("Check via JSON API\n")
   170  		cmdline = genCURLCmdLine(false, false, proxyURL, smap.Tmap)
   171  		tlog.Logf("JSON: %s\n", cmdline)
   172  		out, err = exec.Command("curl", cmdline...).CombinedOutput()
   173  		t.Log(string(out))
   174  		tassert.CheckFatal(t, err)
   175  		speedJSON := extractSpeed(out)
   176  		tassert.Fatalf(t, speedJSON != 0, "Failed to detect speed for JSON download")
   177  	*/
   178  
   179  	tlog.Logf("Cold download speed:   %s\n", cos.ToSizeIEC(speedCold, 1))
   180  	tlog.Logf("HTTP download speed:   %s\n", cos.ToSizeIEC(speedHTTP, 1))
   181  	/*
   182  		TODO: uncomment when target supports HTTPS client
   183  
   184  		tlog.Logf("HTTPS download speed:  %s\n", cos.ToSizeIEC(speedHTTPS, 1))
   185  		tlog.Logf("JSON download speed:   %s\n", cos.ToSizeIEC(speedJSON, 1))
   186  	*/
   187  	ratio := float64(speedHTTP) / float64(speedCold)
   188  	if ratio < 0.8 {
   189  		tlog.Logf("Cached download is %.1f slower than Cold\n", ratio)
   190  	} else if ratio > 1.2 {
   191  		tlog.Logf("HTTP is %.1f faster than Cold\n", ratio)
   192  	}
   193  }
   194  
   195  func TestRProxyInvalidURL(t *testing.T) {
   196  	var (
   197  		proxyURL   = tools.GetPrimaryURL()
   198  		baseParams = tools.BaseAPIParams(proxyURL)
   199  		client     = tools.NewClientWithProxy(proxyURL)
   200  	)
   201  	tests := []struct {
   202  		url        string
   203  		statusCode int
   204  		doAndCheck bool
   205  	}{
   206  		// case 1
   207  		{url: "http://storage.googleapis.com/kubernetes-release/release", statusCode: http.StatusNotFound, doAndCheck: true},
   208  		{url: "http://invalid.invaliddomain.com/test/webpage.txt", statusCode: http.StatusBadRequest, doAndCheck: true}, // Invalid domain
   209  		// case 2
   210  		{url: "http://archive.ics.uci.edu/ml/datasets/Abalone", doAndCheck: false},
   211  	}
   212  	for _, test := range tests {
   213  		hbo, err := cmn.NewHTTPObjPath(test.url)
   214  		tassert.CheckError(t, err)
   215  		api.DestroyBucket(baseParams, hbo.Bck)
   216  
   217  		req, err := http.NewRequest(http.MethodGet, test.url, http.NoBody)
   218  		tassert.CheckFatal(t, err)
   219  		api.SetAuxHeaders(req, &baseParams)
   220  
   221  		if test.doAndCheck {
   222  			// case 1: bad response on GET followed by a failure to HEAD
   223  			tassert.DoAndCheckResp(t, client, req, test.statusCode, http.StatusForbidden)
   224  			_, err = api.HeadBucket(baseParams, hbo.Bck, false /* don't add */)
   225  			tassert.Errorf(t, err != nil, "shouldn't create bucket (%s) for invalid resource URL %q", hbo.Bck, test.url)
   226  		} else {
   227  			// case 2: cannot GET but can still do a HEAD (even though ETag is not provided)
   228  			resp, err := client.Do(req)
   229  			if resp != nil && resp.Body != nil {
   230  				resp.Body.Close()
   231  			}
   232  			tassert.Errorf(t, err != nil, "expecting error executing GET %q", test.url)
   233  			_, err = api.HeadBucket(baseParams, hbo.Bck, false /* don't add */)
   234  			tassert.CheckError(t, err)
   235  		}
   236  
   237  		api.DestroyBucket(baseParams, hbo.Bck)
   238  	}
   239  }