github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/cloud/external_storage_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cloud
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"crypto/rand"
    17  	"encoding/base64"
    18  	"fmt"
    19  	"io/ioutil"
    20  	"net/url"
    21  	"os"
    22  	"path/filepath"
    23  	"sort"
    24  	"strings"
    25  	"testing"
    26  
    27  	"github.com/cockroachdb/cockroach/pkg/base"
    28  	"github.com/cockroachdb/cockroach/pkg/blobs"
    29  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    30  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    31  	"github.com/cockroachdb/cockroach/pkg/workload"
    32  	"github.com/cockroachdb/cockroach/pkg/workload/bank"
    33  	"github.com/spf13/pflag"
    34  	"github.com/stretchr/testify/require"
    35  	"golang.org/x/oauth2/google"
    36  )
    37  
    38  func appendPath(t *testing.T, s, add string) string {
    39  	u, err := url.Parse(s)
    40  	if err != nil {
    41  		t.Fatal(err)
    42  	}
    43  	u.Path = filepath.Join(u.Path, add)
    44  	return u.String()
    45  }
    46  
    47  var testSettings *cluster.Settings
    48  
    49  func init() {
    50  	testSettings = cluster.MakeTestingClusterSettings()
    51  	up := testSettings.MakeUpdater()
    52  	if err := up.Set(cloudstorageGSDefaultKey, os.Getenv("GS_JSONKEY"), gcsDefault.Typ()); err != nil {
    53  		panic(err)
    54  	}
    55  }
    56  
    57  func storeFromURI(
    58  	ctx context.Context, t *testing.T, uri string, clientFactory blobs.BlobClientFactory,
    59  ) ExternalStorage {
    60  	conf, err := ExternalStorageConfFromURI(uri)
    61  	if err != nil {
    62  		t.Fatal(err)
    63  	}
    64  	// Setup a sink for the given args.
    65  	s, err := MakeExternalStorage(ctx, conf, base.ExternalIODirConfig{}, testSettings, clientFactory)
    66  	if err != nil {
    67  		t.Fatal(err)
    68  	}
    69  	return s
    70  }
    71  
    72  func testExportStore(t *testing.T, storeURI string, skipSingleFile bool) {
    73  	testExportStoreWithExternalIOConfig(t, base.ExternalIODirConfig{}, storeURI, skipSingleFile)
    74  }
    75  
    76  func testExportStoreWithExternalIOConfig(
    77  	t *testing.T, ioConf base.ExternalIODirConfig, storeURI string, skipSingleFile bool,
    78  ) {
    79  	ctx := context.Background()
    80  
    81  	conf, err := ExternalStorageConfFromURI(storeURI)
    82  	if err != nil {
    83  		t.Fatal(err)
    84  	}
    85  
    86  	// Setup a sink for the given args.
    87  	clientFactory := blobs.TestBlobServiceClient(testSettings.ExternalIODir)
    88  	s, err := MakeExternalStorage(ctx, conf, ioConf, testSettings, clientFactory)
    89  	if err != nil {
    90  		t.Fatal(err)
    91  	}
    92  	defer s.Close()
    93  
    94  	if readConf := s.Conf(); readConf != conf {
    95  		t.Fatalf("conf does not roundtrip: started with %+v, got back %+v", conf, readConf)
    96  	}
    97  
    98  	t.Run("simple round trip", func(t *testing.T) {
    99  		sampleName := "somebytes"
   100  		sampleBytes := "hello world"
   101  
   102  		for i := 0; i < 10; i++ {
   103  			name := fmt.Sprintf("%s-%d", sampleName, i)
   104  			payload := []byte(strings.Repeat(sampleBytes, i))
   105  			if err := s.WriteFile(ctx, name, bytes.NewReader(payload)); err != nil {
   106  				t.Fatal(err)
   107  			}
   108  
   109  			if sz, err := s.Size(ctx, name); err != nil {
   110  				t.Error(err)
   111  			} else if sz != int64(len(payload)) {
   112  				t.Errorf("size mismatch, got %d, expected %d", sz, len(payload))
   113  			}
   114  
   115  			r, err := s.ReadFile(ctx, name)
   116  			if err != nil {
   117  				t.Fatal(err)
   118  			}
   119  			defer r.Close()
   120  
   121  			res, err := ioutil.ReadAll(r)
   122  			if err != nil {
   123  				t.Fatal(err)
   124  			}
   125  			if !bytes.Equal(res, payload) {
   126  				t.Fatalf("got %v expected %v", res, payload)
   127  			}
   128  			if err := s.Delete(ctx, name); err != nil {
   129  				t.Fatal(err)
   130  			}
   131  		}
   132  	})
   133  
   134  	// The azure driver makes us chunk files that are greater than 4mb, so make
   135  	// sure that files larger than that work on all the providers.
   136  	t.Run("8mb-tempfile", func(t *testing.T) {
   137  		const size = 1024 * 1024 * 8 // 8MiB
   138  		testingContent := make([]byte, size)
   139  		if _, err := rand.Read(testingContent); err != nil {
   140  			t.Fatal(err)
   141  		}
   142  		testingFilename := "testing-123"
   143  
   144  		// Write some random data (random so it doesn't compress).
   145  		if err := s.WriteFile(ctx, testingFilename, bytes.NewReader(testingContent)); err != nil {
   146  			t.Fatal(err)
   147  		}
   148  
   149  		// Attempt to read (or fetch) it back.
   150  		res, err := s.ReadFile(ctx, testingFilename)
   151  		if err != nil {
   152  			t.Fatalf("Could not get reader for %s: %+v", testingFilename, err)
   153  		}
   154  		defer res.Close()
   155  		content, err := ioutil.ReadAll(res)
   156  		if err != nil {
   157  			t.Fatal(err)
   158  		}
   159  		// Verify the result contains what we wrote.
   160  		if !bytes.Equal(content, testingContent) {
   161  			t.Fatalf("wrong content")
   162  		}
   163  		if err := s.Delete(ctx, testingFilename); err != nil {
   164  			t.Fatal(err)
   165  		}
   166  	})
   167  	if skipSingleFile {
   168  		return
   169  	}
   170  	t.Run("read-single-file-by-uri", func(t *testing.T) {
   171  		const testingFilename = "A"
   172  		if err := s.WriteFile(ctx, testingFilename, bytes.NewReader([]byte("aaa"))); err != nil {
   173  			t.Fatal(err)
   174  		}
   175  		singleFile := storeFromURI(ctx, t, appendPath(t, storeURI, testingFilename), clientFactory)
   176  		defer singleFile.Close()
   177  
   178  		res, err := singleFile.ReadFile(ctx, "")
   179  		if err != nil {
   180  			t.Fatal(err)
   181  		}
   182  		defer res.Close()
   183  		content, err := ioutil.ReadAll(res)
   184  		if err != nil {
   185  			t.Fatal(err)
   186  		}
   187  		// Verify the result contains what we wrote.
   188  		if !bytes.Equal(content, []byte("aaa")) {
   189  			t.Fatalf("wrong content")
   190  		}
   191  		if err := s.Delete(ctx, testingFilename); err != nil {
   192  			t.Fatal(err)
   193  		}
   194  	})
   195  	t.Run("write-single-file-by-uri", func(t *testing.T) {
   196  		const testingFilename = "B"
   197  		singleFile := storeFromURI(ctx, t, appendPath(t, storeURI, testingFilename), clientFactory)
   198  		defer singleFile.Close()
   199  
   200  		if err := singleFile.WriteFile(ctx, "", bytes.NewReader([]byte("bbb"))); err != nil {
   201  			t.Fatal(err)
   202  		}
   203  
   204  		res, err := s.ReadFile(ctx, testingFilename)
   205  		if err != nil {
   206  			t.Fatal(err)
   207  		}
   208  		defer res.Close()
   209  		content, err := ioutil.ReadAll(res)
   210  		if err != nil {
   211  			t.Fatal(err)
   212  		}
   213  		// Verify the result contains what we wrote.
   214  		if !bytes.Equal(content, []byte("bbb")) {
   215  			t.Fatalf("wrong content")
   216  		}
   217  		if err := s.Delete(ctx, testingFilename); err != nil {
   218  			t.Fatal(err)
   219  		}
   220  	})
   221  }
   222  
   223  func testListFiles(t *testing.T, storeURI string) {
   224  	ctx := context.Background()
   225  	dataLetterFiles := []string{"file/letters/dataA.csv", "file/letters/dataB.csv", "file/letters/dataC.csv"}
   226  	dataNumberFiles := []string{"file/numbers/data1.csv", "file/numbers/data2.csv", "file/numbers/data3.csv"}
   227  	letterFiles := []string{"file/abc/A.csv", "file/abc/B.csv", "file/abc/C.csv"}
   228  	fileNames := append(dataLetterFiles, dataNumberFiles...)
   229  	fileNames = append(fileNames, letterFiles...)
   230  	sort.Strings(fileNames)
   231  
   232  	clientFactory := blobs.TestBlobServiceClient(testSettings.ExternalIODir)
   233  	for _, fileName := range fileNames {
   234  		file := storeFromURI(ctx, t, storeURI, clientFactory)
   235  		if err := file.WriteFile(ctx, fileName, bytes.NewReader([]byte("bbb"))); err != nil {
   236  			t.Fatal(err)
   237  		}
   238  		_ = file.Close()
   239  	}
   240  
   241  	uri, _ := url.Parse(storeURI)
   242  
   243  	abs := func(in []string) []string {
   244  		out := make([]string, len(in))
   245  		for i := range in {
   246  			u := *uri
   247  			u.Path = u.Path + "/" + in[i]
   248  			out[i] = u.String()
   249  		}
   250  		return out
   251  	}
   252  
   253  	t.Run("ListFiles", func(t *testing.T) {
   254  
   255  		for _, tc := range []struct {
   256  			name       string
   257  			URI        string
   258  			suffix     string
   259  			resultList []string
   260  		}{
   261  			{
   262  				"list-all-csv",
   263  				appendPath(t, storeURI, "file/*/*.csv"),
   264  				"",
   265  				abs(fileNames),
   266  			},
   267  			{
   268  				"list-letter-csv",
   269  				appendPath(t, storeURI, "file/abc/?.csv"),
   270  				"",
   271  				abs(letterFiles),
   272  			},
   273  			{
   274  				"list-letter-csv-rel-file-suffix",
   275  				appendPath(t, storeURI, "file"),
   276  				"abc/?.csv",
   277  				[]string{"abc/A.csv", "abc/B.csv", "abc/C.csv"},
   278  			},
   279  			{
   280  				"list-letter-csv-rel-abc-suffix",
   281  				appendPath(t, storeURI, "file/abc"),
   282  				"?.csv",
   283  				[]string{"A.csv", "B.csv", "C.csv"},
   284  			},
   285  			{
   286  				"list-letter-csv-dotdot",
   287  				appendPath(t, storeURI, "file/abc/xzy/../?.csv"),
   288  				"",
   289  				abs(letterFiles),
   290  			},
   291  			{
   292  				"list-abc-csv-suffix",
   293  				appendPath(t, storeURI, "file"),
   294  				"abc/?.csv",
   295  				[]string{"abc/A.csv", "abc/B.csv", "abc/C.csv"},
   296  			},
   297  			{
   298  				"list-letter-csv-dotdot-suffix",
   299  				appendPath(t, storeURI, "file/abc/xzy"),
   300  				"../../?.csv",
   301  				nil,
   302  			},
   303  			{
   304  				"list-data-num-csv",
   305  				appendPath(t, storeURI, "file/numbers/data[0-9].csv"),
   306  				"",
   307  				abs(dataNumberFiles),
   308  			},
   309  			{
   310  				"wildcard-bucket-and-filename",
   311  				appendPath(t, storeURI, "*/numbers/*.csv"),
   312  				"",
   313  				abs(dataNumberFiles),
   314  			},
   315  			{
   316  				"wildcard-bucket-and-filename-suffix",
   317  				appendPath(t, storeURI, ""),
   318  				"*/numbers/*.csv",
   319  				[]string{"file/numbers/data1.csv", "file/numbers/data2.csv", "file/numbers/data3.csv"},
   320  			},
   321  			{
   322  				"list-all-csv-skip-dir",
   323  				// filepath.Glob() assumes that / is the separator, and enforces that it's there.
   324  				// So this pattern would not actually match anything.
   325  				appendPath(t, storeURI, "file/*.csv"),
   326  				"",
   327  				[]string{},
   328  			},
   329  			{
   330  				"list-no-matches",
   331  				appendPath(t, storeURI, "file/letters/dataD.csv"),
   332  				"",
   333  				[]string{},
   334  			},
   335  			{
   336  				"list-escaped-star",
   337  				appendPath(t, storeURI, "file/*/\\*.csv"),
   338  				"",
   339  				[]string{},
   340  			},
   341  			{
   342  				"list-escaped-star-suffix",
   343  				appendPath(t, storeURI, "file"),
   344  				"*/\\*.csv",
   345  				[]string{},
   346  			},
   347  			{
   348  				"list-escaped-range",
   349  				appendPath(t, storeURI, "file/*/data\\[0-9\\].csv"),
   350  				"",
   351  				[]string{},
   352  			},
   353  			{
   354  				"list-escaped-range-suffix",
   355  				appendPath(t, storeURI, "file"),
   356  				"*/data\\[0-9\\].csv",
   357  				[]string{},
   358  			},
   359  		} {
   360  			t.Run(tc.name, func(t *testing.T) {
   361  				s := storeFromURI(ctx, t, tc.URI, clientFactory)
   362  				filesList, err := s.ListFiles(ctx, tc.suffix)
   363  				if err != nil {
   364  					t.Fatal(err)
   365  				}
   366  
   367  				if len(filesList) != len(tc.resultList) {
   368  					t.Fatal(`listed incorrect number of files`, filesList)
   369  				}
   370  				for i, got := range filesList {
   371  					if expected := tc.resultList[i]; got != expected {
   372  						t.Fatal(`resulting list is incorrect. got: `, got, `expected: `, expected, "\n", filesList)
   373  					}
   374  				}
   375  			})
   376  		}
   377  	})
   378  
   379  	for _, fileName := range fileNames {
   380  		file := storeFromURI(ctx, t, storeURI, clientFactory)
   381  		if err := file.Delete(ctx, fileName); err != nil {
   382  			t.Fatal(err)
   383  		}
   384  		_ = file.Close()
   385  	}
   386  }
   387  
   388  func TestPutGoogleCloud(t *testing.T) {
   389  	defer leaktest.AfterTest(t)()
   390  
   391  	bucket := os.Getenv("GS_BUCKET")
   392  	if bucket == "" {
   393  		t.Skip("GS_BUCKET env var must be set")
   394  	}
   395  
   396  	t.Run("empty", func(t *testing.T) {
   397  		testExportStore(t, fmt.Sprintf("gs://%s/%s", bucket, "backup-test-empty"), false)
   398  	})
   399  	t.Run("default", func(t *testing.T) {
   400  		testExportStore(t,
   401  			fmt.Sprintf("gs://%s/%s?%s=%s", bucket, "backup-test-default", AuthParam, authParamDefault),
   402  			false,
   403  		)
   404  	})
   405  	t.Run("specified", func(t *testing.T) {
   406  		credentials := os.Getenv("GS_JSONKEY")
   407  		if credentials == "" {
   408  			t.Skip("GS_JSONKEY env var must be set")
   409  		}
   410  		encoded := base64.StdEncoding.EncodeToString([]byte(credentials))
   411  		testExportStore(t,
   412  			fmt.Sprintf("gs://%s/%s?%s=%s&%s=%s",
   413  				bucket,
   414  				"backup-test-specified",
   415  				AuthParam,
   416  				authParamSpecified,
   417  				CredentialsParam,
   418  				url.QueryEscape(encoded),
   419  			),
   420  			false,
   421  		)
   422  		testListFiles(t,
   423  			fmt.Sprintf("gs://%s/%s/%s?%s=%s&%s=%s",
   424  				bucket,
   425  				"backup-test-specified",
   426  				"listing-test",
   427  				AuthParam,
   428  				authParamSpecified,
   429  				CredentialsParam,
   430  				url.QueryEscape(encoded),
   431  			),
   432  		)
   433  	})
   434  	t.Run("implicit", func(t *testing.T) {
   435  		// Only test these if they exist.
   436  		if _, err := google.FindDefaultCredentials(context.Background()); err != nil {
   437  			t.Skip(err)
   438  		}
   439  		testExportStore(t,
   440  			fmt.Sprintf("gs://%s/%s?%s=%s", bucket, "backup-test-implicit", AuthParam, authParamImplicit),
   441  			false,
   442  		)
   443  	})
   444  }
   445  
   446  func TestWorkloadStorage(t *testing.T) {
   447  	defer leaktest.AfterTest(t)()
   448  
   449  	settings := cluster.MakeTestingClusterSettings()
   450  
   451  	rows, payloadBytes, ranges := 4, 12, 1
   452  	gen := bank.FromConfig(rows, rows, payloadBytes, ranges)
   453  	bankTable := gen.Tables()[0]
   454  	bankURL := func(extraParams ...map[string]string) *url.URL {
   455  		params := url.Values{`version`: []string{gen.Meta().Version}}
   456  		flags := gen.(workload.Flagser).Flags()
   457  		flags.VisitAll(func(f *pflag.Flag) {
   458  			if flags.Meta[f.Name].RuntimeOnly {
   459  				return
   460  			}
   461  			params[f.Name] = append(params[f.Name], f.Value.String())
   462  		})
   463  		for _, p := range extraParams {
   464  			for key, value := range p {
   465  				params.Add(key, value)
   466  			}
   467  		}
   468  		return &url.URL{
   469  			Scheme:   `workload`,
   470  			Path:     `/` + filepath.Join(`csv`, gen.Meta().Name, bankTable.Name),
   471  			RawQuery: params.Encode(),
   472  		}
   473  	}
   474  
   475  	ctx := context.Background()
   476  
   477  	{
   478  		s, err := ExternalStorageFromURI(
   479  			ctx, bankURL().String(), base.ExternalIODirConfig{},
   480  			settings, blobs.TestEmptyBlobClientFactory,
   481  		)
   482  		require.NoError(t, err)
   483  		r, err := s.ReadFile(ctx, ``)
   484  		require.NoError(t, err)
   485  		bytes, err := ioutil.ReadAll(r)
   486  		require.NoError(t, err)
   487  		require.Equal(t, strings.TrimSpace(`
   488  0,0,initial-dTqn
   489  1,0,initial-Pkyk
   490  2,0,initial-eJkM
   491  3,0,initial-TlNb
   492  		`), strings.TrimSpace(string(bytes)))
   493  	}
   494  
   495  	{
   496  		params := map[string]string{
   497  			`row-start`: `1`, `row-end`: `3`, `payload-bytes`: `14`, `batch-size`: `1`}
   498  		s, err := ExternalStorageFromURI(
   499  			ctx, bankURL(params).String(), base.ExternalIODirConfig{},
   500  			settings, blobs.TestEmptyBlobClientFactory,
   501  		)
   502  		require.NoError(t, err)
   503  		r, err := s.ReadFile(ctx, ``)
   504  		require.NoError(t, err)
   505  		bytes, err := ioutil.ReadAll(r)
   506  		require.NoError(t, err)
   507  		require.Equal(t, strings.TrimSpace(`
   508  1,0,initial-vOpikz
   509  2,0,initial-qMvoPe
   510  		`), strings.TrimSpace(string(bytes)))
   511  	}
   512  
   513  	_, err := ExternalStorageFromURI(
   514  		ctx, `workload:///nope`, base.ExternalIODirConfig{},
   515  		settings, blobs.TestEmptyBlobClientFactory,
   516  	)
   517  	require.EqualError(t, err, `path must be of the form /<format>/<generator>/<table>: /nope`)
   518  	_, err = ExternalStorageFromURI(
   519  		ctx, `workload:///fmt/bank/bank?version=`, base.ExternalIODirConfig{},
   520  		settings, blobs.TestEmptyBlobClientFactory,
   521  	)
   522  	require.EqualError(t, err, `unsupported format: fmt`)
   523  	_, err = ExternalStorageFromURI(
   524  		ctx, `workload:///csv/nope/nope?version=`, base.ExternalIODirConfig{},
   525  		settings, blobs.TestEmptyBlobClientFactory,
   526  	)
   527  	require.EqualError(t, err, `unknown generator: nope`)
   528  	_, err = ExternalStorageFromURI(
   529  		ctx, `workload:///csv/bank/bank`, base.ExternalIODirConfig{},
   530  		settings, blobs.TestEmptyBlobClientFactory,
   531  	)
   532  	require.EqualError(t, err, `parameter version is required`)
   533  	_, err = ExternalStorageFromURI(
   534  		ctx, `workload:///csv/bank/bank?version=`, base.ExternalIODirConfig{},
   535  		settings, blobs.TestEmptyBlobClientFactory,
   536  	)
   537  	require.EqualError(t, err, `expected bank version "" but got "1.0.0"`)
   538  	_, err = ExternalStorageFromURI(
   539  		ctx, `workload:///csv/bank/bank?version=nope`, base.ExternalIODirConfig{},
   540  		settings, blobs.TestEmptyBlobClientFactory,
   541  	)
   542  	require.EqualError(t, err, `expected bank version "nope" but got "1.0.0"`)
   543  }