github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/cloud/external_storage_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package cloud 12 13 import ( 14 "bytes" 15 "context" 16 "crypto/rand" 17 "encoding/base64" 18 "fmt" 19 "io/ioutil" 20 "net/url" 21 "os" 22 "path/filepath" 23 "sort" 24 "strings" 25 "testing" 26 27 "github.com/cockroachdb/cockroach/pkg/base" 28 "github.com/cockroachdb/cockroach/pkg/blobs" 29 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 30 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 31 "github.com/cockroachdb/cockroach/pkg/workload" 32 "github.com/cockroachdb/cockroach/pkg/workload/bank" 33 "github.com/spf13/pflag" 34 "github.com/stretchr/testify/require" 35 "golang.org/x/oauth2/google" 36 ) 37 38 func appendPath(t *testing.T, s, add string) string { 39 u, err := url.Parse(s) 40 if err != nil { 41 t.Fatal(err) 42 } 43 u.Path = filepath.Join(u.Path, add) 44 return u.String() 45 } 46 47 var testSettings *cluster.Settings 48 49 func init() { 50 testSettings = cluster.MakeTestingClusterSettings() 51 up := testSettings.MakeUpdater() 52 if err := up.Set(cloudstorageGSDefaultKey, os.Getenv("GS_JSONKEY"), gcsDefault.Typ()); err != nil { 53 panic(err) 54 } 55 } 56 57 func storeFromURI( 58 ctx context.Context, t *testing.T, uri string, clientFactory blobs.BlobClientFactory, 59 ) ExternalStorage { 60 conf, err := ExternalStorageConfFromURI(uri) 61 if err != nil { 62 t.Fatal(err) 63 } 64 // Setup a sink for the given args. 65 s, err := MakeExternalStorage(ctx, conf, base.ExternalIODirConfig{}, testSettings, clientFactory) 66 if err != nil { 67 t.Fatal(err) 68 } 69 return s 70 } 71 72 func testExportStore(t *testing.T, storeURI string, skipSingleFile bool) { 73 testExportStoreWithExternalIOConfig(t, base.ExternalIODirConfig{}, storeURI, skipSingleFile) 74 } 75 76 func testExportStoreWithExternalIOConfig( 77 t *testing.T, ioConf base.ExternalIODirConfig, storeURI string, skipSingleFile bool, 78 ) { 79 ctx := context.Background() 80 81 conf, err := ExternalStorageConfFromURI(storeURI) 82 if err != nil { 83 t.Fatal(err) 84 } 85 86 // Setup a sink for the given args. 87 clientFactory := blobs.TestBlobServiceClient(testSettings.ExternalIODir) 88 s, err := MakeExternalStorage(ctx, conf, ioConf, testSettings, clientFactory) 89 if err != nil { 90 t.Fatal(err) 91 } 92 defer s.Close() 93 94 if readConf := s.Conf(); readConf != conf { 95 t.Fatalf("conf does not roundtrip: started with %+v, got back %+v", conf, readConf) 96 } 97 98 t.Run("simple round trip", func(t *testing.T) { 99 sampleName := "somebytes" 100 sampleBytes := "hello world" 101 102 for i := 0; i < 10; i++ { 103 name := fmt.Sprintf("%s-%d", sampleName, i) 104 payload := []byte(strings.Repeat(sampleBytes, i)) 105 if err := s.WriteFile(ctx, name, bytes.NewReader(payload)); err != nil { 106 t.Fatal(err) 107 } 108 109 if sz, err := s.Size(ctx, name); err != nil { 110 t.Error(err) 111 } else if sz != int64(len(payload)) { 112 t.Errorf("size mismatch, got %d, expected %d", sz, len(payload)) 113 } 114 115 r, err := s.ReadFile(ctx, name) 116 if err != nil { 117 t.Fatal(err) 118 } 119 defer r.Close() 120 121 res, err := ioutil.ReadAll(r) 122 if err != nil { 123 t.Fatal(err) 124 } 125 if !bytes.Equal(res, payload) { 126 t.Fatalf("got %v expected %v", res, payload) 127 } 128 if err := s.Delete(ctx, name); err != nil { 129 t.Fatal(err) 130 } 131 } 132 }) 133 134 // The azure driver makes us chunk files that are greater than 4mb, so make 135 // sure that files larger than that work on all the providers. 136 t.Run("8mb-tempfile", func(t *testing.T) { 137 const size = 1024 * 1024 * 8 // 8MiB 138 testingContent := make([]byte, size) 139 if _, err := rand.Read(testingContent); err != nil { 140 t.Fatal(err) 141 } 142 testingFilename := "testing-123" 143 144 // Write some random data (random so it doesn't compress). 145 if err := s.WriteFile(ctx, testingFilename, bytes.NewReader(testingContent)); err != nil { 146 t.Fatal(err) 147 } 148 149 // Attempt to read (or fetch) it back. 150 res, err := s.ReadFile(ctx, testingFilename) 151 if err != nil { 152 t.Fatalf("Could not get reader for %s: %+v", testingFilename, err) 153 } 154 defer res.Close() 155 content, err := ioutil.ReadAll(res) 156 if err != nil { 157 t.Fatal(err) 158 } 159 // Verify the result contains what we wrote. 160 if !bytes.Equal(content, testingContent) { 161 t.Fatalf("wrong content") 162 } 163 if err := s.Delete(ctx, testingFilename); err != nil { 164 t.Fatal(err) 165 } 166 }) 167 if skipSingleFile { 168 return 169 } 170 t.Run("read-single-file-by-uri", func(t *testing.T) { 171 const testingFilename = "A" 172 if err := s.WriteFile(ctx, testingFilename, bytes.NewReader([]byte("aaa"))); err != nil { 173 t.Fatal(err) 174 } 175 singleFile := storeFromURI(ctx, t, appendPath(t, storeURI, testingFilename), clientFactory) 176 defer singleFile.Close() 177 178 res, err := singleFile.ReadFile(ctx, "") 179 if err != nil { 180 t.Fatal(err) 181 } 182 defer res.Close() 183 content, err := ioutil.ReadAll(res) 184 if err != nil { 185 t.Fatal(err) 186 } 187 // Verify the result contains what we wrote. 188 if !bytes.Equal(content, []byte("aaa")) { 189 t.Fatalf("wrong content") 190 } 191 if err := s.Delete(ctx, testingFilename); err != nil { 192 t.Fatal(err) 193 } 194 }) 195 t.Run("write-single-file-by-uri", func(t *testing.T) { 196 const testingFilename = "B" 197 singleFile := storeFromURI(ctx, t, appendPath(t, storeURI, testingFilename), clientFactory) 198 defer singleFile.Close() 199 200 if err := singleFile.WriteFile(ctx, "", bytes.NewReader([]byte("bbb"))); err != nil { 201 t.Fatal(err) 202 } 203 204 res, err := s.ReadFile(ctx, testingFilename) 205 if err != nil { 206 t.Fatal(err) 207 } 208 defer res.Close() 209 content, err := ioutil.ReadAll(res) 210 if err != nil { 211 t.Fatal(err) 212 } 213 // Verify the result contains what we wrote. 214 if !bytes.Equal(content, []byte("bbb")) { 215 t.Fatalf("wrong content") 216 } 217 if err := s.Delete(ctx, testingFilename); err != nil { 218 t.Fatal(err) 219 } 220 }) 221 } 222 223 func testListFiles(t *testing.T, storeURI string) { 224 ctx := context.Background() 225 dataLetterFiles := []string{"file/letters/dataA.csv", "file/letters/dataB.csv", "file/letters/dataC.csv"} 226 dataNumberFiles := []string{"file/numbers/data1.csv", "file/numbers/data2.csv", "file/numbers/data3.csv"} 227 letterFiles := []string{"file/abc/A.csv", "file/abc/B.csv", "file/abc/C.csv"} 228 fileNames := append(dataLetterFiles, dataNumberFiles...) 229 fileNames = append(fileNames, letterFiles...) 230 sort.Strings(fileNames) 231 232 clientFactory := blobs.TestBlobServiceClient(testSettings.ExternalIODir) 233 for _, fileName := range fileNames { 234 file := storeFromURI(ctx, t, storeURI, clientFactory) 235 if err := file.WriteFile(ctx, fileName, bytes.NewReader([]byte("bbb"))); err != nil { 236 t.Fatal(err) 237 } 238 _ = file.Close() 239 } 240 241 uri, _ := url.Parse(storeURI) 242 243 abs := func(in []string) []string { 244 out := make([]string, len(in)) 245 for i := range in { 246 u := *uri 247 u.Path = u.Path + "/" + in[i] 248 out[i] = u.String() 249 } 250 return out 251 } 252 253 t.Run("ListFiles", func(t *testing.T) { 254 255 for _, tc := range []struct { 256 name string 257 URI string 258 suffix string 259 resultList []string 260 }{ 261 { 262 "list-all-csv", 263 appendPath(t, storeURI, "file/*/*.csv"), 264 "", 265 abs(fileNames), 266 }, 267 { 268 "list-letter-csv", 269 appendPath(t, storeURI, "file/abc/?.csv"), 270 "", 271 abs(letterFiles), 272 }, 273 { 274 "list-letter-csv-rel-file-suffix", 275 appendPath(t, storeURI, "file"), 276 "abc/?.csv", 277 []string{"abc/A.csv", "abc/B.csv", "abc/C.csv"}, 278 }, 279 { 280 "list-letter-csv-rel-abc-suffix", 281 appendPath(t, storeURI, "file/abc"), 282 "?.csv", 283 []string{"A.csv", "B.csv", "C.csv"}, 284 }, 285 { 286 "list-letter-csv-dotdot", 287 appendPath(t, storeURI, "file/abc/xzy/../?.csv"), 288 "", 289 abs(letterFiles), 290 }, 291 { 292 "list-abc-csv-suffix", 293 appendPath(t, storeURI, "file"), 294 "abc/?.csv", 295 []string{"abc/A.csv", "abc/B.csv", "abc/C.csv"}, 296 }, 297 { 298 "list-letter-csv-dotdot-suffix", 299 appendPath(t, storeURI, "file/abc/xzy"), 300 "../../?.csv", 301 nil, 302 }, 303 { 304 "list-data-num-csv", 305 appendPath(t, storeURI, "file/numbers/data[0-9].csv"), 306 "", 307 abs(dataNumberFiles), 308 }, 309 { 310 "wildcard-bucket-and-filename", 311 appendPath(t, storeURI, "*/numbers/*.csv"), 312 "", 313 abs(dataNumberFiles), 314 }, 315 { 316 "wildcard-bucket-and-filename-suffix", 317 appendPath(t, storeURI, ""), 318 "*/numbers/*.csv", 319 []string{"file/numbers/data1.csv", "file/numbers/data2.csv", "file/numbers/data3.csv"}, 320 }, 321 { 322 "list-all-csv-skip-dir", 323 // filepath.Glob() assumes that / is the separator, and enforces that it's there. 324 // So this pattern would not actually match anything. 325 appendPath(t, storeURI, "file/*.csv"), 326 "", 327 []string{}, 328 }, 329 { 330 "list-no-matches", 331 appendPath(t, storeURI, "file/letters/dataD.csv"), 332 "", 333 []string{}, 334 }, 335 { 336 "list-escaped-star", 337 appendPath(t, storeURI, "file/*/\\*.csv"), 338 "", 339 []string{}, 340 }, 341 { 342 "list-escaped-star-suffix", 343 appendPath(t, storeURI, "file"), 344 "*/\\*.csv", 345 []string{}, 346 }, 347 { 348 "list-escaped-range", 349 appendPath(t, storeURI, "file/*/data\\[0-9\\].csv"), 350 "", 351 []string{}, 352 }, 353 { 354 "list-escaped-range-suffix", 355 appendPath(t, storeURI, "file"), 356 "*/data\\[0-9\\].csv", 357 []string{}, 358 }, 359 } { 360 t.Run(tc.name, func(t *testing.T) { 361 s := storeFromURI(ctx, t, tc.URI, clientFactory) 362 filesList, err := s.ListFiles(ctx, tc.suffix) 363 if err != nil { 364 t.Fatal(err) 365 } 366 367 if len(filesList) != len(tc.resultList) { 368 t.Fatal(`listed incorrect number of files`, filesList) 369 } 370 for i, got := range filesList { 371 if expected := tc.resultList[i]; got != expected { 372 t.Fatal(`resulting list is incorrect. got: `, got, `expected: `, expected, "\n", filesList) 373 } 374 } 375 }) 376 } 377 }) 378 379 for _, fileName := range fileNames { 380 file := storeFromURI(ctx, t, storeURI, clientFactory) 381 if err := file.Delete(ctx, fileName); err != nil { 382 t.Fatal(err) 383 } 384 _ = file.Close() 385 } 386 } 387 388 func TestPutGoogleCloud(t *testing.T) { 389 defer leaktest.AfterTest(t)() 390 391 bucket := os.Getenv("GS_BUCKET") 392 if bucket == "" { 393 t.Skip("GS_BUCKET env var must be set") 394 } 395 396 t.Run("empty", func(t *testing.T) { 397 testExportStore(t, fmt.Sprintf("gs://%s/%s", bucket, "backup-test-empty"), false) 398 }) 399 t.Run("default", func(t *testing.T) { 400 testExportStore(t, 401 fmt.Sprintf("gs://%s/%s?%s=%s", bucket, "backup-test-default", AuthParam, authParamDefault), 402 false, 403 ) 404 }) 405 t.Run("specified", func(t *testing.T) { 406 credentials := os.Getenv("GS_JSONKEY") 407 if credentials == "" { 408 t.Skip("GS_JSONKEY env var must be set") 409 } 410 encoded := base64.StdEncoding.EncodeToString([]byte(credentials)) 411 testExportStore(t, 412 fmt.Sprintf("gs://%s/%s?%s=%s&%s=%s", 413 bucket, 414 "backup-test-specified", 415 AuthParam, 416 authParamSpecified, 417 CredentialsParam, 418 url.QueryEscape(encoded), 419 ), 420 false, 421 ) 422 testListFiles(t, 423 fmt.Sprintf("gs://%s/%s/%s?%s=%s&%s=%s", 424 bucket, 425 "backup-test-specified", 426 "listing-test", 427 AuthParam, 428 authParamSpecified, 429 CredentialsParam, 430 url.QueryEscape(encoded), 431 ), 432 ) 433 }) 434 t.Run("implicit", func(t *testing.T) { 435 // Only test these if they exist. 436 if _, err := google.FindDefaultCredentials(context.Background()); err != nil { 437 t.Skip(err) 438 } 439 testExportStore(t, 440 fmt.Sprintf("gs://%s/%s?%s=%s", bucket, "backup-test-implicit", AuthParam, authParamImplicit), 441 false, 442 ) 443 }) 444 } 445 446 func TestWorkloadStorage(t *testing.T) { 447 defer leaktest.AfterTest(t)() 448 449 settings := cluster.MakeTestingClusterSettings() 450 451 rows, payloadBytes, ranges := 4, 12, 1 452 gen := bank.FromConfig(rows, rows, payloadBytes, ranges) 453 bankTable := gen.Tables()[0] 454 bankURL := func(extraParams ...map[string]string) *url.URL { 455 params := url.Values{`version`: []string{gen.Meta().Version}} 456 flags := gen.(workload.Flagser).Flags() 457 flags.VisitAll(func(f *pflag.Flag) { 458 if flags.Meta[f.Name].RuntimeOnly { 459 return 460 } 461 params[f.Name] = append(params[f.Name], f.Value.String()) 462 }) 463 for _, p := range extraParams { 464 for key, value := range p { 465 params.Add(key, value) 466 } 467 } 468 return &url.URL{ 469 Scheme: `workload`, 470 Path: `/` + filepath.Join(`csv`, gen.Meta().Name, bankTable.Name), 471 RawQuery: params.Encode(), 472 } 473 } 474 475 ctx := context.Background() 476 477 { 478 s, err := ExternalStorageFromURI( 479 ctx, bankURL().String(), base.ExternalIODirConfig{}, 480 settings, blobs.TestEmptyBlobClientFactory, 481 ) 482 require.NoError(t, err) 483 r, err := s.ReadFile(ctx, ``) 484 require.NoError(t, err) 485 bytes, err := ioutil.ReadAll(r) 486 require.NoError(t, err) 487 require.Equal(t, strings.TrimSpace(` 488 0,0,initial-dTqn 489 1,0,initial-Pkyk 490 2,0,initial-eJkM 491 3,0,initial-TlNb 492 `), strings.TrimSpace(string(bytes))) 493 } 494 495 { 496 params := map[string]string{ 497 `row-start`: `1`, `row-end`: `3`, `payload-bytes`: `14`, `batch-size`: `1`} 498 s, err := ExternalStorageFromURI( 499 ctx, bankURL(params).String(), base.ExternalIODirConfig{}, 500 settings, blobs.TestEmptyBlobClientFactory, 501 ) 502 require.NoError(t, err) 503 r, err := s.ReadFile(ctx, ``) 504 require.NoError(t, err) 505 bytes, err := ioutil.ReadAll(r) 506 require.NoError(t, err) 507 require.Equal(t, strings.TrimSpace(` 508 1,0,initial-vOpikz 509 2,0,initial-qMvoPe 510 `), strings.TrimSpace(string(bytes))) 511 } 512 513 _, err := ExternalStorageFromURI( 514 ctx, `workload:///nope`, base.ExternalIODirConfig{}, 515 settings, blobs.TestEmptyBlobClientFactory, 516 ) 517 require.EqualError(t, err, `path must be of the form /<format>/<generator>/<table>: /nope`) 518 _, err = ExternalStorageFromURI( 519 ctx, `workload:///fmt/bank/bank?version=`, base.ExternalIODirConfig{}, 520 settings, blobs.TestEmptyBlobClientFactory, 521 ) 522 require.EqualError(t, err, `unsupported format: fmt`) 523 _, err = ExternalStorageFromURI( 524 ctx, `workload:///csv/nope/nope?version=`, base.ExternalIODirConfig{}, 525 settings, blobs.TestEmptyBlobClientFactory, 526 ) 527 require.EqualError(t, err, `unknown generator: nope`) 528 _, err = ExternalStorageFromURI( 529 ctx, `workload:///csv/bank/bank`, base.ExternalIODirConfig{}, 530 settings, blobs.TestEmptyBlobClientFactory, 531 ) 532 require.EqualError(t, err, `parameter version is required`) 533 _, err = ExternalStorageFromURI( 534 ctx, `workload:///csv/bank/bank?version=`, base.ExternalIODirConfig{}, 535 settings, blobs.TestEmptyBlobClientFactory, 536 ) 537 require.EqualError(t, err, `expected bank version "" but got "1.0.0"`) 538 _, err = ExternalStorageFromURI( 539 ctx, `workload:///csv/bank/bank?version=nope`, base.ExternalIODirConfig{}, 540 settings, blobs.TestEmptyBlobClientFactory, 541 ) 542 require.EqualError(t, err, `expected bank version "nope" but got "1.0.0"`) 543 }