github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/csv_testdata_helpers_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl
    10  
    11  import (
    12  	"compress/gzip"
    13  	"fmt"
    14  	"io"
    15  	"os"
    16  	"os/exec"
    17  	"path/filepath"
    18  	"strings"
    19  	"testing"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/util"
    22  	"github.com/cockroachdb/cockroach/pkg/util/envutil"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  var rewriteCSVTestData = envutil.EnvOrDefaultBool("COCKROACH_REWRITE_CSV_TESTDATA", false)
    27  
    28  type csvTestFiles struct {
    29  	files, gzipFiles, bzipFiles, filesWithOpts, filesWithDups, fileWithShadowKeys, fileWithDupKeySameValue []string
    30  	filesUsingWildcard, gzipFilesUsingWildcard, bzipFilesUsingWildcard                                     []string
    31  }
    32  
    33  // Returns a single CSV file with a previously imported key sandiwched between
    34  // a set of unqiue keys. This is used to ensure that IMPORT does not allow
    35  // ingestion of shadowing keys.
    36  func makeShadowKeyTestFile(t testing.TB, numRowsImportedBefore int, suffix string) {
    37  	if numRowsImportedBefore < 1 {
    38  		t.Fatal(errors.Errorf("table has no existing rows to shadow"))
    39  	}
    40  	padding := 10
    41  	dir := filepath.Join("testdata", "csv")
    42  	fileName := filepath.Join(dir, fmt.Sprintf("shadow-data%s", suffix))
    43  	f, err := os.Create(fileName)
    44  	if err != nil {
    45  		t.Fatal(err)
    46  	}
    47  	// Start the file with some non-colliding rows.
    48  	for i := numRowsImportedBefore; i < numRowsImportedBefore+padding; i++ {
    49  		if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil {
    50  			t.Fatal(err)
    51  		}
    52  	}
    53  	numRowsImportedBefore += padding
    54  
    55  	// Insert colliding row.
    56  	if _, err := fmt.Fprintf(f, "%d,%c\n", 0, 'A'); err != nil {
    57  		t.Fatal(err)
    58  	}
    59  
    60  	// Pad file with some more non-colliding rows.
    61  	for i := numRowsImportedBefore; i < numRowsImportedBefore+padding; i++ {
    62  		if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil {
    63  			t.Fatal(err)
    64  		}
    65  	}
    66  	if err := f.Close(); err != nil {
    67  		t.Fatal(err)
    68  	}
    69  }
    70  
    71  func makeDupWithSameValueFile(t testing.TB, suffix string) {
    72  	dir := filepath.Join("testdata", "csv")
    73  	fileName := filepath.Join(dir, fmt.Sprintf("dup-key-same-value%s", suffix))
    74  	f, err := os.Create(fileName)
    75  	if err != nil {
    76  		t.Fatal(err)
    77  	}
    78  	// Start the file with some non-colliding rows.
    79  	for i := 0; i < 200; i++ {
    80  		if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil {
    81  			t.Fatal(err)
    82  		}
    83  	}
    84  
    85  	// Insert dup keys with same value.
    86  	for i := 0; i < 200; i++ {
    87  		if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil {
    88  			t.Fatal(err)
    89  		}
    90  	}
    91  	if err := f.Close(); err != nil {
    92  		t.Fatal(err)
    93  	}
    94  }
    95  
    96  func getTestFiles(numFiles int) csvTestFiles {
    97  	var testFiles csvTestFiles
    98  	suffix := ""
    99  	if util.RaceEnabled {
   100  		suffix = "-race"
   101  	}
   102  	for i := 0; i < numFiles; i++ {
   103  		testFiles.files = append(testFiles.files, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s", i, suffix)+"?nonsecret=nosecrets"))
   104  		testFiles.gzipFiles = append(testFiles.gzipFiles, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s.gz", i, suffix)+"?AWS_SESSION_TOKEN=secrets"))
   105  		testFiles.bzipFiles = append(testFiles.bzipFiles, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s.bz2", i, suffix)))
   106  		testFiles.filesWithOpts = append(testFiles.filesWithOpts, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d-opts%s", i, suffix)))
   107  		testFiles.filesWithDups = append(testFiles.filesWithDups, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d-dup%s", i, suffix)))
   108  	}
   109  
   110  	testFiles.fileWithDupKeySameValue = append(testFiles.fileWithDupKeySameValue, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("dup-key-same-value%s", suffix)))
   111  	testFiles.fileWithShadowKeys = append(testFiles.fileWithShadowKeys, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("shadow-data%s", suffix)))
   112  
   113  	wildcardFileName := "data-[0-9]"
   114  	testFiles.filesUsingWildcard = append(testFiles.filesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s'`, wildcardFileName, suffix))
   115  	testFiles.gzipFilesUsingWildcard = append(testFiles.gzipFilesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s.gz'`, wildcardFileName, suffix))
   116  	testFiles.bzipFilesUsingWildcard = append(testFiles.gzipFilesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s.bz2'`, wildcardFileName, suffix))
   117  
   118  	return testFiles
   119  }
   120  
   121  func makeFiles(t testing.TB, numFiles, rowsPerFile int, dir string, makeRaceFiles bool) {
   122  	suffix := ""
   123  	if makeRaceFiles {
   124  		suffix = "-race"
   125  	}
   126  
   127  	for fn := 0; fn < numFiles; fn++ {
   128  		// Create normal CSV file.
   129  		fileName := filepath.Join(dir, fmt.Sprintf("data-%d%s", fn, suffix))
   130  		f, err := os.Create(fileName)
   131  		if err != nil {
   132  			t.Fatal(err)
   133  		}
   134  
   135  		// Create CSV file which tests query options.
   136  		fWithOpts, err := os.Create(filepath.Join(dir, fmt.Sprintf("data-%d-opts%s", fn, suffix)))
   137  		if err != nil {
   138  			t.Fatal(err)
   139  		}
   140  		if _, err := fmt.Fprint(fWithOpts, "This is a header line to be skipped\n"); err != nil {
   141  			t.Fatal(err)
   142  		}
   143  		if _, err := fmt.Fprint(fWithOpts, "So is this\n"); err != nil {
   144  			t.Fatal(err)
   145  		}
   146  
   147  		// Create CSV file with duplicate entries.
   148  		fDup, err := os.Create(filepath.Join(dir, fmt.Sprintf("data-%d-dup%s", fn, suffix)))
   149  		if err != nil {
   150  			t.Fatal(err)
   151  		}
   152  
   153  		for i := 0; i < rowsPerFile; i++ {
   154  			x := fn*rowsPerFile + i
   155  			if _, err := fmt.Fprintf(f, "%d,%c\n", x, 'A'+x%26); err != nil {
   156  				t.Fatal(err)
   157  			}
   158  			if _, err := fmt.Fprintf(fDup, "1,%c\n", 'A'+x%26); err != nil {
   159  				t.Fatal(err)
   160  			}
   161  
   162  			// Write a comment.
   163  			if _, err := fmt.Fprintf(fWithOpts, "# %d\n", x); err != nil {
   164  				t.Fatal(err)
   165  			}
   166  			// Write a pipe-delim line with trailing delim.
   167  			if x%4 == 0 { // 1/4 of rows have blank val for b
   168  				if _, err := fmt.Fprintf(fWithOpts, "%d||\n", x); err != nil {
   169  					t.Fatal(err)
   170  				}
   171  			} else {
   172  				if _, err := fmt.Fprintf(fWithOpts, "%d|%c|\n", x, 'A'+x%26); err != nil {
   173  					t.Fatal(err)
   174  				}
   175  			}
   176  		}
   177  
   178  		if err := f.Close(); err != nil {
   179  			t.Fatal(err)
   180  		}
   181  		if err := fDup.Close(); err != nil {
   182  			t.Fatal(err)
   183  		}
   184  		if err := fWithOpts.Close(); err != nil {
   185  			t.Fatal(err)
   186  		}
   187  
   188  		// Check in zipped versions of CSV file fileName.
   189  		_ = gzipFile(t, fileName)
   190  		_ = bzipFile(t, "", fileName)
   191  	}
   192  
   193  	makeDupWithSameValueFile(t, suffix)
   194  	makeShadowKeyTestFile(t, rowsPerFile, suffix)
   195  }
   196  
   197  func makeCSVData(
   198  	t testing.TB, numFiles, rowsPerFile, numRaceFiles, rowsPerRaceFile int,
   199  ) csvTestFiles {
   200  	if rewriteCSVTestData {
   201  		dir := filepath.Join("testdata", "csv")
   202  		if err := os.RemoveAll(dir); err != nil {
   203  			t.Fatal(err)
   204  		}
   205  		if err := os.Mkdir(dir, 0777); err != nil {
   206  			t.Fatal(err)
   207  		}
   208  
   209  		makeFiles(t, numFiles, rowsPerFile, dir, false /* makeRaceFiles */)
   210  		makeFiles(t, numRaceFiles, rowsPerRaceFile, dir, true)
   211  	}
   212  
   213  	if util.RaceEnabled {
   214  		return getTestFiles(numRaceFiles)
   215  	}
   216  	return getTestFiles(numFiles)
   217  }
   218  
   219  func gzipFile(t testing.TB, in string) string {
   220  	r, err := os.Open(in)
   221  	if err != nil {
   222  		t.Fatal(err)
   223  	}
   224  	defer r.Close()
   225  	name := in + ".gz"
   226  	f, err := os.Create(name)
   227  	if err != nil {
   228  		t.Fatal(err)
   229  	}
   230  	defer f.Close()
   231  	w := gzip.NewWriter(f)
   232  	if _, err := io.Copy(w, r); err != nil {
   233  		t.Fatal(err)
   234  	}
   235  	if err := w.Close(); err != nil {
   236  		t.Fatal(err)
   237  	}
   238  	return name
   239  }
   240  
   241  func bzipFile(t testing.TB, dir, in string) string {
   242  	_, err := exec.Command("bzip2", "-k", filepath.Join(dir, in)).CombinedOutput()
   243  	if err != nil {
   244  		if strings.Contains(err.Error(), "executable file not found") {
   245  			return ""
   246  		}
   247  		t.Fatal(err)
   248  	}
   249  	return in + ".bz2"
   250  }