github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/csv_testdata_helpers_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "compress/gzip" 13 "fmt" 14 "io" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "strings" 19 "testing" 20 21 "github.com/cockroachdb/cockroach/pkg/util" 22 "github.com/cockroachdb/cockroach/pkg/util/envutil" 23 "github.com/cockroachdb/errors" 24 ) 25 26 var rewriteCSVTestData = envutil.EnvOrDefaultBool("COCKROACH_REWRITE_CSV_TESTDATA", false) 27 28 type csvTestFiles struct { 29 files, gzipFiles, bzipFiles, filesWithOpts, filesWithDups, fileWithShadowKeys, fileWithDupKeySameValue []string 30 filesUsingWildcard, gzipFilesUsingWildcard, bzipFilesUsingWildcard []string 31 } 32 33 // Returns a single CSV file with a previously imported key sandiwched between 34 // a set of unqiue keys. This is used to ensure that IMPORT does not allow 35 // ingestion of shadowing keys. 36 func makeShadowKeyTestFile(t testing.TB, numRowsImportedBefore int, suffix string) { 37 if numRowsImportedBefore < 1 { 38 t.Fatal(errors.Errorf("table has no existing rows to shadow")) 39 } 40 padding := 10 41 dir := filepath.Join("testdata", "csv") 42 fileName := filepath.Join(dir, fmt.Sprintf("shadow-data%s", suffix)) 43 f, err := os.Create(fileName) 44 if err != nil { 45 t.Fatal(err) 46 } 47 // Start the file with some non-colliding rows. 48 for i := numRowsImportedBefore; i < numRowsImportedBefore+padding; i++ { 49 if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil { 50 t.Fatal(err) 51 } 52 } 53 numRowsImportedBefore += padding 54 55 // Insert colliding row. 56 if _, err := fmt.Fprintf(f, "%d,%c\n", 0, 'A'); err != nil { 57 t.Fatal(err) 58 } 59 60 // Pad file with some more non-colliding rows. 61 for i := numRowsImportedBefore; i < numRowsImportedBefore+padding; i++ { 62 if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil { 63 t.Fatal(err) 64 } 65 } 66 if err := f.Close(); err != nil { 67 t.Fatal(err) 68 } 69 } 70 71 func makeDupWithSameValueFile(t testing.TB, suffix string) { 72 dir := filepath.Join("testdata", "csv") 73 fileName := filepath.Join(dir, fmt.Sprintf("dup-key-same-value%s", suffix)) 74 f, err := os.Create(fileName) 75 if err != nil { 76 t.Fatal(err) 77 } 78 // Start the file with some non-colliding rows. 79 for i := 0; i < 200; i++ { 80 if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil { 81 t.Fatal(err) 82 } 83 } 84 85 // Insert dup keys with same value. 86 for i := 0; i < 200; i++ { 87 if _, err := fmt.Fprintf(f, "%d,%c\n", i, 'A'+i%26); err != nil { 88 t.Fatal(err) 89 } 90 } 91 if err := f.Close(); err != nil { 92 t.Fatal(err) 93 } 94 } 95 96 func getTestFiles(numFiles int) csvTestFiles { 97 var testFiles csvTestFiles 98 suffix := "" 99 if util.RaceEnabled { 100 suffix = "-race" 101 } 102 for i := 0; i < numFiles; i++ { 103 testFiles.files = append(testFiles.files, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s", i, suffix)+"?nonsecret=nosecrets")) 104 testFiles.gzipFiles = append(testFiles.gzipFiles, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s.gz", i, suffix)+"?AWS_SESSION_TOKEN=secrets")) 105 testFiles.bzipFiles = append(testFiles.bzipFiles, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d%s.bz2", i, suffix))) 106 testFiles.filesWithOpts = append(testFiles.filesWithOpts, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d-opts%s", i, suffix))) 107 testFiles.filesWithDups = append(testFiles.filesWithDups, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("data-%d-dup%s", i, suffix))) 108 } 109 110 testFiles.fileWithDupKeySameValue = append(testFiles.fileWithDupKeySameValue, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("dup-key-same-value%s", suffix))) 111 testFiles.fileWithShadowKeys = append(testFiles.fileWithShadowKeys, fmt.Sprintf(`'nodelocal://0/%s'`, fmt.Sprintf("shadow-data%s", suffix))) 112 113 wildcardFileName := "data-[0-9]" 114 testFiles.filesUsingWildcard = append(testFiles.filesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s'`, wildcardFileName, suffix)) 115 testFiles.gzipFilesUsingWildcard = append(testFiles.gzipFilesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s.gz'`, wildcardFileName, suffix)) 116 testFiles.bzipFilesUsingWildcard = append(testFiles.gzipFilesUsingWildcard, fmt.Sprintf(`'nodelocal://0/%s%s.bz2'`, wildcardFileName, suffix)) 117 118 return testFiles 119 } 120 121 func makeFiles(t testing.TB, numFiles, rowsPerFile int, dir string, makeRaceFiles bool) { 122 suffix := "" 123 if makeRaceFiles { 124 suffix = "-race" 125 } 126 127 for fn := 0; fn < numFiles; fn++ { 128 // Create normal CSV file. 129 fileName := filepath.Join(dir, fmt.Sprintf("data-%d%s", fn, suffix)) 130 f, err := os.Create(fileName) 131 if err != nil { 132 t.Fatal(err) 133 } 134 135 // Create CSV file which tests query options. 136 fWithOpts, err := os.Create(filepath.Join(dir, fmt.Sprintf("data-%d-opts%s", fn, suffix))) 137 if err != nil { 138 t.Fatal(err) 139 } 140 if _, err := fmt.Fprint(fWithOpts, "This is a header line to be skipped\n"); err != nil { 141 t.Fatal(err) 142 } 143 if _, err := fmt.Fprint(fWithOpts, "So is this\n"); err != nil { 144 t.Fatal(err) 145 } 146 147 // Create CSV file with duplicate entries. 148 fDup, err := os.Create(filepath.Join(dir, fmt.Sprintf("data-%d-dup%s", fn, suffix))) 149 if err != nil { 150 t.Fatal(err) 151 } 152 153 for i := 0; i < rowsPerFile; i++ { 154 x := fn*rowsPerFile + i 155 if _, err := fmt.Fprintf(f, "%d,%c\n", x, 'A'+x%26); err != nil { 156 t.Fatal(err) 157 } 158 if _, err := fmt.Fprintf(fDup, "1,%c\n", 'A'+x%26); err != nil { 159 t.Fatal(err) 160 } 161 162 // Write a comment. 163 if _, err := fmt.Fprintf(fWithOpts, "# %d\n", x); err != nil { 164 t.Fatal(err) 165 } 166 // Write a pipe-delim line with trailing delim. 167 if x%4 == 0 { // 1/4 of rows have blank val for b 168 if _, err := fmt.Fprintf(fWithOpts, "%d||\n", x); err != nil { 169 t.Fatal(err) 170 } 171 } else { 172 if _, err := fmt.Fprintf(fWithOpts, "%d|%c|\n", x, 'A'+x%26); err != nil { 173 t.Fatal(err) 174 } 175 } 176 } 177 178 if err := f.Close(); err != nil { 179 t.Fatal(err) 180 } 181 if err := fDup.Close(); err != nil { 182 t.Fatal(err) 183 } 184 if err := fWithOpts.Close(); err != nil { 185 t.Fatal(err) 186 } 187 188 // Check in zipped versions of CSV file fileName. 189 _ = gzipFile(t, fileName) 190 _ = bzipFile(t, "", fileName) 191 } 192 193 makeDupWithSameValueFile(t, suffix) 194 makeShadowKeyTestFile(t, rowsPerFile, suffix) 195 } 196 197 func makeCSVData( 198 t testing.TB, numFiles, rowsPerFile, numRaceFiles, rowsPerRaceFile int, 199 ) csvTestFiles { 200 if rewriteCSVTestData { 201 dir := filepath.Join("testdata", "csv") 202 if err := os.RemoveAll(dir); err != nil { 203 t.Fatal(err) 204 } 205 if err := os.Mkdir(dir, 0777); err != nil { 206 t.Fatal(err) 207 } 208 209 makeFiles(t, numFiles, rowsPerFile, dir, false /* makeRaceFiles */) 210 makeFiles(t, numRaceFiles, rowsPerRaceFile, dir, true) 211 } 212 213 if util.RaceEnabled { 214 return getTestFiles(numRaceFiles) 215 } 216 return getTestFiles(numFiles) 217 } 218 219 func gzipFile(t testing.TB, in string) string { 220 r, err := os.Open(in) 221 if err != nil { 222 t.Fatal(err) 223 } 224 defer r.Close() 225 name := in + ".gz" 226 f, err := os.Create(name) 227 if err != nil { 228 t.Fatal(err) 229 } 230 defer f.Close() 231 w := gzip.NewWriter(f) 232 if _, err := io.Copy(w, r); err != nil { 233 t.Fatal(err) 234 } 235 if err := w.Close(); err != nil { 236 t.Fatal(err) 237 } 238 return name 239 } 240 241 func bzipFile(t testing.TB, dir, in string) string { 242 _, err := exec.Command("bzip2", "-k", filepath.Join(dir, in)).CombinedOutput() 243 if err != nil { 244 if strings.Contains(err.Error(), "executable file not found") { 245 return "" 246 } 247 t.Fatal(err) 248 } 249 return in + ".bz2" 250 }