vitess.io/vitess@v0.16.2/go/vt/mysqlctl/compression_benchmark_test.go (about) 1 package mysqlctl 2 3 import ( 4 "bufio" 5 "context" 6 "crypto/md5" 7 "errors" 8 "fmt" 9 "io" 10 "net/http" 11 "net/url" 12 "os" 13 "path" 14 "strconv" 15 "strings" 16 "testing" 17 "time" 18 19 "github.com/klauspost/compress/zstd" 20 "github.com/stretchr/testify/require" 21 22 "vitess.io/vitess/go/vt/logutil" 23 ) 24 25 type ( 26 benchmarkCompressArgs struct { 27 b *testing.B 28 builtin string 29 external string 30 } 31 32 benchmarkCompressEnv struct { 33 benchmarkCompressArgs 34 } 35 36 fnReadCloser struct { 37 io.Reader 38 closer func() error 39 } 40 41 meteredReader struct { 42 count int64 43 r io.Reader 44 } 45 46 meteredWriter struct { 47 count int64 48 w io.Writer 49 } 50 51 timedWriter struct { 52 duration time.Duration 53 w io.Writer 54 } 55 ) 56 57 const ( 58 // This is the default file which will be downloaded, decompressed, and 59 // used by the compression benchmarks in this suite. It's a ~1.5 GiB 60 // compressed tar file containing 3 InnoDB files. The InnoDB files were 61 // built from this Wikipedia dataset: 62 // 63 // https://dumps.wikimedia.org/archive/enwiki/20080103/enwiki-20080103-pages-articles.xml.bz2 64 defaultDataURL = "https://github.com/vitessio/vitess-resources/releases/download/testdata-v1.0/enwiki-20080103-pages-articles.ibd.tar.zst" 65 66 // By default, don't limit how many bytes we input into compression. 67 defaultMaxBytes int64 = 0 68 69 // By default the benchmarks will remove any downloaded data after all 70 // benchmarks are run, unless the data URL is a local path, in which case 71 // it will be left alone. 72 // 73 // Users may override this behavior. This option is 74 // intended purely for debugging purposes. 75 // 76 // export VT_MYSQLCTL_COMPRESSION_BENCHMARK_CLEANUP=false 77 envVarCleanup = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_CLEANUP" 78 79 // Users may specify an alternate gzipped URL. This option is intended 80 // purely for development and debugging purposes. For example: 81 // 82 // export VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL=https://wiki.mozilla.org/images/f/ff/Example.json.gz 83 // 84 // A local path can also be specified: 85 // 86 // export VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL=file:///tmp/custom.dat 87 envVarDataURL = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL" 88 89 // Users may override how many bytes are downloaded. This option is 90 // intended purely for development and debugging purposes. For example: 91 // 92 // export VT_MYSQLCTL_COMPRESSION_BENCHMARK_MAX_BYTES=256 93 envVarMaxBytes = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_MAX_BYTES" 94 ) 95 96 func (frc *fnReadCloser) Close() error { 97 return frc.closer() 98 } 99 100 func dataLocalPath(u *url.URL) string { 101 if isLocal(u) { 102 return u.Path 103 } 104 // Compute a local path for a file by hashing the URL. 105 return path.Join(os.TempDir(), fmt.Sprintf("%x.dat", md5.Sum([]byte(u.String())))) 106 } 107 108 func dataURL() (*url.URL, error) { 109 u := defaultDataURL 110 111 // Use user-defined URL, if specified. 112 if udURL := os.Getenv(envVarDataURL); udURL != "" { 113 u = udURL 114 } 115 116 return url.Parse(u) 117 } 118 119 func downloadData(url, localPath string, maxBytes int64) error { 120 var err error 121 var rdr io.Reader 122 123 // If the local path does not exist, download the file from the URL. 124 httpClient := http.Client{ 125 CheckRedirect: func(r *http.Request, via []*http.Request) error { 126 r.URL.Opaque = r.URL.Path 127 return nil 128 }, 129 } 130 131 resp, err := httpClient.Get(url) 132 if err != nil { 133 return fmt.Errorf("failed to get data at URL %q: %v", url, err) 134 } 135 defer resp.Body.Close() 136 rdr = resp.Body 137 138 // Assume the data we're downloading is compressed with zstd. 139 zr, err := zstd.NewReader(rdr) 140 if err != nil { 141 return fmt.Errorf("failed to decompress data at URL %q: %v", url, err) 142 } 143 defer zr.Close() 144 rdr = zr 145 146 if maxBytes > 0 { 147 rdr = io.LimitReader(rdr, maxBytes) 148 } 149 150 // Create a local file to write the HTTP response to. 151 file, err := os.OpenFile(localPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666) 152 if err != nil { 153 return err 154 } 155 defer file.Close() 156 157 // Write the decompressed data to local path. 158 if _, err := io.Copy(file, rdr); err != nil { 159 return err 160 } 161 162 return nil 163 } 164 165 func isHTTP(u *url.URL) bool { 166 return u.Scheme == "http" || u.Scheme == "https" 167 } 168 169 func isLocal(u *url.URL) bool { 170 return u.Scheme == "file" || (u.Scheme == "" && u.Hostname() == "") 171 } 172 173 func maxBytes() (int64, error) { 174 // Limit how many bytes we unpack from the archive. 175 mb := defaultMaxBytes 176 177 // Use user-defined max bytes, if specified and valid. 178 if udMaxBytes := os.Getenv(envVarMaxBytes); udMaxBytes != "" { 179 udmb, err := strconv.ParseInt(udMaxBytes, 10, 64) 180 if err != nil { 181 return mb, err 182 } 183 mb = udmb 184 } 185 186 return mb, nil 187 } 188 189 func newBenchmarkCompressEnv(args benchmarkCompressArgs) benchmarkCompressEnv { 190 bce := benchmarkCompressEnv{ 191 benchmarkCompressArgs: args, 192 } 193 bce.validate() 194 bce.prepare() 195 return bce 196 } 197 198 func shouldCleanup(u *url.URL) (bool, error) { 199 c := true 200 201 // Don't cleanup local paths provided by the user. 202 if isLocal(u) { 203 c = false 204 } 205 206 // Use user-defined cleanup, if specified and valid. 207 if udCleanup := os.Getenv(envVarCleanup); udCleanup != "" { 208 udc, err := strconv.ParseBool(udCleanup) 209 if err != nil { 210 return c, err 211 } 212 c = udc 213 } 214 215 return c, nil 216 } 217 218 func (bce *benchmarkCompressEnv) compress() { 219 var durCompressed time.Duration 220 var numUncompressedBytes, numCompressedBytes int64 221 222 // The Benchmark, Reader and Writer interfaces make it difficult to time 223 // compression without frequent calls to {Start,Stop}Timer or including 224 // disk read/write times the measurement. Instead we'll use ReportMetric 225 // after all loops are completed. 226 bce.b.StopTimer() 227 bce.b.ResetTimer() 228 229 for i := 0; i < bce.b.N; i++ { 230 logger := logutil.NewMemoryLogger() 231 232 // Don't write anywhere. We're just interested in compression time. 233 w := io.Discard 234 235 // Keep track of how many compressed bytes come through. 236 mw := &meteredWriter{w: w} 237 238 // Create compressor. 239 c := bce.compressor(logger, mw) 240 241 // Time how long we spend on c.Write. 242 tc := &timedWriter{w: c} 243 244 r, err := bce.reader() 245 require.Nil(bce.b, err, "Failed to get data reader.") 246 247 // Track how many bytes we read. 248 mr := &meteredReader{r: r} 249 250 // It makes sense to use {Start,Stop}Timer here, but we're not 251 // interested in how long it takes to read from disk. 252 _, err = io.Copy(tc, mr) 253 254 // Don't defer closing things, otherwise we can exhaust open file limit. 255 r.Close() 256 c.Close() 257 258 require.Nil(bce.b, err, logger.Events) 259 260 // Record how many bytes compressed so we can report these later. 261 durCompressed += tc.duration 262 numCompressedBytes += mw.count 263 numUncompressedBytes += mr.count 264 } 265 266 bce.b.ReportMetric( 267 float64(durCompressed.Nanoseconds()/int64(bce.b.N)), 268 "ns/op", 269 ) 270 271 mbOut := numUncompressedBytes / 1024 / 1024 272 bce.b.ReportMetric( 273 float64(mbOut)/durCompressed.Seconds(), 274 "MB/s", 275 ) 276 277 bce.b.ReportMetric( 278 float64(numUncompressedBytes)/float64(numCompressedBytes), 279 "compression-ratio", 280 ) 281 } 282 283 func (bce *benchmarkCompressEnv) compressor(logger logutil.Logger, writer io.Writer) io.WriteCloser { 284 var compressor io.WriteCloser 285 var err error 286 287 if bce.builtin != "" { 288 compressor, err = newBuiltinCompressor(bce.builtin, writer, logger) 289 } else if bce.external != "" { 290 compressor, err = newExternalCompressor(context.Background(), bce.external, writer, logger) 291 } 292 293 require.Nil(bce.b, err, "failed to create compressor.") 294 return compressor 295 } 296 297 func (bce *benchmarkCompressEnv) prepare() { 298 u, err := dataURL() 299 require.NoError(bce.b, err, "failed to get data url") 300 301 localPath := dataLocalPath(u) 302 303 if isLocal(u) { 304 if _, err := os.Stat(localPath); errors.Is(err, os.ErrNotExist) { 305 require.Failf(bce.b, "local path does not exist", localPath) 306 } 307 } else if isHTTP(u) { 308 if _, err := os.Stat(localPath); errors.Is(err, os.ErrNotExist) { 309 mb, _ := maxBytes() 310 bce.b.Logf("downloading data from %s", u.String()) 311 if err := downloadData(u.String(), localPath, mb); err != nil { 312 require.Failf(bce.b, "failed to download data", err.Error()) 313 } 314 } 315 } else { 316 require.Failf(bce.b, "don't know how to get data from url", u.String()) 317 } 318 } 319 320 func (bce *benchmarkCompressEnv) reader() (io.ReadCloser, error) { 321 var r io.Reader 322 323 u, _ := dataURL() 324 325 f, err := os.Open(dataLocalPath(u)) 326 if err != nil { 327 return nil, err 328 } 329 r = f 330 331 mb, _ := maxBytes() 332 if mb > 0 { 333 r = io.LimitReader(f, mb) 334 } 335 336 buf := bufio.NewReaderSize(r, 2*1024*1024) 337 return &fnReadCloser{buf, f.Close}, nil 338 } 339 340 func (bce *benchmarkCompressEnv) validate() { 341 if bce.external != "" { 342 cmdArgs := strings.Split(bce.external, " ") 343 344 _, err := validateExternalCmd(cmdArgs[0]) 345 if err != nil { 346 bce.b.Skipf("command %q not available in this host: %v; skipping...", cmdArgs[0], err) 347 } 348 } 349 350 if bce.builtin == "" && bce.external == "" { 351 require.Fail(bce.b, "either builtin or external compressor must be specified.") 352 } 353 } 354 355 func (mr *meteredReader) Read(p []byte) (nbytes int, err error) { 356 nbytes, err = mr.r.Read(p) 357 mr.count += int64(nbytes) 358 return 359 } 360 361 func (mw *meteredWriter) Write(p []byte) (nbytes int, err error) { 362 nbytes, err = mw.w.Write(p) 363 mw.count += int64(nbytes) 364 return 365 } 366 367 func (tw *timedWriter) Write(p []byte) (nbytes int, err error) { 368 start := time.Now() 369 nbytes, err = tw.w.Write(p) 370 tw.duration += time.Since(start) 371 return 372 } 373 374 func TestMain(m *testing.M) { 375 code := m.Run() 376 377 u, _ := dataURL() 378 localPath := dataLocalPath(u) 379 380 cleanup, err := shouldCleanup(u) 381 if cleanup { 382 msg := "cleaning up %q" 383 args := []any{localPath} 384 385 if err != nil { 386 args = append(args, err) 387 msg = msg + "; %v" 388 } 389 390 fmt.Printf(msg+"\n", args...) 391 if _, err := os.Stat(localPath); !errors.Is(err, os.ErrNotExist) { 392 os.Remove(localPath) 393 } 394 } 395 396 os.Exit(code) 397 } 398 399 func BenchmarkCompressLz4Builtin(b *testing.B) { 400 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 401 b: b, 402 builtin: Lz4Compressor, 403 }) 404 env.compress() 405 } 406 407 func BenchmarkCompressPargzipBuiltin(b *testing.B) { 408 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 409 b: b, 410 builtin: PargzipCompressor, 411 }) 412 env.compress() 413 } 414 415 func BenchmarkCompressPgzipBuiltin(b *testing.B) { 416 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 417 b: b, 418 builtin: PgzipCompressor, 419 }) 420 env.compress() 421 } 422 423 func BenchmarkCompressZstdBuiltin(b *testing.B) { 424 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 425 b: b, 426 builtin: ZstdCompressor, 427 }) 428 env.compress() 429 } 430 431 func BenchmarkCompressZstdExternal(b *testing.B) { 432 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 433 b: b, 434 external: fmt.Sprintf("zstd -%d -c", compressionLevel), 435 }) 436 env.compress() 437 } 438 439 func BenchmarkCompressZstdExternalFast4(b *testing.B) { 440 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 441 b: b, 442 external: fmt.Sprintf("zstd -%d --fast=4 -c", compressionLevel), 443 }) 444 env.compress() 445 } 446 447 func BenchmarkCompressZstdExternalT0(b *testing.B) { 448 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 449 b: b, 450 external: fmt.Sprintf("zstd -%d -T0 -c", compressionLevel), 451 }) 452 env.compress() 453 } 454 455 func BenchmarkCompressZstdExternalT4(b *testing.B) { 456 env := newBenchmarkCompressEnv(benchmarkCompressArgs{ 457 b: b, 458 external: fmt.Sprintf("zstd -%d -T4 -c", compressionLevel), 459 }) 460 env.compress() 461 }