github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/cmd/benchdb/benchfilesort/main.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package main 15 16 import ( 17 "encoding/binary" 18 "flag" 19 "fmt" 20 "io/ioutil" 21 "math/rand" 22 "os" 23 "path/filepath" 24 "runtime/pprof" 25 "time" 26 27 "github.com/whtcorpsinc/errors" 28 "github.com/whtcorpsinc/log" 29 "github.com/whtcorpsinc/BerolinaSQL/terror" 30 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 31 "github.com/whtcorpsinc/milevadb/types" 32 "github.com/whtcorpsinc/milevadb/soliton/codec" 33 "github.com/whtcorpsinc/milevadb/soliton/filesort" 34 "github.com/whtcorpsinc/milevadb/soliton/logutil" 35 ) 36 37 type comparableRow struct { 38 key []types.Causet 39 val []types.Causet 40 handle int64 41 } 42 43 var ( 44 genCmd = flag.NewFlagSet("gen", flag.ExitOnError) 45 runCmd = flag.NewFlagSet("run", flag.ExitOnError) 46 47 logLevel = "warn" 48 cpuprofile string 49 tmFIDelir string 50 keySize int 51 valSize int 52 bufSize int 53 scale int 54 nWorkers int 55 inputRatio int 56 outputRatio int 57 ) 58 59 func nextRow(r *rand.Rand, keySize int, valSize int) *comparableRow { 60 key := make([]types.Causet, keySize) 61 for i := range key { 62 key[i] = types.NewCauset(r.Int()) 63 } 64 65 val := make([]types.Causet, valSize) 66 for j := range val { 67 val[j] = types.NewCauset(r.Int()) 68 } 69 70 handle := r.Int63() 71 return &comparableRow{key: key, val: val, handle: handle} 72 } 73 74 func encodeRow(b []byte, event *comparableRow) ([]byte, error) { 75 var ( 76 err error 77 head = make([]byte, 8) 78 body []byte 79 ) 80 sc := &stmtctx.StatementContext{TimeZone: time.Local} 81 body, err = codec.EncodeKey(sc, body, event.key...) 82 if err != nil { 83 return b, errors.Trace(err) 84 } 85 body, err = codec.EncodeKey(sc, body, event.val...) 86 if err != nil { 87 return b, errors.Trace(err) 88 } 89 body, err = codec.EncodeKey(sc, body, types.NewIntCauset(event.handle)) 90 if err != nil { 91 return b, errors.Trace(err) 92 } 93 94 binary.BigEndian.PutUint64(head, uint64(len(body))) 95 96 b = append(b, head...) 97 b = append(b, body...) 98 99 return b, nil 100 } 101 102 func decodeRow(fd *os.File) (*comparableRow, error) { 103 var ( 104 err error 105 n int 106 head = make([]byte, 8) 107 dcod = make([]types.Causet, 0, keySize+valSize+1) 108 ) 109 110 n, err = fd.Read(head) 111 if n != 8 { 112 return nil, errors.New("incorrect header") 113 } 114 if err != nil { 115 return nil, errors.Trace(err) 116 } 117 118 rowSize := int(binary.BigEndian.Uint64(head)) 119 rowBytes := make([]byte, rowSize) 120 121 n, err = fd.Read(rowBytes) 122 if n != rowSize { 123 return nil, errors.New("incorrect event") 124 } 125 if err != nil { 126 return nil, errors.Trace(err) 127 } 128 129 dcod, err = codec.Decode(rowBytes, keySize+valSize+1) 130 if err != nil { 131 return nil, errors.Trace(err) 132 } 133 134 return &comparableRow{ 135 key: dcod[:keySize], 136 val: dcod[keySize : keySize+valSize], 137 handle: dcod[keySize+valSize:][0].GetInt64(), 138 }, nil 139 } 140 141 func encodeMeta(b []byte, scale int, keySize int, valSize int) []byte { 142 spacetime := make([]byte, 8) 143 144 binary.BigEndian.PutUint64(spacetime, uint64(scale)) 145 b = append(b, spacetime...) 146 binary.BigEndian.PutUint64(spacetime, uint64(keySize)) 147 b = append(b, spacetime...) 148 binary.BigEndian.PutUint64(spacetime, uint64(valSize)) 149 b = append(b, spacetime...) 150 151 return b 152 } 153 154 func decodeMeta(fd *os.File) error { 155 spacetime := make([]byte, 24) 156 if n, err := fd.Read(spacetime); err != nil || n != 24 { 157 if n != 24 { 158 return errors.New("incorrect spacetime data") 159 } 160 return errors.Trace(err) 161 } 162 163 scale = int(binary.BigEndian.Uint64(spacetime[:8])) 164 if scale <= 0 { 165 return errors.New("number of rows must be positive") 166 } 167 168 keySize = int(binary.BigEndian.Uint64(spacetime[8:16])) 169 if keySize <= 0 { 170 return errors.New("key size must be positive") 171 } 172 173 valSize = int(binary.BigEndian.Uint64(spacetime[16:])) 174 if valSize <= 0 { 175 return errors.New("value size must be positive") 176 } 177 178 return nil 179 } 180 181 /* 182 * The synthetic data is exported as a binary format. 183 * The encoding format is: 184 * 1) Meta Data 185 * Three 64-bit integers represent scale size, key size and value size. 186 * 2) Row Data 187 * Each event is encoded as: 188 * One 64-bit integer represent the event size in bytes, followed by the 189 * the actual event bytes. 190 */ 191 func export() error { 192 var outputBytes []byte 193 194 fileName := filepath.Join(tmFIDelir, "data.out") 195 outputFile, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) 196 if err != nil { 197 return errors.Trace(err) 198 } 199 defer terror.Call(outputFile.Close) 200 201 outputBytes = encodeMeta(outputBytes, scale, keySize, valSize) 202 203 seed := rand.NewSource(time.Now().UnixNano()) 204 r := rand.New(seed) 205 206 for i := 1; i <= scale; i++ { 207 outputBytes, err = encodeRow(outputBytes, nextRow(r, keySize, valSize)) 208 if err != nil { 209 return errors.Trace(err) 210 } 211 _, err = outputFile.Write(outputBytes) 212 if err != nil { 213 return errors.Trace(err) 214 } 215 outputBytes = outputBytes[:0] 216 } 217 218 return nil 219 } 220 221 func load(ratio int) ([]*comparableRow, error) { 222 var ( 223 err error 224 fd *os.File 225 ) 226 227 fileName := filepath.Join(tmFIDelir, "data.out") 228 fd, err = os.Open(fileName) 229 if os.IsNotExist(err) { 230 return nil, errors.New("data file (data.out) does not exist") 231 } 232 if err != nil { 233 return nil, errors.Trace(err) 234 } 235 defer terror.Call(fd.Close) 236 237 err = decodeMeta(fd) 238 if err != nil { 239 return nil, errors.Trace(err) 240 } 241 242 cLogf("\tnumber of rows = %d, key size = %d, value size = %d", scale, keySize, valSize) 243 244 var ( 245 event *comparableRow 246 rows = make([]*comparableRow, 0, scale) 247 ) 248 249 totalRows := int(float64(scale) * (float64(ratio) / 100.0)) 250 cLogf("\tload %d rows", totalRows) 251 for i := 1; i <= totalRows; i++ { 252 event, err = decodeRow(fd) 253 if err != nil { 254 return nil, errors.Trace(err) 255 } 256 rows = append(rows, event) 257 } 258 259 return rows, nil 260 } 261 262 func driveGenCmd() { 263 err := genCmd.Parse(os.Args[2:]) 264 terror.MustNil(err) 265 // Sanity checks 266 if keySize <= 0 { 267 log.Fatal("key size must be positive") 268 } 269 if valSize <= 0 { 270 log.Fatal("value size must be positive") 271 } 272 if scale <= 0 { 273 log.Fatal("scale must be positive") 274 } 275 if _, err = os.Stat(tmFIDelir); err != nil { 276 if os.IsNotExist(err) { 277 log.Fatal("tmFIDelir does not exist") 278 } 279 log.Fatal(err.Error()) 280 } 281 282 cLog("Generating...") 283 start := time.Now() 284 err = export() 285 terror.MustNil(err) 286 cLog("Done!") 287 cLogf("Data placed in: %s", filepath.Join(tmFIDelir, "data.out")) 288 cLog("Time used: ", time.Since(start)) 289 cLog("=================================") 290 } 291 292 func driveRunCmd() { 293 err := runCmd.Parse(os.Args[2:]) 294 terror.MustNil(err) 295 // Sanity checks 296 if bufSize <= 0 { 297 log.Fatal("buffer size must be positive") 298 } 299 if nWorkers <= 0 { 300 log.Fatal("the number of workers must be positive") 301 } 302 if inputRatio < 0 || inputRatio > 100 { 303 log.Fatal("input ratio must between 0 and 100 (inclusive)") 304 } 305 if outputRatio < 0 || outputRatio > 100 { 306 log.Fatal("output ratio must between 0 and 100 (inclusive)") 307 } 308 if _, err = os.Stat(tmFIDelir); err != nil { 309 if os.IsNotExist(err) { 310 log.Fatal("tmFIDelir does not exist") 311 } 312 terror.MustNil(err) 313 } 314 315 var ( 316 dir string 317 profile *os.File 318 fs *filesort.FileSorter 319 ) 320 cLog("Loading...") 321 start := time.Now() 322 data, err := load(inputRatio) 323 terror.MustNil(err) 324 cLog("Done!") 325 cLogf("Loaded %d rows", len(data)) 326 cLog("Time used: ", time.Since(start)) 327 cLog("=================================") 328 329 sc := new(stmtctx.StatementContext) 330 fsBuilder := new(filesort.Builder) 331 byDesc := make([]bool, keySize) 332 for i := 0; i < keySize; i++ { 333 byDesc[i] = false 334 } 335 dir, err = ioutil.TemFIDelir(tmFIDelir, "benchfilesort_test") 336 terror.MustNil(err) 337 fs, err = fsBuilder.SetSC(sc).SetSchema(keySize, valSize).SetBuf(bufSize).SetWorkers(nWorkers).SetDesc(byDesc).SetDir(dir).Build() 338 terror.MustNil(err) 339 340 if cpuprofile != "" { 341 profile, err = os.Create(cpuprofile) 342 terror.MustNil(err) 343 } 344 345 cLog("Inputing...") 346 start = time.Now() 347 for _, r := range data { 348 err = fs.Input(r.key, r.val, r.handle) 349 terror.MustNil(err) 350 } 351 cLog("Done!") 352 cLogf("Input %d rows", len(data)) 353 cLog("Time used: ", time.Since(start)) 354 cLog("=================================") 355 356 cLog("Outputing...") 357 totalRows := int(float64(len(data)) * (float64(outputRatio) / 100.0)) 358 start = time.Now() 359 if cpuprofile != "" { 360 err = pprof.StartCPUProfile(profile) 361 terror.MustNil(err) 362 } 363 for i := 0; i < totalRows; i++ { 364 _, _, _, err = fs.Output() 365 terror.MustNil(err) 366 } 367 if cpuprofile != "" { 368 pprof.StopCPUProfile() 369 } 370 cLog("Done!") 371 cLogf("Output %d rows", totalRows) 372 cLog("Time used: ", time.Since(start)) 373 cLog("=================================") 374 375 cLog("Closing...") 376 start = time.Now() 377 err = fs.Close() 378 terror.MustNil(err) 379 cLog("Done!") 380 cLog("Time used: ", time.Since(start)) 381 cLog("=================================") 382 } 383 384 func init() { 385 err := logutil.InitZapLogger(logutil.NewLogConfig(logLevel, logutil.DefaultLogFormat, "", logutil.EmptyFileLogConfig, false)) 386 terror.MustNil(err) 387 cwd, err1 := os.Getwd() 388 terror.MustNil(err1) 389 390 genCmd.StringVar(&tmFIDelir, "dir", cwd, "where to causetstore the generated rows") 391 genCmd.IntVar(&keySize, "keySize", 8, "the size of key") 392 genCmd.IntVar(&valSize, "valSize", 8, "the size of value") 393 genCmd.IntVar(&scale, "scale", 100, "how many rows to generate") 394 genCmd.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") 395 396 runCmd.StringVar(&tmFIDelir, "dir", cwd, "where to load the generated rows") 397 runCmd.IntVar(&bufSize, "bufSize", 500000, "how many rows held in memory at a time") 398 runCmd.IntVar(&nWorkers, "nWorkers", 1, "how many workers used in async sorting") 399 runCmd.IntVar(&inputRatio, "inputRatio", 100, "input percentage") 400 runCmd.IntVar(&outputRatio, "outputRatio", 100, "output percentage") 401 runCmd.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file") 402 } 403 404 func main() { 405 flag.Parse() 406 407 if len(os.Args) == 1 { 408 fmt.Printf("Usage:\n\n") 409 fmt.Printf("\tbenchfilesort command [arguments]\n\n") 410 fmt.Printf("The commands are:\n\n") 411 fmt.Println("\tgen\t", "generate rows") 412 fmt.Println("\trun\t", "run tests") 413 fmt.Println("") 414 fmt.Println("Checkout benchfilesort/README for more information.") 415 return 416 } 417 418 switch os.Args[1] { 419 case "gen": 420 driveGenCmd() 421 case "run": 422 driveRunCmd() 423 default: 424 fmt.Printf("%q is not valid command.\n", os.Args[1]) 425 os.Exit(2) 426 } 427 } 428 429 func cLogf(format string, args ...interface{}) { 430 str := fmt.Sprintf(format, args...) 431 fmt.Println("\033[0;32m" + str + "\033[0m") 432 } 433 434 func cLog(args ...interface{}) { 435 str := fmt.Sprint(args...) 436 fmt.Println("\033[0;32m" + str + "\033[0m") 437 }