github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/cache/sim/sim.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package sim 18 19 import ( 20 "bufio" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "strconv" 26 "strings" 27 "time" 28 ) 29 30 var ( 31 // ErrDone is returned when the underlying file has ran out of lines. 32 ErrDone = errors.New("no more values in the Simulator") 33 // ErrBadLine is returned when the trace file line is unrecognizable to 34 // the Parser. 35 ErrBadLine = errors.New("bad line for trace format") 36 ) 37 38 // Simulator is the central type of the `sim` package. It is a function 39 // returning a key from some source (composed from the other functions in this 40 // package, either generated or parsed). You can use these Simulators to 41 // approximate access distributions. 42 type Simulator func() (uint64, error) 43 44 // NewZipfian creates a Simulator returning numbers following a Zipfian [1] 45 // distribution infinitely. Zipfian distributions are useful for simulating real 46 // workloads. 47 // 48 // [1]: https://en.wikipedia.org/wiki/Zipf%27s_law 49 func NewZipfian(s, v float64, n uint64) Simulator { 50 z := rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), s, v, n) 51 return func() (uint64, error) { 52 return z.Uint64(), nil 53 } 54 } 55 56 // NewUniform creates a Simulator returning uniformly distributed [1] (random) 57 // numbers [0, max) infinitely. 58 // 59 // [1]: https://en.wikipedia.org/wiki/Uniform_distribution_(continuous) 60 func NewUniform(max uint64) Simulator { 61 m := int64(max) 62 r := rand.New(rand.NewSource(time.Now().UnixNano())) 63 return func() (uint64, error) { 64 return uint64(r.Int63n(m)), nil 65 } 66 } 67 68 // Parser is used as a parameter to NewReader so we can create Simulators from 69 // varying trace file formats easily. 70 type Parser func(string, error) ([]uint64, error) 71 72 // NewReader creates a Simulator from two components: the Parser, which is a 73 // filetype specific function for parsing lines, and the file itself, which will 74 // be read from. 75 // 76 // When every line in the file has been read, ErrDone will be returned. For some 77 // trace formats (LIRS) there is one item per line. For others (ARC) there is a 78 // range of items on each line. Thus, the true number of items in each file 79 // is hard to determine, so it's up to the user to handle ErrDone accordingly. 80 func NewReader(parser Parser, file io.Reader) Simulator { 81 b := bufio.NewReader(file) 82 s := make([]uint64, 0) 83 i := -1 84 var err error 85 return func() (uint64, error) { 86 // only parse a new line when we've run out of items 87 if i++; i == len(s) { 88 // parse sequence from line 89 if s, err = parser(b.ReadString('\n')); err != nil { 90 s = []uint64{0} 91 } 92 i = 0 93 } 94 return s[i], err 95 } 96 } 97 98 // ParseLIRS takes a single line of input from a LIRS trace file as described in 99 // multiple papers [1] and returns a slice containing one number. A nice 100 // collection of LIRS trace files can be found in Ben Manes' repo [2]. 101 // 102 // [1]: https://en.wikipedia.org/wiki/LIRS_caching_algorithm 103 // [2]: https://git.io/fj9gU 104 func ParseLIRS(line string, err error) ([]uint64, error) { 105 if line = strings.TrimSpace(line); line != "" { 106 // example: "1\r\n" 107 key, err := strconv.ParseUint(line, 10, 64) 108 return []uint64{key}, err 109 } 110 return nil, ErrDone 111 } 112 113 // ParseARC takes a single line of input from an ARC trace file as described in 114 // "ARC: a self-tuning, low overhead replacement cache" [1] by Nimrod Megiddo 115 // and Dharmendra S. Modha [1] and returns a sequence of numbers generated from 116 // the line and any error. For use with NewReader. 117 // 118 // [1]: https://scinapse.io/papers/1860107648 119 func ParseARC(line string, err error) ([]uint64, error) { 120 if line != "" { 121 // example: "0 5 0 0\n" 122 // 123 // - first block: starting number in sequence 124 // - second block: number of items in sequence 125 // - third block: ignore 126 // - fourth block: global line number (not used) 127 cols := strings.Fields(line) 128 if len(cols) != 4 { 129 return nil, ErrBadLine 130 } 131 start, err := strconv.ParseUint(cols[0], 10, 64) 132 if err != nil { 133 return nil, err 134 } 135 count, err := strconv.ParseUint(cols[1], 10, 64) 136 if err != nil { 137 return nil, err 138 } 139 // populate sequence from start to start + count 140 seq := make([]uint64, count) 141 for i := range seq { 142 seq[i] = start + uint64(i) 143 } 144 return seq, nil 145 } 146 return nil, ErrDone 147 } 148 149 // Collection evaluates the Simulator size times and saves each item to the 150 // returned slice. 151 func Collection(simulator Simulator, size uint64) []uint64 { 152 collection := make([]uint64, size) 153 for i := range collection { 154 collection[i], _ = simulator() 155 } 156 return collection 157 } 158 159 // StringCollection evaluates the Simulator size times and saves each item to 160 // the returned slice, after converting it to a string. 161 func StringCollection(simulator Simulator, size uint64) []string { 162 collection := make([]string, size) 163 for i := range collection { 164 n, _ := simulator() 165 collection[i] = fmt.Sprintf("%d", n) 166 } 167 return collection 168 }