github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/data/integer.go (about) 1 // Copyright 2024 Block, Inc. 2 3 package data 4 5 import ( 6 "fmt" 7 "math" 8 "math/rand" 9 "strconv" 10 "strings" 11 "sync" 12 "sync/atomic" 13 14 "github.com/square/finch" 15 ) 16 17 // Int implements the int data generator. 18 type Int struct { 19 min int64 20 max int64 21 dist byte // normal|uniform 22 mean float64 // dist=normal 23 stddev float64 // dist=normal 24 } 25 26 var _ Generator = &Int{} 27 28 const ( 29 dist_uniform byte = iota 30 dist_normal 31 ) 32 33 func NewInt(params map[string]string) (*Int, error) { 34 g := &Int{ 35 min: 1, 36 max: finch.ROWS, 37 dist: dist_uniform, 38 } 39 40 if err := int64From(params, "min", &g.min, false); err != nil { 41 return nil, err 42 } 43 if err := int64From(params, "max", &g.max, false); err != nil { 44 return nil, err 45 } 46 47 switch strings.ToLower(params["dist"]) { 48 case "normal": 49 g.dist = dist_normal 50 var mean int64 51 if err := int64From(params, "mean", &mean, false); err != nil { 52 return nil, err 53 } 54 if mean == 0 { 55 mean = (g.max - g.min + 1) / 2 56 } 57 g.mean = float64(mean) 58 59 s, ok := params["stddev"] 60 if ok { 61 var err error 62 g.stddev, err = strconv.ParseFloat(s, 64) 63 if err != nil { 64 return nil, err 65 } 66 } else { 67 g.stddev = (float64(g.max) - float64(g.min)) / 8.0 68 } 69 case "uniform": 70 g.dist = dist_uniform 71 default: 72 g.dist = dist_uniform 73 } 74 finch.Debug("rand int [%d, %d] dist %d (uni %d, norm %d)", g.min, g.max, g.dist, dist_uniform, dist_normal) 75 return g, nil 76 } 77 78 func (g *Int) Name() string { return "int" } 79 func (g *Int) Format() (uint, string) { return 1, "%d" } 80 func (g *Int) Scan(any interface{}) error { return nil } 81 82 func (g *Int) Copy() Generator { 83 c := *g 84 return &c 85 } 86 87 func (g *Int) Values(_ RunCount) []interface{} { 88 switch g.dist { 89 case dist_normal: 90 v := int64(math.Floor(rand.NormFloat64()*g.stddev + g.mean)) 91 if v < g.min || v > g.max { 92 v = int64(math.Floor(rand.NormFloat64()*g.stddev + g.mean)) 93 if v < g.min || v > g.max { 94 return []interface{}{int64(g.mean)} 95 } 96 } 97 return []interface{}{v} 98 default: // uniform 99 v := rand.Int63n(g.max) 100 if v < g.min { 101 v = g.min 102 } 103 return []interface{}{v} 104 } 105 } 106 107 // -------------------------------------------------------------------------- 108 109 // IntGaps implements the int-gaps data generator. 110 type IntGaps struct { 111 params map[string]string 112 input_max int64 113 output_start float64 114 slope float64 115 } 116 117 var _ Generator = &IntGaps{} 118 119 func NewIntGaps(params map[string]string) (*IntGaps, error) { 120 // https://stackoverflow.com/questions/5731863/mapping-a-numeric-range-onto-another 121 min := int64(1) 122 if err := int64From(params, "min", &min, false); err != nil { 123 return nil, err 124 } 125 max := int64(finch.ROWS) 126 if err := int64From(params, "max", &max, false); err != nil { 127 return nil, err 128 } 129 size := max - min + 1 130 if size <= 0 { 131 return nil, fmt.Errorf("invalid int-gaps: max - min must be > 0") 132 } 133 134 p := int64(20) 135 if err := int64From(params, "p", &p, false); err != nil { 136 return nil, err 137 } 138 if p < 1 || p > 100 { 139 return nil, fmt.Errorf("invalid int-gaps p: %d, must be between 1 to 100 (inclusive)", p) 140 } 141 input_max := int64(float64(size) * (float64(p) / 100.0)) 142 143 g := &IntGaps{ 144 params: params, 145 input_max: input_max, 146 output_start: float64(min), 147 slope: float64(max-min) / float64(input_max-1), 148 } 149 finch.Debug("1..%d -> %d..%d (%d%% of %d) gap: %d records", input_max, min, max, p, size, int(g.slope)) 150 return g, nil 151 } 152 153 func (g *IntGaps) Name() string { return "int-gaps" } 154 func (g *IntGaps) Format() (uint, string) { return 1, "%d" } 155 func (g *IntGaps) Scan(any interface{}) error { return nil } 156 157 func (g *IntGaps) Copy() Generator { 158 c, _ := NewIntGaps(g.params) 159 return c 160 } 161 162 func (g *IntGaps) Values(_ RunCount) []interface{} { 163 return []interface{}{int64(g.output_start + float64(rand.Int63n(g.input_max))*g.slope)} 164 } 165 166 // -------------------------------------------------------------------------- 167 168 // IntRange implements the int-range data generator. 169 type IntRange struct { 170 params map[string]string 171 size int64 172 min int64 173 max int64 174 v []int64 175 } 176 177 var _ Generator = &IntRange{} 178 179 func NewIntRange(params map[string]string) (*IntRange, error) { 180 g := &IntRange{ 181 min: 1, 182 max: finch.ROWS, 183 size: 100, 184 v: []int64{0, 0}, 185 params: params, 186 } 187 if err := int64From(params, "size", &g.size, false); err != nil { 188 return nil, err 189 } 190 if err := int64From(params, "min", &g.min, false); err != nil { 191 return nil, err 192 } 193 if err := int64From(params, "max", &g.max, false); err != nil { 194 return nil, err 195 } 196 if g.min >= g.max { 197 return nil, fmt.Errorf("invalid int range: min %d >= max %d", g.min, g.max) 198 } 199 if g.size > (g.max - g.min) { 200 return nil, fmt.Errorf("invalid int range: size %d > (max %d - min %d)", g.size, g.max, g.min) 201 } 202 return g, nil 203 } 204 205 func (g *IntRange) Name() string { return "int-range" } 206 func (g *IntRange) Format() (uint, string) { return 2, "%d" } 207 func (g *IntRange) Scan(any interface{}) error { return nil } 208 209 func (g *IntRange) Copy() Generator { 210 gCopy, _ := NewIntRange(g.params) 211 return gCopy 212 } 213 214 func (g *IntRange) Values(_ RunCount) []interface{} { 215 // MySQL BETWEEN is closed interval [min, max], so if random min (lower) 216 // is 10 and size is 3, then 10+3=13 but that's 4 values: 10, 11, 12, 13. 217 // So we -1 to make BETWEEEN 10 AND 12, which is 3 values. 218 lower := g.min + rand.Int63n(g.max-g.min) 219 upper := lower + g.size - 1 220 if upper > g.max { 221 upper = g.max 222 } 223 return []interface{}{lower, upper} 224 } 225 226 // -------------------------------------------------------------------------- 227 228 // IntRangeSeq implements the int-range-seq data generator. 229 type IntRangeSeq struct { 230 begin int64 231 end int64 232 size int64 233 n int64 234 params map[string]string 235 *sync.Mutex 236 } 237 238 var _ Generator = &IntRangeSeq{} 239 240 func NewIntRangeSeq(params map[string]string) (*IntRangeSeq, error) { 241 g := &IntRangeSeq{ 242 begin: 1, 243 end: finch.ROWS, 244 size: 100, 245 n: 1, 246 params: params, 247 Mutex: &sync.Mutex{}, 248 } 249 if err := int64From(params, "size", &g.size, false); err != nil { 250 return nil, err 251 } 252 if err := int64From(params, "begin", &g.begin, false); err != nil { 253 return nil, err 254 } 255 g.n = g.begin 256 if err := int64From(params, "end", &g.end, false); err != nil { 257 return nil, err 258 } 259 if g.begin > g.end { 260 return nil, fmt.Errorf("invalid int-range-seq: begin (%d) > end (%d)", g.begin, g.end) 261 } 262 if g.size > (g.end - g.begin) { 263 return nil, fmt.Errorf("invalid int-range-seq: size (%d) > end (%d) - begin (%d)", g.size, g.end, g.begin) 264 } 265 return g, nil 266 } 267 268 func (g *IntRangeSeq) Name() string { return "int-range-seq" } 269 func (g *IntRangeSeq) Format() (uint, string) { return 2, "%d" } 270 func (g *IntRangeSeq) Scan(any interface{}) error { return nil } 271 272 func (g *IntRangeSeq) Copy() Generator { 273 c, _ := NewIntRangeSeq(g.params) 274 return c 275 } 276 277 func (g *IntRangeSeq) Values(_ RunCount) []interface{} { 278 g.Lock() 279 if g.n > g.end { 280 g.n = g.begin // reset [begin, m] 281 } 282 n, m := g.n, g.n+g.size-1 // next chunk [n, m] 283 g.n += g.size 284 if m > g.end { 285 m = g.end // short chunk [n, end] 286 } 287 g.Unlock() 288 return []interface{}{n, m} 289 } 290 291 // -------------------------------------------------------------------------- 292 293 // AutoInc implements the auto-inc data generator. 294 type AutoInc struct { 295 i uint64 296 step uint64 297 } 298 299 var _ Generator = &AutoInc{} 300 301 func NewAutoInc(params map[string]string) (*AutoInc, error) { 302 g := &AutoInc{ 303 i: 0, 304 step: 1, 305 } 306 s, ok := params["start"] 307 if ok { 308 i, err := strconv.ParseUint(s, 10, 64) 309 if err != nil { 310 return nil, fmt.Errorf("invalid start=%s: %s", s, err) 311 } 312 g.i = i 313 } 314 s, ok = params["step"] 315 if ok { 316 i, err := strconv.ParseUint(s, 10, 64) 317 if err != nil { 318 return nil, fmt.Errorf("invalid step=%s: %s", s, err) 319 } 320 g.step = i 321 } 322 return g, nil 323 } 324 325 func (g *AutoInc) Name() string { return "auto-inc" } 326 func (g *AutoInc) Format() (uint, string) { return 1, "%d" } 327 func (g *AutoInc) Scan(any interface{}) error { return nil } 328 329 func (g *AutoInc) Copy() Generator { 330 return &AutoInc{ 331 i: g.i, 332 step: g.step, 333 } 334 } 335 336 func (g *AutoInc) Values(_ RunCount) []interface{} { 337 return []interface{}{atomic.AddUint64(&g.i, g.step)} 338 }