github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/data/integer.go (about)

     1  // Copyright 2024 Block, Inc.
     2  
     3  package data
     4  
     5  import (
     6  	"fmt"
     7  	"math"
     8  	"math/rand"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  
    14  	"github.com/square/finch"
    15  )
    16  
    17  // Int implements the int data generator.
    18  type Int struct {
    19  	min    int64
    20  	max    int64
    21  	dist   byte    // normal|uniform
    22  	mean   float64 // dist=normal
    23  	stddev float64 // dist=normal
    24  }
    25  
    26  var _ Generator = &Int{}
    27  
    28  const (
    29  	dist_uniform byte = iota
    30  	dist_normal
    31  )
    32  
    33  func NewInt(params map[string]string) (*Int, error) {
    34  	g := &Int{
    35  		min:  1,
    36  		max:  finch.ROWS,
    37  		dist: dist_uniform,
    38  	}
    39  
    40  	if err := int64From(params, "min", &g.min, false); err != nil {
    41  		return nil, err
    42  	}
    43  	if err := int64From(params, "max", &g.max, false); err != nil {
    44  		return nil, err
    45  	}
    46  
    47  	switch strings.ToLower(params["dist"]) {
    48  	case "normal":
    49  		g.dist = dist_normal
    50  		var mean int64
    51  		if err := int64From(params, "mean", &mean, false); err != nil {
    52  			return nil, err
    53  		}
    54  		if mean == 0 {
    55  			mean = (g.max - g.min + 1) / 2
    56  		}
    57  		g.mean = float64(mean)
    58  
    59  		s, ok := params["stddev"]
    60  		if ok {
    61  			var err error
    62  			g.stddev, err = strconv.ParseFloat(s, 64)
    63  			if err != nil {
    64  				return nil, err
    65  			}
    66  		} else {
    67  			g.stddev = (float64(g.max) - float64(g.min)) / 8.0
    68  		}
    69  	case "uniform":
    70  		g.dist = dist_uniform
    71  	default:
    72  		g.dist = dist_uniform
    73  	}
    74  	finch.Debug("rand int [%d, %d] dist %d (uni %d, norm %d)", g.min, g.max, g.dist, dist_uniform, dist_normal)
    75  	return g, nil
    76  }
    77  
    78  func (g *Int) Name() string               { return "int" }
    79  func (g *Int) Format() (uint, string)     { return 1, "%d" }
    80  func (g *Int) Scan(any interface{}) error { return nil }
    81  
    82  func (g *Int) Copy() Generator {
    83  	c := *g
    84  	return &c
    85  }
    86  
    87  func (g *Int) Values(_ RunCount) []interface{} {
    88  	switch g.dist {
    89  	case dist_normal:
    90  		v := int64(math.Floor(rand.NormFloat64()*g.stddev + g.mean))
    91  		if v < g.min || v > g.max {
    92  			v = int64(math.Floor(rand.NormFloat64()*g.stddev + g.mean))
    93  			if v < g.min || v > g.max {
    94  				return []interface{}{int64(g.mean)}
    95  			}
    96  		}
    97  		return []interface{}{v}
    98  	default: // uniform
    99  		v := rand.Int63n(g.max)
   100  		if v < g.min {
   101  			v = g.min
   102  		}
   103  		return []interface{}{v}
   104  	}
   105  }
   106  
   107  // --------------------------------------------------------------------------
   108  
   109  // IntGaps implements the int-gaps data generator.
   110  type IntGaps struct {
   111  	params       map[string]string
   112  	input_max    int64
   113  	output_start float64
   114  	slope        float64
   115  }
   116  
   117  var _ Generator = &IntGaps{}
   118  
   119  func NewIntGaps(params map[string]string) (*IntGaps, error) {
   120  	// https://stackoverflow.com/questions/5731863/mapping-a-numeric-range-onto-another
   121  	min := int64(1)
   122  	if err := int64From(params, "min", &min, false); err != nil {
   123  		return nil, err
   124  	}
   125  	max := int64(finch.ROWS)
   126  	if err := int64From(params, "max", &max, false); err != nil {
   127  		return nil, err
   128  	}
   129  	size := max - min + 1
   130  	if size <= 0 {
   131  		return nil, fmt.Errorf("invalid int-gaps: max - min must be > 0")
   132  	}
   133  
   134  	p := int64(20)
   135  	if err := int64From(params, "p", &p, false); err != nil {
   136  		return nil, err
   137  	}
   138  	if p < 1 || p > 100 {
   139  		return nil, fmt.Errorf("invalid int-gaps p: %d, must be between 1 to 100 (inclusive)", p)
   140  	}
   141  	input_max := int64(float64(size) * (float64(p) / 100.0))
   142  
   143  	g := &IntGaps{
   144  		params:       params,
   145  		input_max:    input_max,
   146  		output_start: float64(min),
   147  		slope:        float64(max-min) / float64(input_max-1),
   148  	}
   149  	finch.Debug("1..%d -> %d..%d (%d%% of %d) gap: %d records", input_max, min, max, p, size, int(g.slope))
   150  	return g, nil
   151  }
   152  
   153  func (g *IntGaps) Name() string               { return "int-gaps" }
   154  func (g *IntGaps) Format() (uint, string)     { return 1, "%d" }
   155  func (g *IntGaps) Scan(any interface{}) error { return nil }
   156  
   157  func (g *IntGaps) Copy() Generator {
   158  	c, _ := NewIntGaps(g.params)
   159  	return c
   160  }
   161  
   162  func (g *IntGaps) Values(_ RunCount) []interface{} {
   163  	return []interface{}{int64(g.output_start + float64(rand.Int63n(g.input_max))*g.slope)}
   164  }
   165  
   166  // --------------------------------------------------------------------------
   167  
   168  // IntRange implements the int-range data generator.
   169  type IntRange struct {
   170  	params map[string]string
   171  	size   int64
   172  	min    int64
   173  	max    int64
   174  	v      []int64
   175  }
   176  
   177  var _ Generator = &IntRange{}
   178  
   179  func NewIntRange(params map[string]string) (*IntRange, error) {
   180  	g := &IntRange{
   181  		min:    1,
   182  		max:    finch.ROWS,
   183  		size:   100,
   184  		v:      []int64{0, 0},
   185  		params: params,
   186  	}
   187  	if err := int64From(params, "size", &g.size, false); err != nil {
   188  		return nil, err
   189  	}
   190  	if err := int64From(params, "min", &g.min, false); err != nil {
   191  		return nil, err
   192  	}
   193  	if err := int64From(params, "max", &g.max, false); err != nil {
   194  		return nil, err
   195  	}
   196  	if g.min >= g.max {
   197  		return nil, fmt.Errorf("invalid int range: min %d >= max %d", g.min, g.max)
   198  	}
   199  	if g.size > (g.max - g.min) {
   200  		return nil, fmt.Errorf("invalid int range: size %d > (max %d - min %d)", g.size, g.max, g.min)
   201  	}
   202  	return g, nil
   203  }
   204  
   205  func (g *IntRange) Name() string               { return "int-range" }
   206  func (g *IntRange) Format() (uint, string)     { return 2, "%d" }
   207  func (g *IntRange) Scan(any interface{}) error { return nil }
   208  
   209  func (g *IntRange) Copy() Generator {
   210  	gCopy, _ := NewIntRange(g.params)
   211  	return gCopy
   212  }
   213  
   214  func (g *IntRange) Values(_ RunCount) []interface{} {
   215  	// MySQL BETWEEN is closed interval [min, max], so if random min (lower)
   216  	// is 10 and size is 3, then 10+3=13 but that's 4 values: 10, 11, 12, 13.
   217  	// So we -1 to make BETWEEEN 10 AND 12, which is 3 values.
   218  	lower := g.min + rand.Int63n(g.max-g.min)
   219  	upper := lower + g.size - 1
   220  	if upper > g.max {
   221  		upper = g.max
   222  	}
   223  	return []interface{}{lower, upper}
   224  }
   225  
   226  // --------------------------------------------------------------------------
   227  
   228  // IntRangeSeq implements the int-range-seq data generator.
   229  type IntRangeSeq struct {
   230  	begin  int64
   231  	end    int64
   232  	size   int64
   233  	n      int64
   234  	params map[string]string
   235  	*sync.Mutex
   236  }
   237  
   238  var _ Generator = &IntRangeSeq{}
   239  
   240  func NewIntRangeSeq(params map[string]string) (*IntRangeSeq, error) {
   241  	g := &IntRangeSeq{
   242  		begin:  1,
   243  		end:    finch.ROWS,
   244  		size:   100,
   245  		n:      1,
   246  		params: params,
   247  		Mutex:  &sync.Mutex{},
   248  	}
   249  	if err := int64From(params, "size", &g.size, false); err != nil {
   250  		return nil, err
   251  	}
   252  	if err := int64From(params, "begin", &g.begin, false); err != nil {
   253  		return nil, err
   254  	}
   255  	g.n = g.begin
   256  	if err := int64From(params, "end", &g.end, false); err != nil {
   257  		return nil, err
   258  	}
   259  	if g.begin > g.end {
   260  		return nil, fmt.Errorf("invalid int-range-seq: begin (%d) > end (%d)", g.begin, g.end)
   261  	}
   262  	if g.size > (g.end - g.begin) {
   263  		return nil, fmt.Errorf("invalid int-range-seq: size (%d) > end (%d) - begin (%d)", g.size, g.end, g.begin)
   264  	}
   265  	return g, nil
   266  }
   267  
   268  func (g *IntRangeSeq) Name() string               { return "int-range-seq" }
   269  func (g *IntRangeSeq) Format() (uint, string)     { return 2, "%d" }
   270  func (g *IntRangeSeq) Scan(any interface{}) error { return nil }
   271  
   272  func (g *IntRangeSeq) Copy() Generator {
   273  	c, _ := NewIntRangeSeq(g.params)
   274  	return c
   275  }
   276  
   277  func (g *IntRangeSeq) Values(_ RunCount) []interface{} {
   278  	g.Lock()
   279  	if g.n > g.end {
   280  		g.n = g.begin // reset  [begin, m]
   281  	}
   282  	n, m := g.n, g.n+g.size-1 // next chunk [n, m]
   283  	g.n += g.size
   284  	if m > g.end {
   285  		m = g.end // short chunk [n, end]
   286  	}
   287  	g.Unlock()
   288  	return []interface{}{n, m}
   289  }
   290  
   291  // --------------------------------------------------------------------------
   292  
   293  // AutoInc implements the auto-inc data generator.
   294  type AutoInc struct {
   295  	i    uint64
   296  	step uint64
   297  }
   298  
   299  var _ Generator = &AutoInc{}
   300  
   301  func NewAutoInc(params map[string]string) (*AutoInc, error) {
   302  	g := &AutoInc{
   303  		i:    0,
   304  		step: 1,
   305  	}
   306  	s, ok := params["start"]
   307  	if ok {
   308  		i, err := strconv.ParseUint(s, 10, 64)
   309  		if err != nil {
   310  			return nil, fmt.Errorf("invalid start=%s: %s", s, err)
   311  		}
   312  		g.i = i
   313  	}
   314  	s, ok = params["step"]
   315  	if ok {
   316  		i, err := strconv.ParseUint(s, 10, 64)
   317  		if err != nil {
   318  			return nil, fmt.Errorf("invalid step=%s: %s", s, err)
   319  		}
   320  		g.step = i
   321  	}
   322  	return g, nil
   323  }
   324  
   325  func (g *AutoInc) Name() string               { return "auto-inc" }
   326  func (g *AutoInc) Format() (uint, string)     { return 1, "%d" }
   327  func (g *AutoInc) Scan(any interface{}) error { return nil }
   328  
   329  func (g *AutoInc) Copy() Generator {
   330  	return &AutoInc{
   331  		i:    g.i,
   332  		step: g.step,
   333  	}
   334  }
   335  
   336  func (g *AutoInc) Values(_ RunCount) []interface{} {
   337  	return []interface{}{atomic.AddUint64(&g.i, g.step)}
   338  }