github.com/ipld/go-ipld-prime@v0.21.0/testutil/garbage/garbage.go

github.com/ipld/go-ipld-prime@v0.21.0/testutil/garbage/garbage.go (about)

     1  package garbage
     2  
     3  import (
     4  	"math"
     5  	mathrand "math/rand"
     6  	"strings"
     7  
     8  	"github.com/ipfs/go-cid"
     9  	"github.com/ipld/go-ipld-prime/datamodel"
    10  	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
    11  	"github.com/ipld/go-ipld-prime/must"
    12  	basicnode "github.com/ipld/go-ipld-prime/node/basic"
    13  	"github.com/multiformats/go-multihash"
    14  )
    15  
    16  type Options struct {
    17  	initialWeights map[datamodel.Kind]int
    18  	weights        map[datamodel.Kind]int
    19  	blockSize      uint64
    20  }
    21  
    22  type generator func(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node)
    23  
    24  type hasher struct {
    25  	code   uint64
    26  	length int
    27  }
    28  
    29  const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+[]{}|\\:;'\",.<>?/ \t\n☺💩"
    30  
    31  var (
    32  	codecs     = []uint64{0x55, 0x70, 0x71, 0x0129}
    33  	hashes     = []hasher{{0x12, 256}, {0x16, 256}, {0x1b, 256}, {0xb220, 256}, {0x13, 512}, {0x15, 384}, {0x14, 512}}
    34  	kinds      = append(datamodel.KindSet_Scalar, datamodel.KindSet_Recursive...)
    35  	runes      = []rune(charset)
    36  	generators map[datamodel.Kind]generator
    37  )
    38  
    39  // Generate produces random Nodes which can be useful for testing and benchmarking. By default, the
    40  // Nodes produced are relatively small, averaging near the 1024 byte range when encoded
    41  // (very roughly, with a wide spread).
    42  //
    43  // Options can be used to adjust the average size and weights of occurances of different kinds
    44  // within the complete Node graph.
    45  //
    46  // Care should be taken when using a random source to generate garbage for testing purposes, that
    47  // the randomness is stable across test runs, or a seed is captured in such a way that a failure
    48  // can be reproduced (e.g. by printing it to stdout during the test run so it can be captured in
    49  // CI for a failure).
    50  func Generate(rand *mathrand.Rand, opts ...Option) datamodel.Node {
    51  	options := applyOptions(opts...)
    52  	_, n := generate(rand, options.blockSize, options)
    53  	return n
    54  }
    55  
    56  func generate(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
    57  	weights := opts.weights
    58  	if opts.initialWeights != nil {
    59  		weights = opts.initialWeights
    60  		opts = Options{weights: opts.weights}
    61  	}
    62  	totWeight := 0
    63  	for _, kind := range kinds {
    64  		totWeight += weights[kind]
    65  	}
    66  	r := rand.Float64() * float64(totWeight)
    67  	var wacc int
    68  	for _, kind := range kinds {
    69  		wacc += weights[kind]
    70  		if float64(wacc) >= r {
    71  			return generators[kind](rand, count, opts)
    72  		}
    73  	}
    74  	panic("bad options")
    75  }
    76  
    77  func rndSize(rand *mathrand.Rand, bias uint64) uint64 {
    78  	if bias == 0 {
    79  		panic("size shouldn't be zero")
    80  	}
    81  	mean := float64(bias)
    82  	stdev := mean / 10
    83  	for {
    84  		s := math.Abs(rand.NormFloat64())*stdev + mean
    85  		if s >= 1 {
    86  			return uint64(s)
    87  		}
    88  	}
    89  }
    90  
    91  func rndRune(rand *mathrand.Rand) rune {
    92  	return runes[rand.Intn(len(runes))]
    93  }
    94  
    95  func listGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
    96  	len := rndSize(rand, 10)
    97  	lb := basicnode.Prototype.List.NewBuilder()
    98  	la, err := lb.BeginList(int64(len))
    99  	if err != nil {
   100  		panic(err)
   101  	}
   102  	size := uint64(0)
   103  	for i := uint64(0); i < len && size < count; i++ {
   104  		c, n := generate(rand, count-size, opts)
   105  		err := la.AssembleValue().AssignNode(n)
   106  		if err != nil {
   107  			panic(err)
   108  		}
   109  		size += c
   110  	}
   111  	err = la.Finish()
   112  	if err != nil {
   113  		panic(err)
   114  	}
   115  	return size, lb.Build()
   116  }
   117  
   118  func mapGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   119  	length := rndSize(rand, 10)
   120  	mb := basicnode.Prototype.Map.NewBuilder()
   121  	ma, err := mb.BeginMap(int64(length))
   122  	if err != nil {
   123  		panic(err)
   124  	}
   125  	size := uint64(0)
   126  	keys := make(map[string]struct{})
   127  	for i := uint64(0); i < length && size < count; i++ {
   128  		var key string
   129  		for {
   130  			c, k := stringGenerator(rand, 5, opts)
   131  			key = must.String(k)
   132  			if _, ok := keys[key]; !ok && len(key) > 0 {
   133  				keys[key] = struct{}{}
   134  				size += c
   135  				break
   136  			}
   137  		}
   138  		sz := count - size
   139  		if size >= count { // the case where we've blown our budget already on the key
   140  			sz = 5
   141  		}
   142  		c, value := generate(rand, sz, opts)
   143  		size += c
   144  		err := ma.AssembleKey().AssignString(key)
   145  		if err != nil {
   146  			panic(err)
   147  		}
   148  		err = ma.AssembleValue().AssignNode(value)
   149  		if err != nil {
   150  			panic(err)
   151  		}
   152  	}
   153  	err = ma.Finish()
   154  	if err != nil {
   155  		panic(err)
   156  	}
   157  	return size, mb.Build()
   158  }
   159  
   160  func stringGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   161  	len := rndSize(rand, count/2+1)
   162  	sb := strings.Builder{}
   163  	for i := uint64(0); i < len; i++ {
   164  		sb.WriteRune(rndRune(rand))
   165  	}
   166  	return len, basicnode.NewString(sb.String())
   167  }
   168  
   169  func bytesGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   170  	len := rndSize(rand, count/2+1)
   171  	ba := make([]byte, len)
   172  	_, err := rand.Read(ba)
   173  	if err != nil {
   174  		panic(err)
   175  	}
   176  	return len, basicnode.NewBytes(ba)
   177  }
   178  
   179  func boolGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   180  	return 0, basicnode.NewBool(rand.Float64() > 0.5)
   181  }
   182  
   183  func intGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   184  	i := rand.Int63()
   185  	if rand.Float64() > 0.5 {
   186  		i = -i
   187  	}
   188  	return 0, basicnode.NewInt(i)
   189  }
   190  
   191  func floatGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   192  	return 0, basicnode.NewFloat(math.Tan((rand.Float64() - 0.5) * math.Pi))
   193  }
   194  
   195  func nullGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   196  	return 0, datamodel.Null
   197  }
   198  
   199  func linkGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) {
   200  	hasher := hashes[rand.Intn(len(hashes))]
   201  	codec := codecs[rand.Intn(len(codecs))]
   202  	ba := make([]byte, hasher.length/8)
   203  	rand.Read(ba)
   204  	mh, err := multihash.Encode(ba, hasher.code)
   205  	if err != nil {
   206  		panic(err)
   207  	}
   208  	return uint64(hasher.length / 8), basicnode.NewLink(cidlink.Link{Cid: cid.NewCidV1(codec, mh)})
   209  }
   210  
   211  type Option func(*Options)
   212  
   213  func applyOptions(opt ...Option) Options {
   214  	opts := Options{
   215  		blockSize:      1024,
   216  		initialWeights: DefaultInitialWeights(),
   217  		weights:        DefaultWeights(),
   218  	}
   219  	for _, o := range opt {
   220  		o(&opts)
   221  	}
   222  	return opts
   223  }
   224  
   225  // DefaultInitialWeights provides the default map of weights that can be
   226  // overridden by the InitialWeights option. The default is an equal weighting
   227  // of 1 for every scalar kind and 10 for the recursive kinds.
   228  func DefaultInitialWeights() map[datamodel.Kind]int {
   229  	return map[datamodel.Kind]int{
   230  		datamodel.Kind_List:   10,
   231  		datamodel.Kind_Map:    10,
   232  		datamodel.Kind_Bool:   1,
   233  		datamodel.Kind_Bytes:  1,
   234  		datamodel.Kind_Float:  1,
   235  		datamodel.Kind_Int:    1,
   236  		datamodel.Kind_Link:   1,
   237  		datamodel.Kind_Null:   1,
   238  		datamodel.Kind_String: 1,
   239  	}
   240  }
   241  
   242  // DefaultWeights provides the default map of weights that can be overridden by
   243  // the Weights option. The default is an equal weighting of 1 for every kind.
   244  func DefaultWeights() map[datamodel.Kind]int {
   245  	return map[datamodel.Kind]int{
   246  		datamodel.Kind_List:   1,
   247  		datamodel.Kind_Map:    1,
   248  		datamodel.Kind_Bool:   1,
   249  		datamodel.Kind_Bytes:  1,
   250  		datamodel.Kind_Float:  1,
   251  		datamodel.Kind_Int:    1,
   252  		datamodel.Kind_Link:   1,
   253  		datamodel.Kind_Null:   1,
   254  		datamodel.Kind_String: 1,
   255  	}
   256  }
   257  
   258  // InitialWeights sets a per-kind weighting for the root node. That is, the weights
   259  // set here will determine the liklihood of the returned Node's direct .Kind().
   260  // These weights are ignored after the top-level Node (for recursive kinds,
   261  // obviously for scalar kinds there is only a top-level Node).
   262  //
   263  // The default initial weights bias toward Map and List kinds, by a ratio of
   264  // 10:1—i.e. the recursive kinds are more likely to appear at the top-level.
   265  func InitialWeights(initialWeights map[datamodel.Kind]int) Option {
   266  	return func(o *Options) {
   267  		o.initialWeights = initialWeights
   268  	}
   269  }
   270  
   271  // Weights sets a per-kind weighting for nodes appearing throughout the returned
   272  // graph. When assembling a graph, these weights determine the liklihood that
   273  // a given kind will be selected for that node.
   274  //
   275  // A weight of 0 will turn that kind off entirely. So, for example, if you
   276  // wanted output data with no maps or bytes, then set both of those weights to
   277  // zero, leaving the rest >0 and do the same for InitialWeights.
   278  //
   279  // The default weights are set to 1—i.e. there is an equal liklihood that any of
   280  // the valid kinds will be selected for any point in the graph.
   281  //
   282  // This option is overridden by InitialWeights (which also has a default even
   283  // if not set explicitly) for the top-level node.
   284  func Weights(weights map[datamodel.Kind]int) Option {
   285  	return func(o *Options) {
   286  		o.weights = weights
   287  	}
   288  }
   289  
   290  // TargetBlockSize sets a very rough bias in number of bytes that the resulting
   291  // Node may consume when encoded (i.e. the block size). This is a very
   292  // approximate measure, but over enough repeated Generate() calls, the resulting
   293  // Nodes, once encoded, should have a median that is somewhere in this vicinity.
   294  //
   295  // The default target block size is 1024. This should be tuned in accordance with
   296  // the anticipated average block size of the system under test.
   297  func TargetBlockSize(blockSize uint64) Option {
   298  	return func(o *Options) {
   299  		o.blockSize = blockSize
   300  	}
   301  }
   302  
   303  func init() {
   304  	// can't be declared statically because of some cycles through list & map to generate()
   305  	generators = map[datamodel.Kind]generator{
   306  		datamodel.Kind_List:   listGenerator,
   307  		datamodel.Kind_Map:    mapGenerator,
   308  		datamodel.Kind_String: stringGenerator,
   309  		datamodel.Kind_Bytes:  bytesGenerator,
   310  		datamodel.Kind_Bool:   boolGenerator,
   311  		datamodel.Kind_Int:    intGenerator,
   312  		datamodel.Kind_Float:  floatGenerator,
   313  		datamodel.Kind_Null:   nullGenerator,
   314  		datamodel.Kind_Link:   linkGenerator,
   315  	}
   316  }