github.com/ipld/go-ipld-prime@v0.21.0/testutil/garbage/garbage.go (about) 1 package garbage 2 3 import ( 4 "math" 5 mathrand "math/rand" 6 "strings" 7 8 "github.com/ipfs/go-cid" 9 "github.com/ipld/go-ipld-prime/datamodel" 10 cidlink "github.com/ipld/go-ipld-prime/linking/cid" 11 "github.com/ipld/go-ipld-prime/must" 12 basicnode "github.com/ipld/go-ipld-prime/node/basic" 13 "github.com/multiformats/go-multihash" 14 ) 15 16 type Options struct { 17 initialWeights map[datamodel.Kind]int 18 weights map[datamodel.Kind]int 19 blockSize uint64 20 } 21 22 type generator func(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) 23 24 type hasher struct { 25 code uint64 26 length int 27 } 28 29 const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+[]{}|\\:;'\",.<>?/ \t\n☺💩" 30 31 var ( 32 codecs = []uint64{0x55, 0x70, 0x71, 0x0129} 33 hashes = []hasher{{0x12, 256}, {0x16, 256}, {0x1b, 256}, {0xb220, 256}, {0x13, 512}, {0x15, 384}, {0x14, 512}} 34 kinds = append(datamodel.KindSet_Scalar, datamodel.KindSet_Recursive...) 35 runes = []rune(charset) 36 generators map[datamodel.Kind]generator 37 ) 38 39 // Generate produces random Nodes which can be useful for testing and benchmarking. By default, the 40 // Nodes produced are relatively small, averaging near the 1024 byte range when encoded 41 // (very roughly, with a wide spread). 42 // 43 // Options can be used to adjust the average size and weights of occurances of different kinds 44 // within the complete Node graph. 45 // 46 // Care should be taken when using a random source to generate garbage for testing purposes, that 47 // the randomness is stable across test runs, or a seed is captured in such a way that a failure 48 // can be reproduced (e.g. by printing it to stdout during the test run so it can be captured in 49 // CI for a failure). 50 func Generate(rand *mathrand.Rand, opts ...Option) datamodel.Node { 51 options := applyOptions(opts...) 52 _, n := generate(rand, options.blockSize, options) 53 return n 54 } 55 56 func generate(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 57 weights := opts.weights 58 if opts.initialWeights != nil { 59 weights = opts.initialWeights 60 opts = Options{weights: opts.weights} 61 } 62 totWeight := 0 63 for _, kind := range kinds { 64 totWeight += weights[kind] 65 } 66 r := rand.Float64() * float64(totWeight) 67 var wacc int 68 for _, kind := range kinds { 69 wacc += weights[kind] 70 if float64(wacc) >= r { 71 return generators[kind](rand, count, opts) 72 } 73 } 74 panic("bad options") 75 } 76 77 func rndSize(rand *mathrand.Rand, bias uint64) uint64 { 78 if bias == 0 { 79 panic("size shouldn't be zero") 80 } 81 mean := float64(bias) 82 stdev := mean / 10 83 for { 84 s := math.Abs(rand.NormFloat64())*stdev + mean 85 if s >= 1 { 86 return uint64(s) 87 } 88 } 89 } 90 91 func rndRune(rand *mathrand.Rand) rune { 92 return runes[rand.Intn(len(runes))] 93 } 94 95 func listGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 96 len := rndSize(rand, 10) 97 lb := basicnode.Prototype.List.NewBuilder() 98 la, err := lb.BeginList(int64(len)) 99 if err != nil { 100 panic(err) 101 } 102 size := uint64(0) 103 for i := uint64(0); i < len && size < count; i++ { 104 c, n := generate(rand, count-size, opts) 105 err := la.AssembleValue().AssignNode(n) 106 if err != nil { 107 panic(err) 108 } 109 size += c 110 } 111 err = la.Finish() 112 if err != nil { 113 panic(err) 114 } 115 return size, lb.Build() 116 } 117 118 func mapGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 119 length := rndSize(rand, 10) 120 mb := basicnode.Prototype.Map.NewBuilder() 121 ma, err := mb.BeginMap(int64(length)) 122 if err != nil { 123 panic(err) 124 } 125 size := uint64(0) 126 keys := make(map[string]struct{}) 127 for i := uint64(0); i < length && size < count; i++ { 128 var key string 129 for { 130 c, k := stringGenerator(rand, 5, opts) 131 key = must.String(k) 132 if _, ok := keys[key]; !ok && len(key) > 0 { 133 keys[key] = struct{}{} 134 size += c 135 break 136 } 137 } 138 sz := count - size 139 if size >= count { // the case where we've blown our budget already on the key 140 sz = 5 141 } 142 c, value := generate(rand, sz, opts) 143 size += c 144 err := ma.AssembleKey().AssignString(key) 145 if err != nil { 146 panic(err) 147 } 148 err = ma.AssembleValue().AssignNode(value) 149 if err != nil { 150 panic(err) 151 } 152 } 153 err = ma.Finish() 154 if err != nil { 155 panic(err) 156 } 157 return size, mb.Build() 158 } 159 160 func stringGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 161 len := rndSize(rand, count/2+1) 162 sb := strings.Builder{} 163 for i := uint64(0); i < len; i++ { 164 sb.WriteRune(rndRune(rand)) 165 } 166 return len, basicnode.NewString(sb.String()) 167 } 168 169 func bytesGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 170 len := rndSize(rand, count/2+1) 171 ba := make([]byte, len) 172 _, err := rand.Read(ba) 173 if err != nil { 174 panic(err) 175 } 176 return len, basicnode.NewBytes(ba) 177 } 178 179 func boolGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 180 return 0, basicnode.NewBool(rand.Float64() > 0.5) 181 } 182 183 func intGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 184 i := rand.Int63() 185 if rand.Float64() > 0.5 { 186 i = -i 187 } 188 return 0, basicnode.NewInt(i) 189 } 190 191 func floatGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 192 return 0, basicnode.NewFloat(math.Tan((rand.Float64() - 0.5) * math.Pi)) 193 } 194 195 func nullGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 196 return 0, datamodel.Null 197 } 198 199 func linkGenerator(rand *mathrand.Rand, count uint64, opts Options) (uint64, datamodel.Node) { 200 hasher := hashes[rand.Intn(len(hashes))] 201 codec := codecs[rand.Intn(len(codecs))] 202 ba := make([]byte, hasher.length/8) 203 rand.Read(ba) 204 mh, err := multihash.Encode(ba, hasher.code) 205 if err != nil { 206 panic(err) 207 } 208 return uint64(hasher.length / 8), basicnode.NewLink(cidlink.Link{Cid: cid.NewCidV1(codec, mh)}) 209 } 210 211 type Option func(*Options) 212 213 func applyOptions(opt ...Option) Options { 214 opts := Options{ 215 blockSize: 1024, 216 initialWeights: DefaultInitialWeights(), 217 weights: DefaultWeights(), 218 } 219 for _, o := range opt { 220 o(&opts) 221 } 222 return opts 223 } 224 225 // DefaultInitialWeights provides the default map of weights that can be 226 // overridden by the InitialWeights option. The default is an equal weighting 227 // of 1 for every scalar kind and 10 for the recursive kinds. 228 func DefaultInitialWeights() map[datamodel.Kind]int { 229 return map[datamodel.Kind]int{ 230 datamodel.Kind_List: 10, 231 datamodel.Kind_Map: 10, 232 datamodel.Kind_Bool: 1, 233 datamodel.Kind_Bytes: 1, 234 datamodel.Kind_Float: 1, 235 datamodel.Kind_Int: 1, 236 datamodel.Kind_Link: 1, 237 datamodel.Kind_Null: 1, 238 datamodel.Kind_String: 1, 239 } 240 } 241 242 // DefaultWeights provides the default map of weights that can be overridden by 243 // the Weights option. The default is an equal weighting of 1 for every kind. 244 func DefaultWeights() map[datamodel.Kind]int { 245 return map[datamodel.Kind]int{ 246 datamodel.Kind_List: 1, 247 datamodel.Kind_Map: 1, 248 datamodel.Kind_Bool: 1, 249 datamodel.Kind_Bytes: 1, 250 datamodel.Kind_Float: 1, 251 datamodel.Kind_Int: 1, 252 datamodel.Kind_Link: 1, 253 datamodel.Kind_Null: 1, 254 datamodel.Kind_String: 1, 255 } 256 } 257 258 // InitialWeights sets a per-kind weighting for the root node. That is, the weights 259 // set here will determine the liklihood of the returned Node's direct .Kind(). 260 // These weights are ignored after the top-level Node (for recursive kinds, 261 // obviously for scalar kinds there is only a top-level Node). 262 // 263 // The default initial weights bias toward Map and List kinds, by a ratio of 264 // 10:1—i.e. the recursive kinds are more likely to appear at the top-level. 265 func InitialWeights(initialWeights map[datamodel.Kind]int) Option { 266 return func(o *Options) { 267 o.initialWeights = initialWeights 268 } 269 } 270 271 // Weights sets a per-kind weighting for nodes appearing throughout the returned 272 // graph. When assembling a graph, these weights determine the liklihood that 273 // a given kind will be selected for that node. 274 // 275 // A weight of 0 will turn that kind off entirely. So, for example, if you 276 // wanted output data with no maps or bytes, then set both of those weights to 277 // zero, leaving the rest >0 and do the same for InitialWeights. 278 // 279 // The default weights are set to 1—i.e. there is an equal liklihood that any of 280 // the valid kinds will be selected for any point in the graph. 281 // 282 // This option is overridden by InitialWeights (which also has a default even 283 // if not set explicitly) for the top-level node. 284 func Weights(weights map[datamodel.Kind]int) Option { 285 return func(o *Options) { 286 o.weights = weights 287 } 288 } 289 290 // TargetBlockSize sets a very rough bias in number of bytes that the resulting 291 // Node may consume when encoded (i.e. the block size). This is a very 292 // approximate measure, but over enough repeated Generate() calls, the resulting 293 // Nodes, once encoded, should have a median that is somewhere in this vicinity. 294 // 295 // The default target block size is 1024. This should be tuned in accordance with 296 // the anticipated average block size of the system under test. 297 func TargetBlockSize(blockSize uint64) Option { 298 return func(o *Options) { 299 o.blockSize = blockSize 300 } 301 } 302 303 func init() { 304 // can't be declared statically because of some cycles through list & map to generate() 305 generators = map[datamodel.Kind]generator{ 306 datamodel.Kind_List: listGenerator, 307 datamodel.Kind_Map: mapGenerator, 308 datamodel.Kind_String: stringGenerator, 309 datamodel.Kind_Bytes: bytesGenerator, 310 datamodel.Kind_Bool: boolGenerator, 311 datamodel.Kind_Int: intGenerator, 312 datamodel.Kind_Float: floatGenerator, 313 datamodel.Kind_Null: nullGenerator, 314 datamodel.Kind_Link: linkGenerator, 315 } 316 }