github.com/muyo/sno@v1.2.1/generator.go (about) 1 // Package sno provides fast generators of compact, sortable, unique IDs with embedded metadata. 2 package sno 3 4 import ( 5 "encoding/binary" 6 "sync" 7 "sync/atomic" 8 "time" 9 ) 10 11 // GeneratorSnapshot represents the bookkeeping data of a Generator at some point in time. 12 // 13 // Snapshots serve both as configuration and a means of restoring generators across restarts, 14 // to ensure newly generated IDs don't overwrite IDs generated before going offline. 15 type GeneratorSnapshot struct { 16 // The Partition the generator is scoped to. A zero value ({0, 0}) is valid and will be used. 17 Partition Partition `json:"partition"` 18 19 // Sequence pool bounds (inclusive). Can be given in either order - lower value will become lower bound. 20 // When SequenceMax is 0 and SequenceMin != 65535, SequenceMax will be set to 65535. 21 SequenceMin uint16 `json:"sequenceMin"` 22 SequenceMax uint16 `json:"sequenceMax"` 23 24 // Current sequence number. When 0, it will be set to SequenceMin. May overflow SequenceMax, 25 // but not underflow SequenceMin. 26 Sequence uint32 `json:"sequence"` 27 28 Now int64 `json:"now"` // Wall time the snapshot was taken at in sno time units and in our epoch. 29 WallHi int64 `json:"wallHi"` // 30 WallSafe int64 `json:"wallSafe"` // 31 Drifts uint32 `json:"drifts"` // Count of wall clock regressions the generator tick-tocked at. 32 } 33 34 // SequenceOverflowNotification contains information pertaining to the current state of a Generator 35 // while it is overflowing. 36 type SequenceOverflowNotification struct { 37 Now time.Time // Time of tick. 38 Count uint32 // Number of currently overflowing generation calls. 39 Ticks uint32 // Total count of ticks while dealing with the *current* overflow. 40 } 41 42 // Generator is responsible for generating new IDs scoped to a given fixed Partition and 43 // managing their sequence. 44 // 45 // A Generator must be constructed using NewGenerator - the zero value of a Generator is 46 // an unusable state. 47 // 48 // A Generator must not be copied after first use. 49 type Generator struct { 50 partition uint32 // Immutable. 51 52 drifts uint32 // Uses the LSB for the tick-tock and serves as a counter. 53 wallHi uint64 // Atomic. 54 wallSafe uint64 // Atomic. 55 regression sync.Mutex // Regression branch lock. 56 57 seq uint32 // Atomic. 58 seqMin uint32 // Immutable. 59 seqMax uint32 // Immutable. 60 seqStatic uint32 // Atomic. See NewWithTime. Not included in snapshots (does not get restored). 61 62 seqOverflowCond *sync.Cond 63 seqOverflowTicker *time.Ticker 64 seqOverflowCount uint32 // Behind seqOverflowCond lock. 65 seqOverflowChan chan<- *SequenceOverflowNotification 66 } 67 68 // NewGenerator returns a new generator based on the optional Snapshot. 69 func NewGenerator(snapshot *GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) { 70 if snapshot != nil { 71 return newGeneratorFromSnapshot(*snapshot, c) 72 } 73 74 return newGeneratorFromDefaults(c) 75 } 76 77 func newGeneratorFromSnapshot(snapshot GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) { 78 if err := sanitizeSnapshotBounds(&snapshot); err != nil { 79 return nil, err 80 } 81 82 return &Generator{ 83 partition: partitionToInternalRepr(snapshot.Partition), 84 seq: snapshot.Sequence, 85 seqMin: uint32(snapshot.SequenceMin), 86 seqMax: uint32(snapshot.SequenceMax), 87 seqStatic: uint32(snapshot.SequenceMin - 1), // Offset by -1 since NewWithTime starts this with an incr. 88 seqOverflowCond: sync.NewCond(&sync.Mutex{}), 89 seqOverflowChan: c, 90 drifts: snapshot.Drifts, 91 wallHi: uint64(snapshot.WallHi), 92 wallSafe: uint64(snapshot.WallSafe), 93 }, nil 94 } 95 96 func newGeneratorFromDefaults(c chan<- *SequenceOverflowNotification) (*Generator, error) { 97 // Realistically safe, but has an edge case resulting in PartitionPoolExhaustedError. 98 partition, err := genPartition() 99 if err != nil { 100 return nil, err 101 } 102 103 return &Generator{ 104 partition: partition, 105 seqMax: MaxSequence, 106 seqStatic: ^uint32(0), // Offset by -1 since NewWithTime starts this with an incr. 107 seqOverflowCond: sync.NewCond(&sync.Mutex{}), 108 seqOverflowChan: c, 109 }, nil 110 } 111 112 // New generates a new ID using the current system time for its timestamp. 113 func (g *Generator) New(meta byte) (id ID) { 114 retry: 115 var ( 116 // Note: Single load of wallHi for the evaluations is correct (as we only grab wallNow 117 // once as well). 118 wallHi = atomic.LoadUint64(&g.wallHi) 119 wallNow = snotime() 120 ) 121 122 // Fastest branch if we're still within the most recent time unit. 123 if wallNow == wallHi { 124 seq := atomic.AddUint32(&g.seq, 1) 125 126 if g.seqMax >= seq { 127 g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1) 128 g.applyPayload(&id, meta, seq) 129 130 return 131 } 132 133 // This is to be considered an edge case if seqMax actually gets exceeded, but since bounds 134 // can be set arbitrarily, in a small pool (or in stress tests) this can happen. 135 // We don't *really* handle this gracefully - we currently clog up and wait until the sequence 136 // gets reset by a time change *hoping* we'll finally get our turn. If requests to generate 137 // don't decrease enough, eventually this will starve out resources. 138 // 139 // The reason we don't simply plug the broadcast into the time progression branch is precisely 140 // because that one is going to be the most common branch for many uses realistically (1 or 0 ID per 4msec) 141 // while this one is for scales on another level. At the same time if we *ever* hit this case, we need 142 // a periodic flush anyways, because even a single threaded process can easily exhaust the max default 143 // sequence pool, let alone a smaller one, meaning it could potentially deadlock if all routines get 144 // locked in on a sequence overflow and no new routine comes to their rescue at a higher time to reset 145 // the sequence and notify them. 146 g.seqOverflowCond.L.Lock() 147 g.seqOverflowCount++ 148 149 if g.seqOverflowTicker == nil { 150 // Tick *roughly* each 1ms during overflows. 151 g.seqOverflowTicker = time.NewTicker(TimeUnit / 4) 152 go g.seqOverflowLoop() 153 } 154 155 for atomic.LoadUint32(&g.seq) > g.seqMax { 156 // We spin pessimistically here instead of a straight lock -> wait -> unlock because that'd 157 // put us back on the New(). At extreme contention we could end up back here anyways. 158 g.seqOverflowCond.Wait() 159 } 160 161 g.seqOverflowCount-- 162 g.seqOverflowCond.L.Unlock() 163 164 goto retry 165 } 166 167 // Time progression branch. 168 if wallNow > wallHi && atomic.CompareAndSwapUint64(&g.wallHi, wallHi, wallNow) { 169 atomic.StoreUint32(&g.seq, g.seqMin) 170 171 g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1) 172 g.applyPayload(&id, meta, g.seqMin) 173 174 return 175 } 176 177 // Time regression branch. 178 g.regression.Lock() 179 180 // Check-again. It's possible that another thread applied the drift while we were spinning (if we were). 181 if wallHi = atomic.LoadUint64(&g.wallHi); wallNow >= wallHi { 182 g.regression.Unlock() 183 184 goto retry 185 } 186 187 if wallNow > g.wallSafe { 188 // Branch for the one routine that gets to apply the drift. 189 // wallHi is bidirectional (gets updated whenever the wall clock time progresses - or when a drift 190 // gets applied, which is when it regresses). In contrast, wallSafe only ever gets updated when 191 // a drift gets applied and always gets set to the highest time recorded, meaning it 192 // increases monotonically. 193 atomic.StoreUint64(&g.wallSafe, wallHi) 194 atomic.StoreUint64(&g.wallHi, wallNow) 195 atomic.StoreUint32(&g.seq, g.seqMin) 196 197 g.applyTimestamp(&id, wallNow, atomic.AddUint32(&g.drifts, 1)&1) 198 g.applyPayload(&id, meta, g.seqMin) 199 200 g.regression.Unlock() 201 202 return 203 } 204 205 // Branch for all routines that are in an "unsafe" past (e.g. multiple time regressions happened 206 // before we reached wallSafe again). 207 g.regression.Unlock() 208 209 time.Sleep(time.Duration(g.wallSafe - wallNow)) 210 211 goto retry 212 } 213 214 // NewWithTime generates a new ID using the given time for the timestamp. 215 // 216 // IDs generated with user-specified timestamps are exempt from the tick-tock mechanism and 217 // use a sequence separate from New() - one that is independent from time, as time provided to 218 // this method can be arbitrary. The sequence increases strictly monotonically up to hitting 219 // the generator's SequenceMax, after which it rolls over silently back to SequenceMin. 220 // 221 // That means bounds are respected, but unlike New(), NewWithTime() will not block the caller 222 // when the (separate) sequence rolls over as the Generator would be unable to determine when 223 // to resume processing within the constraints of this method. 224 // 225 // Managing potential collisions due to the arbitrary time is left to the user. 226 // 227 // This utility is primarily meant to enable porting of old IDs to sno and assumed to be ran 228 // before an ID scheme goes online. 229 func (g *Generator) NewWithTime(meta byte, t time.Time) (id ID) { 230 retry: 231 var seq = atomic.AddUint32(&g.seqStatic, 1) 232 233 if seq > g.seqMax { 234 if !atomic.CompareAndSwapUint32(&g.seqStatic, seq, g.seqMin) { 235 goto retry 236 } 237 238 seq = g.seqMin 239 } 240 241 g.applyTimestamp(&id, uint64(t.UnixNano()-epochNsec)/TimeUnit, 0) 242 g.applyPayload(&id, meta, seq) 243 244 return 245 } 246 247 // Partition returns the fixed identifier of the Generator. 248 func (g *Generator) Partition() Partition { 249 return partitionToPublicRepr(g.partition) 250 } 251 252 // Sequence returns the current sequence the Generator is at. 253 // 254 // This does *not* mean that if one were to call New() right now, the generated ID 255 // will necessarily get this sequence, as other things may happen before. 256 // 257 // If the next call to New() would result in a reset of the sequence, SequenceMin 258 // is returned instead of the current internal sequence. 259 // 260 // If the generator is currently overflowing, the sequence returned will be higher than 261 // the generator's SequenceMax (thus a uint32 return type), meaning it can be used to 262 // determine the current overflow via: 263 // overflow := int(uint32(generator.SequenceMax()) - generator.Sequence()) 264 func (g *Generator) Sequence() uint32 { 265 if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) { 266 return atomic.LoadUint32(&g.seq) 267 } 268 269 return g.seqMin 270 } 271 272 // SequenceMin returns the lower bound of the sequence pool of this generator. 273 func (g *Generator) SequenceMin() uint16 { 274 return uint16(g.seqMin) 275 } 276 277 // SequenceMax returns the upper bound of the sequence pool of this generator. 278 func (g *Generator) SequenceMax() uint16 { 279 return uint16(g.seqMax) 280 } 281 282 // Len returns the number of IDs generated in the current timeframe. 283 func (g *Generator) Len() int { 284 if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) { 285 if seq := atomic.LoadUint32(&g.seq); g.seqMax > seq { 286 return int(seq-g.seqMin) + 1 287 } 288 289 return g.Cap() 290 } 291 292 return 0 293 } 294 295 // Cap returns the total capacity of the Generator. 296 // 297 // To get its current capacity (e.g. number of possible additional IDs in the current 298 // timeframe), simply: 299 // spare := generator.Cap() - generator.Len() 300 // The result will always be non-negative. 301 func (g *Generator) Cap() int { 302 return int(g.seqMax-g.seqMin) + 1 303 } 304 305 // Snapshot returns a copy of the Generator's current bookkeeping data. 306 func (g *Generator) Snapshot() GeneratorSnapshot { 307 var ( 308 wallNow = snotime() 309 wallHi = atomic.LoadUint64(&g.wallHi) 310 seq uint32 311 ) 312 313 // Be consistent with g.Sequence() and return seqMin if the next call to New() 314 // would reset the sequence. 315 if wallNow == wallHi { 316 seq = atomic.LoadUint32(&g.seq) 317 } else { 318 seq = g.seqMin 319 } 320 321 return GeneratorSnapshot{ 322 Partition: partitionToPublicRepr(g.partition), 323 SequenceMin: uint16(g.seqMin), 324 SequenceMax: uint16(g.seqMax), 325 Sequence: seq, 326 Now: int64(wallNow), 327 WallHi: int64(wallHi), 328 WallSafe: int64(atomic.LoadUint64(&g.wallSafe)), 329 Drifts: atomic.LoadUint32(&g.drifts), 330 } 331 } 332 333 func (g *Generator) applyTimestamp(id *ID, units uint64, tick uint32) { 334 // Equivalent to... 335 // 336 // id[0] = byte(units >> 31) 337 // id[1] = byte(units >> 23) 338 // id[2] = byte(units >> 15) 339 // id[3] = byte(units >> 7) 340 // id[4] = byte(units << 1) | byte(tick) 341 // 342 // ... and slightly wasteful as we're storing 3 bytes that will get overwritten 343 // via applyPartition but unlike the code above, the calls to binary.BigEndian.PutUintXX() 344 // are compiler assisted and boil down to essentially a load + shift + bswap (+ a nop due 345 // to midstack inlining), which we prefer over the roughly 16 instructions otherwise. 346 // If applyTimestamp() was implemented straight in assembly, we'd not get it inline. 347 binary.BigEndian.PutUint64(id[:], units<<25|uint64(tick)<<24) 348 } 349 350 func (g *Generator) applyPayload(id *ID, meta byte, seq uint32) { 351 id[5] = meta 352 binary.BigEndian.PutUint32(id[6:], g.partition|seq) 353 } 354 355 func (g *Generator) seqOverflowLoop() { 356 var ( 357 retryNotify bool 358 ticks uint32 359 ) 360 361 for t := range g.seqOverflowTicker.C { 362 g.seqOverflowCond.L.Lock() 363 364 if g.seqOverflowChan != nil { 365 // We only ever count ticks when we've got a notification channel up. 366 // Even if we're at a count of 0 but on our first tick, it means the generator declogged already, 367 // but we still notify that it happened. 368 ticks++ 369 if retryNotify || g.seqOverflowCount == 0 || ticks%4 == 1 { 370 select { 371 case g.seqOverflowChan <- &SequenceOverflowNotification{ 372 Now: t, 373 Ticks: ticks, 374 Count: g.seqOverflowCount, 375 }: 376 retryNotify = false 377 378 default: 379 // Simply drop the message for now but try again the next tick already 380 // instead of waiting for the full interval. 381 retryNotify = true 382 } 383 } 384 } 385 386 if g.seqOverflowCount == 0 { 387 g.seqOverflowTicker.Stop() 388 g.seqOverflowTicker = nil 389 g.seqOverflowCond.L.Unlock() 390 391 return 392 } 393 394 // At this point we can unlock already because we don't touch any shared data anymore. 395 // The broadcasts further don't require us to hold the lock. 396 g.seqOverflowCond.L.Unlock() 397 398 // Under normal behaviour high load would trigger an overflow and load would remain roughly 399 // steady, so a seq reset will simply get triggered by a time change happening in New(). 400 // The actual callers are in a pessimistic loop and will check the condition themselves again. 401 if g.seqMax >= atomic.LoadUint32(&g.seq) { 402 g.seqOverflowCond.Broadcast() 403 404 continue 405 } 406 407 // Handles an edge case where we've got calls locked on an overflow and suddenly no more 408 // calls to New() come in, meaning there's no one to actually reset the sequence. 409 var ( 410 wallNow = uint64(t.UnixNano()-epochNsec) / TimeUnit 411 wallHi = atomic.LoadUint64(&g.wallHi) 412 ) 413 414 if wallNow > wallHi { 415 atomic.StoreUint32(&g.seq, g.seqMin) 416 g.seqOverflowCond.Broadcast() 417 418 continue // Left for readability of flow. 419 } 420 } 421 } 422 423 // Arbitrary min pool size of 4 per time unit (that is 1000 per sec). 424 // Separated out as a constant as this value is being tested against. 425 const minSequencePoolSize = 4 426 427 func sanitizeSnapshotBounds(s *GeneratorSnapshot) error { 428 // Zero value of SequenceMax will pass as the default max if and only if SequenceMin is not already 429 // default max (as the range can be defined in either order). 430 if s.SequenceMax == 0 && s.SequenceMin != MaxSequence { 431 s.SequenceMax = MaxSequence 432 } 433 434 if s.SequenceMin == s.SequenceMax { 435 return invalidSequenceBounds(s, errSequenceBoundsIdenticalMsg) 436 } 437 438 // Allow bounds to be given in any order. 439 if s.SequenceMax < s.SequenceMin { 440 s.SequenceMin, s.SequenceMax = s.SequenceMax, s.SequenceMin 441 } 442 443 if s.SequenceMax-s.SequenceMin-1 < minSequencePoolSize { 444 return invalidSequenceBounds(s, errSequencePoolTooSmallMsg) 445 } 446 447 // Allow zero value to pass as a default of the lower bound. 448 if s.Sequence == 0 { 449 s.Sequence = uint32(s.SequenceMin) 450 } 451 452 if s.Sequence < uint32(s.SequenceMin) { 453 return invalidSequenceBounds(s, errSequenceUnderflowsBound) 454 } 455 456 return nil 457 } 458 459 func invalidSequenceBounds(s *GeneratorSnapshot, msg string) *InvalidSequenceBoundsError { 460 return &InvalidSequenceBoundsError{ 461 Cur: s.Sequence, 462 Min: s.SequenceMin, 463 Max: s.SequenceMax, 464 Msg: msg, 465 } 466 }