github.com/muyo/sno@v1.2.1/generator.go

github.com/muyo/sno@v1.2.1/generator.go (about)

     1  // Package sno provides fast generators of compact, sortable, unique IDs with embedded metadata.
     2  package sno
     3  
     4  import (
     5  	"encoding/binary"
     6  	"sync"
     7  	"sync/atomic"
     8  	"time"
     9  )
    10  
    11  // GeneratorSnapshot represents the bookkeeping data of a Generator at some point in time.
    12  //
    13  // Snapshots serve both as configuration and a means of restoring generators across restarts,
    14  // to ensure newly generated IDs don't overwrite IDs generated before going offline.
    15  type GeneratorSnapshot struct {
    16  	// The Partition the generator is scoped to. A zero value ({0, 0}) is valid and will be used.
    17  	Partition Partition `json:"partition"`
    18  
    19  	// Sequence pool bounds (inclusive). Can be given in either order - lower value will become lower bound.
    20  	// When SequenceMax is 0 and SequenceMin != 65535, SequenceMax will be set to 65535.
    21  	SequenceMin uint16 `json:"sequenceMin"`
    22  	SequenceMax uint16 `json:"sequenceMax"`
    23  
    24  	// Current sequence number. When 0, it will be set to SequenceMin. May overflow SequenceMax,
    25  	// but not underflow SequenceMin.
    26  	Sequence uint32 `json:"sequence"`
    27  
    28  	Now      int64  `json:"now"`      // Wall time the snapshot was taken at in sno time units and in our epoch.
    29  	WallHi   int64  `json:"wallHi"`   //
    30  	WallSafe int64  `json:"wallSafe"` //
    31  	Drifts   uint32 `json:"drifts"`   // Count of wall clock regressions the generator tick-tocked at.
    32  }
    33  
    34  // SequenceOverflowNotification contains information pertaining to the current state of a Generator
    35  // while it is overflowing.
    36  type SequenceOverflowNotification struct {
    37  	Now   time.Time // Time of tick.
    38  	Count uint32    // Number of currently overflowing generation calls.
    39  	Ticks uint32    // Total count of ticks while dealing with the *current* overflow.
    40  }
    41  
    42  // Generator is responsible for generating new IDs scoped to a given fixed Partition and
    43  // managing their sequence.
    44  //
    45  // A Generator must be constructed using NewGenerator - the zero value of a Generator is
    46  // an unusable state.
    47  //
    48  // A Generator must not be copied after first use.
    49  type Generator struct {
    50  	partition uint32 // Immutable.
    51  
    52  	drifts     uint32     // Uses the LSB for the tick-tock and serves as a counter.
    53  	wallHi     uint64     // Atomic.
    54  	wallSafe   uint64     // Atomic.
    55  	regression sync.Mutex // Regression branch lock.
    56  
    57  	seq       uint32 // Atomic.
    58  	seqMin    uint32 // Immutable.
    59  	seqMax    uint32 // Immutable.
    60  	seqStatic uint32 // Atomic. See NewWithTime. Not included in snapshots (does not get restored).
    61  
    62  	seqOverflowCond   *sync.Cond
    63  	seqOverflowTicker *time.Ticker
    64  	seqOverflowCount  uint32 // Behind seqOverflowCond lock.
    65  	seqOverflowChan   chan<- *SequenceOverflowNotification
    66  }
    67  
    68  // NewGenerator returns a new generator based on the optional Snapshot.
    69  func NewGenerator(snapshot *GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) {
    70  	if snapshot != nil {
    71  		return newGeneratorFromSnapshot(*snapshot, c)
    72  	}
    73  
    74  	return newGeneratorFromDefaults(c)
    75  }
    76  
    77  func newGeneratorFromSnapshot(snapshot GeneratorSnapshot, c chan<- *SequenceOverflowNotification) (*Generator, error) {
    78  	if err := sanitizeSnapshotBounds(&snapshot); err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	return &Generator{
    83  		partition:       partitionToInternalRepr(snapshot.Partition),
    84  		seq:             snapshot.Sequence,
    85  		seqMin:          uint32(snapshot.SequenceMin),
    86  		seqMax:          uint32(snapshot.SequenceMax),
    87  		seqStatic:       uint32(snapshot.SequenceMin - 1), // Offset by -1 since NewWithTime starts this with an incr.
    88  		seqOverflowCond: sync.NewCond(&sync.Mutex{}),
    89  		seqOverflowChan: c,
    90  		drifts:          snapshot.Drifts,
    91  		wallHi:          uint64(snapshot.WallHi),
    92  		wallSafe:        uint64(snapshot.WallSafe),
    93  	}, nil
    94  }
    95  
    96  func newGeneratorFromDefaults(c chan<- *SequenceOverflowNotification) (*Generator, error) {
    97  	// Realistically safe, but has an edge case resulting in PartitionPoolExhaustedError.
    98  	partition, err := genPartition()
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	return &Generator{
   104  		partition:       partition,
   105  		seqMax:          MaxSequence,
   106  		seqStatic:       ^uint32(0), // Offset by -1 since NewWithTime starts this with an incr.
   107  		seqOverflowCond: sync.NewCond(&sync.Mutex{}),
   108  		seqOverflowChan: c,
   109  	}, nil
   110  }
   111  
   112  // New generates a new ID using the current system time for its timestamp.
   113  func (g *Generator) New(meta byte) (id ID) {
   114  retry:
   115  	var (
   116  		// Note: Single load of wallHi for the evaluations is correct (as we only grab wallNow
   117  		// once as well).
   118  		wallHi  = atomic.LoadUint64(&g.wallHi)
   119  		wallNow = snotime()
   120  	)
   121  
   122  	// Fastest branch if we're still within the most recent time unit.
   123  	if wallNow == wallHi {
   124  		seq := atomic.AddUint32(&g.seq, 1)
   125  
   126  		if g.seqMax >= seq {
   127  			g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1)
   128  			g.applyPayload(&id, meta, seq)
   129  
   130  			return
   131  		}
   132  
   133  		// This is to be considered an edge case if seqMax actually gets exceeded, but since bounds
   134  		// can be set arbitrarily, in a small pool (or in stress tests) this can happen.
   135  		// We don't *really* handle this gracefully - we currently clog up and wait until the sequence
   136  		// gets reset by a time change *hoping* we'll finally get our turn. If requests to generate
   137  		// don't decrease enough, eventually this will starve out resources.
   138  		//
   139  		// The reason we don't simply plug the broadcast into the time progression branch is precisely
   140  		// because that one is going to be the most common branch for many uses realistically (1 or 0 ID per 4msec)
   141  		// while this one is for scales on another level. At the same time if we *ever* hit this case, we need
   142  		// a periodic flush anyways, because even a single threaded process can easily exhaust the max default
   143  		// sequence pool, let alone a smaller one, meaning it could potentially deadlock if all routines get
   144  		// locked in on a sequence overflow and no new routine comes to their rescue at a higher time to reset
   145  		// the sequence and notify them.
   146  		g.seqOverflowCond.L.Lock()
   147  		g.seqOverflowCount++
   148  
   149  		if g.seqOverflowTicker == nil {
   150  			// Tick *roughly* each 1ms during overflows.
   151  			g.seqOverflowTicker = time.NewTicker(TimeUnit / 4)
   152  			go g.seqOverflowLoop()
   153  		}
   154  
   155  		for atomic.LoadUint32(&g.seq) > g.seqMax {
   156  			// We spin pessimistically here instead of a straight lock -> wait -> unlock because that'd
   157  			// put us back on the New(). At extreme contention we could end up back here anyways.
   158  			g.seqOverflowCond.Wait()
   159  		}
   160  
   161  		g.seqOverflowCount--
   162  		g.seqOverflowCond.L.Unlock()
   163  
   164  		goto retry
   165  	}
   166  
   167  	// Time progression branch.
   168  	if wallNow > wallHi && atomic.CompareAndSwapUint64(&g.wallHi, wallHi, wallNow) {
   169  		atomic.StoreUint32(&g.seq, g.seqMin)
   170  
   171  		g.applyTimestamp(&id, wallNow, atomic.LoadUint32(&g.drifts)&1)
   172  		g.applyPayload(&id, meta, g.seqMin)
   173  
   174  		return
   175  	}
   176  
   177  	// Time regression branch.
   178  	g.regression.Lock()
   179  
   180  	// Check-again. It's possible that another thread applied the drift while we were spinning (if we were).
   181  	if wallHi = atomic.LoadUint64(&g.wallHi); wallNow >= wallHi {
   182  		g.regression.Unlock()
   183  
   184  		goto retry
   185  	}
   186  
   187  	if wallNow > g.wallSafe {
   188  		// Branch for the one routine that gets to apply the drift.
   189  		// wallHi is bidirectional (gets updated whenever the wall clock time progresses - or when a drift
   190  		// gets applied, which is when it regresses). In contrast, wallSafe only ever gets updated when
   191  		// a drift gets applied and always gets set to the highest time recorded, meaning it
   192  		// increases monotonically.
   193  		atomic.StoreUint64(&g.wallSafe, wallHi)
   194  		atomic.StoreUint64(&g.wallHi, wallNow)
   195  		atomic.StoreUint32(&g.seq, g.seqMin)
   196  
   197  		g.applyTimestamp(&id, wallNow, atomic.AddUint32(&g.drifts, 1)&1)
   198  		g.applyPayload(&id, meta, g.seqMin)
   199  
   200  		g.regression.Unlock()
   201  
   202  		return
   203  	}
   204  
   205  	// Branch for all routines that are in an "unsafe" past (e.g. multiple time regressions happened
   206  	// before we reached wallSafe again).
   207  	g.regression.Unlock()
   208  
   209  	time.Sleep(time.Duration(g.wallSafe - wallNow))
   210  
   211  	goto retry
   212  }
   213  
   214  // NewWithTime generates a new ID using the given time for the timestamp.
   215  //
   216  // IDs generated with user-specified timestamps are exempt from the tick-tock mechanism and
   217  // use a sequence separate from New() - one that is independent from time, as time provided to
   218  // this method can be arbitrary. The sequence increases strictly monotonically up to hitting
   219  // the generator's SequenceMax, after which it rolls over silently back to SequenceMin.
   220  //
   221  // That means bounds are respected, but unlike New(), NewWithTime() will not block the caller
   222  // when the (separate) sequence rolls over as the Generator would be unable to determine when
   223  // to resume processing within the constraints of this method.
   224  //
   225  // Managing potential collisions due to the arbitrary time is left to the user.
   226  //
   227  // This utility is primarily meant to enable porting of old IDs to sno and assumed to be ran
   228  // before an ID scheme goes online.
   229  func (g *Generator) NewWithTime(meta byte, t time.Time) (id ID) {
   230  retry:
   231  	var seq = atomic.AddUint32(&g.seqStatic, 1)
   232  
   233  	if seq > g.seqMax {
   234  		if !atomic.CompareAndSwapUint32(&g.seqStatic, seq, g.seqMin) {
   235  			goto retry
   236  		}
   237  
   238  		seq = g.seqMin
   239  	}
   240  
   241  	g.applyTimestamp(&id, uint64(t.UnixNano()-epochNsec)/TimeUnit, 0)
   242  	g.applyPayload(&id, meta, seq)
   243  
   244  	return
   245  }
   246  
   247  // Partition returns the fixed identifier of the Generator.
   248  func (g *Generator) Partition() Partition {
   249  	return partitionToPublicRepr(g.partition)
   250  }
   251  
   252  // Sequence returns the current sequence the Generator is at.
   253  //
   254  // This does *not* mean that if one were to call New() right now, the generated ID
   255  // will necessarily get this sequence, as other things may happen before.
   256  //
   257  // If the next call to New() would result in a reset of the sequence, SequenceMin
   258  // is returned instead of the current internal sequence.
   259  //
   260  // If the generator is currently overflowing, the sequence returned will be higher than
   261  // the generator's SequenceMax (thus a uint32 return type), meaning it can be used to
   262  // determine the current overflow via:
   263  //	overflow := int(uint32(generator.SequenceMax()) - generator.Sequence())
   264  func (g *Generator) Sequence() uint32 {
   265  	if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) {
   266  		return atomic.LoadUint32(&g.seq)
   267  	}
   268  
   269  	return g.seqMin
   270  }
   271  
   272  // SequenceMin returns the lower bound of the sequence pool of this generator.
   273  func (g *Generator) SequenceMin() uint16 {
   274  	return uint16(g.seqMin)
   275  }
   276  
   277  // SequenceMax returns the upper bound of the sequence pool of this generator.
   278  func (g *Generator) SequenceMax() uint16 {
   279  	return uint16(g.seqMax)
   280  }
   281  
   282  // Len returns the number of IDs generated in the current timeframe.
   283  func (g *Generator) Len() int {
   284  	if wallNow := snotime(); wallNow == atomic.LoadUint64(&g.wallHi) {
   285  		if seq := atomic.LoadUint32(&g.seq); g.seqMax > seq {
   286  			return int(seq-g.seqMin) + 1
   287  		}
   288  
   289  		return g.Cap()
   290  	}
   291  
   292  	return 0
   293  }
   294  
   295  // Cap returns the total capacity of the Generator.
   296  //
   297  // To get its current capacity (e.g. number of possible additional IDs in the current
   298  // timeframe), simply:
   299  // 	spare := generator.Cap() - generator.Len()
   300  // The result will always be non-negative.
   301  func (g *Generator) Cap() int {
   302  	return int(g.seqMax-g.seqMin) + 1
   303  }
   304  
   305  // Snapshot returns a copy of the Generator's current bookkeeping data.
   306  func (g *Generator) Snapshot() GeneratorSnapshot {
   307  	var (
   308  		wallNow = snotime()
   309  		wallHi  = atomic.LoadUint64(&g.wallHi)
   310  		seq     uint32
   311  	)
   312  
   313  	// Be consistent with g.Sequence() and return seqMin if the next call to New()
   314  	// would reset the sequence.
   315  	if wallNow == wallHi {
   316  		seq = atomic.LoadUint32(&g.seq)
   317  	} else {
   318  		seq = g.seqMin
   319  	}
   320  
   321  	return GeneratorSnapshot{
   322  		Partition:   partitionToPublicRepr(g.partition),
   323  		SequenceMin: uint16(g.seqMin),
   324  		SequenceMax: uint16(g.seqMax),
   325  		Sequence:    seq,
   326  		Now:         int64(wallNow),
   327  		WallHi:      int64(wallHi),
   328  		WallSafe:    int64(atomic.LoadUint64(&g.wallSafe)),
   329  		Drifts:      atomic.LoadUint32(&g.drifts),
   330  	}
   331  }
   332  
   333  func (g *Generator) applyTimestamp(id *ID, units uint64, tick uint32) {
   334  	// Equivalent to...
   335  	//
   336  	//	id[0] = byte(units >> 31)
   337  	//	id[1] = byte(units >> 23)
   338  	//	id[2] = byte(units >> 15)
   339  	//	id[3] = byte(units >> 7)
   340  	//	id[4] = byte(units << 1) | byte(tick)
   341  	//
   342  	// ... and slightly wasteful as we're storing 3 bytes that will get overwritten
   343  	// via applyPartition but unlike the code above, the calls to binary.BigEndian.PutUintXX()
   344  	// are compiler assisted and boil down to essentially a load + shift + bswap (+ a nop due
   345  	// to midstack inlining), which we prefer over the roughly 16 instructions otherwise.
   346  	// If applyTimestamp() was implemented straight in assembly, we'd not get it inline.
   347  	binary.BigEndian.PutUint64(id[:], units<<25|uint64(tick)<<24)
   348  }
   349  
   350  func (g *Generator) applyPayload(id *ID, meta byte, seq uint32) {
   351  	id[5] = meta
   352  	binary.BigEndian.PutUint32(id[6:], g.partition|seq)
   353  }
   354  
   355  func (g *Generator) seqOverflowLoop() {
   356  	var (
   357  		retryNotify bool
   358  		ticks       uint32
   359  	)
   360  
   361  	for t := range g.seqOverflowTicker.C {
   362  		g.seqOverflowCond.L.Lock()
   363  
   364  		if g.seqOverflowChan != nil {
   365  			// We only ever count ticks when we've got a notification channel up.
   366  			// Even if we're at a count of 0 but on our first tick, it means the generator declogged already,
   367  			// but we still notify that it happened.
   368  			ticks++
   369  			if retryNotify || g.seqOverflowCount == 0 || ticks%4 == 1 {
   370  				select {
   371  				case g.seqOverflowChan <- &SequenceOverflowNotification{
   372  					Now:   t,
   373  					Ticks: ticks,
   374  					Count: g.seqOverflowCount,
   375  				}:
   376  					retryNotify = false
   377  
   378  				default:
   379  					// Simply drop the message for now but try again the next tick already
   380  					// instead of waiting for the full interval.
   381  					retryNotify = true
   382  				}
   383  			}
   384  		}
   385  
   386  		if g.seqOverflowCount == 0 {
   387  			g.seqOverflowTicker.Stop()
   388  			g.seqOverflowTicker = nil
   389  			g.seqOverflowCond.L.Unlock()
   390  
   391  			return
   392  		}
   393  
   394  		// At this point we can unlock already because we don't touch any shared data anymore.
   395  		// The broadcasts further don't require us to hold the lock.
   396  		g.seqOverflowCond.L.Unlock()
   397  
   398  		// Under normal behaviour high load would trigger an overflow and load would remain roughly
   399  		// steady, so a seq reset will simply get triggered by a time change happening in New().
   400  		// The actual callers are in a pessimistic loop and will check the condition themselves again.
   401  		if g.seqMax >= atomic.LoadUint32(&g.seq) {
   402  			g.seqOverflowCond.Broadcast()
   403  
   404  			continue
   405  		}
   406  
   407  		// Handles an edge case where we've got calls locked on an overflow and suddenly no more
   408  		// calls to New() come in, meaning there's no one to actually reset the sequence.
   409  		var (
   410  			wallNow = uint64(t.UnixNano()-epochNsec) / TimeUnit
   411  			wallHi  = atomic.LoadUint64(&g.wallHi)
   412  		)
   413  
   414  		if wallNow > wallHi {
   415  			atomic.StoreUint32(&g.seq, g.seqMin)
   416  			g.seqOverflowCond.Broadcast()
   417  
   418  			continue // Left for readability of flow.
   419  		}
   420  	}
   421  }
   422  
   423  // Arbitrary min pool size of 4 per time unit (that is 1000 per sec).
   424  // Separated out as a constant as this value is being tested against.
   425  const minSequencePoolSize = 4
   426  
   427  func sanitizeSnapshotBounds(s *GeneratorSnapshot) error {
   428  	// Zero value of SequenceMax will pass as the default max if and only if SequenceMin is not already
   429  	// default max (as the range can be defined in either order).
   430  	if s.SequenceMax == 0 && s.SequenceMin != MaxSequence {
   431  		s.SequenceMax = MaxSequence
   432  	}
   433  
   434  	if s.SequenceMin == s.SequenceMax {
   435  		return invalidSequenceBounds(s, errSequenceBoundsIdenticalMsg)
   436  	}
   437  
   438  	// Allow bounds to be given in any order.
   439  	if s.SequenceMax < s.SequenceMin {
   440  		s.SequenceMin, s.SequenceMax = s.SequenceMax, s.SequenceMin
   441  	}
   442  
   443  	if s.SequenceMax-s.SequenceMin-1 < minSequencePoolSize {
   444  		return invalidSequenceBounds(s, errSequencePoolTooSmallMsg)
   445  	}
   446  
   447  	// Allow zero value to pass as a default of the lower bound.
   448  	if s.Sequence == 0 {
   449  		s.Sequence = uint32(s.SequenceMin)
   450  	}
   451  
   452  	if s.Sequence < uint32(s.SequenceMin) {
   453  		return invalidSequenceBounds(s, errSequenceUnderflowsBound)
   454  	}
   455  
   456  	return nil
   457  }
   458  
   459  func invalidSequenceBounds(s *GeneratorSnapshot, msg string) *InvalidSequenceBoundsError {
   460  	return &InvalidSequenceBoundsError{
   461  		Cur: s.Sequence,
   462  		Min: s.SequenceMin,
   463  		Max: s.SequenceMax,
   464  		Msg: msg,
   465  	}
   466  }