github.com/ledgerwatch/erigon-lib@v1.0.0/kv/mdbx/kv_mdbx.go (about)

     1  /*
     2     Copyright 2021 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package mdbx
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/binary"
    23  	"fmt"
    24  	"os"
    25  	"runtime"
    26  	"sort"
    27  	"strings"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"github.com/c2h5oh/datasize"
    33  	"github.com/erigontech/mdbx-go/mdbx"
    34  	stack2 "github.com/go-stack/stack"
    35  	"github.com/ledgerwatch/erigon-lib/common/dbg"
    36  	"github.com/ledgerwatch/erigon-lib/kv"
    37  	"github.com/ledgerwatch/erigon-lib/kv/iter"
    38  	"github.com/ledgerwatch/erigon-lib/kv/order"
    39  	"github.com/ledgerwatch/log/v3"
    40  	"github.com/pbnjay/memory"
    41  	"golang.org/x/exp/maps"
    42  	"golang.org/x/sync/semaphore"
    43  )
    44  
    45  const NonExistingDBI kv.DBI = 999_999_999
    46  
    47  type TableCfgFunc func(defaultBuckets kv.TableCfg) kv.TableCfg
    48  
    49  func WithChaindataTables(defaultBuckets kv.TableCfg) kv.TableCfg {
    50  	return defaultBuckets
    51  }
    52  
    53  type MdbxOpts struct {
    54  	// must be in the range from 12.5% (almost empty) to 50% (half empty)
    55  	// which corresponds to the range from 8192 and to 32768 in units respectively
    56  	log             log.Logger
    57  	roTxsLimiter    *semaphore.Weighted
    58  	bucketsCfg      TableCfgFunc
    59  	path            string
    60  	syncPeriod      time.Duration
    61  	mapSize         datasize.ByteSize
    62  	growthStep      datasize.ByteSize
    63  	shrinkThreshold int
    64  	flags           uint
    65  	pageSize        uint64
    66  	dirtySpace      uint64 // if exeed this space, modified pages will `spill` to disk
    67  	mergeThreshold  uint64
    68  	verbosity       kv.DBVerbosityLvl
    69  	label           kv.Label // marker to distinct db instances - one process may open many databases. for example to collect metrics of only 1 database
    70  	inMem           bool
    71  }
    72  
    73  func NewMDBX(log log.Logger) MdbxOpts {
    74  	opts := MdbxOpts{
    75  		bucketsCfg: WithChaindataTables,
    76  		flags:      mdbx.NoReadahead | mdbx.Coalesce | mdbx.Durable,
    77  		log:        log,
    78  		pageSize:   kv.DefaultPageSize(),
    79  
    80  		// default is (TOTAL_RAM+AVAILABLE_RAM)/42/pageSize
    81  		// but for reproducibility of benchmarks - please don't rely on Available RAM
    82  		dirtySpace: 2 * (memory.TotalMemory() / 42),
    83  
    84  		mapSize:         2 * datasize.TB,
    85  		growthStep:      2 * datasize.GB,
    86  		mergeThreshold:  3 * 8192,
    87  		shrinkThreshold: -1, // default
    88  		label:           kv.InMem,
    89  	}
    90  	return opts
    91  }
    92  
    93  func (opts MdbxOpts) GetLabel() kv.Label  { return opts.label }
    94  func (opts MdbxOpts) GetInMem() bool      { return opts.inMem }
    95  func (opts MdbxOpts) GetPageSize() uint64 { return opts.pageSize }
    96  
    97  func (opts MdbxOpts) Label(label kv.Label) MdbxOpts {
    98  	opts.label = label
    99  	return opts
   100  }
   101  
   102  func (opts MdbxOpts) DirtySpace(s uint64) MdbxOpts {
   103  	opts.dirtySpace = s
   104  	return opts
   105  }
   106  
   107  func (opts MdbxOpts) RoTxsLimiter(l *semaphore.Weighted) MdbxOpts {
   108  	opts.roTxsLimiter = l
   109  	return opts
   110  }
   111  
   112  func (opts MdbxOpts) PageSize(v uint64) MdbxOpts {
   113  	opts.pageSize = v
   114  	return opts
   115  }
   116  
   117  func (opts MdbxOpts) GrowthStep(v datasize.ByteSize) MdbxOpts {
   118  	opts.growthStep = v
   119  	return opts
   120  }
   121  
   122  func (opts MdbxOpts) Path(path string) MdbxOpts {
   123  	opts.path = path
   124  	return opts
   125  }
   126  
   127  func (opts MdbxOpts) Set(opt MdbxOpts) MdbxOpts {
   128  	return opt
   129  }
   130  
   131  func (opts MdbxOpts) InMem(tmpDir string) MdbxOpts {
   132  	if tmpDir != "" {
   133  		if err := os.MkdirAll(tmpDir, 0755); err != nil {
   134  			panic(err)
   135  		}
   136  	}
   137  	path, err := os.MkdirTemp(tmpDir, "erigon-memdb-")
   138  	if err != nil {
   139  		panic(err)
   140  	}
   141  	opts.path = path
   142  	opts.inMem = true
   143  	opts.flags = mdbx.UtterlyNoSync | mdbx.NoMetaSync | mdbx.LifoReclaim | mdbx.NoMemInit
   144  	opts.growthStep = 2 * datasize.MB
   145  	opts.mapSize = 512 * datasize.MB
   146  	opts.shrinkThreshold = 0 // disable
   147  	opts.label = kv.InMem
   148  	return opts
   149  }
   150  
   151  func (opts MdbxOpts) Exclusive() MdbxOpts {
   152  	opts.flags = opts.flags | mdbx.Exclusive
   153  	return opts
   154  }
   155  
   156  func (opts MdbxOpts) Flags(f func(uint) uint) MdbxOpts {
   157  	opts.flags = f(opts.flags)
   158  	return opts
   159  }
   160  
   161  func (opts MdbxOpts) HasFlag(flag uint) bool { return opts.flags&flag != 0 }
   162  func (opts MdbxOpts) Readonly() MdbxOpts {
   163  	opts.flags = opts.flags | mdbx.Readonly
   164  	return opts
   165  }
   166  
   167  func (opts MdbxOpts) SyncPeriod(period time.Duration) MdbxOpts {
   168  	opts.syncPeriod = period
   169  	return opts
   170  }
   171  
   172  func (opts MdbxOpts) DBVerbosity(v kv.DBVerbosityLvl) MdbxOpts {
   173  	opts.verbosity = v
   174  	return opts
   175  }
   176  
   177  func (opts MdbxOpts) MapSize(sz datasize.ByteSize) MdbxOpts {
   178  	opts.mapSize = sz
   179  	return opts
   180  }
   181  
   182  func (opts MdbxOpts) WriteMap() MdbxOpts {
   183  	opts.flags |= mdbx.WriteMap
   184  	return opts
   185  }
   186  
   187  func (opts MdbxOpts) WriteMergeThreshold(v uint64) MdbxOpts {
   188  	opts.mergeThreshold = v
   189  	return opts
   190  }
   191  
   192  func (opts MdbxOpts) WithTableCfg(f TableCfgFunc) MdbxOpts {
   193  	opts.bucketsCfg = f
   194  	return opts
   195  }
   196  
   197  var pathDbMap = map[string]kv.RoDB{}
   198  var pathDbMapLock sync.Mutex
   199  
   200  func addToPathDbMap(path string, db kv.RoDB) {
   201  	pathDbMapLock.Lock()
   202  	defer pathDbMapLock.Unlock()
   203  	pathDbMap[path] = db
   204  }
   205  
   206  func removeFromPathDbMap(path string) {
   207  	pathDbMapLock.Lock()
   208  	defer pathDbMapLock.Unlock()
   209  	delete(pathDbMap, path)
   210  }
   211  
   212  func PathDbMap() map[string]kv.RoDB {
   213  	pathDbMapLock.Lock()
   214  	defer pathDbMapLock.Unlock()
   215  	return maps.Clone(pathDbMap)
   216  }
   217  
   218  func (opts MdbxOpts) Open() (kv.RwDB, error) {
   219  	if dbg.WriteMap() {
   220  		opts = opts.WriteMap() //nolint
   221  	}
   222  	if dbg.DirtySpace() > 0 {
   223  		opts = opts.DirtySpace(dbg.DirtySpace()) //nolint
   224  	}
   225  	if dbg.NoSync() {
   226  		opts = opts.Flags(func(u uint) uint { return u | mdbx.SafeNoSync }) //nolint
   227  	}
   228  	if dbg.MergeTr() > 0 {
   229  		opts = opts.WriteMergeThreshold(uint64(dbg.MergeTr() * 8192)) //nolint
   230  	}
   231  	if dbg.MdbxReadAhead() {
   232  		opts = opts.Flags(func(u uint) uint { return u &^ mdbx.NoReadahead }) //nolint
   233  	}
   234  	env, err := mdbx.NewEnv()
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	if opts.verbosity != -1 {
   239  		err = env.SetDebug(mdbx.LogLvl(opts.verbosity), mdbx.DbgDoNotChange, mdbx.LoggerDoNotChange) // temporary disable error, because it works if call it 1 time, but returns error if call it twice in same process (what often happening in tests)
   240  		if err != nil {
   241  			return nil, fmt.Errorf("db verbosity set: %w", err)
   242  		}
   243  	}
   244  	if err = env.SetOption(mdbx.OptMaxDB, 200); err != nil {
   245  		return nil, err
   246  	}
   247  	if err = env.SetOption(mdbx.OptMaxReaders, kv.ReadersLimit); err != nil {
   248  		return nil, err
   249  	}
   250  
   251  	if opts.flags&mdbx.Accede == 0 {
   252  		if err = env.SetGeometry(-1, -1, int(opts.mapSize), int(opts.growthStep), opts.shrinkThreshold, int(opts.pageSize)); err != nil {
   253  			return nil, err
   254  		}
   255  		if err = os.MkdirAll(opts.path, 0744); err != nil {
   256  			return nil, fmt.Errorf("could not create dir: %s, %w", opts.path, err)
   257  		}
   258  	}
   259  
   260  	err = env.Open(opts.path, opts.flags, 0664)
   261  	if err != nil {
   262  		if err != nil {
   263  			return nil, fmt.Errorf("%w, label: %s, trace: %s", err, opts.label.String(), stack2.Trace().String())
   264  		}
   265  	}
   266  
   267  	// mdbx will not change pageSize if db already exists. means need read real value after env.open()
   268  	in, err := env.Info(nil)
   269  	if err != nil {
   270  		if err != nil {
   271  			return nil, fmt.Errorf("%w, label: %s, trace: %s", err, opts.label.String(), stack2.Trace().String())
   272  		}
   273  	}
   274  
   275  	opts.pageSize = uint64(in.PageSize)
   276  
   277  	//nolint
   278  	if opts.flags&mdbx.Accede == 0 && opts.flags&mdbx.Readonly == 0 {
   279  	}
   280  	// erigon using big transactions
   281  	// increase "page measured" options. need do it after env.Open() because default are depend on pageSize known only after env.Open()
   282  	if opts.flags&mdbx.Readonly == 0 {
   283  		// 1/8 is good for transactions with a lot of modifications - to reduce invalidation size.
   284  		// But Erigon app now using Batch and etl.Collectors to avoid writing to DB frequently changing data.
   285  		// It means most of our writes are: APPEND or "single UPSERT per key during transaction"
   286  		//if err = env.SetOption(mdbx.OptSpillMinDenominator, 8); err != nil {
   287  		//	return nil, err
   288  		//}
   289  
   290  		txnDpInitial, err := env.GetOption(mdbx.OptTxnDpInitial)
   291  		if err != nil {
   292  			return nil, err
   293  		}
   294  		if err = env.SetOption(mdbx.OptTxnDpInitial, txnDpInitial*2); err != nil {
   295  			return nil, err
   296  		}
   297  		dpReserveLimit, err := env.GetOption(mdbx.OptDpReverseLimit)
   298  		if err != nil {
   299  			return nil, err
   300  		}
   301  		if err = env.SetOption(mdbx.OptDpReverseLimit, dpReserveLimit*2); err != nil {
   302  			return nil, err
   303  		}
   304  
   305  		if err = env.SetOption(mdbx.OptTxnDpLimit, opts.dirtySpace/opts.pageSize); err != nil {
   306  			return nil, err
   307  		}
   308  		// must be in the range from 12.5% (almost empty) to 50% (half empty)
   309  		// which corresponds to the range from 8192 and to 32768 in units respectively
   310  		if err = env.SetOption(mdbx.OptMergeThreshold16dot16Percent, opts.mergeThreshold); err != nil {
   311  			return nil, err
   312  		}
   313  	}
   314  
   315  	dirtyPagesLimit, err := env.GetOption(mdbx.OptTxnDpLimit)
   316  	if err != nil {
   317  		return nil, err
   318  	}
   319  
   320  	if opts.syncPeriod != 0 {
   321  		if err = env.SetSyncPeriod(opts.syncPeriod); err != nil {
   322  			env.Close()
   323  			return nil, err
   324  		}
   325  	}
   326  	//if err := env.SetOption(mdbx.OptSyncBytes, uint64(math2.MaxUint64)); err != nil {
   327  	//	return nil, err
   328  	//}
   329  
   330  	if opts.roTxsLimiter == nil {
   331  		targetSemCount := int64(runtime.GOMAXPROCS(-1) * 16)
   332  		opts.roTxsLimiter = semaphore.NewWeighted(targetSemCount) // 1 less than max to allow unlocking to happen
   333  	}
   334  	db := &MdbxKV{
   335  		opts:         opts,
   336  		env:          env,
   337  		log:          opts.log,
   338  		wg:           &sync.WaitGroup{},
   339  		buckets:      kv.TableCfg{},
   340  		txSize:       dirtyPagesLimit * opts.pageSize,
   341  		roTxsLimiter: opts.roTxsLimiter,
   342  
   343  		leakDetector: dbg.NewLeakDetector("db."+opts.label.String(), dbg.SlowTx()),
   344  	}
   345  
   346  	customBuckets := opts.bucketsCfg(kv.ChaindataTablesCfg)
   347  	for name, cfg := range customBuckets { // copy map to avoid changing global variable
   348  		db.buckets[name] = cfg
   349  	}
   350  
   351  	buckets := bucketSlice(db.buckets)
   352  	if err := db.openDBIs(buckets); err != nil {
   353  		return nil, err
   354  	}
   355  
   356  	// Configure buckets and open deprecated buckets
   357  	if err := env.View(func(tx *mdbx.Txn) error {
   358  		for _, name := range buckets {
   359  			// Open deprecated buckets if they exist, don't create
   360  			if !db.buckets[name].IsDeprecated {
   361  				continue
   362  			}
   363  			cnfCopy := db.buckets[name]
   364  			dbi, createErr := tx.OpenDBI(name, mdbx.DBAccede, nil, nil)
   365  			if createErr != nil {
   366  				if mdbx.IsNotFound(createErr) {
   367  					cnfCopy.DBI = NonExistingDBI
   368  					db.buckets[name] = cnfCopy
   369  					continue // if deprecated bucket couldn't be open - then it's deleted and it's fine
   370  				} else {
   371  					return fmt.Errorf("bucket: %s, %w", name, createErr)
   372  				}
   373  			}
   374  			cnfCopy.DBI = kv.DBI(dbi)
   375  			db.buckets[name] = cnfCopy
   376  		}
   377  		return nil
   378  	}); err != nil {
   379  		return nil, err
   380  	}
   381  
   382  	if !opts.inMem {
   383  		if staleReaders, err := db.env.ReaderCheck(); err != nil {
   384  			db.log.Error("failed ReaderCheck", "err", err)
   385  		} else if staleReaders > 0 {
   386  			db.log.Info("cleared reader slots from dead processes", "amount", staleReaders)
   387  		}
   388  
   389  	}
   390  	db.path = opts.path
   391  	addToPathDbMap(opts.path, db)
   392  	return db, nil
   393  }
   394  
   395  func (opts MdbxOpts) MustOpen() kv.RwDB {
   396  	db, err := opts.Open()
   397  	if err != nil {
   398  		panic(fmt.Errorf("fail to open mdbx: %w", err))
   399  	}
   400  	return db
   401  }
   402  
   403  type MdbxKV struct {
   404  	log          log.Logger
   405  	env          *mdbx.Env
   406  	wg           *sync.WaitGroup
   407  	buckets      kv.TableCfg
   408  	roTxsLimiter *semaphore.Weighted // does limit amount of concurrent Ro transactions - in most casess runtime.NumCPU() is good value for this channel capacity - this channel can be shared with other components (like Decompressor)
   409  	opts         MdbxOpts
   410  	txSize       uint64
   411  	closed       atomic.Bool
   412  	path         string
   413  
   414  	leakDetector *dbg.LeakDetector
   415  }
   416  
   417  func (db *MdbxKV) PageSize() uint64 { return db.opts.pageSize }
   418  func (db *MdbxKV) ReadOnly() bool   { return db.opts.HasFlag(mdbx.Readonly) }
   419  
   420  // openDBIs - first trying to open existing DBI's in RO transaction
   421  // otherwise re-try by RW transaction
   422  // it allow open DB from another process - even if main process holding long RW transaction
   423  func (db *MdbxKV) openDBIs(buckets []string) error {
   424  	if db.ReadOnly() {
   425  		if err := db.View(context.Background(), func(tx kv.Tx) error {
   426  			for _, name := range buckets {
   427  				if db.buckets[name].IsDeprecated {
   428  					continue
   429  				}
   430  				if err := tx.(kv.BucketMigrator).CreateBucket(name); err != nil {
   431  					return err
   432  				}
   433  			}
   434  			return tx.Commit() // when open db as read-only, commit of this RO transaction is required
   435  		}); err != nil {
   436  			return err
   437  		}
   438  	} else {
   439  		if err := db.Update(context.Background(), func(tx kv.RwTx) error {
   440  			for _, name := range buckets {
   441  				if db.buckets[name].IsDeprecated {
   442  					continue
   443  				}
   444  				if err := tx.(kv.BucketMigrator).CreateBucket(name); err != nil {
   445  					return err
   446  				}
   447  			}
   448  			return nil
   449  		}); err != nil {
   450  			return err
   451  		}
   452  	}
   453  	return nil
   454  }
   455  
   456  // Close closes db
   457  // All transactions must be closed before closing the database.
   458  func (db *MdbxKV) Close() {
   459  	if ok := db.closed.CompareAndSwap(false, true); !ok {
   460  		return
   461  	}
   462  	db.wg.Wait()
   463  	db.env.Close()
   464  	db.env = nil
   465  
   466  	if db.opts.inMem {
   467  		if err := os.RemoveAll(db.opts.path); err != nil {
   468  			db.log.Warn("failed to remove in-mem db file", "err", err)
   469  		}
   470  	}
   471  	removeFromPathDbMap(db.path)
   472  }
   473  
   474  func (db *MdbxKV) BeginRo(ctx context.Context) (txn kv.Tx, err error) {
   475  	if db.closed.Load() {
   476  		return nil, fmt.Errorf("db closed")
   477  	}
   478  
   479  	// don't try to acquire if the context is already done
   480  	select {
   481  	case <-ctx.Done():
   482  		return nil, ctx.Err()
   483  	default:
   484  		// otherwise carry on
   485  	}
   486  
   487  	// will return nil err if context is cancelled (may appear to acquire the semaphore)
   488  	if semErr := db.roTxsLimiter.Acquire(ctx, 1); semErr != nil {
   489  		return nil, semErr
   490  	}
   491  
   492  	defer func() {
   493  		if txn == nil {
   494  			// on error, or if there is whatever reason that we don't return a tx,
   495  			// we need to free up the limiter slot, otherwise it could lead to deadlocks
   496  			db.roTxsLimiter.Release(1)
   497  		}
   498  	}()
   499  
   500  	tx, err := db.env.BeginTxn(nil, mdbx.Readonly)
   501  	if err != nil {
   502  		return nil, fmt.Errorf("%w, label: %s, trace: %s", err, db.opts.label.String(), stack2.Trace().String())
   503  	}
   504  	db.wg.Add(1)
   505  	return &MdbxTx{
   506  		ctx:      ctx,
   507  		db:       db,
   508  		tx:       tx,
   509  		readOnly: true,
   510  		id:       db.leakDetector.Add(),
   511  	}, nil
   512  }
   513  
   514  func (db *MdbxKV) BeginRw(ctx context.Context) (kv.RwTx, error) {
   515  	return db.beginRw(ctx, 0)
   516  }
   517  func (db *MdbxKV) BeginRwNosync(ctx context.Context) (kv.RwTx, error) {
   518  	return db.beginRw(ctx, mdbx.TxNoSync)
   519  }
   520  
   521  func (db *MdbxKV) beginRw(ctx context.Context, flags uint) (txn kv.RwTx, err error) {
   522  	select {
   523  	case <-ctx.Done():
   524  		return nil, ctx.Err()
   525  	default:
   526  	}
   527  
   528  	if db.closed.Load() {
   529  		return nil, fmt.Errorf("db closed")
   530  	}
   531  	runtime.LockOSThread()
   532  	tx, err := db.env.BeginTxn(nil, flags)
   533  	if err != nil {
   534  		runtime.UnlockOSThread() // unlock only in case of error. normal flow is "defer .Rollback()"
   535  		return nil, fmt.Errorf("%w, lable: %s, trace: %s", err, db.opts.label.String(), stack2.Trace().String())
   536  	}
   537  	db.wg.Add(1)
   538  	return &MdbxTx{
   539  		db:  db,
   540  		tx:  tx,
   541  		ctx: ctx,
   542  		id:  db.leakDetector.Add(),
   543  	}, nil
   544  }
   545  
   546  type MdbxTx struct {
   547  	tx               *mdbx.Txn
   548  	db               *MdbxKV
   549  	cursors          map[uint64]*mdbx.Cursor
   550  	streams          []kv.Closer
   551  	statelessCursors map[string]kv.RwCursor
   552  	readOnly         bool
   553  	cursorID         uint64
   554  	ctx              context.Context
   555  	id               uint64 // set only if TRACE_TX=true
   556  }
   557  
   558  type MdbxCursor struct {
   559  	tx         *MdbxTx
   560  	c          *mdbx.Cursor
   561  	bucketName string
   562  	bucketCfg  kv.TableCfgItem
   563  	dbi        mdbx.DBI
   564  	id         uint64
   565  }
   566  
   567  func (db *MdbxKV) Env() *mdbx.Env {
   568  	return db.env
   569  }
   570  
   571  func (db *MdbxKV) AllDBI() map[string]kv.DBI {
   572  	res := map[string]kv.DBI{}
   573  	for name, cfg := range db.buckets {
   574  		res[name] = cfg.DBI
   575  	}
   576  	return res
   577  }
   578  
   579  func (db *MdbxKV) AllTables() kv.TableCfg {
   580  	return db.buckets
   581  }
   582  
   583  func (tx *MdbxTx) ViewID() uint64 { return tx.tx.ID() }
   584  
   585  func (tx *MdbxTx) CollectMetrics() {
   586  	if tx.db.opts.label != kv.ChainDB {
   587  		return
   588  	}
   589  
   590  	info, err := tx.db.env.Info(tx.tx)
   591  	if err != nil {
   592  		return
   593  	}
   594  	if info.SinceReaderCheck.Hours() > 1 {
   595  		if staleReaders, err := tx.db.env.ReaderCheck(); err != nil {
   596  			tx.db.log.Error("failed ReaderCheck", "err", err)
   597  		} else if staleReaders > 0 {
   598  			tx.db.log.Info("cleared reader slots from dead processes", "amount", staleReaders)
   599  		}
   600  	}
   601  
   602  	kv.DbSize.Set(info.Geo.Current)
   603  	kv.DbPgopsNewly.Set(info.PageOps.Newly)
   604  	kv.DbPgopsCow.Set(info.PageOps.Cow)
   605  	kv.DbPgopsClone.Set(info.PageOps.Clone)
   606  	kv.DbPgopsSplit.Set(info.PageOps.Split)
   607  	kv.DbPgopsMerge.Set(info.PageOps.Merge)
   608  	kv.DbPgopsSpill.Set(info.PageOps.Spill)
   609  	kv.DbPgopsUnspill.Set(info.PageOps.Unspill)
   610  	kv.DbPgopsWops.Set(info.PageOps.Wops)
   611  
   612  	txInfo, err := tx.tx.Info(true)
   613  	if err != nil {
   614  		return
   615  	}
   616  
   617  	kv.TxDirty.Set(txInfo.SpaceDirty)
   618  	kv.TxLimit.Set(tx.db.txSize)
   619  	kv.TxSpill.Set(txInfo.Spill)
   620  	kv.TxUnspill.Set(txInfo.Unspill)
   621  
   622  	gc, err := tx.BucketStat("gc")
   623  	if err != nil {
   624  		return
   625  	}
   626  	kv.GcLeafMetric.Set(gc.LeafPages)
   627  	kv.GcOverflowMetric.Set(gc.OverflowPages)
   628  	kv.GcPagesMetric.Set((gc.LeafPages + gc.OverflowPages) * tx.db.opts.pageSize / 8)
   629  }
   630  
   631  // ListBuckets - all buckets stored as keys of un-named bucket
   632  func (tx *MdbxTx) ListBuckets() ([]string, error) {
   633  	return tx.tx.ListDBI()
   634  }
   635  
   636  func (db *MdbxKV) View(ctx context.Context, f func(tx kv.Tx) error) (err error) {
   637  	// can't use db.env.View method - because it calls commit for read transactions - it conflicts with write transactions.
   638  	tx, err := db.BeginRo(ctx)
   639  	if err != nil {
   640  		return err
   641  	}
   642  	defer tx.Rollback()
   643  
   644  	return f(tx)
   645  }
   646  
   647  func (db *MdbxKV) UpdateNosync(ctx context.Context, f func(tx kv.RwTx) error) (err error) {
   648  	tx, err := db.BeginRwNosync(ctx)
   649  	if err != nil {
   650  		return err
   651  	}
   652  	defer tx.Rollback()
   653  	err = f(tx)
   654  	if err != nil {
   655  		return err
   656  	}
   657  	err = tx.Commit()
   658  	if err != nil {
   659  		return err
   660  	}
   661  	return nil
   662  }
   663  
   664  func (db *MdbxKV) Update(ctx context.Context, f func(tx kv.RwTx) error) (err error) {
   665  	tx, err := db.BeginRw(ctx)
   666  	if err != nil {
   667  		return err
   668  	}
   669  	defer tx.Rollback()
   670  	err = f(tx)
   671  	if err != nil {
   672  		return err
   673  	}
   674  	err = tx.Commit()
   675  	if err != nil {
   676  		return err
   677  	}
   678  	return nil
   679  }
   680  
   681  func (tx *MdbxTx) CreateBucket(name string) error {
   682  	cnfCopy := tx.db.buckets[name]
   683  	dbi, err := tx.tx.OpenDBI(name, mdbx.DBAccede, nil, nil)
   684  	if err != nil && !mdbx.IsNotFound(err) {
   685  		return fmt.Errorf("create table: %s, %w", name, err)
   686  	}
   687  	if err == nil {
   688  		cnfCopy.DBI = kv.DBI(dbi)
   689  		var flags uint
   690  		flags, err = tx.tx.Flags(dbi)
   691  		if err != nil {
   692  			return err
   693  		}
   694  		cnfCopy.Flags = kv.TableFlags(flags)
   695  
   696  		tx.db.buckets[name] = cnfCopy
   697  		return nil
   698  	}
   699  
   700  	// if bucket doesn't exists - create it
   701  
   702  	var flags = tx.db.buckets[name].Flags
   703  	var nativeFlags uint
   704  	if !tx.db.ReadOnly() {
   705  		nativeFlags |= mdbx.Create
   706  	}
   707  
   708  	if flags&kv.DupSort != 0 {
   709  		nativeFlags |= mdbx.DupSort
   710  		flags ^= kv.DupSort
   711  	}
   712  	if flags != 0 {
   713  		return fmt.Errorf("some not supported flag provided for bucket")
   714  	}
   715  
   716  	dbi, err = tx.tx.OpenDBI(name, nativeFlags, nil, nil)
   717  
   718  	if err != nil {
   719  		return fmt.Errorf("create table: %s, %w", name, err)
   720  	}
   721  	cnfCopy.DBI = kv.DBI(dbi)
   722  
   723  	tx.db.buckets[name] = cnfCopy
   724  	return nil
   725  }
   726  
   727  func (tx *MdbxTx) dropEvenIfBucketIsNotDeprecated(name string) error {
   728  	dbi := tx.db.buckets[name].DBI
   729  	// if bucket was not open on db start, then it's may be deprecated
   730  	// try to open it now without `Create` flag, and if fail then nothing to drop
   731  	if dbi == NonExistingDBI {
   732  		nativeDBI, err := tx.tx.OpenDBI(name, 0, nil, nil)
   733  		if err != nil {
   734  			if mdbx.IsNotFound(err) {
   735  				return nil // DBI doesn't exists means no drop needed
   736  			}
   737  			return fmt.Errorf("bucket: %s, %w", name, err)
   738  		}
   739  		dbi = kv.DBI(nativeDBI)
   740  	}
   741  
   742  	if err := tx.tx.Drop(mdbx.DBI(dbi), true); err != nil {
   743  		return err
   744  	}
   745  	cnfCopy := tx.db.buckets[name]
   746  	cnfCopy.DBI = NonExistingDBI
   747  	tx.db.buckets[name] = cnfCopy
   748  	return nil
   749  }
   750  
   751  func (tx *MdbxTx) ClearBucket(bucket string) error {
   752  	dbi := tx.db.buckets[bucket].DBI
   753  	if dbi == NonExistingDBI {
   754  		return nil
   755  	}
   756  	return tx.tx.Drop(mdbx.DBI(dbi), false)
   757  }
   758  
   759  func (tx *MdbxTx) DropBucket(bucket string) error {
   760  	if cfg, ok := tx.db.buckets[bucket]; !(ok && cfg.IsDeprecated) {
   761  		return fmt.Errorf("%w, bucket: %s", kv.ErrAttemptToDeleteNonDeprecatedBucket, bucket)
   762  	}
   763  
   764  	return tx.dropEvenIfBucketIsNotDeprecated(bucket)
   765  }
   766  
   767  func (tx *MdbxTx) ExistsBucket(bucket string) (bool, error) {
   768  	if cfg, ok := tx.db.buckets[bucket]; ok {
   769  		return cfg.DBI != NonExistingDBI, nil
   770  	}
   771  	return false, nil
   772  }
   773  
   774  func (tx *MdbxTx) Commit() error {
   775  	if tx.tx == nil {
   776  		return nil
   777  	}
   778  	defer func() {
   779  		tx.tx = nil
   780  		tx.db.wg.Done()
   781  		if tx.readOnly {
   782  			tx.db.roTxsLimiter.Release(1)
   783  		} else {
   784  			runtime.UnlockOSThread()
   785  		}
   786  		tx.db.leakDetector.Del(tx.id)
   787  	}()
   788  	tx.closeCursors()
   789  
   790  	//slowTx := 10 * time.Second
   791  	//if debug.SlowCommit() > 0 {
   792  	//	slowTx = debug.SlowCommit()
   793  	//}
   794  	//
   795  	//if debug.BigRoTxKb() > 0 || debug.BigRwTxKb() > 0 {
   796  	//	tx.PrintDebugInfo()
   797  	//}
   798  	tx.CollectMetrics()
   799  
   800  	latency, err := tx.tx.Commit()
   801  	if err != nil {
   802  		return err
   803  	}
   804  
   805  	if tx.db.opts.label == kv.ChainDB {
   806  		kv.DbCommitPreparation.Update(latency.Preparation.Seconds())
   807  		//kv.DbCommitAudit.Update(latency.Audit.Seconds())
   808  		kv.DbCommitWrite.Update(latency.Write.Seconds())
   809  		kv.DbCommitSync.Update(latency.Sync.Seconds())
   810  		kv.DbCommitEnding.Update(latency.Ending.Seconds())
   811  		kv.DbCommitTotal.Update(latency.Whole.Seconds())
   812  
   813  		//kv.DbGcWorkPnlMergeTime.Update(latency.GCDetails.WorkPnlMergeTime.Seconds())
   814  		//kv.DbGcWorkPnlMergeVolume.Set(uint64(latency.GCDetails.WorkPnlMergeVolume))
   815  		//kv.DbGcWorkPnlMergeCalls.Set(uint64(latency.GCDetails.WorkPnlMergeCalls))
   816  		//
   817  		//kv.DbGcSelfPnlMergeTime.Update(latency.GCDetails.SelfPnlMergeTime.Seconds())
   818  		//kv.DbGcSelfPnlMergeVolume.Set(uint64(latency.GCDetails.SelfPnlMergeVolume))
   819  		//kv.DbGcSelfPnlMergeCalls.Set(uint64(latency.GCDetails.SelfPnlMergeCalls))
   820  	}
   821  
   822  	return nil
   823  }
   824  
   825  func (tx *MdbxTx) Rollback() {
   826  	if tx.tx == nil {
   827  		return
   828  	}
   829  	defer func() {
   830  		tx.tx = nil
   831  		tx.db.wg.Done()
   832  		if tx.readOnly {
   833  			tx.db.roTxsLimiter.Release(1)
   834  		} else {
   835  			runtime.UnlockOSThread()
   836  		}
   837  		tx.db.leakDetector.Del(tx.id)
   838  	}()
   839  	tx.closeCursors()
   840  	//tx.printDebugInfo()
   841  	tx.tx.Abort()
   842  }
   843  
   844  func (tx *MdbxTx) SpaceDirty() (uint64, uint64, error) {
   845  	txInfo, err := tx.tx.Info(true)
   846  	if err != nil {
   847  		return 0, 0, err
   848  	}
   849  
   850  	return txInfo.SpaceDirty, tx.db.txSize, nil
   851  }
   852  
   853  func (tx *MdbxTx) PrintDebugInfo() {
   854  	/*
   855  		txInfo, err := tx.tx.Info(true)
   856  		if err != nil {
   857  			panic(err)
   858  		}
   859  
   860  		txSize := uint(txInfo.SpaceDirty / 1024)
   861  		doPrint := debug.BigRoTxKb() == 0 && debug.BigRwTxKb() == 0 ||
   862  			tx.readOnly && debug.BigRoTxKb() > 0 && txSize > debug.BigRoTxKb() ||
   863  			(!tx.readOnly && debug.BigRwTxKb() > 0 && txSize > debug.BigRwTxKb())
   864  		if doPrint {
   865  			tx.db.log.Info("Tx info",
   866  				"id", txInfo.Id,
   867  				"read_lag", txInfo.ReadLag,
   868  				"ro", tx.readOnly,
   869  				//"space_retired_mb", txInfo.SpaceRetired/1024/1024,
   870  				"space_dirty_mb", txInfo.SpaceDirty/1024/1024,
   871  				//"callers", debug.Callers(7),
   872  			)
   873  		}
   874  	*/
   875  }
   876  
   877  func (tx *MdbxTx) closeCursors() {
   878  	for _, c := range tx.cursors {
   879  		if c != nil {
   880  			c.Close()
   881  		}
   882  	}
   883  	tx.cursors = nil
   884  	for _, c := range tx.streams {
   885  		if c != nil {
   886  			c.Close()
   887  		}
   888  	}
   889  	tx.statelessCursors = nil
   890  }
   891  
   892  func (tx *MdbxTx) statelessCursor(bucket string) (kv.RwCursor, error) {
   893  	if tx.statelessCursors == nil {
   894  		tx.statelessCursors = make(map[string]kv.RwCursor)
   895  	}
   896  	c, ok := tx.statelessCursors[bucket]
   897  	if !ok {
   898  		var err error
   899  		c, err = tx.RwCursor(bucket)
   900  		if err != nil {
   901  			return nil, err
   902  		}
   903  		tx.statelessCursors[bucket] = c
   904  	}
   905  	return c, nil
   906  }
   907  
   908  func (tx *MdbxTx) Put(table string, k, v []byte) error {
   909  	c, err := tx.statelessCursor(table)
   910  	if err != nil {
   911  		return err
   912  	}
   913  	return c.Put(k, v)
   914  }
   915  
   916  func (tx *MdbxTx) Delete(table string, k []byte) error {
   917  	c, err := tx.statelessCursor(table)
   918  	if err != nil {
   919  		return err
   920  	}
   921  	return c.Delete(k)
   922  }
   923  
   924  func (tx *MdbxTx) GetOne(bucket string, k []byte) ([]byte, error) {
   925  	c, err := tx.statelessCursor(bucket)
   926  	if err != nil {
   927  		return nil, err
   928  	}
   929  	_, v, err := c.SeekExact(k)
   930  	return v, err
   931  }
   932  
   933  func (tx *MdbxTx) Has(bucket string, key []byte) (bool, error) {
   934  	c, err := tx.statelessCursor(bucket)
   935  	if err != nil {
   936  		return false, err
   937  	}
   938  	k, _, err := c.Seek(key)
   939  	if err != nil {
   940  		return false, err
   941  	}
   942  	return bytes.Equal(key, k), nil
   943  }
   944  
   945  func (tx *MdbxTx) Append(bucket string, k, v []byte) error {
   946  	c, err := tx.statelessCursor(bucket)
   947  	if err != nil {
   948  		return err
   949  	}
   950  	return c.Append(k, v)
   951  }
   952  func (tx *MdbxTx) AppendDup(bucket string, k, v []byte) error {
   953  	c, err := tx.statelessCursor(bucket)
   954  	if err != nil {
   955  		return err
   956  	}
   957  	return c.(*MdbxDupSortCursor).AppendDup(k, v)
   958  }
   959  
   960  func (tx *MdbxTx) IncrementSequence(bucket string, amount uint64) (uint64, error) {
   961  	c, err := tx.statelessCursor(kv.Sequence)
   962  	if err != nil {
   963  		return 0, err
   964  	}
   965  	_, v, err := c.SeekExact([]byte(bucket))
   966  	if err != nil {
   967  		return 0, err
   968  	}
   969  
   970  	var currentV uint64 = 0
   971  	if len(v) > 0 {
   972  		currentV = binary.BigEndian.Uint64(v)
   973  	}
   974  
   975  	newVBytes := make([]byte, 8)
   976  	binary.BigEndian.PutUint64(newVBytes, currentV+amount)
   977  	err = c.Put([]byte(bucket), newVBytes)
   978  	if err != nil {
   979  		return 0, err
   980  	}
   981  	return currentV, nil
   982  }
   983  
   984  func (tx *MdbxTx) ReadSequence(bucket string) (uint64, error) {
   985  	c, err := tx.statelessCursor(kv.Sequence)
   986  	if err != nil {
   987  		return 0, err
   988  	}
   989  	_, v, err := c.SeekExact([]byte(bucket))
   990  	if err != nil && !mdbx.IsNotFound(err) {
   991  		return 0, err
   992  	}
   993  
   994  	var currentV uint64
   995  	if len(v) > 0 {
   996  		currentV = binary.BigEndian.Uint64(v)
   997  	}
   998  
   999  	return currentV, nil
  1000  }
  1001  
  1002  func (tx *MdbxTx) BucketSize(name string) (uint64, error) {
  1003  	st, err := tx.BucketStat(name)
  1004  	if err != nil {
  1005  		return 0, err
  1006  	}
  1007  	return (st.LeafPages + st.BranchPages + st.OverflowPages) * tx.db.opts.pageSize, nil
  1008  }
  1009  
  1010  func (tx *MdbxTx) BucketStat(name string) (*mdbx.Stat, error) {
  1011  	if name == "freelist" || name == "gc" || name == "free_list" {
  1012  		return tx.tx.StatDBI(mdbx.DBI(0))
  1013  	}
  1014  	if name == "root" {
  1015  		return tx.tx.StatDBI(mdbx.DBI(1))
  1016  	}
  1017  	st, err := tx.tx.StatDBI(mdbx.DBI(tx.db.buckets[name].DBI))
  1018  	if err != nil {
  1019  		return nil, fmt.Errorf("bucket: %s, %w", name, err)
  1020  	}
  1021  	return st, nil
  1022  }
  1023  
  1024  func (tx *MdbxTx) DBSize() (uint64, error) {
  1025  	info, err := tx.db.env.Info(tx.tx)
  1026  	if err != nil {
  1027  		return 0, err
  1028  	}
  1029  	return info.Geo.Current, err
  1030  }
  1031  
  1032  func (tx *MdbxTx) RwCursor(bucket string) (kv.RwCursor, error) {
  1033  	b := tx.db.buckets[bucket]
  1034  	if b.AutoDupSortKeysConversion {
  1035  		return tx.stdCursor(bucket)
  1036  	}
  1037  
  1038  	if b.Flags&kv.DupSort != 0 {
  1039  		return tx.RwCursorDupSort(bucket)
  1040  	}
  1041  
  1042  	return tx.stdCursor(bucket)
  1043  }
  1044  
  1045  func (tx *MdbxTx) Cursor(bucket string) (kv.Cursor, error) {
  1046  	return tx.RwCursor(bucket)
  1047  }
  1048  
  1049  func (tx *MdbxTx) stdCursor(bucket string) (kv.RwCursor, error) {
  1050  	b := tx.db.buckets[bucket]
  1051  	c := &MdbxCursor{bucketName: bucket, tx: tx, bucketCfg: b, dbi: mdbx.DBI(tx.db.buckets[bucket].DBI), id: tx.cursorID}
  1052  	tx.cursorID++
  1053  
  1054  	var err error
  1055  	c.c, err = tx.tx.OpenCursor(c.dbi)
  1056  	if err != nil {
  1057  		return nil, fmt.Errorf("table: %s, %w, stack: %s", c.bucketName, err, dbg.Stack())
  1058  	}
  1059  
  1060  	// add to auto-cleanup on end of transactions
  1061  	if tx.cursors == nil {
  1062  		tx.cursors = map[uint64]*mdbx.Cursor{}
  1063  	}
  1064  	tx.cursors[c.id] = c.c
  1065  	return c, nil
  1066  }
  1067  
  1068  func (tx *MdbxTx) RwCursorDupSort(bucket string) (kv.RwCursorDupSort, error) {
  1069  	basicCursor, err := tx.stdCursor(bucket)
  1070  	if err != nil {
  1071  		return nil, err
  1072  	}
  1073  	return &MdbxDupSortCursor{MdbxCursor: basicCursor.(*MdbxCursor)}, nil
  1074  }
  1075  
  1076  func (tx *MdbxTx) CursorDupSort(bucket string) (kv.CursorDupSort, error) {
  1077  	return tx.RwCursorDupSort(bucket)
  1078  }
  1079  
  1080  // methods here help to see better pprof picture
  1081  func (c *MdbxCursor) set(k []byte) ([]byte, []byte, error) { return c.c.Get(k, nil, mdbx.Set) }
  1082  func (c *MdbxCursor) getCurrent() ([]byte, []byte, error)  { return c.c.Get(nil, nil, mdbx.GetCurrent) }
  1083  func (c *MdbxCursor) first() ([]byte, []byte, error)       { return c.c.Get(nil, nil, mdbx.First) }
  1084  func (c *MdbxCursor) next() ([]byte, []byte, error)        { return c.c.Get(nil, nil, mdbx.Next) }
  1085  func (c *MdbxCursor) nextDup() ([]byte, []byte, error)     { return c.c.Get(nil, nil, mdbx.NextDup) }
  1086  func (c *MdbxCursor) nextNoDup() ([]byte, []byte, error)   { return c.c.Get(nil, nil, mdbx.NextNoDup) }
  1087  func (c *MdbxCursor) prev() ([]byte, []byte, error)        { return c.c.Get(nil, nil, mdbx.Prev) }
  1088  func (c *MdbxCursor) prevDup() ([]byte, []byte, error)     { return c.c.Get(nil, nil, mdbx.PrevDup) }
  1089  func (c *MdbxCursor) prevNoDup() ([]byte, []byte, error)   { return c.c.Get(nil, nil, mdbx.PrevNoDup) }
  1090  func (c *MdbxCursor) last() ([]byte, []byte, error)        { return c.c.Get(nil, nil, mdbx.Last) }
  1091  func (c *MdbxCursor) delCurrent() error                    { return c.c.Del(mdbx.Current) }
  1092  func (c *MdbxCursor) delAllDupData() error                 { return c.c.Del(mdbx.AllDups) }
  1093  func (c *MdbxCursor) put(k, v []byte) error                { return c.c.Put(k, v, 0) }
  1094  func (c *MdbxCursor) putCurrent(k, v []byte) error         { return c.c.Put(k, v, mdbx.Current) }
  1095  func (c *MdbxCursor) putNoOverwrite(k, v []byte) error     { return c.c.Put(k, v, mdbx.NoOverwrite) }
  1096  func (c *MdbxCursor) getBoth(k, v []byte) ([]byte, error) {
  1097  	_, v, err := c.c.Get(k, v, mdbx.GetBoth)
  1098  	return v, err
  1099  }
  1100  func (c *MdbxCursor) setRange(k []byte) ([]byte, []byte, error) {
  1101  	return c.c.Get(k, nil, mdbx.SetRange)
  1102  }
  1103  func (c *MdbxCursor) getBothRange(k, v []byte) ([]byte, error) {
  1104  	_, v, err := c.c.Get(k, v, mdbx.GetBothRange)
  1105  	return v, err
  1106  }
  1107  func (c *MdbxCursor) firstDup() ([]byte, error) {
  1108  	_, v, err := c.c.Get(nil, nil, mdbx.FirstDup)
  1109  	return v, err
  1110  }
  1111  func (c *MdbxCursor) lastDup() ([]byte, error) {
  1112  	_, v, err := c.c.Get(nil, nil, mdbx.LastDup)
  1113  	return v, err
  1114  }
  1115  
  1116  func (c *MdbxCursor) Count() (uint64, error) {
  1117  	st, err := c.tx.tx.StatDBI(c.dbi)
  1118  	if err != nil {
  1119  		return 0, err
  1120  	}
  1121  	return st.Entries, nil
  1122  }
  1123  
  1124  func (c *MdbxCursor) First() ([]byte, []byte, error) {
  1125  	return c.Seek(nil)
  1126  }
  1127  
  1128  func (c *MdbxCursor) Last() ([]byte, []byte, error) {
  1129  	k, v, err := c.last()
  1130  	if err != nil {
  1131  		if mdbx.IsNotFound(err) {
  1132  			return nil, nil, nil
  1133  		}
  1134  		err = fmt.Errorf("failed MdbxKV cursor.Last(): %w, bucket: %s", err, c.bucketName)
  1135  		return []byte{}, nil, err
  1136  	}
  1137  
  1138  	b := c.bucketCfg
  1139  	if b.AutoDupSortKeysConversion && len(k) == b.DupToLen {
  1140  		keyPart := b.DupFromLen - b.DupToLen
  1141  		k = append(k, v[:keyPart]...)
  1142  		v = v[keyPart:]
  1143  	}
  1144  
  1145  	return k, v, nil
  1146  }
  1147  
  1148  func (c *MdbxCursor) Seek(seek []byte) (k, v []byte, err error) {
  1149  	if c.bucketCfg.AutoDupSortKeysConversion {
  1150  		return c.seekDupSort(seek)
  1151  	}
  1152  
  1153  	if len(seek) == 0 {
  1154  		k, v, err = c.first()
  1155  	} else {
  1156  		k, v, err = c.setRange(seek)
  1157  	}
  1158  	if err != nil {
  1159  		if mdbx.IsNotFound(err) {
  1160  			return nil, nil, nil
  1161  		}
  1162  		err = fmt.Errorf("failed MdbxKV cursor.Seek(): %w, bucket: %s,  key: %x", err, c.bucketName, seek)
  1163  		return []byte{}, nil, err
  1164  	}
  1165  
  1166  	return k, v, nil
  1167  }
  1168  
  1169  func (c *MdbxCursor) seekDupSort(seek []byte) (k, v []byte, err error) {
  1170  	b := c.bucketCfg
  1171  	from, to := b.DupFromLen, b.DupToLen
  1172  	if len(seek) == 0 {
  1173  		k, v, err = c.first()
  1174  		if err != nil {
  1175  			if mdbx.IsNotFound(err) {
  1176  				return nil, nil, nil
  1177  			}
  1178  			return []byte{}, nil, err
  1179  		}
  1180  
  1181  		if len(k) == to {
  1182  			k2 := make([]byte, 0, len(k)+from-to)
  1183  			k2 = append(append(k2, k...), v[:from-to]...)
  1184  			v = v[from-to:]
  1185  			k = k2
  1186  		}
  1187  		return k, v, nil
  1188  	}
  1189  
  1190  	var seek1, seek2 []byte
  1191  	if len(seek) > to {
  1192  		seek1, seek2 = seek[:to], seek[to:]
  1193  	} else {
  1194  		seek1 = seek
  1195  	}
  1196  	k, v, err = c.setRange(seek1)
  1197  	if err != nil {
  1198  		if mdbx.IsNotFound(err) {
  1199  			return nil, nil, nil
  1200  		}
  1201  
  1202  		return []byte{}, nil, err
  1203  	}
  1204  
  1205  	if seek2 != nil && bytes.Equal(seek1, k) {
  1206  		v, err = c.getBothRange(seek1, seek2)
  1207  		if err != nil && mdbx.IsNotFound(err) {
  1208  			k, v, err = c.next()
  1209  			if err != nil {
  1210  				if mdbx.IsNotFound(err) {
  1211  					return nil, nil, nil
  1212  				}
  1213  				return []byte{}, nil, err
  1214  			}
  1215  		} else if err != nil {
  1216  			return []byte{}, nil, err
  1217  		}
  1218  	}
  1219  	if len(k) == to {
  1220  		k2 := make([]byte, 0, len(k)+from-to)
  1221  		k2 = append(append(k2, k...), v[:from-to]...)
  1222  		v = v[from-to:]
  1223  		k = k2
  1224  	}
  1225  
  1226  	return k, v, nil
  1227  }
  1228  
  1229  func (c *MdbxCursor) Next() (k, v []byte, err error) {
  1230  	k, v, err = c.next()
  1231  	if err != nil {
  1232  		if mdbx.IsNotFound(err) {
  1233  			return nil, nil, nil
  1234  		}
  1235  		return []byte{}, nil, fmt.Errorf("failed MdbxKV cursor.Next(): %w", err)
  1236  	}
  1237  
  1238  	b := c.bucketCfg
  1239  	if b.AutoDupSortKeysConversion && len(k) == b.DupToLen {
  1240  		keyPart := b.DupFromLen - b.DupToLen
  1241  		k = append(k, v[:keyPart]...)
  1242  		v = v[keyPart:]
  1243  	}
  1244  
  1245  	return k, v, nil
  1246  }
  1247  
  1248  func (c *MdbxCursor) Prev() (k, v []byte, err error) {
  1249  	k, v, err = c.prev()
  1250  	if err != nil {
  1251  		if mdbx.IsNotFound(err) {
  1252  			return nil, nil, nil
  1253  		}
  1254  		return []byte{}, nil, fmt.Errorf("failed MdbxKV cursor.Prev(): %w", err)
  1255  	}
  1256  
  1257  	b := c.bucketCfg
  1258  	if b.AutoDupSortKeysConversion && len(k) == b.DupToLen {
  1259  		keyPart := b.DupFromLen - b.DupToLen
  1260  		k = append(k, v[:keyPart]...)
  1261  		v = v[keyPart:]
  1262  	}
  1263  
  1264  	return k, v, nil
  1265  }
  1266  
  1267  // Current - return key/data at current cursor position
  1268  func (c *MdbxCursor) Current() ([]byte, []byte, error) {
  1269  	k, v, err := c.getCurrent()
  1270  	if err != nil {
  1271  		if mdbx.IsNotFound(err) {
  1272  			return nil, nil, nil
  1273  		}
  1274  		return []byte{}, nil, err
  1275  	}
  1276  
  1277  	b := c.bucketCfg
  1278  	if b.AutoDupSortKeysConversion && len(k) == b.DupToLen {
  1279  		keyPart := b.DupFromLen - b.DupToLen
  1280  		k = append(k, v[:keyPart]...)
  1281  		v = v[keyPart:]
  1282  	}
  1283  
  1284  	return k, v, nil
  1285  }
  1286  
  1287  func (c *MdbxCursor) Delete(k []byte) error {
  1288  	if c.bucketCfg.AutoDupSortKeysConversion {
  1289  		return c.deleteDupSort(k)
  1290  	}
  1291  
  1292  	_, _, err := c.set(k)
  1293  	if err != nil {
  1294  		if mdbx.IsNotFound(err) {
  1295  			return nil
  1296  		}
  1297  		return err
  1298  	}
  1299  
  1300  	if c.bucketCfg.Flags&mdbx.DupSort != 0 {
  1301  		return c.delAllDupData()
  1302  	}
  1303  	return c.delCurrent()
  1304  }
  1305  
  1306  // DeleteCurrent This function deletes the key/data pair to which the cursor refers.
  1307  // This does not invalidate the cursor, so operations such as MDB_NEXT
  1308  // can still be used on it.
  1309  // Both MDB_NEXT and MDB_GET_CURRENT will return the same record after
  1310  // this operation.
  1311  func (c *MdbxCursor) DeleteCurrent() error {
  1312  	return c.delCurrent()
  1313  }
  1314  
  1315  func (c *MdbxCursor) deleteDupSort(key []byte) error {
  1316  	b := c.bucketCfg
  1317  	from, to := b.DupFromLen, b.DupToLen
  1318  	if len(key) != from && len(key) >= to {
  1319  		return fmt.Errorf("delete from dupsort bucket: %s, can have keys of len==%d and len<%d. key: %x,%d", c.bucketName, from, to, key, len(key))
  1320  	}
  1321  
  1322  	if len(key) == from {
  1323  		v, err := c.getBothRange(key[:to], key[to:])
  1324  		if err != nil { // if key not found, or found another one - then nothing to delete
  1325  			if mdbx.IsNotFound(err) {
  1326  				return nil
  1327  			}
  1328  			return err
  1329  		}
  1330  		if !bytes.Equal(v[:from-to], key[to:]) {
  1331  			return nil
  1332  		}
  1333  		return c.delCurrent()
  1334  	}
  1335  
  1336  	_, _, err := c.set(key)
  1337  	if err != nil {
  1338  		if mdbx.IsNotFound(err) {
  1339  			return nil
  1340  		}
  1341  		return err
  1342  	}
  1343  
  1344  	return c.delCurrent()
  1345  }
  1346  
  1347  func (c *MdbxCursor) PutNoOverwrite(key []byte, value []byte) error {
  1348  	if c.bucketCfg.AutoDupSortKeysConversion {
  1349  		panic("not implemented")
  1350  	}
  1351  
  1352  	return c.putNoOverwrite(key, value)
  1353  }
  1354  
  1355  func (c *MdbxCursor) Put(key []byte, value []byte) error {
  1356  	b := c.bucketCfg
  1357  	if b.AutoDupSortKeysConversion {
  1358  		if err := c.putDupSort(key, value); err != nil {
  1359  			return err
  1360  		}
  1361  		return nil
  1362  	}
  1363  	if err := c.put(key, value); err != nil {
  1364  		return fmt.Errorf("table: %s, err: %w", c.bucketName, err)
  1365  	}
  1366  	return nil
  1367  }
  1368  
  1369  func (c *MdbxCursor) putDupSort(key []byte, value []byte) error {
  1370  	b := c.bucketCfg
  1371  	from, to := b.DupFromLen, b.DupToLen
  1372  	if len(key) != from && len(key) >= to {
  1373  		return fmt.Errorf("put dupsort bucket: %s, can have keys of len==%d and len<%d. key: %x,%d", c.bucketName, from, to, key, len(key))
  1374  	}
  1375  
  1376  	if len(key) != from {
  1377  		err := c.putNoOverwrite(key, value)
  1378  		if err != nil {
  1379  			if mdbx.IsKeyExists(err) {
  1380  				return c.putCurrent(key, value)
  1381  			}
  1382  			return fmt.Errorf("putNoOverwrite, bucket: %s, key: %x, val: %x, err: %w", c.bucketName, key, value, err)
  1383  		}
  1384  		return nil
  1385  	}
  1386  
  1387  	value = append(key[to:], value...)
  1388  	key = key[:to]
  1389  	v, err := c.getBothRange(key, value[:from-to])
  1390  	if err != nil { // if key not found, or found another one - then just insert
  1391  		if mdbx.IsNotFound(err) {
  1392  			return c.put(key, value)
  1393  		}
  1394  		return err
  1395  	}
  1396  
  1397  	if bytes.Equal(v[:from-to], value[:from-to]) {
  1398  		if len(v) == len(value) { // in DupSort case mdbx.Current works only with values of same length
  1399  			return c.putCurrent(key, value)
  1400  		}
  1401  		err = c.delCurrent()
  1402  		if err != nil {
  1403  			return err
  1404  		}
  1405  	}
  1406  
  1407  	return c.put(key, value)
  1408  }
  1409  
  1410  func (c *MdbxCursor) SeekExact(key []byte) ([]byte, []byte, error) {
  1411  	b := c.bucketCfg
  1412  	if b.AutoDupSortKeysConversion && len(key) == b.DupFromLen {
  1413  		from, to := b.DupFromLen, b.DupToLen
  1414  		v, err := c.getBothRange(key[:to], key[to:])
  1415  		if err != nil {
  1416  			if mdbx.IsNotFound(err) {
  1417  				return nil, nil, nil
  1418  			}
  1419  			return []byte{}, nil, err
  1420  		}
  1421  		if !bytes.Equal(key[to:], v[:from-to]) {
  1422  			return nil, nil, nil
  1423  		}
  1424  		return key[:to], v[from-to:], nil
  1425  	}
  1426  
  1427  	k, v, err := c.set(key)
  1428  	if err != nil {
  1429  		if mdbx.IsNotFound(err) {
  1430  			return nil, nil, nil
  1431  		}
  1432  		return []byte{}, nil, err
  1433  	}
  1434  	return k, v, nil
  1435  }
  1436  
  1437  // Append - speedy feature of mdbx which is not part of KV interface.
  1438  // Cast your cursor to *MdbxCursor to use this method.
  1439  // Return error - if provided data will not sorted (or bucket have old records which mess with new in sorting manner).
  1440  func (c *MdbxCursor) Append(k []byte, v []byte) error {
  1441  	if c.bucketCfg.AutoDupSortKeysConversion {
  1442  		b := c.bucketCfg
  1443  		from, to := b.DupFromLen, b.DupToLen
  1444  		if len(k) != from && len(k) >= to {
  1445  			return fmt.Errorf("append dupsort bucket: %s, can have keys of len==%d and len<%d. key: %x,%d", c.bucketName, from, to, k, len(k))
  1446  		}
  1447  
  1448  		if len(k) == from {
  1449  			v = append(k[to:], v...)
  1450  			k = k[:to]
  1451  		}
  1452  	}
  1453  
  1454  	if c.bucketCfg.Flags&mdbx.DupSort != 0 {
  1455  		if err := c.c.Put(k, v, mdbx.AppendDup); err != nil {
  1456  			return fmt.Errorf("bucket: %s, %w", c.bucketName, err)
  1457  		}
  1458  		return nil
  1459  	}
  1460  
  1461  	if err := c.c.Put(k, v, mdbx.Append); err != nil {
  1462  		return fmt.Errorf("bucket: %s, %w", c.bucketName, err)
  1463  	}
  1464  	return nil
  1465  }
  1466  
  1467  func (c *MdbxCursor) Close() {
  1468  	if c.c != nil {
  1469  		c.c.Close()
  1470  		delete(c.tx.cursors, c.id)
  1471  		c.c = nil
  1472  	}
  1473  }
  1474  
  1475  type MdbxDupSortCursor struct {
  1476  	*MdbxCursor
  1477  }
  1478  
  1479  func (c *MdbxDupSortCursor) Internal() *mdbx.Cursor {
  1480  	return c.c
  1481  }
  1482  
  1483  // DeleteExact - does delete
  1484  func (c *MdbxDupSortCursor) DeleteExact(k1, k2 []byte) error {
  1485  	_, err := c.getBoth(k1, k2)
  1486  	if err != nil { // if key not found, or found another one - then nothing to delete
  1487  		if mdbx.IsNotFound(err) {
  1488  			return nil
  1489  		}
  1490  		return err
  1491  	}
  1492  	return c.delCurrent()
  1493  }
  1494  
  1495  func (c *MdbxDupSortCursor) SeekBothExact(key, value []byte) ([]byte, []byte, error) {
  1496  	v, err := c.getBoth(key, value)
  1497  	if err != nil {
  1498  		if mdbx.IsNotFound(err) {
  1499  			return nil, nil, nil
  1500  		}
  1501  		return []byte{}, nil, fmt.Errorf("in SeekBothExact: %w", err)
  1502  	}
  1503  	return key, v, nil
  1504  }
  1505  
  1506  func (c *MdbxDupSortCursor) SeekBothRange(key, value []byte) ([]byte, error) {
  1507  	v, err := c.getBothRange(key, value)
  1508  	if err != nil {
  1509  		if mdbx.IsNotFound(err) {
  1510  			return nil, nil
  1511  		}
  1512  		return nil, fmt.Errorf("in SeekBothRange, table=%s: %w", c.bucketName, err)
  1513  	}
  1514  	return v, nil
  1515  }
  1516  
  1517  func (c *MdbxDupSortCursor) FirstDup() ([]byte, error) {
  1518  	v, err := c.firstDup()
  1519  	if err != nil {
  1520  		if mdbx.IsNotFound(err) {
  1521  			return nil, nil
  1522  		}
  1523  		return nil, fmt.Errorf("in FirstDup: %w", err)
  1524  	}
  1525  	return v, nil
  1526  }
  1527  
  1528  // NextDup - iterate only over duplicates of current key
  1529  func (c *MdbxDupSortCursor) NextDup() ([]byte, []byte, error) {
  1530  	k, v, err := c.nextDup()
  1531  	if err != nil {
  1532  		if mdbx.IsNotFound(err) {
  1533  			return nil, nil, nil
  1534  		}
  1535  		return []byte{}, nil, fmt.Errorf("in NextDup: %w", err)
  1536  	}
  1537  	return k, v, nil
  1538  }
  1539  
  1540  // NextNoDup - iterate with skipping all duplicates
  1541  func (c *MdbxDupSortCursor) NextNoDup() ([]byte, []byte, error) {
  1542  	k, v, err := c.nextNoDup()
  1543  	if err != nil {
  1544  		if mdbx.IsNotFound(err) {
  1545  			return nil, nil, nil
  1546  		}
  1547  		return []byte{}, nil, fmt.Errorf("in NextNoDup: %w", err)
  1548  	}
  1549  	return k, v, nil
  1550  }
  1551  
  1552  func (c *MdbxDupSortCursor) PrevDup() ([]byte, []byte, error) {
  1553  	k, v, err := c.prevDup()
  1554  	if err != nil {
  1555  		if mdbx.IsNotFound(err) {
  1556  			return nil, nil, nil
  1557  		}
  1558  		return []byte{}, nil, fmt.Errorf("in PrevDup: %w", err)
  1559  	}
  1560  	return k, v, nil
  1561  }
  1562  
  1563  func (c *MdbxDupSortCursor) PrevNoDup() ([]byte, []byte, error) {
  1564  	k, v, err := c.prevNoDup()
  1565  	if err != nil {
  1566  		if mdbx.IsNotFound(err) {
  1567  			return nil, nil, nil
  1568  		}
  1569  		return []byte{}, nil, fmt.Errorf("in PrevNoDup: %w", err)
  1570  	}
  1571  	return k, v, nil
  1572  }
  1573  
  1574  func (c *MdbxDupSortCursor) LastDup() ([]byte, error) {
  1575  	v, err := c.lastDup()
  1576  	if err != nil {
  1577  		if mdbx.IsNotFound(err) {
  1578  			return nil, nil
  1579  		}
  1580  		return nil, fmt.Errorf("in LastDup: %w", err)
  1581  	}
  1582  	return v, nil
  1583  }
  1584  
  1585  func (c *MdbxDupSortCursor) Append(k []byte, v []byte) error {
  1586  	if err := c.c.Put(k, v, mdbx.Append|mdbx.AppendDup); err != nil {
  1587  		return fmt.Errorf("in Append: bucket=%s, %w", c.bucketName, err)
  1588  	}
  1589  	return nil
  1590  }
  1591  
  1592  func (c *MdbxDupSortCursor) AppendDup(k []byte, v []byte) error {
  1593  	if err := c.c.Put(k, v, mdbx.AppendDup); err != nil {
  1594  		return fmt.Errorf("in AppendDup: bucket=%s, %w", c.bucketName, err)
  1595  	}
  1596  	return nil
  1597  }
  1598  
  1599  func (c *MdbxDupSortCursor) PutNoDupData(k, v []byte) error {
  1600  	if err := c.c.Put(k, v, mdbx.NoDupData); err != nil {
  1601  		return fmt.Errorf("in PutNoDupData: %w", err)
  1602  	}
  1603  
  1604  	return nil
  1605  }
  1606  
  1607  // DeleteCurrentDuplicates - delete all of the data items for the current key.
  1608  func (c *MdbxDupSortCursor) DeleteCurrentDuplicates() error {
  1609  	if err := c.delAllDupData(); err != nil {
  1610  		return fmt.Errorf("in DeleteCurrentDuplicates: %w", err)
  1611  	}
  1612  	return nil
  1613  }
  1614  
  1615  // CountDuplicates returns the number of duplicates for the current key. See mdb_cursor_count
  1616  func (c *MdbxDupSortCursor) CountDuplicates() (uint64, error) {
  1617  	res, err := c.c.Count()
  1618  	if err != nil {
  1619  		return 0, fmt.Errorf("in CountDuplicates: %w", err)
  1620  	}
  1621  	return res, nil
  1622  }
  1623  
  1624  func bucketSlice(b kv.TableCfg) []string {
  1625  	buckets := make([]string, 0, len(b))
  1626  	for name := range b {
  1627  		buckets = append(buckets, name)
  1628  	}
  1629  	sort.Slice(buckets, func(i, j int) bool {
  1630  		return strings.Compare(buckets[i], buckets[j]) < 0
  1631  	})
  1632  	return buckets
  1633  }
  1634  
  1635  func (tx *MdbxTx) ForEach(bucket string, fromPrefix []byte, walker func(k, v []byte) error) error {
  1636  	c, err := tx.Cursor(bucket)
  1637  	if err != nil {
  1638  		return err
  1639  	}
  1640  	defer c.Close()
  1641  
  1642  	for k, v, err := c.Seek(fromPrefix); k != nil; k, v, err = c.Next() {
  1643  		if err != nil {
  1644  			return err
  1645  		}
  1646  		if err := walker(k, v); err != nil {
  1647  			return err
  1648  		}
  1649  	}
  1650  	return nil
  1651  }
  1652  
  1653  func (tx *MdbxTx) ForPrefix(bucket string, prefix []byte, walker func(k, v []byte) error) error {
  1654  	c, err := tx.Cursor(bucket)
  1655  	if err != nil {
  1656  		return err
  1657  	}
  1658  	defer c.Close()
  1659  
  1660  	for k, v, err := c.Seek(prefix); k != nil; k, v, err = c.Next() {
  1661  		if err != nil {
  1662  			return err
  1663  		}
  1664  		if !bytes.HasPrefix(k, prefix) {
  1665  			break
  1666  		}
  1667  		if err := walker(k, v); err != nil {
  1668  			return err
  1669  		}
  1670  	}
  1671  	return nil
  1672  }
  1673  
  1674  func (tx *MdbxTx) Prefix(table string, prefix []byte) (iter.KV, error) {
  1675  	nextPrefix, ok := kv.NextSubtree(prefix)
  1676  	if !ok {
  1677  		return tx.Range(table, prefix, nil)
  1678  	}
  1679  	return tx.Range(table, prefix, nextPrefix)
  1680  }
  1681  
  1682  func (tx *MdbxTx) Range(table string, fromPrefix, toPrefix []byte) (iter.KV, error) {
  1683  	return tx.RangeAscend(table, fromPrefix, toPrefix, -1)
  1684  }
  1685  func (tx *MdbxTx) RangeAscend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) {
  1686  	return tx.rangeOrderLimit(table, fromPrefix, toPrefix, order.Asc, limit)
  1687  }
  1688  func (tx *MdbxTx) RangeDescend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) {
  1689  	return tx.rangeOrderLimit(table, fromPrefix, toPrefix, order.Desc, limit)
  1690  }
  1691  
  1692  type cursor2iter struct {
  1693  	c                                  kv.Cursor
  1694  	fromPrefix, toPrefix, nextK, nextV []byte
  1695  	err                                error
  1696  	orderAscend                        order.By
  1697  	limit                              int64
  1698  	ctx                                context.Context
  1699  }
  1700  
  1701  func (tx *MdbxTx) rangeOrderLimit(table string, fromPrefix, toPrefix []byte, orderAscend order.By, limit int) (*cursor2iter, error) {
  1702  	s := &cursor2iter{ctx: tx.ctx, fromPrefix: fromPrefix, toPrefix: toPrefix, orderAscend: orderAscend, limit: int64(limit)}
  1703  	tx.streams = append(tx.streams, s)
  1704  	return s.init(table, tx)
  1705  }
  1706  func (s *cursor2iter) init(table string, tx kv.Tx) (*cursor2iter, error) {
  1707  	if s.orderAscend && s.fromPrefix != nil && s.toPrefix != nil && bytes.Compare(s.fromPrefix, s.toPrefix) >= 0 {
  1708  		return s, fmt.Errorf("tx.Dual: %x must be lexicographicaly before %x", s.fromPrefix, s.toPrefix)
  1709  	}
  1710  	if !s.orderAscend && s.fromPrefix != nil && s.toPrefix != nil && bytes.Compare(s.fromPrefix, s.toPrefix) <= 0 {
  1711  		return s, fmt.Errorf("tx.Dual: %x must be lexicographicaly before %x", s.toPrefix, s.fromPrefix)
  1712  	}
  1713  	c, err := tx.Cursor(table)
  1714  	if err != nil {
  1715  		return s, err
  1716  	}
  1717  	s.c = c
  1718  
  1719  	if s.fromPrefix == nil { // no initial position
  1720  		if s.orderAscend {
  1721  			s.nextK, s.nextV, s.err = s.c.First()
  1722  		} else {
  1723  			s.nextK, s.nextV, s.err = s.c.Last()
  1724  		}
  1725  		return s, s.err
  1726  	}
  1727  
  1728  	if s.orderAscend {
  1729  		s.nextK, s.nextV, s.err = s.c.Seek(s.fromPrefix)
  1730  		return s, s.err
  1731  	} else {
  1732  		// seek exactly to given key or previous one
  1733  		s.nextK, s.nextV, s.err = s.c.SeekExact(s.fromPrefix)
  1734  		if s.err != nil {
  1735  			return s, s.err
  1736  		}
  1737  		if s.nextK != nil { // go to last value of this key
  1738  			if casted, ok := s.c.(kv.CursorDupSort); ok {
  1739  				s.nextV, s.err = casted.LastDup()
  1740  			}
  1741  		} else { // key not found, go to prev one
  1742  			s.nextK, s.nextV, s.err = s.c.Prev()
  1743  		}
  1744  		return s, s.err
  1745  	}
  1746  }
  1747  
  1748  func (s *cursor2iter) Close() {
  1749  	if s.c != nil {
  1750  		s.c.Close()
  1751  	}
  1752  }
  1753  func (s *cursor2iter) HasNext() bool {
  1754  	if s.err != nil { // always true, then .Next() call will return this error
  1755  		return true
  1756  	}
  1757  	if s.limit == 0 { // limit reached
  1758  		return false
  1759  	}
  1760  	if s.nextK == nil { // EndOfTable
  1761  		return false
  1762  	}
  1763  	if s.toPrefix == nil { // s.nextK == nil check is above
  1764  		return true
  1765  	}
  1766  
  1767  	//Asc:  [from, to) AND from > to
  1768  	//Desc: [from, to) AND from < to
  1769  	cmp := bytes.Compare(s.nextK, s.toPrefix)
  1770  	return (bool(s.orderAscend) && cmp < 0) || (!bool(s.orderAscend) && cmp > 0)
  1771  }
  1772  func (s *cursor2iter) Next() (k, v []byte, err error) {
  1773  	select {
  1774  	case <-s.ctx.Done():
  1775  		return nil, nil, s.ctx.Err()
  1776  	default:
  1777  	}
  1778  	s.limit--
  1779  	k, v, err = s.nextK, s.nextV, s.err
  1780  	if s.orderAscend {
  1781  		s.nextK, s.nextV, s.err = s.c.Next()
  1782  	} else {
  1783  		s.nextK, s.nextV, s.err = s.c.Prev()
  1784  	}
  1785  	return k, v, err
  1786  }
  1787  
  1788  func (tx *MdbxTx) RangeDupSort(table string, key []byte, fromPrefix, toPrefix []byte, asc order.By, limit int) (iter.KV, error) {
  1789  	s := &cursorDup2iter{ctx: tx.ctx, key: key, fromPrefix: fromPrefix, toPrefix: toPrefix, orderAscend: bool(asc), limit: int64(limit)}
  1790  	tx.streams = append(tx.streams, s)
  1791  	return s.init(table, tx)
  1792  }
  1793  
  1794  type cursorDup2iter struct {
  1795  	c                           kv.CursorDupSort
  1796  	key                         []byte
  1797  	fromPrefix, toPrefix, nextV []byte
  1798  	err                         error
  1799  	orderAscend                 bool
  1800  	limit                       int64
  1801  	ctx                         context.Context
  1802  }
  1803  
  1804  func (s *cursorDup2iter) init(table string, tx kv.Tx) (*cursorDup2iter, error) {
  1805  	if s.orderAscend && s.fromPrefix != nil && s.toPrefix != nil && bytes.Compare(s.fromPrefix, s.toPrefix) >= 0 {
  1806  		return s, fmt.Errorf("tx.Dual: %x must be lexicographicaly before %x", s.fromPrefix, s.toPrefix)
  1807  	}
  1808  	if !s.orderAscend && s.fromPrefix != nil && s.toPrefix != nil && bytes.Compare(s.fromPrefix, s.toPrefix) <= 0 {
  1809  		return s, fmt.Errorf("tx.Dual: %x must be lexicographicaly before %x", s.toPrefix, s.fromPrefix)
  1810  	}
  1811  	c, err := tx.CursorDupSort(table)
  1812  	if err != nil {
  1813  		return s, err
  1814  	}
  1815  	s.c = c
  1816  	k, _, err := c.SeekExact(s.key)
  1817  	if err != nil {
  1818  		return s, err
  1819  	}
  1820  	if k == nil {
  1821  		return s, nil
  1822  	}
  1823  
  1824  	if s.fromPrefix == nil { // no initial position
  1825  		if s.orderAscend {
  1826  			s.nextV, s.err = s.c.FirstDup()
  1827  		} else {
  1828  			s.nextV, s.err = s.c.LastDup()
  1829  		}
  1830  		return s, s.err
  1831  	}
  1832  
  1833  	if s.orderAscend {
  1834  		s.nextV, s.err = s.c.SeekBothRange(s.key, s.fromPrefix)
  1835  		return s, s.err
  1836  	} else {
  1837  		// seek exactly to given key or previous one
  1838  		_, s.nextV, s.err = s.c.SeekBothExact(s.key, s.fromPrefix)
  1839  		if s.nextV == nil { // no such key
  1840  			_, s.nextV, s.err = s.c.PrevDup()
  1841  		}
  1842  		return s, s.err
  1843  	}
  1844  }
  1845  
  1846  func (s *cursorDup2iter) Close() {
  1847  	if s.c != nil {
  1848  		s.c.Close()
  1849  	}
  1850  }
  1851  func (s *cursorDup2iter) HasNext() bool {
  1852  	if s.err != nil { // always true, then .Next() call will return this error
  1853  		return true
  1854  	}
  1855  	if s.limit == 0 { // limit reached
  1856  		return false
  1857  	}
  1858  	if s.nextV == nil { // EndOfTable
  1859  		return false
  1860  	}
  1861  	if s.toPrefix == nil { // s.nextK == nil check is above
  1862  		return true
  1863  	}
  1864  
  1865  	//Asc:  [from, to) AND from > to
  1866  	//Desc: [from, to) AND from < to
  1867  	cmp := bytes.Compare(s.nextV, s.toPrefix)
  1868  	return (s.orderAscend && cmp < 0) || (!s.orderAscend && cmp > 0)
  1869  }
  1870  func (s *cursorDup2iter) Next() (k, v []byte, err error) {
  1871  	select {
  1872  	case <-s.ctx.Done():
  1873  		return nil, nil, s.ctx.Err()
  1874  	default:
  1875  	}
  1876  	s.limit--
  1877  	v, err = s.nextV, s.err
  1878  	if s.orderAscend {
  1879  		_, s.nextV, s.err = s.c.NextDup()
  1880  	} else {
  1881  		_, s.nextV, s.err = s.c.PrevDup()
  1882  	}
  1883  	return s.key, v, err
  1884  }
  1885  
  1886  func (tx *MdbxTx) ForAmount(bucket string, fromPrefix []byte, amount uint32, walker func(k, v []byte) error) error {
  1887  	if amount == 0 {
  1888  		return nil
  1889  	}
  1890  	c, err := tx.Cursor(bucket)
  1891  	if err != nil {
  1892  		return err
  1893  	}
  1894  	defer c.Close()
  1895  
  1896  	for k, v, err := c.Seek(fromPrefix); k != nil && amount > 0; k, v, err = c.Next() {
  1897  		if err != nil {
  1898  			return err
  1899  		}
  1900  		if err := walker(k, v); err != nil {
  1901  			return err
  1902  		}
  1903  		amount--
  1904  	}
  1905  	return nil
  1906  }