github.com/ledgerwatch/erigon-lib@v1.0.0/kv/bitmapdb/fixed_size.go (about)

     1  /*
     2  Copyright 2022 Erigon contributors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package bitmapdb
    18  
    19  import (
    20  	"bufio"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"reflect"
    26  	"time"
    27  	"unsafe"
    28  
    29  	"github.com/c2h5oh/datasize"
    30  	mmap2 "github.com/edsrzf/mmap-go"
    31  	"github.com/ledgerwatch/log/v3"
    32  )
    33  
    34  type FixedSizeBitmaps struct {
    35  	f                  *os.File
    36  	filePath, fileName string
    37  
    38  	data     []uint64
    39  	metaData []byte
    40  	amount   uint64
    41  	version  uint8
    42  
    43  	m             mmap2.MMap
    44  	bitsPerBitmap int
    45  	size          int
    46  	modTime       time.Time
    47  }
    48  
    49  func OpenFixedSizeBitmaps(filePath string, bitsPerBitmap int) (*FixedSizeBitmaps, error) {
    50  	_, fName := filepath.Split(filePath)
    51  	idx := &FixedSizeBitmaps{
    52  		filePath:      filePath,
    53  		fileName:      fName,
    54  		bitsPerBitmap: bitsPerBitmap,
    55  	}
    56  
    57  	var err error
    58  	idx.f, err = os.Open(filePath)
    59  	if err != nil {
    60  		return nil, fmt.Errorf("OpenFile: %w", err)
    61  	}
    62  	var stat os.FileInfo
    63  	if stat, err = idx.f.Stat(); err != nil {
    64  		return nil, err
    65  	}
    66  	idx.size = int(stat.Size())
    67  	idx.modTime = stat.ModTime()
    68  	idx.m, err = mmap2.MapRegion(idx.f, idx.size, mmap2.RDONLY, 0, 0)
    69  	if err != nil {
    70  		return nil, err
    71  	}
    72  	idx.metaData = idx.m[:MetaHeaderSize]
    73  	idx.data = castToArrU64(idx.m[MetaHeaderSize:])
    74  
    75  	idx.version = idx.metaData[0]
    76  	idx.amount = binary.BigEndian.Uint64(idx.metaData[1 : 8+1])
    77  
    78  	return idx, nil
    79  }
    80  
    81  func (bm *FixedSizeBitmaps) FileName() string { return bm.fileName }
    82  func (bm *FixedSizeBitmaps) FilePath() string { return bm.filePath }
    83  func (bm *FixedSizeBitmaps) Close() {
    84  	if bm.m != nil {
    85  		if err := bm.m.Unmap(); err != nil {
    86  			log.Trace("unmap", "err", err, "file", bm.FileName())
    87  		}
    88  		bm.m = nil
    89  	}
    90  	if bm.f != nil {
    91  		if err := bm.f.Close(); err != nil {
    92  			log.Trace("close", "err", err, "file", bm.FileName())
    93  		}
    94  		bm.f = nil
    95  	}
    96  }
    97  
    98  func (bm *FixedSizeBitmaps) At(item uint64) (res []uint64, err error) {
    99  	if item > bm.amount {
   100  		return nil, fmt.Errorf("too big item number: %d > %d", item, bm.amount)
   101  	}
   102  
   103  	n := bm.bitsPerBitmap * int(item)
   104  	blkFrom, bitFrom := n/64, n%64
   105  	blkTo := (n+bm.bitsPerBitmap)/64 + 1
   106  	bitTo := 64
   107  
   108  	var j uint64
   109  	for i := blkFrom; i < blkTo; i++ {
   110  		if i == blkTo-1 {
   111  			bitTo = (n + bm.bitsPerBitmap) % 64
   112  		}
   113  		for bit := bitFrom; bit < bitTo; bit++ {
   114  			if bm.data[i]&(1<<bit) != 0 {
   115  				res = append(res, j)
   116  			}
   117  			j++
   118  		}
   119  		bitFrom = 0
   120  	}
   121  
   122  	return res, nil
   123  }
   124  
   125  func (bm *FixedSizeBitmaps) First2At(item, after uint64) (fst uint64, snd uint64, ok, ok2 bool, err error) {
   126  	if item > bm.amount {
   127  		return 0, 0, false, false, fmt.Errorf("too big item number: %d > %d", item, bm.amount)
   128  	}
   129  	n := bm.bitsPerBitmap * int(item)
   130  	blkFrom, bitFrom := n/64, n%64
   131  	blkTo := (n+bm.bitsPerBitmap)/64 + 1
   132  	bitTo := 64
   133  
   134  	var j uint64
   135  	for i := blkFrom; i < blkTo; i++ {
   136  		if i == blkTo-1 {
   137  			bitTo = (n + bm.bitsPerBitmap) % 64
   138  		}
   139  		for bit := bitFrom; bit < bitTo; bit++ {
   140  			if bm.data[i]&(1<<bit) != 0 {
   141  				if j >= after {
   142  					if !ok {
   143  						ok = true
   144  						fst = j
   145  					} else {
   146  						ok2 = true
   147  						snd = j
   148  						return
   149  					}
   150  				}
   151  			}
   152  			j++
   153  		}
   154  		bitFrom = 0
   155  	}
   156  
   157  	return
   158  }
   159  
   160  type FixedSizeBitmapsWriter struct {
   161  	f *os.File
   162  
   163  	indexFile, tmpIdxFilePath string
   164  	data                      []uint64 // slice of correct size for the index to work with
   165  	metaData                  []byte
   166  	m                         mmap2.MMap
   167  
   168  	version       uint8
   169  	amount        uint64
   170  	size          int
   171  	bitsPerBitmap uint64
   172  
   173  	logger  log.Logger
   174  	noFsync bool // fsync is enabled by default, but tests can manually disable
   175  }
   176  
   177  const MetaHeaderSize = 64
   178  
   179  func NewFixedSizeBitmapsWriter(indexFile string, bitsPerBitmap int, amount uint64, logger log.Logger) (*FixedSizeBitmapsWriter, error) {
   180  	pageSize := os.Getpagesize()
   181  	//TODO: use math.SafeMul()
   182  	bytesAmount := MetaHeaderSize + (bitsPerBitmap*int(amount))/8
   183  	size := (bytesAmount/pageSize + 1) * pageSize // must be page-size-aligned
   184  	idx := &FixedSizeBitmapsWriter{
   185  		indexFile:      indexFile,
   186  		tmpIdxFilePath: indexFile + ".tmp",
   187  		bitsPerBitmap:  uint64(bitsPerBitmap),
   188  		size:           size,
   189  		amount:         amount,
   190  		version:        1,
   191  		logger:         logger,
   192  	}
   193  
   194  	_ = os.Remove(idx.tmpIdxFilePath)
   195  
   196  	var err error
   197  	idx.f, err = os.Create(idx.tmpIdxFilePath)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  
   202  	if err := growFileToSize(idx.f, idx.size); err != nil {
   203  		return nil, err
   204  	}
   205  
   206  	idx.m, err = mmap2.MapRegion(idx.f, idx.size, mmap2.RDWR, 0, 0)
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	idx.metaData = idx.m[:MetaHeaderSize]
   212  	idx.data = castToArrU64(idx.m[MetaHeaderSize:])
   213  	//if err := mmap.MadviseNormal(idx.m); err != nil {
   214  	//	return nil, err
   215  	//}
   216  	idx.metaData[0] = idx.version
   217  	binary.BigEndian.PutUint64(idx.metaData[1:], idx.amount)
   218  	idx.amount = binary.BigEndian.Uint64(idx.metaData[1 : 8+1])
   219  
   220  	return idx, nil
   221  }
   222  func (w *FixedSizeBitmapsWriter) Close() {
   223  	if w.m != nil {
   224  		if err := w.m.Unmap(); err != nil {
   225  			log.Trace("unmap", "err", err, "file", w.f.Name())
   226  		}
   227  		w.m = nil
   228  	}
   229  	if w.f != nil {
   230  		if err := w.f.Close(); err != nil {
   231  			log.Trace("close", "err", err, "file", w.f.Name())
   232  		}
   233  		w.f = nil
   234  	}
   235  }
   236  func growFileToSize(f *os.File, size int) error {
   237  	pageSize := os.Getpagesize()
   238  	pages := size / pageSize
   239  	wr := bufio.NewWriterSize(f, int(4*datasize.MB))
   240  	page := make([]byte, pageSize)
   241  	for i := 0; i < pages; i++ {
   242  		if _, err := wr.Write(page); err != nil {
   243  			return err
   244  		}
   245  	}
   246  	if err := wr.Flush(); err != nil {
   247  		return err
   248  	}
   249  	return nil
   250  }
   251  
   252  // Create a []uint64 view of the file
   253  func castToArrU64(in []byte) []uint64 {
   254  	var view []uint64
   255  	header := (*reflect.SliceHeader)(unsafe.Pointer(&view))
   256  	header.Data = (*reflect.SliceHeader)(unsafe.Pointer(&in)).Data
   257  	header.Len = len(in) / 8
   258  	header.Cap = header.Len
   259  	return view
   260  }
   261  
   262  func (w *FixedSizeBitmapsWriter) AddArray(item uint64, listOfValues []uint64) error {
   263  	if item > w.amount {
   264  		return fmt.Errorf("too big item number: %d > %d", item, w.amount)
   265  	}
   266  	offset := item * w.bitsPerBitmap
   267  	for _, v := range listOfValues {
   268  		if v > w.bitsPerBitmap {
   269  			return fmt.Errorf("too big value: %d > %d", v, w.bitsPerBitmap)
   270  		}
   271  		n := offset + v
   272  		blkAt, bitAt := int(n/64), int(n%64)
   273  		if blkAt > len(w.data) {
   274  			return fmt.Errorf("too big value: %d, %d, max: %d", item, listOfValues, len(w.data))
   275  		}
   276  		w.data[blkAt] |= (1 << bitAt)
   277  	}
   278  	return nil
   279  }
   280  
   281  func (w *FixedSizeBitmapsWriter) Build() error {
   282  	if err := w.m.Flush(); err != nil {
   283  		return err
   284  	}
   285  	if err := w.fsync(); err != nil {
   286  		return err
   287  	}
   288  
   289  	if err := w.m.Unmap(); err != nil {
   290  		return err
   291  	}
   292  	w.m = nil
   293  
   294  	if err := w.f.Close(); err != nil {
   295  		return err
   296  	}
   297  	w.f = nil
   298  
   299  	_ = os.Remove(w.indexFile)
   300  	if err := os.Rename(w.tmpIdxFilePath, w.indexFile); err != nil {
   301  		return err
   302  	}
   303  	return nil
   304  }
   305  
   306  func (w *FixedSizeBitmapsWriter) DisableFsync() { w.noFsync = true }
   307  
   308  // fsync - other processes/goroutines must see only "fully-complete" (valid) files. No partial-writes.
   309  // To achieve it: write to .tmp file then `rename` when file is ready.
   310  // Machine may power-off right after `rename` - it means `fsync` must be before `rename`
   311  func (w *FixedSizeBitmapsWriter) fsync() error {
   312  	if w.noFsync {
   313  		return nil
   314  	}
   315  	if err := w.f.Sync(); err != nil {
   316  		w.logger.Warn("couldn't fsync", "err", err, "file", w.tmpIdxFilePath)
   317  		return err
   318  	}
   319  	return nil
   320  }