github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"io"
    16  	"os"
    17  
    18  	"github.com/davecgh/go-spew/spew"
    19  	"github.com/pkg/errors"
    20  )
    21  
    22  type MmapCondensor struct {
    23  	connectionsPerLevel int
    24  }
    25  
    26  func NewMmapCondensor(connectionsPerLevel int) *MmapCondensor {
    27  	return &MmapCondensor{connectionsPerLevel: connectionsPerLevel}
    28  }
    29  
    30  func (c *MmapCondensor) Do(fileName string) error {
    31  	fd, err := os.Open(fileName)
    32  	if err != nil {
    33  		return errors.Wrap(err, "open commit log to be condensed")
    34  	}
    35  	defer fd.Close()
    36  
    37  	index, err := c.analyze(fd)
    38  	if err != nil {
    39  		return errors.Wrap(err, "analyze commit log and build index")
    40  	}
    41  
    42  	index.calculateOffsets()
    43  
    44  	// "rewind" file so we can read it again, this time into the mmap file
    45  	if _, err := fd.Seek(0, io.SeekStart); err != nil {
    46  		return errors.Wrap(err, "rewind uncondensed")
    47  	}
    48  
    49  	if err := c.read(fd, index, fileName+".scratch.tmp"); err != nil {
    50  		return errors.Wrap(err, "read uncondensed into mmap file")
    51  	}
    52  
    53  	spew.Dump(index)
    54  	spew.Dump(index.Size())
    55  	return nil
    56  }
    57  
    58  func (c *MmapCondensor) analyze(file *os.File) (mmapIndex, error) {
    59  	return newMmapCondensorAnalyzer(c.connectionsPerLevel).Do(file)
    60  }
    61  
    62  func (c *MmapCondensor) read(source *os.File, index mmapIndex,
    63  	targetName string,
    64  ) error {
    65  	return newMmapCondensorReader().Do(source, index, targetName)
    66  }
    67  
    68  func (mi *mmapIndex) calculateOffsets() {
    69  	for i := range mi.nodes {
    70  		if i == 0 {
    71  			// offset for the first element is 0, nothing to do
    72  			continue
    73  		}
    74  
    75  		// we now have the guarantee that elem i-1 exists
    76  		mi.nodes[i].offset = mi.nodes[i-1].offset + uint64(mi.nodes[i-1].Size(mi.connectionsPerLevel))
    77  	}
    78  }
    79  
    80  // Size can only return a useful result if offsets have been calculated prior
    81  // to calling Size()
    82  func (mi *mmapIndex) Size() int {
    83  	if len(mi.nodes) == 0 {
    84  		return -1
    85  	}
    86  
    87  	return int(mi.nodes[len(mi.nodes)-1].offset) +
    88  		mi.nodes[len(mi.nodes)-1].Size(mi.connectionsPerLevel)
    89  }