github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"bufio"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"math"
    19  	"os"
    20  
    21  	"github.com/pkg/errors"
    22  	"github.com/sirupsen/logrus"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    24  	"github.com/weaviate/weaviate/entities/errorcompounder"
    25  )
    26  
    27  type MemoryCondensor struct {
    28  	newLogFile *os.File
    29  	newLog     *bufWriter
    30  	logger     logrus.FieldLogger
    31  }
    32  
    33  func (c *MemoryCondensor) Do(fileName string) error {
    34  	fd, err := os.Open(fileName)
    35  	if err != nil {
    36  		return errors.Wrap(err, "open commit log to be condensed")
    37  	}
    38  	defer fd.Close()
    39  	fdBuf := bufio.NewReaderSize(fd, 256*1024)
    40  
    41  	res, _, err := NewDeserializer(c.logger).Do(fdBuf, nil, true)
    42  	if err != nil {
    43  		return errors.Wrap(err, "read commit log to be condensed")
    44  	}
    45  
    46  	newLogFile, err := os.OpenFile(fmt.Sprintf("%s.condensed", fileName),
    47  		os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
    48  	if err != nil {
    49  		return errors.Wrap(err, "open new commit log file for writing")
    50  	}
    51  
    52  	c.newLogFile = newLogFile
    53  
    54  	c.newLog = NewWriterSize(c.newLogFile, 1*1024*1024)
    55  
    56  	if res.Compressed {
    57  		if err := c.AddPQ(res.PQData); err != nil {
    58  			return fmt.Errorf("write pq data: %w", err)
    59  		}
    60  	}
    61  
    62  	for _, node := range res.Nodes {
    63  		if node == nil {
    64  			// nil nodes occur when we've grown, but not inserted anything yet
    65  			continue
    66  		}
    67  
    68  		if node.level > 0 {
    69  			// nodes are implicitly added when they are first linked, if the level is
    70  			// not zero we know this node was new. If the level is zero it doesn't
    71  			// matter if it gets added explicitly or implicitly
    72  			if err := c.AddNode(node); err != nil {
    73  				return errors.Wrapf(err, "write node %d to commit log", node.id)
    74  			}
    75  		}
    76  
    77  		for level, links := range node.connections {
    78  			if res.ReplaceLinks(node.id, uint16(level)) {
    79  				if err := c.SetLinksAtLevel(node.id, level, links); err != nil {
    80  					return errors.Wrapf(err,
    81  						"write links for node %d at level %d to commit log", node.id, level)
    82  				}
    83  			} else {
    84  				if err := c.AddLinksAtLevel(node.id, uint16(level), links); err != nil {
    85  					return errors.Wrapf(err,
    86  						"write links for node %d at level %d to commit log", node.id, level)
    87  				}
    88  			}
    89  		}
    90  	}
    91  
    92  	if res.EntrypointChanged {
    93  		if err := c.SetEntryPointWithMaxLayer(res.Entrypoint,
    94  			int(res.Level)); err != nil {
    95  			return errors.Wrap(err, "write entrypoint to commit log")
    96  		}
    97  	}
    98  
    99  	for ts := range res.Tombstones {
   100  		if err := c.AddTombstone(ts); err != nil {
   101  			return errors.Wrapf(err,
   102  				"write tombstone for node %d to commit log", ts)
   103  		}
   104  	}
   105  
   106  	if err := c.newLog.Flush(); err != nil {
   107  		return errors.Wrap(err, "close new commit log")
   108  	}
   109  
   110  	if err := c.newLogFile.Close(); err != nil {
   111  		return errors.Wrap(err, "close new commit log")
   112  	}
   113  
   114  	if err := os.Remove(fileName); err != nil {
   115  		return errors.Wrap(err, "cleanup old (uncondensed) commit log")
   116  	}
   117  
   118  	return nil
   119  }
   120  
   121  func (c *MemoryCondensor) writeUint64(w *bufWriter, in uint64) error {
   122  	toWrite := make([]byte, 8)
   123  	binary.LittleEndian.PutUint64(toWrite[0:8], in)
   124  	_, err := w.Write(toWrite)
   125  	if err != nil {
   126  		return err
   127  	}
   128  
   129  	return nil
   130  }
   131  
   132  func (c *MemoryCondensor) writeUint16(w *bufWriter, in uint16) error {
   133  	toWrite := make([]byte, 2)
   134  	binary.LittleEndian.PutUint16(toWrite[0:2], in)
   135  	_, err := w.Write(toWrite)
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	return nil
   141  }
   142  
   143  func (c *MemoryCondensor) writeCommitType(w *bufWriter, in HnswCommitType) error {
   144  	toWrite := make([]byte, 1)
   145  	toWrite[0] = byte(in)
   146  	_, err := w.Write(toWrite)
   147  	if err != nil {
   148  		return err
   149  	}
   150  
   151  	return nil
   152  }
   153  
   154  func (c *MemoryCondensor) writeUint64Slice(w *bufWriter, in []uint64) error {
   155  	for _, v := range in {
   156  		err := c.writeUint64(w, v)
   157  		if err != nil {
   158  			return err
   159  		}
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // AddNode adds an empty node
   166  func (c *MemoryCondensor) AddNode(node *vertex) error {
   167  	ec := &errorcompounder.ErrorCompounder{}
   168  	ec.Add(c.writeCommitType(c.newLog, AddNode))
   169  	ec.Add(c.writeUint64(c.newLog, node.id))
   170  	ec.Add(c.writeUint16(c.newLog, uint16(node.level)))
   171  
   172  	return ec.ToError()
   173  }
   174  
   175  func (c *MemoryCondensor) SetLinksAtLevel(nodeid uint64, level int, targets []uint64) error {
   176  	ec := &errorcompounder.ErrorCompounder{}
   177  	ec.Add(c.writeCommitType(c.newLog, ReplaceLinksAtLevel))
   178  	ec.Add(c.writeUint64(c.newLog, nodeid))
   179  	ec.Add(c.writeUint16(c.newLog, uint16(level)))
   180  
   181  	targetLength := len(targets)
   182  	if targetLength > math.MaxUint16 {
   183  		// TODO: investigate why we get such massive connections
   184  		targetLength = math.MaxUint16
   185  		c.logger.WithField("action", "condense_commit_log").
   186  			WithField("original_length", len(targets)).
   187  			WithField("maximum_length", targetLength).
   188  			Warning("condensor length of connections would overflow uint16, cutting off")
   189  	}
   190  	ec.Add(c.writeUint16(c.newLog, uint16(targetLength)))
   191  	ec.Add(c.writeUint64Slice(c.newLog, targets[:targetLength]))
   192  
   193  	return ec.ToError()
   194  }
   195  
   196  func (c *MemoryCondensor) AddLinksAtLevel(nodeid uint64, level uint16, targets []uint64) error {
   197  	toWrite := make([]byte, 13+len(targets)*8)
   198  	toWrite[0] = byte(AddLinksAtLevel)
   199  	binary.LittleEndian.PutUint64(toWrite[1:9], nodeid)
   200  	binary.LittleEndian.PutUint16(toWrite[9:11], uint16(level))
   201  	binary.LittleEndian.PutUint16(toWrite[11:13], uint16(len(targets)))
   202  	for i, target := range targets {
   203  		offsetStart := 13 + i*8
   204  		offsetEnd := offsetStart + 8
   205  		binary.LittleEndian.PutUint64(toWrite[offsetStart:offsetEnd], target)
   206  	}
   207  	_, err := c.newLog.Write(toWrite)
   208  	return err
   209  }
   210  
   211  func (c *MemoryCondensor) AddLinkAtLevel(nodeid uint64, level uint16, target uint64) error {
   212  	ec := &errorcompounder.ErrorCompounder{}
   213  	ec.Add(c.writeCommitType(c.newLog, AddLinkAtLevel))
   214  	ec.Add(c.writeUint64(c.newLog, nodeid))
   215  	ec.Add(c.writeUint16(c.newLog, uint16(level)))
   216  	ec.Add(c.writeUint64(c.newLog, target))
   217  
   218  	return ec.ToError()
   219  }
   220  
   221  func (c *MemoryCondensor) SetEntryPointWithMaxLayer(id uint64, level int) error {
   222  	ec := &errorcompounder.ErrorCompounder{}
   223  	ec.Add(c.writeCommitType(c.newLog, SetEntryPointMaxLevel))
   224  	ec.Add(c.writeUint64(c.newLog, id))
   225  	ec.Add(c.writeUint16(c.newLog, uint16(level)))
   226  
   227  	return ec.ToError()
   228  }
   229  
   230  func (c *MemoryCondensor) AddTombstone(nodeid uint64) error {
   231  	ec := &errorcompounder.ErrorCompounder{}
   232  	ec.Add(c.writeCommitType(c.newLog, AddTombstone))
   233  	ec.Add(c.writeUint64(c.newLog, nodeid))
   234  
   235  	return ec.ToError()
   236  }
   237  
   238  func (c *MemoryCondensor) AddPQ(data compressionhelpers.PQData) error {
   239  	toWrite := make([]byte, 10)
   240  	toWrite[0] = byte(AddPQ)
   241  	binary.LittleEndian.PutUint16(toWrite[1:3], data.Dimensions)
   242  	toWrite[3] = byte(data.EncoderType)
   243  	binary.LittleEndian.PutUint16(toWrite[4:6], data.Ks)
   244  	binary.LittleEndian.PutUint16(toWrite[6:8], data.M)
   245  	toWrite[8] = data.EncoderDistribution
   246  	if data.UseBitsEncoding {
   247  		toWrite[9] = 1
   248  	} else {
   249  		toWrite[9] = 0
   250  	}
   251  
   252  	for _, encoder := range data.Encoders {
   253  		toWrite = append(toWrite, encoder.ExposeDataForRestore()...)
   254  	}
   255  	_, err := c.newLog.Write(toWrite)
   256  	return err
   257  }
   258  
   259  func NewMemoryCondensor(logger logrus.FieldLogger) *MemoryCondensor {
   260  	return &MemoryCondensor{logger: logger}
   261  }