github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/condensor_mmap_analyzer.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"bufio"
    16  	"encoding/binary"
    17  	"io"
    18  	"os"
    19  	"sort"
    20  
    21  	"github.com/pkg/errors"
    22  )
    23  
    24  type mmapIndex struct {
    25  	nodes               []mmapIndexNode
    26  	connectionsPerLevel int
    27  }
    28  
    29  func (mi *mmapIndex) UpsertNodeMaxLevel(node uint64, level uint16) {
    30  	n := sort.Search(len(mi.nodes), func(a int) bool {
    31  		return mi.nodes[a].id >= node
    32  	})
    33  
    34  	if n < len(mi.nodes) && mi.nodes[n].id == node {
    35  		// update
    36  		if mi.nodes[n].maxLevel < level {
    37  			mi.nodes[n].maxLevel = level
    38  		}
    39  	} else {
    40  		// insert
    41  
    42  		// See https://github.com/golang/go/wiki/SliceTricks#insert
    43  		mi.nodes = append(mi.nodes, mmapIndexNode{})
    44  		copy(mi.nodes[n+1:], mi.nodes[n:])
    45  		mi.nodes[n].id = node
    46  		mi.nodes[n].maxLevel = level
    47  	}
    48  }
    49  
    50  func (mi *mmapIndex) DeleteNode(node uint64) {
    51  }
    52  
    53  type mmapIndexNode struct {
    54  	id       uint64
    55  	offset   uint64
    56  	maxLevel uint16
    57  }
    58  
    59  func (n mmapIndexNode) Size(connectionsPerLevel int) int {
    60  	return int(n.maxLevel)*2 + // overhead for uint16 length indicators
    61  		connectionsPerLevel*int(n.maxLevel+1) // level 0 has 2x connections
    62  }
    63  
    64  type MmapCondensorAnalyzer struct {
    65  	reader              *bufio.Reader
    66  	connectionsPerLevel int
    67  	index               mmapIndex
    68  }
    69  
    70  func newMmapCondensorAnalyzer(connectionsPerLevel int) *MmapCondensorAnalyzer {
    71  	return &MmapCondensorAnalyzer{connectionsPerLevel: connectionsPerLevel}
    72  }
    73  
    74  func (a *MmapCondensorAnalyzer) Do(file *os.File) (mmapIndex, error) {
    75  	a.reader = bufio.NewReaderSize(file, 1024*1024)
    76  
    77  	a.index = mmapIndex{
    78  		connectionsPerLevel: a.connectionsPerLevel,
    79  		nodes:               make([]mmapIndexNode, 0, 10000),
    80  	}
    81  
    82  	if err := a.loop(); err != nil {
    83  		return a.index, err
    84  	}
    85  
    86  	return a.index, nil
    87  }
    88  
    89  func (a *MmapCondensorAnalyzer) loop() error {
    90  	for {
    91  		ct, err := a.ReadCommitType(a.reader)
    92  		if err != nil {
    93  			if errors.Is(err, io.EOF) {
    94  				break
    95  			}
    96  
    97  			return err
    98  		}
    99  
   100  		switch ct {
   101  		case AddNode:
   102  			err = a.ReadNode(a.reader)
   103  		case SetEntryPointMaxLevel:
   104  			err = a.ReadEP(a.reader)
   105  		case AddLinkAtLevel:
   106  			err = a.ReadLink(a.reader)
   107  		case ReplaceLinksAtLevel:
   108  			err = a.ReadLinks(a.reader)
   109  		case AddTombstone:
   110  			err = a.ReadAddTombstone(a.reader)
   111  		case RemoveTombstone:
   112  			err = a.ReadRemoveTombstone(a.reader)
   113  		case ClearLinks:
   114  			err = a.ReadClearLinks(a.reader)
   115  		case DeleteNode:
   116  			err = a.ReadDeleteNode(a.reader)
   117  		case ResetIndex:
   118  			a.index.nodes = make([]mmapIndexNode, 0, 10000)
   119  		default:
   120  			err = errors.Errorf("unrecognized commit type %d", ct)
   121  		}
   122  		if err != nil {
   123  			// do not return nil, err, because the err could be a recoverable one
   124  			return err
   125  		}
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  func (a *MmapCondensorAnalyzer) ReadNode(r io.Reader) error {
   132  	id, err := a.readUint64(r)
   133  	if err != nil {
   134  		return err
   135  	}
   136  
   137  	level, err := a.readUint16(r)
   138  	if err != nil {
   139  		return err
   140  	}
   141  
   142  	a.index.UpsertNodeMaxLevel(id, level)
   143  	return nil
   144  }
   145  
   146  func (a *MmapCondensorAnalyzer) ReadEP(r io.Reader) error {
   147  	// TODO: is this an issue because of bufio Read vs ReadFull?
   148  	_, err := io.CopyN(io.Discard, r, 10)
   149  	return err
   150  }
   151  
   152  func (a *MmapCondensorAnalyzer) ReadLink(r io.Reader) error {
   153  	source, err := a.readUint64(r)
   154  	if err != nil {
   155  		return err
   156  	}
   157  
   158  	level, err := a.readUint16(r)
   159  	if err != nil {
   160  		return err
   161  	}
   162  
   163  	// TODO: is this an issue because of bufio Read vs ReadFull?
   164  	_, err = io.CopyN(io.Discard, r, 8)
   165  	if err != nil {
   166  		return err
   167  	}
   168  	a.index.UpsertNodeMaxLevel(source, level)
   169  
   170  	return nil
   171  }
   172  
   173  func (a *MmapCondensorAnalyzer) ReadLinks(r io.Reader) error {
   174  	source, err := a.readUint64(r)
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	level, err := a.readUint16(r)
   180  	if err != nil {
   181  		return err
   182  	}
   183  
   184  	length, err := a.readUint16(r)
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	a.index.UpsertNodeMaxLevel(source, level)
   190  
   191  	// TODO: is this an issue because of bufio Read vs ReadFull?
   192  	_, err = io.CopyN(io.Discard, r, 8*int64(length))
   193  	if err != nil {
   194  		return err
   195  	}
   196  
   197  	return nil
   198  }
   199  
   200  func (a *MmapCondensorAnalyzer) ReadAddTombstone(r io.Reader) error {
   201  	// TODO: is this an issue because of bufio Read vs ReadFull?
   202  	_, err := io.CopyN(io.Discard, r, 8)
   203  	return err
   204  }
   205  
   206  func (a *MmapCondensorAnalyzer) ReadRemoveTombstone(r io.Reader) error {
   207  	// TODO: is this an issue because of bufio Read vs ReadFull?
   208  	_, err := io.CopyN(io.Discard, r, 8)
   209  	return err
   210  }
   211  
   212  func (a *MmapCondensorAnalyzer) ReadClearLinks(r io.Reader) error {
   213  	// TODO: is this an issue because of bufio Read vs ReadFull?
   214  	_, err := io.CopyN(io.Discard, r, 8)
   215  	return err
   216  }
   217  
   218  func (a *MmapCondensorAnalyzer) ReadDeleteNode(r io.Reader) error {
   219  	id, err := a.readUint64(r)
   220  	if err != nil {
   221  		return err
   222  	}
   223  
   224  	a.index.DeleteNode(id)
   225  	return nil
   226  }
   227  
   228  func (a *MmapCondensorAnalyzer) readUint64(r io.Reader) (uint64, error) {
   229  	var value uint64
   230  	tmpBuf := make([]byte, 8)
   231  	_, err := io.ReadFull(r, tmpBuf)
   232  	if err != nil {
   233  		return 0, errors.Wrap(err, "failed to read uint64")
   234  	}
   235  
   236  	value = binary.LittleEndian.Uint64(tmpBuf)
   237  
   238  	return value, nil
   239  }
   240  
   241  func (a *MmapCondensorAnalyzer) readUint16(r io.Reader) (uint16, error) {
   242  	var value uint16
   243  	tmpBuf := make([]byte, 2)
   244  	_, err := io.ReadFull(r, tmpBuf)
   245  	if err != nil {
   246  		return 0, errors.Wrap(err, "failed to read uint16")
   247  	}
   248  
   249  	value = binary.LittleEndian.Uint16(tmpBuf)
   250  
   251  	return value, nil
   252  }
   253  
   254  func (a *MmapCondensorAnalyzer) ReadCommitType(r io.Reader) (HnswCommitType, error) {
   255  	tmpBuf := make([]byte, 1)
   256  	if _, err := io.ReadFull(r, tmpBuf); err != nil {
   257  		return 0, errors.Wrap(err, "failed to read commit type")
   258  	}
   259  
   260  	return HnswCommitType(tmpBuf[0]), nil
   261  }