github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segmentindex/indexes.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package segmentindex
    13  
    14  import (
    15  	"bufio"
    16  	"bytes"
    17  	"encoding/binary"
    18  	"io"
    19  	"os"
    20  	"path/filepath"
    21  	"sort"
    22  
    23  	"github.com/pkg/errors"
    24  )
    25  
    26  type Indexes struct {
    27  	Keys                []Key
    28  	SecondaryIndexCount uint16
    29  	ScratchSpacePath    string
    30  }
    31  
    32  func (s Indexes) WriteTo(w io.Writer) (int64, error) {
    33  	var currentOffset uint64 = HeaderSize
    34  	if len(s.Keys) > 0 {
    35  		currentOffset = uint64(s.Keys[len(s.Keys)-1].ValueEnd)
    36  	}
    37  	var written int64
    38  
    39  	if _, err := os.Stat(s.ScratchSpacePath); err == nil {
    40  		// exists, we need to delete
    41  		// This could be the case if Weaviate shut down unexpectedly (i.e. crashed)
    42  		// while a compaction was running. We can safely discard the contents of
    43  		// the scratch space.
    44  
    45  		if err := os.RemoveAll(s.ScratchSpacePath); err != nil {
    46  			return written, errors.Wrap(err, "clean up previous scratch space")
    47  		}
    48  	} else if os.IsNotExist(err) {
    49  		// does not exist yet, nothing to - will be created in the next step
    50  	} else {
    51  		return written, errors.Wrap(err, "check for scratch space directory")
    52  	}
    53  
    54  	if err := os.Mkdir(s.ScratchSpacePath, 0o777); err != nil {
    55  		return written, errors.Wrap(err, "create scratch space")
    56  	}
    57  
    58  	primaryFileName := filepath.Join(s.ScratchSpacePath, "primary")
    59  	primaryFD, err := os.Create(primaryFileName)
    60  	if err != nil {
    61  		return written, err
    62  	}
    63  
    64  	primaryFDBuffered := bufio.NewWriter(primaryFD)
    65  
    66  	n, err := s.buildAndMarshalPrimary(primaryFDBuffered, s.Keys)
    67  	if err != nil {
    68  		return written, err
    69  	}
    70  
    71  	if err := primaryFDBuffered.Flush(); err != nil {
    72  		return written, err
    73  	}
    74  
    75  	primaryFD.Seek(0, io.SeekStart)
    76  
    77  	// pretend that primary index was already written, then also account for the
    78  	// additional offset pointers (one for each secondary index)
    79  	currentOffset = currentOffset + uint64(n) +
    80  		uint64(s.SecondaryIndexCount)*8
    81  
    82  	// secondaryIndicesBytes := bytes.NewBuffer(nil)
    83  	secondaryFileName := filepath.Join(s.ScratchSpacePath, "secondary")
    84  	secondaryFD, err := os.Create(secondaryFileName)
    85  	if err != nil {
    86  		return written, err
    87  	}
    88  
    89  	secondaryFDBuffered := bufio.NewWriter(secondaryFD)
    90  
    91  	if s.SecondaryIndexCount > 0 {
    92  		offsets := make([]uint64, s.SecondaryIndexCount)
    93  		for pos := range offsets {
    94  			n, err := s.buildAndMarshalSecondary(secondaryFDBuffered, pos, s.Keys)
    95  			if err != nil {
    96  				return written, err
    97  			} else {
    98  				written += int64(n)
    99  			}
   100  
   101  			offsets[pos] = currentOffset
   102  			currentOffset = offsets[pos] + uint64(n)
   103  		}
   104  
   105  		if err := binary.Write(w, binary.LittleEndian, &offsets); err != nil {
   106  			return written, err
   107  		}
   108  
   109  		written += int64(len(offsets)) * 8
   110  	}
   111  
   112  	if err := secondaryFDBuffered.Flush(); err != nil {
   113  		return written, err
   114  	}
   115  
   116  	secondaryFD.Seek(0, io.SeekStart)
   117  
   118  	if n, err := io.Copy(w, primaryFD); err != nil {
   119  		return written, err
   120  	} else {
   121  		written += int64(n)
   122  	}
   123  
   124  	if n, err := io.Copy(w, secondaryFD); err != nil {
   125  		return written, err
   126  	} else {
   127  		written += int64(n)
   128  	}
   129  
   130  	if err := primaryFD.Close(); err != nil {
   131  		return written, err
   132  	}
   133  
   134  	if err := secondaryFD.Close(); err != nil {
   135  		return written, err
   136  	}
   137  
   138  	if err := os.RemoveAll(s.ScratchSpacePath); err != nil {
   139  		return written, err
   140  	}
   141  
   142  	return written, nil
   143  }
   144  
   145  // pos indicates the position of a secondary index, assumes unsorted keys and
   146  // sorts them
   147  func (s *Indexes) buildAndMarshalSecondary(w io.Writer, pos int,
   148  	keys []Key,
   149  ) (int64, error) {
   150  	keyNodes := make([]Node, len(keys))
   151  	i := 0
   152  	for _, key := range keys {
   153  		if pos >= len(key.SecondaryKeys) {
   154  			// a secondary key is not guaranteed to be present. For example, a delete
   155  			// operation could pe performed using only the primary key
   156  			continue
   157  		}
   158  
   159  		keyNodes[i] = Node{
   160  			Key:   key.SecondaryKeys[pos],
   161  			Start: uint64(key.ValueStart),
   162  			End:   uint64(key.ValueEnd),
   163  		}
   164  		i++
   165  	}
   166  
   167  	keyNodes = keyNodes[:i]
   168  
   169  	sort.Slice(keyNodes, func(a, b int) bool {
   170  		return bytes.Compare(keyNodes[a].Key, keyNodes[b].Key) < 0
   171  	})
   172  
   173  	index := NewBalanced(keyNodes)
   174  	n, err := index.MarshalBinaryInto(w)
   175  	if err != nil {
   176  		return 0, err
   177  	}
   178  
   179  	return n, nil
   180  }
   181  
   182  // assumes sorted keys and does NOT sort them again
   183  func (s *Indexes) buildAndMarshalPrimary(w io.Writer, keys []Key) (int64, error) {
   184  	keyNodes := make([]Node, len(keys))
   185  	for i, key := range keys {
   186  		keyNodes[i] = Node{
   187  			Key:   key.Key,
   188  			Start: uint64(key.ValueStart),
   189  			End:   uint64(key.ValueEnd),
   190  		}
   191  	}
   192  	index := NewBalanced(keyNodes)
   193  
   194  	n, err := index.MarshalBinaryInto(w)
   195  	if err != nil {
   196  		return -1, err
   197  	}
   198  
   199  	return n, nil
   200  }