github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segmentindex/indexes.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package segmentindex 13 14 import ( 15 "bufio" 16 "bytes" 17 "encoding/binary" 18 "io" 19 "os" 20 "path/filepath" 21 "sort" 22 23 "github.com/pkg/errors" 24 ) 25 26 type Indexes struct { 27 Keys []Key 28 SecondaryIndexCount uint16 29 ScratchSpacePath string 30 } 31 32 func (s Indexes) WriteTo(w io.Writer) (int64, error) { 33 var currentOffset uint64 = HeaderSize 34 if len(s.Keys) > 0 { 35 currentOffset = uint64(s.Keys[len(s.Keys)-1].ValueEnd) 36 } 37 var written int64 38 39 if _, err := os.Stat(s.ScratchSpacePath); err == nil { 40 // exists, we need to delete 41 // This could be the case if Weaviate shut down unexpectedly (i.e. crashed) 42 // while a compaction was running. We can safely discard the contents of 43 // the scratch space. 44 45 if err := os.RemoveAll(s.ScratchSpacePath); err != nil { 46 return written, errors.Wrap(err, "clean up previous scratch space") 47 } 48 } else if os.IsNotExist(err) { 49 // does not exist yet, nothing to - will be created in the next step 50 } else { 51 return written, errors.Wrap(err, "check for scratch space directory") 52 } 53 54 if err := os.Mkdir(s.ScratchSpacePath, 0o777); err != nil { 55 return written, errors.Wrap(err, "create scratch space") 56 } 57 58 primaryFileName := filepath.Join(s.ScratchSpacePath, "primary") 59 primaryFD, err := os.Create(primaryFileName) 60 if err != nil { 61 return written, err 62 } 63 64 primaryFDBuffered := bufio.NewWriter(primaryFD) 65 66 n, err := s.buildAndMarshalPrimary(primaryFDBuffered, s.Keys) 67 if err != nil { 68 return written, err 69 } 70 71 if err := primaryFDBuffered.Flush(); err != nil { 72 return written, err 73 } 74 75 primaryFD.Seek(0, io.SeekStart) 76 77 // pretend that primary index was already written, then also account for the 78 // additional offset pointers (one for each secondary index) 79 currentOffset = currentOffset + uint64(n) + 80 uint64(s.SecondaryIndexCount)*8 81 82 // secondaryIndicesBytes := bytes.NewBuffer(nil) 83 secondaryFileName := filepath.Join(s.ScratchSpacePath, "secondary") 84 secondaryFD, err := os.Create(secondaryFileName) 85 if err != nil { 86 return written, err 87 } 88 89 secondaryFDBuffered := bufio.NewWriter(secondaryFD) 90 91 if s.SecondaryIndexCount > 0 { 92 offsets := make([]uint64, s.SecondaryIndexCount) 93 for pos := range offsets { 94 n, err := s.buildAndMarshalSecondary(secondaryFDBuffered, pos, s.Keys) 95 if err != nil { 96 return written, err 97 } else { 98 written += int64(n) 99 } 100 101 offsets[pos] = currentOffset 102 currentOffset = offsets[pos] + uint64(n) 103 } 104 105 if err := binary.Write(w, binary.LittleEndian, &offsets); err != nil { 106 return written, err 107 } 108 109 written += int64(len(offsets)) * 8 110 } 111 112 if err := secondaryFDBuffered.Flush(); err != nil { 113 return written, err 114 } 115 116 secondaryFD.Seek(0, io.SeekStart) 117 118 if n, err := io.Copy(w, primaryFD); err != nil { 119 return written, err 120 } else { 121 written += int64(n) 122 } 123 124 if n, err := io.Copy(w, secondaryFD); err != nil { 125 return written, err 126 } else { 127 written += int64(n) 128 } 129 130 if err := primaryFD.Close(); err != nil { 131 return written, err 132 } 133 134 if err := secondaryFD.Close(); err != nil { 135 return written, err 136 } 137 138 if err := os.RemoveAll(s.ScratchSpacePath); err != nil { 139 return written, err 140 } 141 142 return written, nil 143 } 144 145 // pos indicates the position of a secondary index, assumes unsorted keys and 146 // sorts them 147 func (s *Indexes) buildAndMarshalSecondary(w io.Writer, pos int, 148 keys []Key, 149 ) (int64, error) { 150 keyNodes := make([]Node, len(keys)) 151 i := 0 152 for _, key := range keys { 153 if pos >= len(key.SecondaryKeys) { 154 // a secondary key is not guaranteed to be present. For example, a delete 155 // operation could pe performed using only the primary key 156 continue 157 } 158 159 keyNodes[i] = Node{ 160 Key: key.SecondaryKeys[pos], 161 Start: uint64(key.ValueStart), 162 End: uint64(key.ValueEnd), 163 } 164 i++ 165 } 166 167 keyNodes = keyNodes[:i] 168 169 sort.Slice(keyNodes, func(a, b int) bool { 170 return bytes.Compare(keyNodes[a].Key, keyNodes[b].Key) < 0 171 }) 172 173 index := NewBalanced(keyNodes) 174 n, err := index.MarshalBinaryInto(w) 175 if err != nil { 176 return 0, err 177 } 178 179 return n, nil 180 } 181 182 // assumes sorted keys and does NOT sort them again 183 func (s *Indexes) buildAndMarshalPrimary(w io.Writer, keys []Key) (int64, error) { 184 keyNodes := make([]Node, len(keys)) 185 for i, key := range keys { 186 keyNodes[i] = Node{ 187 Key: key.Key, 188 Start: uint64(key.ValueStart), 189 End: uint64(key.ValueEnd), 190 } 191 } 192 index := NewBalanced(keyNodes) 193 194 n, err := index.MarshalBinaryInto(w) 195 if err != nil { 196 return -1, err 197 } 198 199 return n, nil 200 }