github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/index_lookup.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "bytes" 25 "errors" 26 "fmt" 27 28 "github.com/m3db/m3/src/dbnode/digest" 29 xmsgpack "github.com/m3db/m3/src/dbnode/persist/fs/msgpack" 30 "github.com/m3db/m3/src/x/ident" 31 "github.com/m3db/m3/src/x/mmap" 32 ) 33 34 const mmapPersistFsSummariesFileName = "mmap.persist.fs.summariesfile" 35 36 var errCloneShouldNotBeCloned = errors.New("clones should not be cloned") 37 38 // nearestIndexOffsetLookup provides a way of quickly determining the nearest offset of an 39 // ID in the index file. It is not safe for concurrent use 40 type nearestIndexOffsetLookup struct { 41 summaryIDsOffsets []xmsgpack.IndexSummaryToken 42 // bytes from file mmap'd into anonymous region 43 summariesMmap mmap.Descriptor 44 isClone bool 45 } 46 47 func newNearestIndexOffsetLookup( 48 summaryIDsOffsets []xmsgpack.IndexSummaryToken, 49 summariesMmap mmap.Descriptor, 50 ) *nearestIndexOffsetLookup { 51 return &nearestIndexOffsetLookup{ 52 summaryIDsOffsets: summaryIDsOffsets, 53 summariesMmap: summariesMmap, 54 isClone: false, 55 } 56 } 57 58 func (il *nearestIndexOffsetLookup) concurrentClone() (*nearestIndexOffsetLookup, error) { 59 if il.isClone { 60 return nil, errCloneShouldNotBeCloned 61 } 62 63 return &nearestIndexOffsetLookup{ 64 summaryIDsOffsets: il.summaryIDsOffsets, 65 summariesMmap: il.summariesMmap, 66 isClone: true, 67 }, nil 68 } 69 70 // getNearestIndexFileOffset returns either: 71 // 1. The offset in the index file for the specified series 72 // 2. The offset in the index file for the the series in the summaries file 73 // that satisfies the following two constraints: 74 // 1. Is closest to the desired series in the index file 75 // 2. Is BEFORE the desired series in the index file (because we 76 // we scan the index file sequentially in a forward-moving manner) 77 // In other words, the returned offset can always be used as a starting point to 78 // begin scanning the index file for the desired series. 79 func (il *nearestIndexOffsetLookup) getNearestIndexFileOffset( 80 id ident.ID, 81 resources ReusableSeekerResources, 82 ) (int64, error) { 83 idBytes := id.Bytes() 84 85 min := 0 86 max := len(il.summaryIDsOffsets) - 1 87 88 // The summaries file only contains a fraction of the series that are in 89 // the index file itself. Because of that, the binary search that we're 90 // performing is "optimistic". We're trying to find either an exact match, 91 // OR the nearest match that is to the left of the series we're searching 92 // for (so we keep track of it everytime we move right). We start with an 93 // assumption that the best match so far is at index 0, because in the worst 94 // case scenario if we don't find a single "match", then the caller should 95 // start at index 0 and scan until they encounter an entry that tells them 96 // that the ID they're looking for does not exist (because the IDs in the 97 // index are sorted). 98 bestMatchSoFar := int64(0) 99 100 for { 101 if min > max { 102 return bestMatchSoFar, nil 103 } 104 105 idx := (max + min) / 2 106 summaryBytesMetadata := il.summaryIDsOffsets[idx] 107 compBytes := summaryBytesMetadata.ID(il.summariesMmap.Bytes) 108 comparison := bytes.Compare(idBytes, compBytes) 109 110 // Found it 111 if comparison == 0 { 112 indexOffset, err := summaryBytesMetadata.IndexOffset( 113 il.summariesMmap.Bytes, resources.byteDecoderStream, resources.msgpackDecoder) 114 // Should never happen, either something is really wrong with the code or 115 // the file on disk was corrupted 116 if err != nil { 117 return -1, err 118 } 119 return indexOffset, nil 120 } 121 122 // idBytes is smaller than compBytes, go left 123 if comparison == -1 { 124 max = idx - 1 125 continue 126 } 127 128 // idBytes is larger than compBytes, go right 129 if comparison == 1 { 130 min = idx + 1 131 indexOffset, err := summaryBytesMetadata.IndexOffset( 132 il.summariesMmap.Bytes, resources.byteDecoderStream, resources.msgpackDecoder) 133 if err != nil { 134 return -1, err 135 } 136 // update the bestMatchSoFar everytime we move right 137 bestMatchSoFar = indexOffset 138 continue 139 } 140 } 141 } 142 143 func (il *nearestIndexOffsetLookup) close() error { 144 // Parent should clean up shared resources 145 if il.isClone { 146 return nil 147 } 148 return mmap.Munmap(il.summariesMmap) 149 } 150 151 // newNearestIndexOffsetLookupFromSummariesFile creates an nearestIndexOffsetLookup 152 // from an index summaries file by reading the summaries file into an anonymous 153 // mmap'd region, and also creating the slice of summaries offsets which is 154 // required to binary search the data structure. It will also make sure that 155 // the summaries file is sorted (which it always should be). 156 func newNearestIndexOffsetLookupFromSummariesFile( 157 summariesFdWithDigest digest.FdWithDigestReader, 158 expectedDigest uint32, 159 decoder *xmsgpack.Decoder, 160 decoderStream xmsgpack.ByteDecoderStream, 161 numEntries int, 162 forceMmapMemory bool, 163 reporterOptions mmap.ReporterOptions, 164 ) (*nearestIndexOffsetLookup, error) { 165 reporterOptions.Context.Name = mmapPersistFsSummariesFileName 166 summariesMmap, err := validateAndMmap(summariesFdWithDigest, expectedDigest, forceMmapMemory, reporterOptions) 167 if err != nil { 168 return nil, err 169 } 170 171 // Msgpack decode the entire summaries file (we need to store the offsets 172 // for the entries so we can binary-search it) 173 var ( 174 summaryTokens = make([]xmsgpack.IndexSummaryToken, 0, numEntries) 175 lastReadID []byte 176 ) 177 decoderStream.Reset(summariesMmap.Bytes) 178 decoder.Reset(decoderStream) 179 180 for read := 0; read < numEntries; read++ { 181 // We ignore the entry itself because we don't need any information from it 182 entry, summaryToken, err := decoder.DecodeIndexSummary() 183 if err != nil { 184 mmap.Munmap(summariesMmap) 185 return nil, err 186 } 187 188 // Make sure that all the IDs are sorted as we iterate, and return an error 189 // if they're not. This should never happen as files should be sorted on disk. 190 if lastReadID != nil && bytes.Compare(lastReadID, entry.ID) != -1 { 191 mmap.Munmap(summariesMmap) 192 return nil, fmt.Errorf("summaries file is not sorted: %s", summariesFdWithDigest.Fd().Name()) 193 } 194 summaryTokens = append(summaryTokens, summaryToken) 195 lastReadID = entry.ID 196 } 197 198 return newNearestIndexOffsetLookup(summaryTokens, summariesMmap), nil 199 }