github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/memstore/common/vector_party.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package common 16 17 import ( 18 "github.com/uber/aresdb/diskstore" 19 "io" 20 "unsafe" 21 ) 22 23 // ColumnMode represents how many vectors a vector party may have. 24 // For live batch, it should always be 0,1 or 2. 25 // For sorted column of archive batch, it will be mode 0 or 3. 26 // For other columns of archive batch, it can be any of these four modes. 27 type ColumnMode int 28 29 const ( 30 // AllValuesDefault (mode 0) 31 AllValuesDefault ColumnMode = iota 32 // AllValuesPresent (mode 1) 33 AllValuesPresent 34 // HasNullVector (mode 2) 35 HasNullVector 36 // HasCountVector (mode 3) 37 HasCountVector 38 // MaxColumnMode represents the upper limit of column modes 39 MaxColumnMode 40 ) 41 42 // HostVectorPartySlice stores pointers to data for a column in host memory. 43 // And its start index and Bytes 44 type HostVectorPartySlice struct { 45 Values unsafe.Pointer 46 Nulls unsafe.Pointer 47 // The length of the count vector is Length+1 48 Counts unsafe.Pointer 49 Length int 50 ValueType DataType 51 DefaultValue DataValue 52 53 ValueStartIndex int 54 NullStartIndex int 55 CountStartIndex int 56 57 ValueBytes int 58 NullBytes int 59 CountBytes int 60 } 61 62 // ValueCountsUpdateMode represents the way we update value counts when we are writing values to 63 // vector parties. 64 type ValueCountsUpdateMode int 65 66 // SlicedVector is vector party data represented into human-readable slice format 67 // consists of a value slice and count slice, 68 // count slice consists of accumulative counts. 69 // swagger:model slicedVector 70 type SlicedVector struct { 71 Values []interface{} `json:"values"` 72 Counts []int `json:"counts"` 73 } 74 75 // VectorPartySerializer is the interface to read/write a vector party from/to disk. Refer to 76 // https://github.com/uber/aresdb/wiki/VectorStore for more details about 77 // vector party's on disk format. 78 type VectorPartySerializer interface { 79 // ReadVectorParty reads vector party from disk and set fields in passed-in vp. 80 ReadVectorParty(vp VectorParty) error 81 // WriteSnapshotVectorParty writes vector party to disk 82 WriteVectorParty(vp VectorParty) error 83 // CheckVectorPartySerializable check if the VectorParty is serializable 84 CheckVectorPartySerializable(vp VectorParty) error 85 // ReportVectorPartyMemoryUsage report memory usage according to underneath VectorParty property 86 ReportVectorPartyMemoryUsage(bytes int64) 87 } 88 89 // VectorParty interface 90 type VectorParty interface { 91 // Allocate allocate underlying storage for vector party 92 Allocate(hasCount bool) 93 94 // GetValidity get validity of given offset. 95 GetValidity(offset int) bool 96 // GetDataValue returns the DataValue for the specified index. 97 // It first check validity of the value, then it check whether it's a 98 // boolean column to decide whether to load bool value or other value 99 // type. Index bound is not checked! 100 GetDataValue(offset int) DataValue 101 // SetDataValue writes a data value at given offset. Third parameter count should 102 // only be passed for compressed columns. checkValueCount is a flag to tell whether 103 // need to check value count (NonDefaultValueCount and ValidValueCount) while setting 104 // the value. It should be true for archive store and false for live store. **This does 105 // not set the count vector as this is not accumulated count.** 106 SetDataValue(offset int, value DataValue, countsUpdateMode ValueCountsUpdateMode, counts ...uint32) 107 // GetDataValueByRow returns the DataValue for the specified row. It will do binary 108 // search on the count vector to find the correct offset if this is a mode 3 vector 109 // party. Otherwise it will behave same as GetDataValue. 110 // Caller needs to ensure row is within valid range. 111 GetDataValueByRow(row int) DataValue 112 113 GetDataType() DataType 114 GetLength() int 115 GetBytes() int64 116 117 // Slice vector party into human readable SlicedVector format 118 Slice(startRow, numRows int) SlicedVector 119 120 // SafeDestruct destructs vector party memory 121 SafeDestruct() 122 123 // Write serialize vector party 124 Write(writer io.Writer) error 125 // Read deserialize vector party 126 Read(reader io.Reader, serializer VectorPartySerializer) error 127 // Check whether two vector parties are equal (used only in unit tests) 128 Equals(other VectorParty) bool 129 // GetNonDefaultValueCount get Number of non-default values stored 130 GetNonDefaultValueCount() int 131 } 132 133 // CVectorParty is vector party that is backed by c 134 type CVectorParty interface { 135 //Judge column mode 136 JudgeMode() ColumnMode 137 // Get column mode 138 GetMode() ColumnMode 139 } 140 141 // LiveVectorParty represents vector party in live store 142 type LiveVectorParty interface { 143 VectorParty 144 145 // Note for all following functions, data type are not checked. So callee need to perform the data type check 146 // and call the correct SetXXX function. 147 148 // If we already know this is a bool vp, we can set bool directly without constructing a data value struct. 149 SetBool(offset int, val bool, valid bool) 150 // Set value via a unsafe.Pointer directly. 151 SetValue(offset int, val unsafe.Pointer, valid bool) 152 // Set go value directly. 153 SetGoValue(offset int, val GoDataValue, valid bool) 154 // Get value directly 155 GetValue(offset int) (unsafe.Pointer, bool) 156 // GetMinMaxValue get min and max value, 157 // returns uint32 value since only valid for time column 158 GetMinMaxValue() (min, max uint32) 159 } 160 161 // ArchiveVectorParty represents vector party in archive store 162 type ArchiveVectorParty interface { 163 VectorParty 164 165 // Get cumulative count on specified offset 166 GetCount(offset int) uint32 167 // set cumulative count on specified offset 168 SetCount(offset int, count uint32) 169 170 // Pin archive vector party for use 171 Pin() 172 // Release pin 173 Release() 174 // WaitForUsers Wait/Check whether all users finished 175 // batch lock needs to be held before calling if blocking wait 176 // eg. 177 // batch.Lock() 178 // vp.WaitForUsers(true) 179 // batch.Unlock() 180 WaitForUsers(blocking bool) (usersDone bool) 181 182 // CopyOnWrite copies vector party on write/update 183 CopyOnWrite(batchSize int) ArchiveVectorParty 184 // LoadFromDisk start loading vector party from disk, 185 // this is a non-blocking operation 186 LoadFromDisk(hostMemManager HostMemoryManager, diskStore diskstore.DiskStore, table string, shardID int, columnID, batchID int, batchVersion uint32, seqNum uint32) 187 // WaitForDiskLoad waits for vector party disk load to finish 188 WaitForDiskLoad() 189 // Prune prunes vector party based on column mode to clean memory if possible 190 Prune() 191 192 // Slice vector party using specified value within [lowerBoundRow, upperBoundRow) 193 SliceByValue(lowerBoundRow, upperBoundRow int, value unsafe.Pointer) (startRow int, endRow int, startIndex int, endIndex int) 194 // Slice vector party to get [startIndex, endIndex) based on [lowerBoundRow, upperBoundRow) 195 SliceIndex(lowerBoundRow, upperBoundRow int) (startIndex, endIndex int) 196 }