github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/memstore/common/vector_party.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package common
    16  
    17  import (
    18  	"github.com/uber/aresdb/diskstore"
    19  	"io"
    20  	"unsafe"
    21  )
    22  
    23  // ColumnMode represents how many vectors a vector party may have.
    24  // For live batch, it should always be 0,1 or 2.
    25  // For sorted column of archive batch, it will be mode 0 or 3.
    26  // For other columns of archive batch, it can be any of these four modes.
    27  type ColumnMode int
    28  
    29  const (
    30  	// AllValuesDefault (mode 0)
    31  	AllValuesDefault ColumnMode = iota
    32  	// AllValuesPresent (mode 1)
    33  	AllValuesPresent
    34  	// HasNullVector (mode 2)
    35  	HasNullVector
    36  	// HasCountVector (mode 3)
    37  	HasCountVector
    38  	// MaxColumnMode represents the upper limit of column modes
    39  	MaxColumnMode
    40  )
    41  
    42  // HostVectorPartySlice stores pointers to data for a column in host memory.
    43  // And its start index and Bytes
    44  type HostVectorPartySlice struct {
    45  	Values unsafe.Pointer
    46  	Nulls  unsafe.Pointer
    47  	// The length of the count vector is Length+1
    48  	Counts       unsafe.Pointer
    49  	Length       int
    50  	ValueType    DataType
    51  	DefaultValue DataValue
    52  
    53  	ValueStartIndex int
    54  	NullStartIndex  int
    55  	CountStartIndex int
    56  
    57  	ValueBytes int
    58  	NullBytes  int
    59  	CountBytes int
    60  }
    61  
    62  // ValueCountsUpdateMode represents the way we update value counts when we are writing values to
    63  // vector parties.
    64  type ValueCountsUpdateMode int
    65  
    66  // SlicedVector is vector party data represented into human-readable slice format
    67  // consists of a value slice and count slice,
    68  // count slice consists of accumulative counts.
    69  // swagger:model slicedVector
    70  type SlicedVector struct {
    71  	Values []interface{} `json:"values"`
    72  	Counts []int         `json:"counts"`
    73  }
    74  
    75  // VectorPartySerializer is the interface to read/write a vector party from/to disk. Refer to
    76  // https://github.com/uber/aresdb/wiki/VectorStore for more details about
    77  // vector party's on disk format.
    78  type VectorPartySerializer interface {
    79  	// ReadVectorParty reads vector party from disk and set fields in passed-in vp.
    80  	ReadVectorParty(vp VectorParty) error
    81  	// WriteSnapshotVectorParty writes vector party to disk
    82  	WriteVectorParty(vp VectorParty) error
    83  	// CheckVectorPartySerializable check if the VectorParty is serializable
    84  	CheckVectorPartySerializable(vp VectorParty) error
    85  	// ReportVectorPartyMemoryUsage report memory usage according to underneath VectorParty property
    86  	ReportVectorPartyMemoryUsage(bytes int64)
    87  }
    88  
    89  // VectorParty interface
    90  type VectorParty interface {
    91  	// Allocate allocate underlying storage for vector party
    92  	Allocate(hasCount bool)
    93  
    94  	// GetValidity get validity of given offset.
    95  	GetValidity(offset int) bool
    96  	// GetDataValue returns the DataValue for the specified index.
    97  	// It first check validity of the value, then it check whether it's a
    98  	// boolean column to decide whether to load bool value or other value
    99  	// type. Index bound is not checked!
   100  	GetDataValue(offset int) DataValue
   101  	// SetDataValue writes a data value at given offset. Third parameter count should
   102  	// only be passed for compressed columns. checkValueCount is a flag to tell whether
   103  	// need to check value count (NonDefaultValueCount and ValidValueCount) while setting
   104  	// the value. It should be true for archive store and false for live store. **This does
   105  	// not set the count vector as this is not accumulated count.**
   106  	SetDataValue(offset int, value DataValue, countsUpdateMode ValueCountsUpdateMode, counts ...uint32)
   107  	// GetDataValueByRow returns the DataValue for the specified row. It will do binary
   108  	// search on the count vector to find the correct offset if this is a mode 3 vector
   109  	// party. Otherwise it will behave same as GetDataValue.
   110  	// Caller needs to ensure row is within valid range.
   111  	GetDataValueByRow(row int) DataValue
   112  
   113  	GetDataType() DataType
   114  	GetLength() int
   115  	GetBytes() int64
   116  
   117  	// Slice vector party into human readable SlicedVector format
   118  	Slice(startRow, numRows int) SlicedVector
   119  
   120  	// SafeDestruct destructs vector party memory
   121  	SafeDestruct()
   122  
   123  	// Write serialize vector party
   124  	Write(writer io.Writer) error
   125  	// Read deserialize vector party
   126  	Read(reader io.Reader, serializer VectorPartySerializer) error
   127  	// Check whether two vector parties are equal (used only in unit tests)
   128  	Equals(other VectorParty) bool
   129  	// GetNonDefaultValueCount get Number of non-default values stored
   130  	GetNonDefaultValueCount() int
   131  }
   132  
   133  // CVectorParty is vector party that is backed by c
   134  type CVectorParty interface {
   135  	//Judge column mode
   136  	JudgeMode() ColumnMode
   137  	// Get column mode
   138  	GetMode() ColumnMode
   139  }
   140  
   141  // LiveVectorParty represents vector party in live store
   142  type LiveVectorParty interface {
   143  	VectorParty
   144  
   145  	// Note for all following functions, data type are not checked. So callee need to perform the data type check
   146  	// and call the correct SetXXX function.
   147  
   148  	// If we already know this is a bool vp, we can set bool directly without constructing a data value struct.
   149  	SetBool(offset int, val bool, valid bool)
   150  	// Set value via a unsafe.Pointer directly.
   151  	SetValue(offset int, val unsafe.Pointer, valid bool)
   152  	// Set go value directly.
   153  	SetGoValue(offset int, val GoDataValue, valid bool)
   154  	// Get value directly
   155  	GetValue(offset int) (unsafe.Pointer, bool)
   156  	// GetMinMaxValue get min and max value,
   157  	// returns uint32 value since only valid for time column
   158  	GetMinMaxValue() (min, max uint32)
   159  }
   160  
   161  // ArchiveVectorParty represents vector party in archive store
   162  type ArchiveVectorParty interface {
   163  	VectorParty
   164  
   165  	// Get cumulative count on specified offset
   166  	GetCount(offset int) uint32
   167  	// set cumulative count on specified offset
   168  	SetCount(offset int, count uint32)
   169  
   170  	// Pin archive vector party for use
   171  	Pin()
   172  	// Release pin
   173  	Release()
   174  	// WaitForUsers Wait/Check whether all users finished
   175  	// batch lock needs to be held before calling if blocking wait
   176  	// eg.
   177  	// 	batch.Lock()
   178  	// 	vp.WaitForUsers(true)
   179  	// 	batch.Unlock()
   180  	WaitForUsers(blocking bool) (usersDone bool)
   181  
   182  	// CopyOnWrite copies vector party on write/update
   183  	CopyOnWrite(batchSize int) ArchiveVectorParty
   184  	// LoadFromDisk start loading vector party from disk,
   185  	// this is a non-blocking operation
   186  	LoadFromDisk(hostMemManager HostMemoryManager, diskStore diskstore.DiskStore, table string, shardID int, columnID, batchID int, batchVersion uint32, seqNum uint32)
   187  	// WaitForDiskLoad waits for vector party disk load to finish
   188  	WaitForDiskLoad()
   189  	// Prune prunes vector party based on column mode to clean memory if possible
   190  	Prune()
   191  
   192  	// Slice vector party using specified value within [lowerBoundRow, upperBoundRow)
   193  	SliceByValue(lowerBoundRow, upperBoundRow int, value unsafe.Pointer) (startRow int, endRow int, startIndex int, endIndex int)
   194  	// Slice vector party to get [startIndex, endIndex) based on [lowerBoundRow, upperBoundRow)
   195  	SliceIndex(lowerBoundRow, upperBoundRow int) (startIndex, endIndex int)
   196  }