github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/agiletreewriter.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  
    23  	. "github.com/siglens/siglens/pkg/segment/utils"
    24  	"github.com/siglens/siglens/pkg/utils"
    25  	log "github.com/sirupsen/logrus"
    26  )
    27  
    28  func (stb *StarTreeBuilder) encodeDictEnc(colName string, colNum uint16,
    29  	buf []byte) uint32 {
    30  
    31  	idx := uint32(0)
    32  
    33  	// copy colname strlen
    34  	l1 := uint16(len(colName))
    35  	copy(buf[idx:], utils.Uint16ToBytesLittleEndian(l1))
    36  	idx += 2
    37  
    38  	// copy the colname str
    39  	copy(buf[idx:], colName)
    40  	idx += uint32(l1)
    41  
    42  	numKeysForCol := stb.segDictLastNum[colNum]
    43  	copy(buf[idx:], utils.Uint32ToBytesLittleEndian(numKeysForCol))
    44  	idx += 4
    45  
    46  	for i := uint32(0); i < numKeysForCol; i++ {
    47  
    48  		curString := stb.segDictEncRev[colNum][i]
    49  
    50  		// copy enc col val strlen
    51  		l1 := uint16(len(curString))
    52  		copy(buf[idx:], utils.Uint16ToBytesLittleEndian(l1))
    53  		idx += 2
    54  
    55  		// copy the enc col val str
    56  		copy(buf[idx:], curString)
    57  		idx += uint32(l1)
    58  	}
    59  	return idx
    60  }
    61  
    62  func (stb *StarTreeBuilder) encodeMetadata(strMFd *os.File) (uint32, error) {
    63  
    64  	sizeNeeded := stb.estimateMetaSize()
    65  	sizeToAdd := sizeNeeded - len(stb.buf)
    66  	if sizeToAdd > 0 {
    67  		newArr := make([]byte, sizeToAdd)
    68  		stb.buf = append(stb.buf, newArr...)
    69  	}
    70  
    71  	idx := uint32(0)
    72  	idx += 4 // reserve for metabyteslen
    73  
    74  	// Len of groupByKeys
    75  	copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(stb.numGroupByCols))
    76  	idx += 2
    77  
    78  	// each groupbyKey
    79  	for i := uint16(0); i < stb.numGroupByCols; i++ {
    80  		// copy strlen
    81  		l1 := uint16(len(stb.groupByKeys[i]))
    82  		copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(l1))
    83  		idx += 2
    84  
    85  		// copy the str
    86  		copy(stb.buf[idx:], stb.groupByKeys[i])
    87  		idx += uint32(l1)
    88  	}
    89  
    90  	// Len of MeasureColNames
    91  	copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(stb.mColNames))))
    92  	idx += 2
    93  
    94  	// each aggFunc
    95  	for _, mCname := range stb.mColNames {
    96  
    97  		// Mcol len
    98  		l1 := uint16(len(mCname))
    99  		copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(l1))
   100  		idx += 2
   101  
   102  		// copy the Mcol strname
   103  		copy(stb.buf[idx:], mCname)
   104  		idx += uint32(l1)
   105  	}
   106  
   107  	for colNum, cName := range stb.groupByKeys {
   108  		size := stb.encodeDictEnc(cName, uint16(colNum), stb.buf[idx:])
   109  		idx += size
   110  	}
   111  
   112  	// metaDataLen
   113  	copy(stb.buf[0:], utils.Uint32ToBytesLittleEndian(idx-4))
   114  
   115  	_, err := strMFd.Write(stb.buf[:idx])
   116  	if err != nil {
   117  		log.Errorf("encodeMetadata: meta write failed fname=%v, err=%v", strMFd.Name(), err)
   118  		return idx, err
   119  	}
   120  
   121  	return idx, nil
   122  }
   123  
   124  func (stb *StarTreeBuilder) encodeNddWrapper(segKey string, levsOffsets []int64,
   125  	levsSizes []uint32) (uint32, error) {
   126  
   127  	strLevFname := fmt.Sprintf("%s.strl", segKey)
   128  	strLevFd, err := os.OpenFile(strLevFname, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
   129  	if err != nil {
   130  		log.Errorf("encodeNddWrapper: open failed fname=%v, err=%v", strLevFname, err)
   131  		return 0, err
   132  	}
   133  	defer strLevFd.Close()
   134  
   135  	size, err := stb.encodeNodeDetails(strLevFd, []*Node{stb.tree.Root}, 0, 0, levsOffsets,
   136  		levsSizes)
   137  	if err != nil {
   138  		return 0, err
   139  	}
   140  
   141  	return size, nil
   142  }
   143  
   144  func (stb *StarTreeBuilder) encodeNodeDetails(strLevFd *os.File, curLevNodes []*Node,
   145  	level int, strLevFileOff int64, levsOffsets []int64, levsSizes []uint32) (uint32, error) {
   146  
   147  	// save current level offset
   148  	levsOffsets[level] = strLevFileOff
   149  
   150  	sizeNeeded := stb.estimateNodeSize(len(curLevNodes))
   151  	sizeToAdd := sizeNeeded - len(stb.buf)
   152  	if sizeToAdd > 0 {
   153  		newArr := make([]byte, sizeToAdd)
   154  		stb.buf = append(stb.buf, newArr...)
   155  	}
   156  
   157  	idx := uint32(0)
   158  	// encode levelNum
   159  	copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(uint16(level)))
   160  	idx += 2
   161  
   162  	// numOfNodes at this level
   163  	copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(uint32(len(curLevNodes))))
   164  	idx += 4
   165  
   166  	nextLevelNodes := []*Node{}
   167  	for _, n := range curLevNodes {
   168  
   169  		// save nextlevel children
   170  		for _, child := range n.children {
   171  			nextLevelNodes = append(nextLevelNodes, child)
   172  		}
   173  		// encode curr nodes details
   174  
   175  		// mapKey
   176  		copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(n.myKey))
   177  		idx += 4
   178  
   179  		// add Parent keys, don't add parents for root (level-0) and level-1 (since their parent is root)
   180  		ancestor := n.parent
   181  		for i := 1; i < level; i++ {
   182  			if ancestor == nil {
   183  				log.Errorf("encodeNodeDetails: ancestor is nil, level: %v, nodeKey: %+v", level, n.myKey)
   184  				break
   185  			}
   186  
   187  			copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(ancestor.myKey))
   188  			idx += 4
   189  			ancestor = ancestor.parent
   190  		}
   191  
   192  		// We should have reached the root.
   193  		if level > 0 && ancestor != stb.tree.Root {
   194  			log.Errorf("encodeNodeDetails: ancestor is not the root, level: %v, nodeKey: %+v", level, n.myKey)
   195  		}
   196  
   197  		for agIdx, e := range n.aggValues {
   198  			copy(stb.buf[idx:], []byte{uint8(e.Dtype)})
   199  			idx += 1
   200  
   201  			switch e.Dtype {
   202  			case SS_DT_UNSIGNED_NUM:
   203  				copy(stb.buf[idx:], utils.Uint64ToBytesLittleEndian(e.CVal.(uint64)))
   204  			case SS_DT_SIGNED_NUM:
   205  				copy(stb.buf[idx:], utils.Int64ToBytesLittleEndian(e.CVal.(int64)))
   206  			case SS_DT_FLOAT:
   207  				copy(stb.buf[idx:], utils.Float64ToBytesLittleEndian(e.CVal.(float64)))
   208  			case SS_DT_BACKFILL: // even for backfill we will have empty bytes in to keep things uniform
   209  			default:
   210  				return 0, fmt.Errorf("encodeNodeDetails: unsupported Dtype: %v, agIdx: %v, nodeKey: %+v, e: %+v",
   211  					e.Dtype, agIdx, n.myKey, e)
   212  			}
   213  			idx += 8
   214  		}
   215  	}
   216  	_, err := strLevFd.WriteAt(stb.buf[:idx], strLevFileOff)
   217  	if err != nil {
   218  		log.Errorf("encodeNodeDetails: nnd write failed, level: %v fname=%v, err=%v", level, strLevFd.Name(), err)
   219  		return idx, err
   220  	}
   221  	strLevFileOff += int64(idx)
   222  	levsSizes[level] = idx
   223  
   224  	if len(nextLevelNodes) > 0 {
   225  		nSize, err := stb.encodeNodeDetails(strLevFd, nextLevelNodes, level+1, strLevFileOff, levsOffsets, levsSizes)
   226  		if err != nil {
   227  			return 0, err
   228  		}
   229  		idx += nSize
   230  	}
   231  
   232  	return idx, nil
   233  }
   234  
   235  /*
   236  	   *************** StarTree Encoding Format *****************************
   237  
   238  	   [FileType 1B] [LenMetaData 4B] [MetaData] [NodeDataDetails]
   239  
   240  	   [MetaData] :
   241  		  [GroupbyKeys] [MeasureColNames] [DictEncCol-1] [DictEncCol-2] ...[DictEncCol-N]
   242  			[GroupbyKeys] : [LenGrpKeys 2B] [GPK-1] [GPK-2]...
   243  			   [GPK] : [StrLen 2B] [ActualStr xB]
   244  
   245  		  [MeasureColNames] : [LenMeasureColNames 2B] [MeasureColName-1] [MeasureColNames-2] ...
   246  			   [MeasureColNames-1] : [StrLen 2B] [McolName xB]
   247  
   248  		  [DictEncCol-1] : [ColStrLen 2B] [ColName xB] [NumKeys 4B] [Enc-1] {Enc-2] ...
   249  			   [Enc-1] : [EncStrLen 2B] [EncStr xB]
   250  
   251  	   [NodeDataDetails]: [NddLen 4B] [LevOffMeta xB] [LevelDetails-1 xB] [LevelDetails-2 xB].... in BFS
   252  		  [LevOffMetas] : [levOff-0 8B] [levSize-0 4B] [levOff-1 8B] [levSize-1 4B] ....
   253  		  [LevelDetails-1] : [LevelNum 2B] [numNodesAtLevel 4B] [NodeAgInfo...]
   254  			  [NodeAgInfo-1] : [nodeKey 4B] [parentKeys xB] [aggValue-1] [aggValue-2] ...
   255  				[parentKeys] : [parKey-0 4B] [parKey-1 4B].... // numOfParents depends on level
   256  				[aggValue]: [dType 1B] [val 8B]
   257  */
   258  func (stb *StarTreeBuilder) EncodeStarTree(segKey string) (uint32, error) {
   259  
   260  	strMetaFname := fmt.Sprintf("%s.strm", segKey)
   261  
   262  	err := stb.Aggregate(stb.tree.Root)
   263  	if err != nil {
   264  		return 0, err
   265  	}
   266  
   267  	strMFd, err := os.OpenFile(strMetaFname, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
   268  	if err != nil {
   269  		log.Errorf("EncodeStarTree: open failed fname=%v, err=%v", strMetaFname, err)
   270  		return 0, err
   271  	}
   272  
   273  	_, err = strMFd.Write(STAR_TREE_BLOCK)
   274  	if err != nil {
   275  		log.Errorf("EncodeStarTree: compression Type write failed fname=%v, err=%v", strMetaFname, err)
   276  		strMFd.Close()
   277  		_ = os.Remove(strMetaFname) //we don't want half encoded agileTree file
   278  		return 0, err
   279  	}
   280  
   281  	metaSize, err := stb.encodeMetadata(strMFd)
   282  	if err != nil {
   283  		strMFd.Close()
   284  		_ = os.Remove(strMetaFname)
   285  		return 0, err
   286  	}
   287  
   288  	levsOffsets := make([]int64, stb.numGroupByCols+1)
   289  	levsSizes := make([]uint32, stb.numGroupByCols+1)
   290  
   291  	nddSize, err := stb.encodeNddWrapper(segKey, levsOffsets, levsSizes)
   292  	if err != nil {
   293  		log.Errorf("EncodeStarTree: failed to encode nodeDetails Err: %+v", err)
   294  		strMFd.Close()
   295  		_ = os.Remove(strMetaFname)
   296  		return 0, err
   297  	}
   298  
   299  	err = stb.writeLevsInfo(strMFd, levsOffsets, levsSizes)
   300  	if err != nil {
   301  		log.Errorf("EncodeStarTree: failed to write levvsoff Err: %+v", err)
   302  		strMFd.Close()
   303  		_ = os.Remove(strMetaFname)
   304  		return 0, err
   305  	}
   306  
   307  	strMFd.Close()
   308  	return nddSize + metaSize, nil
   309  }
   310  
   311  func (stb *StarTreeBuilder) estimateNodeSize(numNodes int) int {
   312  
   313  	// 9 for CvalEnc
   314  	lenAggVals := len(stb.mColNames) * TotalMeasFns * 9
   315  	// 4 (for curNode mapkey) + 4 per parent path to root + 1000 for buffer
   316  	return numNodes*(lenAggVals+4+4*int(stb.numGroupByCols)) + 1000
   317  
   318  }
   319  
   320  func (stb *StarTreeBuilder) writeLevsInfo(strMFd *os.File, levsOffsets []int64,
   321  	levsSizes []uint32) error {
   322  
   323  	idx := uint32(0)
   324  
   325  	// encode level offsets and sizes
   326  	for i := range levsOffsets {
   327  		copy(stb.buf[idx:], utils.Int64ToBytesLittleEndian(levsOffsets[i]))
   328  		idx += 8
   329  		copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(levsSizes[i]))
   330  		idx += 4
   331  	}
   332  
   333  	_, err := strMFd.Write(stb.buf[:idx])
   334  	if err != nil {
   335  		log.Errorf("writeLevsInfo: failed levOff writing, err: %v", err)
   336  		return err
   337  	}
   338  	return nil
   339  }
   340  
   341  func (stb *StarTreeBuilder) estimateMetaSize() int {
   342  
   343  	// 55: estimate for width of colNames
   344  	colsMeta := (int(stb.numGroupByCols) + len(stb.mColNames)) * 55
   345  
   346  	deSize := int(0)
   347  	for colNum := range stb.groupByKeys {
   348  		// 60 : estimate for colnamelen, columnname, 55: for enc len
   349  		deSize += 60 + int(stb.segDictLastNum[colNum])*55
   350  	}
   351  
   352  	return colsMeta + deSize + 1000
   353  }