github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/agiletreewriter.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package writer 18 19 import ( 20 "fmt" 21 "os" 22 23 . "github.com/siglens/siglens/pkg/segment/utils" 24 "github.com/siglens/siglens/pkg/utils" 25 log "github.com/sirupsen/logrus" 26 ) 27 28 func (stb *StarTreeBuilder) encodeDictEnc(colName string, colNum uint16, 29 buf []byte) uint32 { 30 31 idx := uint32(0) 32 33 // copy colname strlen 34 l1 := uint16(len(colName)) 35 copy(buf[idx:], utils.Uint16ToBytesLittleEndian(l1)) 36 idx += 2 37 38 // copy the colname str 39 copy(buf[idx:], colName) 40 idx += uint32(l1) 41 42 numKeysForCol := stb.segDictLastNum[colNum] 43 copy(buf[idx:], utils.Uint32ToBytesLittleEndian(numKeysForCol)) 44 idx += 4 45 46 for i := uint32(0); i < numKeysForCol; i++ { 47 48 curString := stb.segDictEncRev[colNum][i] 49 50 // copy enc col val strlen 51 l1 := uint16(len(curString)) 52 copy(buf[idx:], utils.Uint16ToBytesLittleEndian(l1)) 53 idx += 2 54 55 // copy the enc col val str 56 copy(buf[idx:], curString) 57 idx += uint32(l1) 58 } 59 return idx 60 } 61 62 func (stb *StarTreeBuilder) encodeMetadata(strMFd *os.File) (uint32, error) { 63 64 sizeNeeded := stb.estimateMetaSize() 65 sizeToAdd := sizeNeeded - len(stb.buf) 66 if sizeToAdd > 0 { 67 newArr := make([]byte, sizeToAdd) 68 stb.buf = append(stb.buf, newArr...) 69 } 70 71 idx := uint32(0) 72 idx += 4 // reserve for metabyteslen 73 74 // Len of groupByKeys 75 copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(stb.numGroupByCols)) 76 idx += 2 77 78 // each groupbyKey 79 for i := uint16(0); i < stb.numGroupByCols; i++ { 80 // copy strlen 81 l1 := uint16(len(stb.groupByKeys[i])) 82 copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(l1)) 83 idx += 2 84 85 // copy the str 86 copy(stb.buf[idx:], stb.groupByKeys[i]) 87 idx += uint32(l1) 88 } 89 90 // Len of MeasureColNames 91 copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(uint16(len(stb.mColNames)))) 92 idx += 2 93 94 // each aggFunc 95 for _, mCname := range stb.mColNames { 96 97 // Mcol len 98 l1 := uint16(len(mCname)) 99 copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(l1)) 100 idx += 2 101 102 // copy the Mcol strname 103 copy(stb.buf[idx:], mCname) 104 idx += uint32(l1) 105 } 106 107 for colNum, cName := range stb.groupByKeys { 108 size := stb.encodeDictEnc(cName, uint16(colNum), stb.buf[idx:]) 109 idx += size 110 } 111 112 // metaDataLen 113 copy(stb.buf[0:], utils.Uint32ToBytesLittleEndian(idx-4)) 114 115 _, err := strMFd.Write(stb.buf[:idx]) 116 if err != nil { 117 log.Errorf("encodeMetadata: meta write failed fname=%v, err=%v", strMFd.Name(), err) 118 return idx, err 119 } 120 121 return idx, nil 122 } 123 124 func (stb *StarTreeBuilder) encodeNddWrapper(segKey string, levsOffsets []int64, 125 levsSizes []uint32) (uint32, error) { 126 127 strLevFname := fmt.Sprintf("%s.strl", segKey) 128 strLevFd, err := os.OpenFile(strLevFname, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644) 129 if err != nil { 130 log.Errorf("encodeNddWrapper: open failed fname=%v, err=%v", strLevFname, err) 131 return 0, err 132 } 133 defer strLevFd.Close() 134 135 size, err := stb.encodeNodeDetails(strLevFd, []*Node{stb.tree.Root}, 0, 0, levsOffsets, 136 levsSizes) 137 if err != nil { 138 return 0, err 139 } 140 141 return size, nil 142 } 143 144 func (stb *StarTreeBuilder) encodeNodeDetails(strLevFd *os.File, curLevNodes []*Node, 145 level int, strLevFileOff int64, levsOffsets []int64, levsSizes []uint32) (uint32, error) { 146 147 // save current level offset 148 levsOffsets[level] = strLevFileOff 149 150 sizeNeeded := stb.estimateNodeSize(len(curLevNodes)) 151 sizeToAdd := sizeNeeded - len(stb.buf) 152 if sizeToAdd > 0 { 153 newArr := make([]byte, sizeToAdd) 154 stb.buf = append(stb.buf, newArr...) 155 } 156 157 idx := uint32(0) 158 // encode levelNum 159 copy(stb.buf[idx:], utils.Uint16ToBytesLittleEndian(uint16(level))) 160 idx += 2 161 162 // numOfNodes at this level 163 copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(uint32(len(curLevNodes)))) 164 idx += 4 165 166 nextLevelNodes := []*Node{} 167 for _, n := range curLevNodes { 168 169 // save nextlevel children 170 for _, child := range n.children { 171 nextLevelNodes = append(nextLevelNodes, child) 172 } 173 // encode curr nodes details 174 175 // mapKey 176 copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(n.myKey)) 177 idx += 4 178 179 // add Parent keys, don't add parents for root (level-0) and level-1 (since their parent is root) 180 ancestor := n.parent 181 for i := 1; i < level; i++ { 182 if ancestor == nil { 183 log.Errorf("encodeNodeDetails: ancestor is nil, level: %v, nodeKey: %+v", level, n.myKey) 184 break 185 } 186 187 copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(ancestor.myKey)) 188 idx += 4 189 ancestor = ancestor.parent 190 } 191 192 // We should have reached the root. 193 if level > 0 && ancestor != stb.tree.Root { 194 log.Errorf("encodeNodeDetails: ancestor is not the root, level: %v, nodeKey: %+v", level, n.myKey) 195 } 196 197 for agIdx, e := range n.aggValues { 198 copy(stb.buf[idx:], []byte{uint8(e.Dtype)}) 199 idx += 1 200 201 switch e.Dtype { 202 case SS_DT_UNSIGNED_NUM: 203 copy(stb.buf[idx:], utils.Uint64ToBytesLittleEndian(e.CVal.(uint64))) 204 case SS_DT_SIGNED_NUM: 205 copy(stb.buf[idx:], utils.Int64ToBytesLittleEndian(e.CVal.(int64))) 206 case SS_DT_FLOAT: 207 copy(stb.buf[idx:], utils.Float64ToBytesLittleEndian(e.CVal.(float64))) 208 case SS_DT_BACKFILL: // even for backfill we will have empty bytes in to keep things uniform 209 default: 210 return 0, fmt.Errorf("encodeNodeDetails: unsupported Dtype: %v, agIdx: %v, nodeKey: %+v, e: %+v", 211 e.Dtype, agIdx, n.myKey, e) 212 } 213 idx += 8 214 } 215 } 216 _, err := strLevFd.WriteAt(stb.buf[:idx], strLevFileOff) 217 if err != nil { 218 log.Errorf("encodeNodeDetails: nnd write failed, level: %v fname=%v, err=%v", level, strLevFd.Name(), err) 219 return idx, err 220 } 221 strLevFileOff += int64(idx) 222 levsSizes[level] = idx 223 224 if len(nextLevelNodes) > 0 { 225 nSize, err := stb.encodeNodeDetails(strLevFd, nextLevelNodes, level+1, strLevFileOff, levsOffsets, levsSizes) 226 if err != nil { 227 return 0, err 228 } 229 idx += nSize 230 } 231 232 return idx, nil 233 } 234 235 /* 236 *************** StarTree Encoding Format ***************************** 237 238 [FileType 1B] [LenMetaData 4B] [MetaData] [NodeDataDetails] 239 240 [MetaData] : 241 [GroupbyKeys] [MeasureColNames] [DictEncCol-1] [DictEncCol-2] ...[DictEncCol-N] 242 [GroupbyKeys] : [LenGrpKeys 2B] [GPK-1] [GPK-2]... 243 [GPK] : [StrLen 2B] [ActualStr xB] 244 245 [MeasureColNames] : [LenMeasureColNames 2B] [MeasureColName-1] [MeasureColNames-2] ... 246 [MeasureColNames-1] : [StrLen 2B] [McolName xB] 247 248 [DictEncCol-1] : [ColStrLen 2B] [ColName xB] [NumKeys 4B] [Enc-1] {Enc-2] ... 249 [Enc-1] : [EncStrLen 2B] [EncStr xB] 250 251 [NodeDataDetails]: [NddLen 4B] [LevOffMeta xB] [LevelDetails-1 xB] [LevelDetails-2 xB].... in BFS 252 [LevOffMetas] : [levOff-0 8B] [levSize-0 4B] [levOff-1 8B] [levSize-1 4B] .... 253 [LevelDetails-1] : [LevelNum 2B] [numNodesAtLevel 4B] [NodeAgInfo...] 254 [NodeAgInfo-1] : [nodeKey 4B] [parentKeys xB] [aggValue-1] [aggValue-2] ... 255 [parentKeys] : [parKey-0 4B] [parKey-1 4B].... // numOfParents depends on level 256 [aggValue]: [dType 1B] [val 8B] 257 */ 258 func (stb *StarTreeBuilder) EncodeStarTree(segKey string) (uint32, error) { 259 260 strMetaFname := fmt.Sprintf("%s.strm", segKey) 261 262 err := stb.Aggregate(stb.tree.Root) 263 if err != nil { 264 return 0, err 265 } 266 267 strMFd, err := os.OpenFile(strMetaFname, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644) 268 if err != nil { 269 log.Errorf("EncodeStarTree: open failed fname=%v, err=%v", strMetaFname, err) 270 return 0, err 271 } 272 273 _, err = strMFd.Write(STAR_TREE_BLOCK) 274 if err != nil { 275 log.Errorf("EncodeStarTree: compression Type write failed fname=%v, err=%v", strMetaFname, err) 276 strMFd.Close() 277 _ = os.Remove(strMetaFname) //we don't want half encoded agileTree file 278 return 0, err 279 } 280 281 metaSize, err := stb.encodeMetadata(strMFd) 282 if err != nil { 283 strMFd.Close() 284 _ = os.Remove(strMetaFname) 285 return 0, err 286 } 287 288 levsOffsets := make([]int64, stb.numGroupByCols+1) 289 levsSizes := make([]uint32, stb.numGroupByCols+1) 290 291 nddSize, err := stb.encodeNddWrapper(segKey, levsOffsets, levsSizes) 292 if err != nil { 293 log.Errorf("EncodeStarTree: failed to encode nodeDetails Err: %+v", err) 294 strMFd.Close() 295 _ = os.Remove(strMetaFname) 296 return 0, err 297 } 298 299 err = stb.writeLevsInfo(strMFd, levsOffsets, levsSizes) 300 if err != nil { 301 log.Errorf("EncodeStarTree: failed to write levvsoff Err: %+v", err) 302 strMFd.Close() 303 _ = os.Remove(strMetaFname) 304 return 0, err 305 } 306 307 strMFd.Close() 308 return nddSize + metaSize, nil 309 } 310 311 func (stb *StarTreeBuilder) estimateNodeSize(numNodes int) int { 312 313 // 9 for CvalEnc 314 lenAggVals := len(stb.mColNames) * TotalMeasFns * 9 315 // 4 (for curNode mapkey) + 4 per parent path to root + 1000 for buffer 316 return numNodes*(lenAggVals+4+4*int(stb.numGroupByCols)) + 1000 317 318 } 319 320 func (stb *StarTreeBuilder) writeLevsInfo(strMFd *os.File, levsOffsets []int64, 321 levsSizes []uint32) error { 322 323 idx := uint32(0) 324 325 // encode level offsets and sizes 326 for i := range levsOffsets { 327 copy(stb.buf[idx:], utils.Int64ToBytesLittleEndian(levsOffsets[i])) 328 idx += 8 329 copy(stb.buf[idx:], utils.Uint32ToBytesLittleEndian(levsSizes[i])) 330 idx += 4 331 } 332 333 _, err := strMFd.Write(stb.buf[:idx]) 334 if err != nil { 335 log.Errorf("writeLevsInfo: failed levOff writing, err: %v", err) 336 return err 337 } 338 return nil 339 } 340 341 func (stb *StarTreeBuilder) estimateMetaSize() int { 342 343 // 55: estimate for width of colNames 344 colsMeta := (int(stb.numGroupByCols) + len(stb.mColNames)) * 55 345 346 deSize := int(0) 347 for colNum := range stb.groupByKeys { 348 // 60 : estimate for colnamelen, columnname, 55: for enc len 349 deSize += 60 + int(stb.segDictLastNum[colNum])*55 350 } 351 352 return colsMeta + deSize + 1000 353 }