github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/tree/serialize.go (about) 1 package tree 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/base64" 7 "io" 8 9 "github.com/pyroscope-io/pyroscope/pkg/storage/dict" 10 "github.com/pyroscope-io/pyroscope/pkg/util/varint" 11 ) 12 13 // serialization format version. it's not very useful right now, but it will be in the future 14 const currentVersion = 1 15 16 var lostDuringSerializationName = []byte("other") 17 18 // warning: this function modifies the tree 19 func (t *Tree) SerializeTruncate(d *dict.Dict, maxNodes int, w io.Writer) error { 20 t.Lock() 21 defer t.Unlock() 22 vw := varint.NewWriter() 23 var err error 24 if _, err = vw.Write(w, currentVersion); err != nil { 25 return err 26 } 27 28 var b bytes.Buffer // Temporary buffer for dictionary keys. 29 minVal := t.minValue(maxNodes) 30 nodes := make([]*treeNode, 1, 128) 31 nodes[0] = t.root 32 for len(nodes) > 0 { 33 tn := nodes[0] 34 nodes = nodes[1:] 35 36 b.Reset() 37 d.PutValue([]byte(tn.Name), &b) 38 if _, err = vw.Write(w, uint64(b.Len())); err != nil { 39 return err 40 } 41 if _, err = w.Write(b.Bytes()); err != nil { 42 return err 43 } 44 if _, err = vw.Write(w, tn.Self); err != nil { 45 return err 46 } 47 48 cNodes := tn.ChildrenNodes 49 tn.ChildrenNodes = tn.ChildrenNodes[:0] 50 51 other := uint64(0) 52 for _, cn := range cNodes { 53 isOtherNode := bytes.Equal(cn.Name, lostDuringSerializationName) 54 if cn.Total >= minVal || isOtherNode { 55 tn.ChildrenNodes = append(tn.ChildrenNodes, cn) 56 } else { 57 // Truncated children accounted as parent self. 58 other += cn.Total 59 } 60 } 61 62 if other > 0 { 63 otherNode := tn.insert(lostDuringSerializationName) 64 otherNode.Self += other 65 otherNode.Total += other 66 } 67 68 if len(tn.ChildrenNodes) > 0 { 69 nodes = append(tn.ChildrenNodes, nodes...) 70 } else { 71 tn.ChildrenNodes = nil // Just to make it eligible for GC. 72 } 73 if _, err = vw.Write(w, uint64(len(tn.ChildrenNodes))); err != nil { 74 return err 75 } 76 } 77 return nil 78 } 79 80 type parentNode struct { 81 node *treeNode 82 parent *parentNode 83 } 84 85 func Deserialize(d *dict.Dict, r io.Reader) (*Tree, error) { 86 t := New() 87 88 type reader interface { 89 io.ByteReader 90 io.Reader 91 } 92 var br reader 93 switch x := r.(type) { 94 case *bytes.Buffer: 95 br = x 96 case *bytes.Reader: 97 br = x 98 case *bufio.Reader: 99 br = x 100 default: 101 br = bufio.NewReader(r) 102 } 103 104 // reads serialization format version, see comment at the top 105 _, err := varint.Read(br) 106 if err != nil { 107 return nil, err 108 } 109 110 parents := []*parentNode{{t.root, nil}} 111 j := 0 112 113 var nameBuf bytes.Buffer 114 for len(parents) > 0 { 115 j++ 116 parent := parents[0] 117 parents = parents[1:] 118 119 labelLen, err := varint.Read(br) 120 labelLinkBuf := make([]byte, labelLen) // TODO: there are better ways to do this? 121 _, err = io.ReadAtLeast(br, labelLinkBuf, int(labelLen)) 122 if err != nil { 123 return nil, err 124 } 125 126 nameBuf.Reset() 127 if !d.GetValue(labelLinkBuf, &nameBuf) { 128 // these strings has to be at least slightly different, hence base64 Addon 129 nameBuf.Reset() 130 nameBuf.WriteString("label not found " + base64.URLEncoding.EncodeToString(labelLinkBuf)) 131 } 132 tn := parent.node.insert(nameBuf.Bytes()) 133 tn.Self, err = varint.Read(br) 134 tn.Total = tn.Self 135 if err != nil { 136 return nil, err 137 } 138 139 pn := parent 140 for pn != nil { 141 pn.node.Total += tn.Self 142 pn = pn.parent 143 } 144 145 childrenLen, err := varint.Read(br) 146 if err != nil { 147 return nil, err 148 } 149 150 for i := uint64(0); i < childrenLen; i++ { 151 parents = append([]*parentNode{{tn, parent}}, parents...) 152 } 153 } 154 155 t.root = t.root.ChildrenNodes[0] 156 157 return t, nil 158 } 159 160 // used in the cloud 161 func DeserializeNoDict(r io.Reader) (*Tree, error) { 162 t := New() 163 br := bufio.NewReader(r) // TODO if it's already a bytereader skip 164 165 parents := []*parentNode{{t.root, nil}} 166 j := 0 167 168 for len(parents) > 0 { 169 j++ 170 parent := parents[0] 171 parents = parents[1:] 172 173 nameLen, err := varint.Read(br) 174 // if err == io.EOF { 175 // return t, nil 176 // } 177 nameBuf := make([]byte, nameLen) // TODO: there are better ways to do this? 178 _, err = io.ReadAtLeast(br, nameBuf, int(nameLen)) 179 if err != nil { 180 return nil, err 181 } 182 tn := parent.node.insert(nameBuf) 183 184 tn.Self, err = varint.Read(br) 185 tn.Total = tn.Self 186 if err != nil { 187 return nil, err 188 } 189 190 pn := parent 191 for pn != nil { 192 pn.node.Total += tn.Self 193 pn = pn.parent 194 } 195 196 childrenLen, err := varint.Read(br) 197 if err != nil { 198 return nil, err 199 } 200 201 for i := uint64(0); i < childrenLen; i++ { 202 parents = append([]*parentNode{{tn, parent}}, parents...) 203 } 204 } 205 206 t.root = t.root.ChildrenNodes[0] 207 208 return t, nil 209 } 210 211 // used in the cloud 212 // warning: this function modifies the tree 213 func (t *Tree) SerializeTruncateNoDict(maxNodes int, w io.Writer) error { 214 t.Lock() 215 defer t.Unlock() 216 vw := varint.NewWriter() 217 var err error 218 minVal := t.minValue(maxNodes) 219 nodes := make([]*treeNode, 1, 1024) 220 nodes[0] = t.root 221 for len(nodes) > 0 { 222 tn := nodes[0] 223 nodes = nodes[1:] 224 if _, err = vw.Write(w, uint64(len(tn.Name))); err != nil { 225 return err 226 } 227 if _, err = w.Write(tn.Name); err != nil { 228 return err 229 } 230 231 if _, err = vw.Write(w, tn.Self); err != nil { 232 return err 233 } 234 cNodes := tn.ChildrenNodes 235 tn.ChildrenNodes = tn.ChildrenNodes[:0] 236 237 other := uint64(0) 238 for _, cn := range cNodes { 239 isOtherNode := bytes.Equal(cn.Name, lostDuringSerializationName) 240 if cn.Total >= minVal || isOtherNode { 241 tn.ChildrenNodes = append(tn.ChildrenNodes, cn) 242 } else { 243 // Truncated children accounted as parent self. 244 other += cn.Total 245 } 246 } 247 248 if other > 0 { 249 otherNode := tn.insert(lostDuringSerializationName) 250 otherNode.Self += other 251 otherNode.Total += other 252 } 253 254 if len(tn.ChildrenNodes) > 0 { 255 nodes = append(tn.ChildrenNodes, nodes...) 256 } else { 257 tn.ChildrenNodes = nil // Just to make it eligible for GC. 258 } 259 if _, err = vw.Write(w, uint64(len(tn.ChildrenNodes))); err != nil { 260 return err 261 } 262 } 263 return nil 264 }