github.com/m3db/m3@v1.5.0/src/x/serialize/encoder.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package serialize 22 23 import ( 24 "bytes" 25 "encoding/binary" 26 "errors" 27 "fmt" 28 29 "github.com/m3db/m3/src/x/checked" 30 xerrors "github.com/m3db/m3/src/x/errors" 31 "github.com/m3db/m3/src/x/ident" 32 ) 33 34 /* 35 * Serialization scheme to combat Thrift's allocation hell. 36 * 37 * Given Tags (i.e. key-values) this allows the bijective serialization to, 38 * and from Tags <--> []byte. 39 * 40 * Consider example, Tags: {"abc": "defg", "x": "foo"} 41 * this translates to: 42 * []byte( 43 * MAGIC_MARKER + NUMBER_TAGS 44 * + LENGTH([]byte("abc")) + []byte("abc") 45 * + LENGTH([]byte("defg")) + []byte("abc") 46 * + LENGTH([]byte("x")) + []byte("x") 47 * + LENGTH([]byte("foo")) + []byte("foo") 48 * ) 49 * 50 * Where MAGIC_MARKER/NUMBER_TAGS/LENGTH are maximum 2 bytes. 51 */ 52 53 var ( 54 // ByteOrder is the byte order used for encoding tags into a byte sequence. 55 ByteOrder binary.ByteOrder = binary.LittleEndian 56 headerMagicBytes = make([]byte, 2) 57 ) 58 59 func init() { 60 encodeUInt16(HeaderMagicNumber, headerMagicBytes) 61 } 62 63 var ( 64 errTagEncoderInUse = errors.New("encoder already in use") 65 errTagLiteralTooLong = xerrors.NewInvalidParamsError(errors.New("literal is too long")) 66 // ErrEmptyTagNameLiteral is an error when encoded tag name is empty. 67 ErrEmptyTagNameLiteral = xerrors.NewInvalidParamsError(errors.New("tag name cannot be empty")) 68 ) 69 70 type newCheckedBytesFn func([]byte, checked.BytesOptions) checked.Bytes 71 72 var defaultNewCheckedBytesFn = checked.NewBytes 73 74 type encoder struct { 75 buf *bytes.Buffer 76 checkedBytes checked.Bytes 77 staticBuffer [2]byte 78 staticBufferSlice []byte 79 80 opts TagEncoderOptions 81 pool TagEncoderPool 82 } 83 84 func newTagEncoder( 85 newFn newCheckedBytesFn, 86 opts TagEncoderOptions, 87 pool TagEncoderPool, 88 ) TagEncoder { 89 b := make([]byte, 0, opts.InitialCapacity()) 90 cb := newFn(nil, nil) 91 e := &encoder{ 92 buf: bytes.NewBuffer(b), 93 checkedBytes: cb, 94 opts: opts, 95 pool: pool, 96 } 97 e.staticBufferSlice = e.staticBuffer[:] 98 return e 99 } 100 101 func (e *encoder) Encode(tags ident.TagIterator) error { 102 if e.checkedBytes.NumRef() > 0 { 103 return errTagEncoderInUse 104 } 105 106 tags.Rewind() 107 defer tags.Rewind() 108 109 numTags := tags.Remaining() 110 max := int(e.opts.TagSerializationLimits().MaxNumberTags()) 111 if numTags > max { 112 return fmt.Errorf("too many tags to encode (%d), limit is: %d", numTags, max) 113 } 114 115 if _, err := e.buf.Write(headerMagicBytes); err != nil { 116 e.buf.Reset() 117 return err 118 } 119 120 if _, err := e.buf.Write(e.encodeUInt16(uint16(numTags))); err != nil { 121 e.buf.Reset() 122 return err 123 } 124 125 for tags.Next() { 126 tag := tags.Current() 127 if err := e.encodeTag(tag); err != nil { 128 e.buf.Reset() 129 return err 130 } 131 } 132 133 if err := tags.Err(); err != nil { 134 e.buf.Reset() 135 return err 136 } 137 138 e.checkedBytes.IncRef() 139 e.checkedBytes.Reset(e.buf.Bytes()) 140 141 return nil 142 } 143 144 func (e *encoder) Data() (checked.Bytes, bool) { 145 if e.checkedBytes.NumRef() == 0 { 146 return nil, false 147 } 148 return e.checkedBytes, true 149 } 150 151 func (e *encoder) Reset() { 152 if e.checkedBytes.NumRef() == 0 { 153 return 154 } 155 e.buf.Reset() 156 e.checkedBytes.Reset(nil) 157 e.checkedBytes.DecRef() 158 } 159 160 func (e *encoder) Finalize() { 161 e.Reset() 162 p := e.pool 163 if p == nil { 164 return 165 } 166 p.Put(e) 167 } 168 169 func (e *encoder) encodeTag(t ident.Tag) error { 170 if len(t.Name.Bytes()) == 0 { 171 return ErrEmptyTagNameLiteral 172 } 173 174 if err := e.encodeID(t.Name); err != nil { 175 return err 176 } 177 178 return e.encodeID(t.Value) 179 } 180 181 func (e *encoder) encodeID(i ident.ID) error { 182 d := i.Bytes() 183 184 max := int(e.opts.TagSerializationLimits().MaxTagLiteralLength()) 185 if len(d) > max { 186 return errTagLiteralTooLong 187 } 188 189 ld := uint16(len(d)) 190 if _, err := e.buf.Write(e.encodeUInt16(ld)); err != nil { 191 return err 192 } 193 194 if _, err := e.buf.Write(d); err != nil { 195 return err 196 } 197 198 return nil 199 } 200 201 func (e *encoder) encodeUInt16(v uint16) []byte { 202 // NB(r): Use static buffer on the struct for encoding, otherwise if it's 203 // statically defined inline in the function it will escape to heap. 204 dest := e.staticBufferSlice[:2] 205 return encodeUInt16(v, dest) 206 } 207 208 func encodeUInt16(v uint16, dest []byte) []byte { 209 ByteOrder.PutUint16(dest, v) 210 return dest 211 } 212 213 func decodeUInt16(b []byte) uint16 { 214 return ByteOrder.Uint16(b) 215 }