github.com/insolar/vanilla@v0.0.0-20201023172447-248fdf805322/protokit/notation.go (about) 1 // Copyright 2020 Insolar Network Ltd. 2 // All rights reserved. 3 // This material is licensed under the Insolar License version 1.0, 4 // available at https://github.com/insolar/assured-ledger/blob/master/LICENSE.md. 5 6 package protokit 7 8 import ( 9 "errors" 10 "io" 11 12 "github.com/insolar/vanilla/throw" 13 ) 14 15 /* 16 This is a notation that allows predictable parsing of any protobuf without a scheme: 17 1. Textual strings are valid UTF-8 and do not start with codes less than LF (10) 18 2. Any encoded protobuf message always starts with a field of id=16 (aka PolymorphFieldID) 19 3. Any binary / non-parsable payload is prepended with 0 or BinaryMarker byte 20 */ 21 // TODO notation-aware pbuf parser/printer for protobuf without a scheme 22 23 const ( 24 illegalUtf8FirstByte byte = 0x80 25 legalUtf8 byte = 0xC0 26 27 ObjectMarker = illegalUtf8FirstByte | byte(WireVarint) // 16:varint 28 // _Marker = illegalUtf8FirstByte | byte(WireFixed64) // 16:fixed64 29 // _Marker = illegalUtf8FirstByte | byte(WireBytes) // 16:bytes 30 // DO_NOT_USE = illegalUtf8FirstByte | byte(WireStartGroup) // 16:groupStart 31 BinaryMarker = illegalUtf8FirstByte | byte(WireEndGroup) // 16:groupEnd 32 // _Marker = illegalUtf8FirstByte | byte(WireFixed32) // 16:fixed32 33 ) 34 35 const PolymorphFieldID = illegalUtf8FirstByte >> WireTypeBits // = 16 36 const MaxSafeForPolymorphFieldID = (legalUtf8 >> WireTypeBits) - 1 37 38 // As a valid pbuf payload cant start with groupEnd tag, so we can use it as an indicator of a non-parsable payload. 39 // Number of BinaryMarkers is limited by valid UTF-8 codes, starting at 0xC0 40 const ( 41 GeneralBinaryMarker = BinaryMarker | iota<<WireTypeBits 42 // _BinaryMarker 43 // _BinaryMarker 44 // _BinaryMarker 45 // _BinaryMarker 46 // _BinaryMarker 47 // _BinaryMarker 48 // _BinaryMarker 49 ) 50 51 const MaxPolymorphFieldSize = 2 + MaxVarintSize 52 const MinPolymorphFieldSize = 2 + MinVarintSize 53 54 func GetPolymorphFieldSize(id uint64) int { 55 return int(WireVarint.Tag(int(PolymorphFieldID)).FieldSize(id)) 56 } 57 58 // Content type detection of a notation-friendly payload. 59 type ContentType uint8 60 61 const ( 62 /* Content is unclear */ 63 ContentUndefined ContentType = iota 64 /* Content is text */ 65 ContentText 66 /* Content is binary */ 67 ContentBinary 68 /* Content is protobuf message */ 69 ContentMessage 70 /* Content is protobuf that follows the notation and has polymorph marker */ 71 ContentPolymorph 72 ) 73 74 type ContentTypeOptions uint8 75 76 const ( 77 // ContentOptionText indicates that the content can be text 78 ContentOptionText ContentTypeOptions = 1 << iota 79 80 // ContentOptionMessage indicates that the content can be a protobuf message 81 ContentOptionMessage 82 83 // ContentOptionNotation indicates the content has either Polymorph or Binary markers 84 ContentOptionNotation 85 ) 86 87 // Provides content type detection of a notation-friendly payload. 88 func PossibleContentTypes(firstByte byte) (ct ContentTypeOptions) { 89 if firstByte == 0 { 90 return 0 91 } 92 93 switch { 94 case firstByte < '\t' /* 9 */ : 95 // not a text 96 if firstByte&^maskWireType == 0 { 97 return 0 98 } 99 case firstByte >= legalUtf8: 100 ct |= ContentOptionText 101 case firstByte < illegalUtf8FirstByte: 102 ct |= ContentOptionText 103 } 104 105 switch WireType(firstByte & maskWireType) { 106 case WireVarint: 107 if ct&ContentOptionText == 0 && firstByte >= illegalUtf8FirstByte { 108 ct |= ContentOptionNotation 109 } 110 ct |= ContentOptionMessage 111 case WireFixed64, WireBytes, WireFixed32, WireStartGroup: 112 ct |= ContentOptionMessage 113 case WireEndGroup: 114 if ct&ContentOptionText == 0 { 115 ct |= ContentOptionNotation 116 } 117 } 118 return ct 119 } 120 121 func PeekPossibleContentTypes(r io.ByteScanner) (ContentTypeOptions, error) { 122 b, err := r.ReadByte() 123 if err != nil { 124 return 0, err 125 } 126 err = r.UnreadByte() 127 return PossibleContentTypes(b), err 128 } 129 130 var ErrUnexpectedHeader = errors.New("unexpected header") 131 132 func PeekContentTypeAndPolymorphID(r io.ByteScanner) (ContentType, uint64, error) { 133 b, err := r.ReadByte() 134 if err != nil { 135 return ContentUndefined, 0, err 136 } 137 138 ct := ContentUndefined 139 switch pct := PossibleContentTypes(b); pct { 140 case 0: 141 case ContentOptionMessage: 142 ct = ContentMessage 143 case ContentOptionNotation: 144 ct = ContentBinary 145 case ContentOptionNotation | ContentOptionMessage: 146 switch b, err = r.ReadByte(); { 147 case err != nil: 148 return ContentUndefined, 0, err 149 case b != 0x01: 150 return ContentMessage, 0, ErrUnexpectedHeader // we can't recover here 151 } 152 153 var id uint64 154 if id, err = DecodeVarint(r); err != nil { 155 return ContentUndefined, 0, err 156 } 157 return ContentPolymorph, id, nil 158 159 default: 160 if pct&ContentOptionText != 0 { 161 ct = ContentText 162 break 163 } 164 _ = r.UnreadByte() 165 panic(throw.Impossible()) 166 } 167 return ct, 0, r.UnreadByte() 168 } 169 170 func PeekContentTypeAndPolymorphIDFromBytes(b []byte) (ContentType, uint64, error) { 171 n := len(b) 172 if n == 0 { 173 return ContentUndefined, 0, nil 174 } 175 176 switch pct := PossibleContentTypes(b[0]); pct { 177 case 0: 178 return ContentUndefined, 0, nil 179 180 case ContentOptionMessage: 181 if n == 1 || n == 2 && b[0] >= 0x80 { 182 return ContentMessage, 0, throw.FailHere("bad message") 183 } 184 return ContentMessage, 0, nil 185 186 case ContentOptionNotation: 187 return ContentBinary, 0, nil 188 189 case ContentOptionNotation | ContentOptionMessage: 190 if n < 3 || b[1] != 0x01 { 191 return ContentPolymorph, 0, throw.FailHere("bad message") 192 } 193 194 id, n := DecodeVarintFromBytes(b[2:]) 195 if n == 0 { 196 return ContentPolymorph, 0, throw.FailHere("bad message") 197 } 198 return ContentPolymorph, id, nil 199 200 default: 201 switch { 202 case pct&ContentOptionText == 0: 203 panic(throw.Impossible()) 204 case b[0] < illegalUtf8FirstByte: 205 return ContentText, 0, nil 206 case n == 1: 207 return ContentText, 0, throw.FailHere("bad utf") 208 default: 209 return ContentText, 0, nil 210 } 211 } 212 } 213 214 func DecodePolymorphFromBytes(b []byte, onlyVarint bool) (id uint64, size int, err error) { 215 u, n := DecodeVarintFromBytes(b) 216 if n == 0 { 217 return 0, 0, throw.E("invalid wire tag, overflow") 218 } 219 wt, err := SafeWireTag(u) 220 if err != nil { 221 return 0, 0, err 222 } 223 switch fid := wt.FieldID(); { 224 case fid == int(PolymorphFieldID): 225 case fid < int(PolymorphFieldID) || fid > int(MaxSafeForPolymorphFieldID): 226 return 0, 0, throw.E("invalid polymorph content") 227 default: 228 return 0, 0, nil 229 } 230 231 switch wt.Type() { 232 case WireVarint: 233 case WireFixed64, WireFixed32: 234 if !onlyVarint { 235 break 236 } 237 fallthrough 238 default: 239 return 0, 0, throw.E("unknown polymorph tag") 240 } 241 242 size = n 243 id, n, err = wt.ReadValueFromBytes(b[n:]) 244 if err != nil { 245 return 0, 0, err 246 } 247 return id, size + n, nil 248 }