github.com/insolar/vanilla@v0.0.0-20201023172447-248fdf805322/protokit/notation.go (about)

     1  // Copyright 2020 Insolar Network Ltd.
     2  // All rights reserved.
     3  // This material is licensed under the Insolar License version 1.0,
     4  // available at https://github.com/insolar/assured-ledger/blob/master/LICENSE.md.
     5  
     6  package protokit
     7  
     8  import (
     9  	"errors"
    10  	"io"
    11  
    12  	"github.com/insolar/vanilla/throw"
    13  )
    14  
    15  /*
    16  	This is a notation that allows predictable parsing of any protobuf without a scheme:
    17  		1. Textual strings are valid UTF-8 and do not start with codes less than LF (10)
    18  		2. Any encoded protobuf message always starts with a field of id=16 (aka PolymorphFieldID)
    19  		3. Any binary / non-parsable payload is prepended with 0 or BinaryMarker byte
    20  */
    21  // TODO notation-aware pbuf parser/printer for protobuf without a scheme
    22  
    23  const (
    24  	illegalUtf8FirstByte byte = 0x80
    25  	legalUtf8            byte = 0xC0
    26  
    27  	ObjectMarker = illegalUtf8FirstByte | byte(WireVarint) // 16:varint
    28  	//  _Marker		 = illegalUtf8FirstByte | byte(WireFixed64)	// 16:fixed64
    29  	//  _Marker		 = illegalUtf8FirstByte | byte(WireBytes)	// 16:bytes
    30  	//	DO_NOT_USE	 = illegalUtf8FirstByte | byte(WireStartGroup)	// 16:groupStart
    31  	BinaryMarker = illegalUtf8FirstByte | byte(WireEndGroup) // 16:groupEnd
    32  //  _Marker		 = illegalUtf8FirstByte | byte(WireFixed32)		// 16:fixed32
    33  )
    34  
    35  const PolymorphFieldID = illegalUtf8FirstByte >> WireTypeBits // = 16
    36  const MaxSafeForPolymorphFieldID = (legalUtf8 >> WireTypeBits) - 1
    37  
    38  // As a valid pbuf payload cant start with groupEnd tag, so we can use it as an indicator of a non-parsable payload.
    39  // Number of BinaryMarkers is limited by valid UTF-8 codes, starting at 0xC0
    40  const (
    41  	GeneralBinaryMarker = BinaryMarker | iota<<WireTypeBits
    42  	// _BinaryMarker
    43  	// _BinaryMarker
    44  	// _BinaryMarker
    45  	// _BinaryMarker
    46  	// _BinaryMarker
    47  	// _BinaryMarker
    48  	// _BinaryMarker
    49  )
    50  
    51  const MaxPolymorphFieldSize = 2 + MaxVarintSize
    52  const MinPolymorphFieldSize = 2 + MinVarintSize
    53  
    54  func GetPolymorphFieldSize(id uint64) int {
    55  	return int(WireVarint.Tag(int(PolymorphFieldID)).FieldSize(id))
    56  }
    57  
    58  // Content type detection of a notation-friendly payload.
    59  type ContentType uint8
    60  
    61  const (
    62  	/* Content is unclear */
    63  	ContentUndefined ContentType = iota
    64  	/* Content is text */
    65  	ContentText
    66  	/* Content is binary */
    67  	ContentBinary
    68  	/* Content is protobuf message */
    69  	ContentMessage
    70  	/* Content is protobuf that follows the notation and has polymorph marker */
    71  	ContentPolymorph
    72  )
    73  
    74  type ContentTypeOptions uint8
    75  
    76  const (
    77  	// ContentOptionText indicates that the content can be text
    78  	ContentOptionText ContentTypeOptions = 1 << iota
    79  
    80  	// ContentOptionMessage indicates that the content can be a protobuf message
    81  	ContentOptionMessage
    82  
    83  	// ContentOptionNotation indicates the content has either Polymorph or Binary markers
    84  	ContentOptionNotation
    85  )
    86  
    87  // Provides content type detection of a notation-friendly payload.
    88  func PossibleContentTypes(firstByte byte) (ct ContentTypeOptions) {
    89  	if firstByte == 0 {
    90  		return 0
    91  	}
    92  
    93  	switch {
    94  	case firstByte < '\t' /* 9 */ :
    95  		// not a text
    96  		if firstByte&^maskWireType == 0 {
    97  			return 0
    98  		}
    99  	case firstByte >= legalUtf8:
   100  		ct |= ContentOptionText
   101  	case firstByte < illegalUtf8FirstByte:
   102  		ct |= ContentOptionText
   103  	}
   104  
   105  	switch WireType(firstByte & maskWireType) {
   106  	case WireVarint:
   107  		if ct&ContentOptionText == 0 && firstByte >= illegalUtf8FirstByte {
   108  			ct |= ContentOptionNotation
   109  		}
   110  		ct |= ContentOptionMessage
   111  	case WireFixed64, WireBytes, WireFixed32, WireStartGroup:
   112  		ct |= ContentOptionMessage
   113  	case WireEndGroup:
   114  		if ct&ContentOptionText == 0 {
   115  			ct |= ContentOptionNotation
   116  		}
   117  	}
   118  	return ct
   119  }
   120  
   121  func PeekPossibleContentTypes(r io.ByteScanner) (ContentTypeOptions, error) {
   122  	b, err := r.ReadByte()
   123  	if err != nil {
   124  		return 0, err
   125  	}
   126  	err = r.UnreadByte()
   127  	return PossibleContentTypes(b), err
   128  }
   129  
   130  var ErrUnexpectedHeader = errors.New("unexpected header")
   131  
   132  func PeekContentTypeAndPolymorphID(r io.ByteScanner) (ContentType, uint64, error) {
   133  	b, err := r.ReadByte()
   134  	if err != nil {
   135  		return ContentUndefined, 0, err
   136  	}
   137  
   138  	ct := ContentUndefined
   139  	switch pct := PossibleContentTypes(b); pct {
   140  	case 0:
   141  	case ContentOptionMessage:
   142  		ct = ContentMessage
   143  	case ContentOptionNotation:
   144  		ct = ContentBinary
   145  	case ContentOptionNotation | ContentOptionMessage:
   146  		switch b, err = r.ReadByte(); {
   147  		case err != nil:
   148  			return ContentUndefined, 0, err
   149  		case b != 0x01:
   150  			return ContentMessage, 0, ErrUnexpectedHeader // we can't recover here
   151  		}
   152  
   153  		var id uint64
   154  		if id, err = DecodeVarint(r); err != nil {
   155  			return ContentUndefined, 0, err
   156  		}
   157  		return ContentPolymorph, id, nil
   158  
   159  	default:
   160  		if pct&ContentOptionText != 0 {
   161  			ct = ContentText
   162  			break
   163  		}
   164  		_ = r.UnreadByte()
   165  		panic(throw.Impossible())
   166  	}
   167  	return ct, 0, r.UnreadByte()
   168  }
   169  
   170  func PeekContentTypeAndPolymorphIDFromBytes(b []byte) (ContentType, uint64, error) {
   171  	n := len(b)
   172  	if n == 0 {
   173  		return ContentUndefined, 0, nil
   174  	}
   175  
   176  	switch pct := PossibleContentTypes(b[0]); pct {
   177  	case 0:
   178  		return ContentUndefined, 0, nil
   179  
   180  	case ContentOptionMessage:
   181  		if n == 1 || n == 2 && b[0] >= 0x80 {
   182  			return ContentMessage, 0, throw.FailHere("bad message")
   183  		}
   184  		return ContentMessage, 0, nil
   185  
   186  	case ContentOptionNotation:
   187  		return ContentBinary, 0, nil
   188  
   189  	case ContentOptionNotation | ContentOptionMessage:
   190  		if n < 3 || b[1] != 0x01 {
   191  			return ContentPolymorph, 0, throw.FailHere("bad message")
   192  		}
   193  
   194  		id, n := DecodeVarintFromBytes(b[2:])
   195  		if n == 0 {
   196  			return ContentPolymorph, 0, throw.FailHere("bad message")
   197  		}
   198  		return ContentPolymorph, id, nil
   199  
   200  	default:
   201  		switch {
   202  		case pct&ContentOptionText == 0:
   203  			panic(throw.Impossible())
   204  		case b[0] < illegalUtf8FirstByte:
   205  			return ContentText, 0, nil
   206  		case n == 1:
   207  			return ContentText, 0, throw.FailHere("bad utf")
   208  		default:
   209  			return ContentText, 0, nil
   210  		}
   211  	}
   212  }
   213  
   214  func DecodePolymorphFromBytes(b []byte, onlyVarint bool) (id uint64, size int, err error) {
   215  	u, n := DecodeVarintFromBytes(b)
   216  	if n == 0 {
   217  		return 0, 0, throw.E("invalid wire tag, overflow")
   218  	}
   219  	wt, err := SafeWireTag(u)
   220  	if err != nil {
   221  		return 0, 0, err
   222  	}
   223  	switch fid := wt.FieldID(); {
   224  	case fid == int(PolymorphFieldID):
   225  	case fid < int(PolymorphFieldID) || fid > int(MaxSafeForPolymorphFieldID):
   226  		return 0, 0, throw.E("invalid polymorph content")
   227  	default:
   228  		return 0, 0, nil
   229  	}
   230  
   231  	switch wt.Type() {
   232  	case WireVarint:
   233  	case WireFixed64, WireFixed32:
   234  		if !onlyVarint {
   235  			break
   236  		}
   237  		fallthrough
   238  	default:
   239  		return 0, 0, throw.E("unknown polymorph tag")
   240  	}
   241  
   242  	size = n
   243  	id, n, err = wt.ReadValueFromBytes(b[n:])
   244  	if err != nil {
   245  		return 0, 0, err
   246  	}
   247  	return id, size + n, nil
   248  }