github.com/aretext/aretext@v1.3.0/syntax/languages/protobuf.go (about)

     1  package languages
     2  
     3  import (
     4  	"github.com/aretext/aretext/syntax/parser"
     5  )
     6  
     7  // Protocol Buffers Version 3
     8  // See "Protocol Buffers Version 3 Language Specification"
     9  // https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
    10  func ProtobufParseFunc() parser.Func {
    11  	return initialState(protobufParseState{depth: 0},
    12  		protobufCommentParseFunc().
    13  			Or(protobufStringLiteralParseFunc()).
    14  			Or(protobufFloatLiteralParseFunc()).
    15  			Or(protobufIntegerLiteralParseFunc()).
    16  			Or(protobufOperatorParseFunc()).
    17  			Or(protobufBraceParseFunc()).
    18  			Or(protobufKeywordParseFunc()),
    19  	)
    20  }
    21  
    22  func protobufCommentParseFunc() parser.Func {
    23  	consumeLineComment := consumeString("//").
    24  		ThenMaybe(consumeToNextLineFeed)
    25  
    26  	consumeBlockComment := consumeString("/*").
    27  		Then(consumeToString("*/"))
    28  
    29  	return consumeLineComment.
    30  		Or(consumeBlockComment).
    31  		Map(recognizeToken(parser.TokenRoleComment))
    32  }
    33  
    34  func protobufStringLiteralParseFunc() parser.Func {
    35  	return parseCStyleString('\'', false).Or(parseCStyleString('"', false))
    36  }
    37  
    38  func protobufIntegerLiteralParseFunc() parser.Func {
    39  	consumeHexLiteral := consumeString("0").
    40  		Then(consumeSingleRuneLike(func(r rune) bool {
    41  			return r == 'x' || r == 'X'
    42  		})).
    43  		Then(consumeRunesLike(func(r rune) bool {
    44  			return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F') || (r >= 'a' && r <= 'f')
    45  		}))
    46  
    47  	// Slightly more permissive than the spec, since it allows digits "8" and "9" after a leading "0".
    48  	consumeDecimalOrOctalLiteral := consumeRunesLike(func(r rune) bool {
    49  		return r >= '0' && r <= '9'
    50  	})
    51  
    52  	consumePlusOrMinus := consumeSingleRuneLike(func(r rune) bool {
    53  		return r == '+' || r == '-'
    54  	})
    55  	consumeInt := consumeHexLiteral.Or(consumeDecimalOrOctalLiteral)
    56  
    57  	return (consumePlusOrMinus.Then(consumeInt)).
    58  		Or(consumeInt).
    59  		Map(recognizeToken(parser.TokenRoleNumber))
    60  }
    61  
    62  func protobufFloatLiteralParseFunc() parser.Func {
    63  	consumeDecimals := consumeRunesLike(func(r rune) bool {
    64  		return r >= '0' && r <= '9'
    65  	})
    66  
    67  	consumeExponent := consumeSingleRuneLike(func(r rune) bool {
    68  		return r == 'e' || r == 'E'
    69  	}).
    70  		ThenMaybe(consumeSingleRuneLike(func(r rune) bool {
    71  			return r == '+' || r == '-'
    72  		})).
    73  		Then(consumeDecimals)
    74  
    75  	consumeFloatFormA := consumeDecimals.
    76  		Then(consumeString(".")).
    77  		ThenMaybe(consumeDecimals).
    78  		ThenMaybe(consumeExponent)
    79  
    80  	consumeFloatFormB := consumeDecimals.Then(consumeExponent)
    81  
    82  	consumeFloatFormC := consumeString(".").
    83  		Then(consumeDecimals).
    84  		ThenMaybe(consumeExponent)
    85  
    86  	consumeFloatFormD := consumeString("inf").Or(consumeString("nan"))
    87  
    88  	consumePlusOrMinus := consumeSingleRuneLike(func(r rune) bool {
    89  		return r == '+' || r == '-'
    90  	})
    91  	consumeFloat := consumeFloatFormA.Or(consumeFloatFormB).Or(consumeFloatFormC).Or(consumeFloatFormD)
    92  
    93  	return (consumePlusOrMinus.Then(consumeFloat)).
    94  		Or(consumeFloat).
    95  		Map(recognizeToken(parser.TokenRoleNumber))
    96  }
    97  
    98  func protobufOperatorParseFunc() parser.Func {
    99  	return consumeString("=").
   100  		Map(recognizeToken(parser.TokenRoleOperator))
   101  }
   102  
   103  func protobufBraceParseFunc() parser.Func {
   104  	// Open brace increases the depth by one.
   105  	openBraceParseFunc := consumeString("{").
   106  		Map(func(result parser.Result) parser.Result {
   107  			depth := result.NextState.(protobufParseState).depth
   108  			result.NextState = protobufParseState{depth: depth + 1}
   109  			return result
   110  		})
   111  
   112  	// Close brace decreases the depth by one, with a minimum of zero.
   113  	closeBraceParseFunc := consumeString("}").
   114  		Map(func(result parser.Result) parser.Result {
   115  			depth := result.NextState.(protobufParseState).depth
   116  			if depth > 0 {
   117  				result.NextState = protobufParseState{depth: depth - 1}
   118  			}
   119  			return result
   120  		})
   121  
   122  	return openBraceParseFunc.Or(closeBraceParseFunc)
   123  }
   124  
   125  func protobufKeywordParseFunc() parser.Func {
   126  	isLetter := func(r rune) bool {
   127  		return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z')
   128  	}
   129  
   130  	isLetterDigitPeriodOrUnderscore := func(r rune) bool {
   131  		return isLetter(r) || (r >= '0' && r <= '9') || r == '.' || r == '_'
   132  	}
   133  
   134  	allLevelKeywords := []string{"true", "false", "message", "enum", "option"}
   135  
   136  	topLevelKeywords := append(
   137  		[]string{"syntax", "import", "weak", "public", "package", "service"},
   138  		allLevelKeywords...,
   139  	)
   140  
   141  	nestedLevelKeywords := append(
   142  		[]string{
   143  			"double", "float", "int32", "int64",
   144  			"uint32", "uint64", "sint32", "sint64", "fixed32",
   145  			"fixed64", "sfixed32", "sfixed64",
   146  			"bool", "string", "bytes", "repeated", "oneof",
   147  			"map", "reserved", "rpc", "returns", "to",
   148  			"required", "optional",
   149  		},
   150  		allLevelKeywords...,
   151  	)
   152  
   153  	recognizeTopLevelKeywordOrConsume := recognizeKeywordOrConsume(topLevelKeywords)
   154  	recognizeNestedLevelKeywordOrConsume := recognizeKeywordOrConsume(nestedLevelKeywords)
   155  
   156  	// Consume an identifier, then check whether it's a keyword.
   157  	// The parser recognizes different keywords at the top-level than within a block (nested in open/close parens).
   158  	return consumeSingleRuneLike(isLetter).
   159  		ThenMaybe(consumeRunesLike(isLetterDigitPeriodOrUnderscore)).
   160  		MapWithInput(func(result parser.Result, iter parser.TrackingRuneIter, state parser.State) parser.Result {
   161  			depth := result.NextState.(protobufParseState).depth
   162  			if depth == 0 {
   163  				return recognizeTopLevelKeywordOrConsume(result, iter, state)
   164  			} else {
   165  				return recognizeNestedLevelKeywordOrConsume(result, iter, state)
   166  			}
   167  		})
   168  }
   169  
   170  // protobufParseState tracks the nesting depth (open/closed braces).
   171  type protobufParseState struct {
   172  	depth int
   173  }
   174  
   175  func (s protobufParseState) Equals(other parser.State) bool {
   176  	otherState, ok := other.(protobufParseState)
   177  	return ok && s == otherState
   178  }