github.com/aretext/aretext@v1.3.0/syntax/languages/protobuf.go (about) 1 package languages 2 3 import ( 4 "github.com/aretext/aretext/syntax/parser" 5 ) 6 7 // Protocol Buffers Version 3 8 // See "Protocol Buffers Version 3 Language Specification" 9 // https://developers.google.com/protocol-buffers/docs/reference/proto3-spec 10 func ProtobufParseFunc() parser.Func { 11 return initialState(protobufParseState{depth: 0}, 12 protobufCommentParseFunc(). 13 Or(protobufStringLiteralParseFunc()). 14 Or(protobufFloatLiteralParseFunc()). 15 Or(protobufIntegerLiteralParseFunc()). 16 Or(protobufOperatorParseFunc()). 17 Or(protobufBraceParseFunc()). 18 Or(protobufKeywordParseFunc()), 19 ) 20 } 21 22 func protobufCommentParseFunc() parser.Func { 23 consumeLineComment := consumeString("//"). 24 ThenMaybe(consumeToNextLineFeed) 25 26 consumeBlockComment := consumeString("/*"). 27 Then(consumeToString("*/")) 28 29 return consumeLineComment. 30 Or(consumeBlockComment). 31 Map(recognizeToken(parser.TokenRoleComment)) 32 } 33 34 func protobufStringLiteralParseFunc() parser.Func { 35 return parseCStyleString('\'', false).Or(parseCStyleString('"', false)) 36 } 37 38 func protobufIntegerLiteralParseFunc() parser.Func { 39 consumeHexLiteral := consumeString("0"). 40 Then(consumeSingleRuneLike(func(r rune) bool { 41 return r == 'x' || r == 'X' 42 })). 43 Then(consumeRunesLike(func(r rune) bool { 44 return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F') || (r >= 'a' && r <= 'f') 45 })) 46 47 // Slightly more permissive than the spec, since it allows digits "8" and "9" after a leading "0". 48 consumeDecimalOrOctalLiteral := consumeRunesLike(func(r rune) bool { 49 return r >= '0' && r <= '9' 50 }) 51 52 consumePlusOrMinus := consumeSingleRuneLike(func(r rune) bool { 53 return r == '+' || r == '-' 54 }) 55 consumeInt := consumeHexLiteral.Or(consumeDecimalOrOctalLiteral) 56 57 return (consumePlusOrMinus.Then(consumeInt)). 58 Or(consumeInt). 59 Map(recognizeToken(parser.TokenRoleNumber)) 60 } 61 62 func protobufFloatLiteralParseFunc() parser.Func { 63 consumeDecimals := consumeRunesLike(func(r rune) bool { 64 return r >= '0' && r <= '9' 65 }) 66 67 consumeExponent := consumeSingleRuneLike(func(r rune) bool { 68 return r == 'e' || r == 'E' 69 }). 70 ThenMaybe(consumeSingleRuneLike(func(r rune) bool { 71 return r == '+' || r == '-' 72 })). 73 Then(consumeDecimals) 74 75 consumeFloatFormA := consumeDecimals. 76 Then(consumeString(".")). 77 ThenMaybe(consumeDecimals). 78 ThenMaybe(consumeExponent) 79 80 consumeFloatFormB := consumeDecimals.Then(consumeExponent) 81 82 consumeFloatFormC := consumeString("."). 83 Then(consumeDecimals). 84 ThenMaybe(consumeExponent) 85 86 consumeFloatFormD := consumeString("inf").Or(consumeString("nan")) 87 88 consumePlusOrMinus := consumeSingleRuneLike(func(r rune) bool { 89 return r == '+' || r == '-' 90 }) 91 consumeFloat := consumeFloatFormA.Or(consumeFloatFormB).Or(consumeFloatFormC).Or(consumeFloatFormD) 92 93 return (consumePlusOrMinus.Then(consumeFloat)). 94 Or(consumeFloat). 95 Map(recognizeToken(parser.TokenRoleNumber)) 96 } 97 98 func protobufOperatorParseFunc() parser.Func { 99 return consumeString("="). 100 Map(recognizeToken(parser.TokenRoleOperator)) 101 } 102 103 func protobufBraceParseFunc() parser.Func { 104 // Open brace increases the depth by one. 105 openBraceParseFunc := consumeString("{"). 106 Map(func(result parser.Result) parser.Result { 107 depth := result.NextState.(protobufParseState).depth 108 result.NextState = protobufParseState{depth: depth + 1} 109 return result 110 }) 111 112 // Close brace decreases the depth by one, with a minimum of zero. 113 closeBraceParseFunc := consumeString("}"). 114 Map(func(result parser.Result) parser.Result { 115 depth := result.NextState.(protobufParseState).depth 116 if depth > 0 { 117 result.NextState = protobufParseState{depth: depth - 1} 118 } 119 return result 120 }) 121 122 return openBraceParseFunc.Or(closeBraceParseFunc) 123 } 124 125 func protobufKeywordParseFunc() parser.Func { 126 isLetter := func(r rune) bool { 127 return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') 128 } 129 130 isLetterDigitPeriodOrUnderscore := func(r rune) bool { 131 return isLetter(r) || (r >= '0' && r <= '9') || r == '.' || r == '_' 132 } 133 134 allLevelKeywords := []string{"true", "false", "message", "enum", "option"} 135 136 topLevelKeywords := append( 137 []string{"syntax", "import", "weak", "public", "package", "service"}, 138 allLevelKeywords..., 139 ) 140 141 nestedLevelKeywords := append( 142 []string{ 143 "double", "float", "int32", "int64", 144 "uint32", "uint64", "sint32", "sint64", "fixed32", 145 "fixed64", "sfixed32", "sfixed64", 146 "bool", "string", "bytes", "repeated", "oneof", 147 "map", "reserved", "rpc", "returns", "to", 148 "required", "optional", 149 }, 150 allLevelKeywords..., 151 ) 152 153 recognizeTopLevelKeywordOrConsume := recognizeKeywordOrConsume(topLevelKeywords) 154 recognizeNestedLevelKeywordOrConsume := recognizeKeywordOrConsume(nestedLevelKeywords) 155 156 // Consume an identifier, then check whether it's a keyword. 157 // The parser recognizes different keywords at the top-level than within a block (nested in open/close parens). 158 return consumeSingleRuneLike(isLetter). 159 ThenMaybe(consumeRunesLike(isLetterDigitPeriodOrUnderscore)). 160 MapWithInput(func(result parser.Result, iter parser.TrackingRuneIter, state parser.State) parser.Result { 161 depth := result.NextState.(protobufParseState).depth 162 if depth == 0 { 163 return recognizeTopLevelKeywordOrConsume(result, iter, state) 164 } else { 165 return recognizeNestedLevelKeywordOrConsume(result, iter, state) 166 } 167 }) 168 } 169 170 // protobufParseState tracks the nesting depth (open/closed braces). 171 type protobufParseState struct { 172 depth int 173 } 174 175 func (s protobufParseState) Equals(other parser.State) bool { 176 otherState, ok := other.(protobufParseState) 177 return ok && s == otherState 178 }