github.com/liquid-dev/text@v0.3.3-liquid/internal/catmsg/catmsg.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package catmsg contains support types for package x/text/message/catalog. 6 // 7 // This package contains the low-level implementations of Message used by the 8 // catalog package and provides primitives for other packages to implement their 9 // own. For instance, the plural package provides functionality for selecting 10 // translation strings based on the plural category of substitution arguments. 11 // 12 // 13 // Encoding and Decoding 14 // 15 // Catalogs store Messages encoded as a single string. Compiling a message into 16 // a string both results in compacter representation and speeds up evaluation. 17 // 18 // A Message must implement a Compile method to convert its arbitrary 19 // representation to a string. The Compile method takes an Encoder which 20 // facilitates serializing the message. Encoders also provide more context of 21 // the messages's creation (such as for which language the message is intended), 22 // which may not be known at the time of the creation of the message. 23 // 24 // Each message type must also have an accompanying decoder registered to decode 25 // the message. This decoder takes a Decoder argument which provides the 26 // counterparts for the decoding. 27 // 28 // 29 // Renderers 30 // 31 // A Decoder must be initialized with a Renderer implementation. These 32 // implementations must be provided by packages that use Catalogs, typically 33 // formatting packages such as x/text/message. A typical user will not need to 34 // worry about this type; it is only relevant to packages that do string 35 // formatting and want to use the catalog package to handle localized strings. 36 // 37 // A package that uses catalogs for selecting strings receives selection results 38 // as sequence of substrings passed to the Renderer. The following snippet shows 39 // how to express the above example using the message package. 40 // 41 // message.Set(language.English, "You are %d minute(s) late.", 42 // catalog.Var("minutes", plural.Select(1, "one", "minute")), 43 // catalog.String("You are %[1]d ${minutes} late.")) 44 // 45 // p := message.NewPrinter(language.English) 46 // p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. 47 // 48 // To evaluate the Printf, package message wraps the arguments in a Renderer 49 // that is passed to the catalog for message decoding. The call sequence that 50 // results from evaluating the above message, assuming the person is rather 51 // tardy, is: 52 // 53 // Render("You are %[1]d ") 54 // Arg(1) 55 // Render("minutes") 56 // Render(" late.") 57 // 58 // The calls to Arg is caused by the plural.Select execution, which evaluates 59 // the argument to determine whether the singular or plural message form should 60 // be selected. The calls to Render reports the partial results to the message 61 // package for further evaluation. 62 package catmsg 63 64 import ( 65 "errors" 66 "fmt" 67 "strconv" 68 "strings" 69 "sync" 70 71 "github.com/liquid-dev/text/language" 72 ) 73 74 // A Handle refers to a registered message type. 75 type Handle int 76 77 // A Handler decodes and evaluates data compiled by a Message and sends the 78 // result to the Decoder. The output may depend on the value of the substitution 79 // arguments, accessible by the Decoder's Arg method. The Handler returns false 80 // if there is no translation for the given substitution arguments. 81 type Handler func(d *Decoder) bool 82 83 // Register records the existence of a message type and returns a Handle that 84 // can be used in the Encoder's EncodeMessageType method to create such 85 // messages. The prefix of the name should be the package path followed by 86 // an optional disambiguating string. 87 // Register will panic if a handle for the same name was already registered. 88 func Register(name string, handler Handler) Handle { 89 mutex.Lock() 90 defer mutex.Unlock() 91 92 if _, ok := names[name]; ok { 93 panic(fmt.Errorf("catmsg: handler for %q already exists", name)) 94 } 95 h := Handle(len(handlers)) 96 names[name] = h 97 handlers = append(handlers, handler) 98 return h 99 } 100 101 // These handlers require fixed positions in the handlers slice. 102 const ( 103 msgVars Handle = iota 104 msgFirst 105 msgRaw 106 msgString 107 msgAffix 108 // Leave some arbitrary room for future expansion: 20 should suffice. 109 numInternal = 20 110 ) 111 112 const prefix = "github.com/liquid-dev/text/internal/catmsg." 113 114 var ( 115 // TODO: find a more stable way to link handles to message types. 116 mutex sync.Mutex 117 names = map[string]Handle{ 118 prefix + "Vars": msgVars, 119 prefix + "First": msgFirst, 120 prefix + "Raw": msgRaw, 121 prefix + "String": msgString, 122 prefix + "Affix": msgAffix, 123 } 124 handlers = make([]Handler, numInternal) 125 ) 126 127 func init() { 128 // This handler is a message type wrapper that initializes a decoder 129 // with a variable block. This message type, if present, is always at the 130 // start of an encoded message. 131 handlers[msgVars] = func(d *Decoder) bool { 132 blockSize := int(d.DecodeUint()) 133 d.vars = d.data[:blockSize] 134 d.data = d.data[blockSize:] 135 return d.executeMessage() 136 } 137 138 // First takes the first message in a sequence that results in a match for 139 // the given substitution arguments. 140 handlers[msgFirst] = func(d *Decoder) bool { 141 for !d.Done() { 142 if d.ExecuteMessage() { 143 return true 144 } 145 } 146 return false 147 } 148 149 handlers[msgRaw] = func(d *Decoder) bool { 150 d.Render(d.data) 151 return true 152 } 153 154 // A String message alternates between a string constant and a variable 155 // substitution. 156 handlers[msgString] = func(d *Decoder) bool { 157 for !d.Done() { 158 if str := d.DecodeString(); str != "" { 159 d.Render(str) 160 } 161 if d.Done() { 162 break 163 } 164 d.ExecuteSubstitution() 165 } 166 return true 167 } 168 169 handlers[msgAffix] = func(d *Decoder) bool { 170 // TODO: use an alternative method for common cases. 171 prefix := d.DecodeString() 172 suffix := d.DecodeString() 173 if prefix != "" { 174 d.Render(prefix) 175 } 176 ret := d.ExecuteMessage() 177 if suffix != "" { 178 d.Render(suffix) 179 } 180 return ret 181 } 182 } 183 184 var ( 185 // ErrIncomplete indicates a compiled message does not define translations 186 // for all possible argument values. If this message is returned, evaluating 187 // a message may result in the ErrNoMatch error. 188 ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs") 189 190 // ErrNoMatch indicates no translation message matched the given input 191 // parameters when evaluating a message. 192 ErrNoMatch = errors.New("catmsg: no translation for inputs") 193 ) 194 195 // A Message holds a collection of translations for the same phrase that may 196 // vary based on the values of substitution arguments. 197 type Message interface { 198 // Compile encodes the format string(s) of the message as a string for later 199 // evaluation. 200 // 201 // The first call Compile makes on the encoder must be EncodeMessageType. 202 // The handle passed to this call may either be a handle returned by 203 // Register to encode a single custom message, or HandleFirst followed by 204 // a sequence of calls to EncodeMessage. 205 // 206 // Compile must return ErrIncomplete if it is possible for evaluation to 207 // not match any translation for a given set of formatting parameters. 208 // For example, selecting a translation based on plural form may not yield 209 // a match if the form "Other" is not one of the selectors. 210 // 211 // Compile may return any other application-specific error. For backwards 212 // compatibility with package like fmt, which often do not do sanity 213 // checking of format strings ahead of time, Compile should still make an 214 // effort to have some sensible fallback in case of an error. 215 Compile(e *Encoder) error 216 } 217 218 // Compile converts a Message to a data string that can be stored in a Catalog. 219 // The resulting string can subsequently be decoded by passing to the Execute 220 // method of a Decoder. 221 func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) { 222 // TODO: pass macros so they can be used for validation. 223 v := &Encoder{inBody: true} // encoder for variables 224 v.root = v 225 e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages 226 err = m.Compile(e) 227 // This package serves te message package, which in turn is meant to be a 228 // drop-in replacement for fmt. With the fmt package, format strings are 229 // evaluated lazily and errors are handled by substituting strings in the 230 // result, rather then returning an error. Dealing with multiple languages 231 // makes it more important to check errors ahead of time. We chose to be 232 // consistent and compatible and allow graceful degradation in case of 233 // errors. 234 buf := e.buf[stripPrefix(e.buf):] 235 if len(v.buf) > 0 { 236 // Prepend variable block. 237 b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf)) 238 b[0] = byte(msgVars) 239 b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))] 240 b = append(b, v.buf...) 241 b = append(b, buf...) 242 buf = b 243 } 244 if err == nil { 245 err = v.err 246 } 247 return string(buf), err 248 } 249 250 // FirstOf is a message type that prints the first message in the sequence that 251 // resolves to a match for the given substitution arguments. 252 type FirstOf []Message 253 254 // Compile implements Message. 255 func (s FirstOf) Compile(e *Encoder) error { 256 e.EncodeMessageType(msgFirst) 257 err := ErrIncomplete 258 for i, m := range s { 259 if err == nil { 260 return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1) 261 } 262 err = e.EncodeMessage(m) 263 } 264 return err 265 } 266 267 // Var defines a message that can be substituted for a placeholder of the same 268 // name. If an expression does not result in a string after evaluation, Name is 269 // used as the substitution. For example: 270 // Var{ 271 // Name: "minutes", 272 // Message: plural.Select(1, "one", "minute"), 273 // } 274 // will resolve to minute for singular and minutes for plural forms. 275 type Var struct { 276 Name string 277 Message Message 278 } 279 280 var errIsVar = errors.New("catmsg: variable used as message") 281 282 // Compile implements Message. 283 // 284 // Note that this method merely registers a variable; it does not create an 285 // encoded message. 286 func (v *Var) Compile(e *Encoder) error { 287 if err := e.addVar(v.Name, v.Message); err != nil { 288 return err 289 } 290 // Using a Var by itself is an error. If it is in a sequence followed by 291 // other messages referring to it, this error will be ignored. 292 return errIsVar 293 } 294 295 // Raw is a message consisting of a single format string that is passed as is 296 // to the Renderer. 297 // 298 // Note that a Renderer may still do its own variable substitution. 299 type Raw string 300 301 // Compile implements Message. 302 func (r Raw) Compile(e *Encoder) (err error) { 303 e.EncodeMessageType(msgRaw) 304 // Special case: raw strings don't have a size encoding and so don't use 305 // EncodeString. 306 e.buf = append(e.buf, r...) 307 return nil 308 } 309 310 // String is a message consisting of a single format string which contains 311 // placeholders that may be substituted with variables. 312 // 313 // Variable substitutions are marked with placeholders and a variable name of 314 // the form ${name}. Any other substitutions such as Go templates or 315 // printf-style substitutions are left to be done by the Renderer. 316 // 317 // When evaluation a string interpolation, a Renderer will receive separate 318 // calls for each placeholder and interstitial string. For example, for the 319 // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls 320 // is: 321 // d.Render("%[1]v ") 322 // d.Arg(1) 323 // d.Render(resultOfInvites) 324 // d.Render(" %[2]v to ") 325 // d.Arg(2) 326 // d.Render(resultOfTheir) 327 // d.Render(" party.") 328 // where the messages for "invites" and "their" both use a plural.Select 329 // referring to the first argument. 330 // 331 // Strings may also invoke macros. Macros are essentially variables that can be 332 // reused. Macros may, for instance, be used to make selections between 333 // different conjugations of a verb. See the catalog package description for an 334 // overview of macros. 335 type String string 336 337 // Compile implements Message. It parses the placeholder formats and returns 338 // any error. 339 func (s String) Compile(e *Encoder) (err error) { 340 msg := string(s) 341 const subStart = "${" 342 hasHeader := false 343 p := 0 344 b := []byte{} 345 for { 346 i := strings.Index(msg[p:], subStart) 347 if i == -1 { 348 break 349 } 350 b = append(b, msg[p:p+i]...) 351 p += i + len(subStart) 352 if i = strings.IndexByte(msg[p:], '}'); i == -1 { 353 b = append(b, "$!(MISSINGBRACE)"...) 354 err = fmt.Errorf("catmsg: missing '}'") 355 p = len(msg) 356 break 357 } 358 name := strings.TrimSpace(msg[p : p+i]) 359 if q := strings.IndexByte(name, '('); q == -1 { 360 if !hasHeader { 361 hasHeader = true 362 e.EncodeMessageType(msgString) 363 } 364 e.EncodeString(string(b)) 365 e.EncodeSubstitution(name) 366 b = b[:0] 367 } else if j := strings.IndexByte(name[q:], ')'); j == -1 { 368 // TODO: what should the error be? 369 b = append(b, "$!(MISSINGPAREN)"...) 370 err = fmt.Errorf("catmsg: missing ')'") 371 } else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil { 372 // TODO: handle more than one argument 373 b = append(b, "$!(BADNUM)"...) 374 err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j])) 375 } else { 376 if !hasHeader { 377 hasHeader = true 378 e.EncodeMessageType(msgString) 379 } 380 e.EncodeString(string(b)) 381 e.EncodeSubstitution(name[:q], int(x)) 382 b = b[:0] 383 } 384 p += i + 1 385 } 386 b = append(b, msg[p:]...) 387 if !hasHeader { 388 // Simplify string to a raw string. 389 Raw(string(b)).Compile(e) 390 } else if len(b) > 0 { 391 e.EncodeString(string(b)) 392 } 393 return err 394 } 395 396 // Affix is a message that adds a prefix and suffix to another message. 397 // This is mostly used add back whitespace to a translation that was stripped 398 // before sending it out. 399 type Affix struct { 400 Message Message 401 Prefix string 402 Suffix string 403 } 404 405 // Compile implements Message. 406 func (a Affix) Compile(e *Encoder) (err error) { 407 // TODO: consider adding a special message type that just adds a single 408 // return. This is probably common enough to handle the majority of cases. 409 // Get some stats first, though. 410 e.EncodeMessageType(msgAffix) 411 e.EncodeString(a.Prefix) 412 e.EncodeString(a.Suffix) 413 e.EncodeMessage(a.Message) 414 return nil 415 }