github.com/liquid-dev/text@v0.3.3-liquid/internal/catmsg/catmsg.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package catmsg contains support types for package x/text/message/catalog.
     6  //
     7  // This package contains the low-level implementations of Message used by the
     8  // catalog package and provides primitives for other packages to implement their
     9  // own. For instance, the plural package provides functionality for selecting
    10  // translation strings based on the plural category of substitution arguments.
    11  //
    12  //
    13  // Encoding and Decoding
    14  //
    15  // Catalogs store Messages encoded as a single string. Compiling a message into
    16  // a string both results in compacter representation and speeds up evaluation.
    17  //
    18  // A Message must implement a Compile method to convert its arbitrary
    19  // representation to a string. The Compile method takes an Encoder which
    20  // facilitates serializing the message. Encoders also provide more context of
    21  // the messages's creation (such as for which language the message is intended),
    22  // which may not be known at the time of the creation of the message.
    23  //
    24  // Each message type must also have an accompanying decoder registered to decode
    25  // the message. This decoder takes a Decoder argument which provides the
    26  // counterparts for the decoding.
    27  //
    28  //
    29  // Renderers
    30  //
    31  // A Decoder must be initialized with a Renderer implementation. These
    32  // implementations must be provided by packages that use Catalogs, typically
    33  // formatting packages such as x/text/message. A typical user will not need to
    34  // worry about this type; it is only relevant to packages that do string
    35  // formatting and want to use the catalog package to handle localized strings.
    36  //
    37  // A package that uses catalogs for selecting strings receives selection results
    38  // as sequence of substrings passed to the Renderer. The following snippet shows
    39  // how to express the above example using the message package.
    40  //
    41  //   message.Set(language.English, "You are %d minute(s) late.",
    42  //       catalog.Var("minutes", plural.Select(1, "one", "minute")),
    43  //       catalog.String("You are %[1]d ${minutes} late."))
    44  //
    45  //   p := message.NewPrinter(language.English)
    46  //   p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
    47  //
    48  // To evaluate the Printf, package message wraps the arguments in a Renderer
    49  // that is passed to the catalog for message decoding. The call sequence that
    50  // results from evaluating the above message, assuming the person is rather
    51  // tardy, is:
    52  //
    53  //   Render("You are %[1]d ")
    54  //   Arg(1)
    55  //   Render("minutes")
    56  //   Render(" late.")
    57  //
    58  // The calls to Arg is caused by the plural.Select execution, which evaluates
    59  // the argument to determine whether the singular or plural message form should
    60  // be selected. The calls to Render reports the partial results to the message
    61  // package for further evaluation.
    62  package catmsg
    63  
    64  import (
    65  	"errors"
    66  	"fmt"
    67  	"strconv"
    68  	"strings"
    69  	"sync"
    70  
    71  	"github.com/liquid-dev/text/language"
    72  )
    73  
    74  // A Handle refers to a registered message type.
    75  type Handle int
    76  
    77  // A Handler decodes and evaluates data compiled by a Message and sends the
    78  // result to the Decoder. The output may depend on the value of the substitution
    79  // arguments, accessible by the Decoder's Arg method. The Handler returns false
    80  // if there is no translation for the given substitution arguments.
    81  type Handler func(d *Decoder) bool
    82  
    83  // Register records the existence of a message type and returns a Handle that
    84  // can be used in the Encoder's EncodeMessageType method to create such
    85  // messages. The prefix of the name should be the package path followed by
    86  // an optional disambiguating string.
    87  // Register will panic if a handle for the same name was already registered.
    88  func Register(name string, handler Handler) Handle {
    89  	mutex.Lock()
    90  	defer mutex.Unlock()
    91  
    92  	if _, ok := names[name]; ok {
    93  		panic(fmt.Errorf("catmsg: handler for %q already exists", name))
    94  	}
    95  	h := Handle(len(handlers))
    96  	names[name] = h
    97  	handlers = append(handlers, handler)
    98  	return h
    99  }
   100  
   101  // These handlers require fixed positions in the handlers slice.
   102  const (
   103  	msgVars Handle = iota
   104  	msgFirst
   105  	msgRaw
   106  	msgString
   107  	msgAffix
   108  	// Leave some arbitrary room for future expansion: 20 should suffice.
   109  	numInternal = 20
   110  )
   111  
   112  const prefix = "github.com/liquid-dev/text/internal/catmsg."
   113  
   114  var (
   115  	// TODO: find a more stable way to link handles to message types.
   116  	mutex sync.Mutex
   117  	names = map[string]Handle{
   118  		prefix + "Vars":   msgVars,
   119  		prefix + "First":  msgFirst,
   120  		prefix + "Raw":    msgRaw,
   121  		prefix + "String": msgString,
   122  		prefix + "Affix":  msgAffix,
   123  	}
   124  	handlers = make([]Handler, numInternal)
   125  )
   126  
   127  func init() {
   128  	// This handler is a message type wrapper that initializes a decoder
   129  	// with a variable block. This message type, if present, is always at the
   130  	// start of an encoded message.
   131  	handlers[msgVars] = func(d *Decoder) bool {
   132  		blockSize := int(d.DecodeUint())
   133  		d.vars = d.data[:blockSize]
   134  		d.data = d.data[blockSize:]
   135  		return d.executeMessage()
   136  	}
   137  
   138  	// First takes the first message in a sequence that results in a match for
   139  	// the given substitution arguments.
   140  	handlers[msgFirst] = func(d *Decoder) bool {
   141  		for !d.Done() {
   142  			if d.ExecuteMessage() {
   143  				return true
   144  			}
   145  		}
   146  		return false
   147  	}
   148  
   149  	handlers[msgRaw] = func(d *Decoder) bool {
   150  		d.Render(d.data)
   151  		return true
   152  	}
   153  
   154  	// A String message alternates between a string constant and a variable
   155  	// substitution.
   156  	handlers[msgString] = func(d *Decoder) bool {
   157  		for !d.Done() {
   158  			if str := d.DecodeString(); str != "" {
   159  				d.Render(str)
   160  			}
   161  			if d.Done() {
   162  				break
   163  			}
   164  			d.ExecuteSubstitution()
   165  		}
   166  		return true
   167  	}
   168  
   169  	handlers[msgAffix] = func(d *Decoder) bool {
   170  		// TODO: use an alternative method for common cases.
   171  		prefix := d.DecodeString()
   172  		suffix := d.DecodeString()
   173  		if prefix != "" {
   174  			d.Render(prefix)
   175  		}
   176  		ret := d.ExecuteMessage()
   177  		if suffix != "" {
   178  			d.Render(suffix)
   179  		}
   180  		return ret
   181  	}
   182  }
   183  
   184  var (
   185  	// ErrIncomplete indicates a compiled message does not define translations
   186  	// for all possible argument values. If this message is returned, evaluating
   187  	// a message may result in the ErrNoMatch error.
   188  	ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
   189  
   190  	// ErrNoMatch indicates no translation message matched the given input
   191  	// parameters when evaluating a message.
   192  	ErrNoMatch = errors.New("catmsg: no translation for inputs")
   193  )
   194  
   195  // A Message holds a collection of translations for the same phrase that may
   196  // vary based on the values of substitution arguments.
   197  type Message interface {
   198  	// Compile encodes the format string(s) of the message as a string for later
   199  	// evaluation.
   200  	//
   201  	// The first call Compile makes on the encoder must be EncodeMessageType.
   202  	// The handle passed to this call may either be a handle returned by
   203  	// Register to encode a single custom message, or HandleFirst followed by
   204  	// a sequence of calls to EncodeMessage.
   205  	//
   206  	// Compile must return ErrIncomplete if it is possible for evaluation to
   207  	// not match any translation for a given set of formatting parameters.
   208  	// For example, selecting a translation based on plural form may not yield
   209  	// a match if the form "Other" is not one of the selectors.
   210  	//
   211  	// Compile may return any other application-specific error. For backwards
   212  	// compatibility with package like fmt, which often do not do sanity
   213  	// checking of format strings ahead of time, Compile should still make an
   214  	// effort to have some sensible fallback in case of an error.
   215  	Compile(e *Encoder) error
   216  }
   217  
   218  // Compile converts a Message to a data string that can be stored in a Catalog.
   219  // The resulting string can subsequently be decoded by passing to the Execute
   220  // method of a Decoder.
   221  func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
   222  	// TODO: pass macros so they can be used for validation.
   223  	v := &Encoder{inBody: true} // encoder for variables
   224  	v.root = v
   225  	e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
   226  	err = m.Compile(e)
   227  	// This package serves te message package, which in turn is meant to be a
   228  	// drop-in replacement for fmt.  With the fmt package, format strings are
   229  	// evaluated lazily and errors are handled by substituting strings in the
   230  	// result, rather then returning an error. Dealing with multiple languages
   231  	// makes it more important to check errors ahead of time. We chose to be
   232  	// consistent and compatible and allow graceful degradation in case of
   233  	// errors.
   234  	buf := e.buf[stripPrefix(e.buf):]
   235  	if len(v.buf) > 0 {
   236  		// Prepend variable block.
   237  		b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
   238  		b[0] = byte(msgVars)
   239  		b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
   240  		b = append(b, v.buf...)
   241  		b = append(b, buf...)
   242  		buf = b
   243  	}
   244  	if err == nil {
   245  		err = v.err
   246  	}
   247  	return string(buf), err
   248  }
   249  
   250  // FirstOf is a message type that prints the first message in the sequence that
   251  // resolves to a match for the given substitution arguments.
   252  type FirstOf []Message
   253  
   254  // Compile implements Message.
   255  func (s FirstOf) Compile(e *Encoder) error {
   256  	e.EncodeMessageType(msgFirst)
   257  	err := ErrIncomplete
   258  	for i, m := range s {
   259  		if err == nil {
   260  			return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
   261  		}
   262  		err = e.EncodeMessage(m)
   263  	}
   264  	return err
   265  }
   266  
   267  // Var defines a message that can be substituted for a placeholder of the same
   268  // name. If an expression does not result in a string after evaluation, Name is
   269  // used as the substitution. For example:
   270  //    Var{
   271  //      Name:    "minutes",
   272  //      Message: plural.Select(1, "one", "minute"),
   273  //    }
   274  // will resolve to minute for singular and minutes for plural forms.
   275  type Var struct {
   276  	Name    string
   277  	Message Message
   278  }
   279  
   280  var errIsVar = errors.New("catmsg: variable used as message")
   281  
   282  // Compile implements Message.
   283  //
   284  // Note that this method merely registers a variable; it does not create an
   285  // encoded message.
   286  func (v *Var) Compile(e *Encoder) error {
   287  	if err := e.addVar(v.Name, v.Message); err != nil {
   288  		return err
   289  	}
   290  	// Using a Var by itself is an error. If it is in a sequence followed by
   291  	// other messages referring to it, this error will be ignored.
   292  	return errIsVar
   293  }
   294  
   295  // Raw is a message consisting of a single format string that is passed as is
   296  // to the Renderer.
   297  //
   298  // Note that a Renderer may still do its own variable substitution.
   299  type Raw string
   300  
   301  // Compile implements Message.
   302  func (r Raw) Compile(e *Encoder) (err error) {
   303  	e.EncodeMessageType(msgRaw)
   304  	// Special case: raw strings don't have a size encoding and so don't use
   305  	// EncodeString.
   306  	e.buf = append(e.buf, r...)
   307  	return nil
   308  }
   309  
   310  // String is a message consisting of a single format string which contains
   311  // placeholders that may be substituted with variables.
   312  //
   313  // Variable substitutions are marked with placeholders and a variable name of
   314  // the form ${name}. Any other substitutions such as Go templates or
   315  // printf-style substitutions are left to be done by the Renderer.
   316  //
   317  // When evaluation a string interpolation, a Renderer will receive separate
   318  // calls for each placeholder and interstitial string. For example, for the
   319  // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
   320  // is:
   321  //   d.Render("%[1]v ")
   322  //   d.Arg(1)
   323  //   d.Render(resultOfInvites)
   324  //   d.Render(" %[2]v to ")
   325  //   d.Arg(2)
   326  //   d.Render(resultOfTheir)
   327  //   d.Render(" party.")
   328  // where the messages for "invites" and "their" both use a plural.Select
   329  // referring to the first argument.
   330  //
   331  // Strings may also invoke macros. Macros are essentially variables that can be
   332  // reused. Macros may, for instance, be used to make selections between
   333  // different conjugations of a verb. See the catalog package description for an
   334  // overview of macros.
   335  type String string
   336  
   337  // Compile implements Message. It parses the placeholder formats and returns
   338  // any error.
   339  func (s String) Compile(e *Encoder) (err error) {
   340  	msg := string(s)
   341  	const subStart = "${"
   342  	hasHeader := false
   343  	p := 0
   344  	b := []byte{}
   345  	for {
   346  		i := strings.Index(msg[p:], subStart)
   347  		if i == -1 {
   348  			break
   349  		}
   350  		b = append(b, msg[p:p+i]...)
   351  		p += i + len(subStart)
   352  		if i = strings.IndexByte(msg[p:], '}'); i == -1 {
   353  			b = append(b, "$!(MISSINGBRACE)"...)
   354  			err = fmt.Errorf("catmsg: missing '}'")
   355  			p = len(msg)
   356  			break
   357  		}
   358  		name := strings.TrimSpace(msg[p : p+i])
   359  		if q := strings.IndexByte(name, '('); q == -1 {
   360  			if !hasHeader {
   361  				hasHeader = true
   362  				e.EncodeMessageType(msgString)
   363  			}
   364  			e.EncodeString(string(b))
   365  			e.EncodeSubstitution(name)
   366  			b = b[:0]
   367  		} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
   368  			// TODO: what should the error be?
   369  			b = append(b, "$!(MISSINGPAREN)"...)
   370  			err = fmt.Errorf("catmsg: missing ')'")
   371  		} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
   372  			// TODO: handle more than one argument
   373  			b = append(b, "$!(BADNUM)"...)
   374  			err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
   375  		} else {
   376  			if !hasHeader {
   377  				hasHeader = true
   378  				e.EncodeMessageType(msgString)
   379  			}
   380  			e.EncodeString(string(b))
   381  			e.EncodeSubstitution(name[:q], int(x))
   382  			b = b[:0]
   383  		}
   384  		p += i + 1
   385  	}
   386  	b = append(b, msg[p:]...)
   387  	if !hasHeader {
   388  		// Simplify string to a raw string.
   389  		Raw(string(b)).Compile(e)
   390  	} else if len(b) > 0 {
   391  		e.EncodeString(string(b))
   392  	}
   393  	return err
   394  }
   395  
   396  // Affix is a message that adds a prefix and suffix to another message.
   397  // This is mostly used add back whitespace to a translation that was stripped
   398  // before sending it out.
   399  type Affix struct {
   400  	Message Message
   401  	Prefix  string
   402  	Suffix  string
   403  }
   404  
   405  // Compile implements Message.
   406  func (a Affix) Compile(e *Encoder) (err error) {
   407  	// TODO: consider adding a special message type that just adds a single
   408  	// return. This is probably common enough to handle the majority of cases.
   409  	// Get some stats first, though.
   410  	e.EncodeMessageType(msgAffix)
   411  	e.EncodeString(a.Prefix)
   412  	e.EncodeString(a.Suffix)
   413  	e.EncodeMessage(a.Message)
   414  	return nil
   415  }