github.com/liquid-dev/text@v0.3.3-liquid/collate/build/builder.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build // import "github.com/liquid-dev/text/collate/build"
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  	"sort"
    12  	"strings"
    13  	"unicode/utf8"
    14  
    15  	"github.com/liquid-dev/text/internal/colltab"
    16  	"github.com/liquid-dev/text/language"
    17  	"github.com/liquid-dev/text/unicode/norm"
    18  )
    19  
    20  // TODO: optimizations:
    21  // - expandElem is currently 20K. By putting unique colElems in a separate
    22  //   table and having a byte array of indexes into this table, we can reduce
    23  //   the total size to about 7K. By also factoring out the length bytes, we
    24  //   can reduce this to about 6K.
    25  // - trie valueBlocks are currently 100K. There are a lot of sparse blocks
    26  //   and many consecutive values with the same stride. This can be further
    27  //   compacted.
    28  // - Compress secondary weights into 8 bits.
    29  // - Some LDML specs specify a context element. Currently we simply concatenate
    30  //   those.  Context can be implemented using the contraction trie. If Builder
    31  //   could analyze and detect when using a context makes sense, there is no
    32  //   need to expose this construct in the API.
    33  
    34  // A Builder builds a root collation table.  The user must specify the
    35  // collation elements for each entry.  A common use will be to base the weights
    36  // on those specified in the allkeys* file as provided by the UCA or CLDR.
    37  type Builder struct {
    38  	index  *trieBuilder
    39  	root   ordering
    40  	locale []*Tailoring
    41  	t      *table
    42  	err    error
    43  	built  bool
    44  
    45  	minNonVar int // lowest primary recorded for a variable
    46  	varTop    int // highest primary recorded for a non-variable
    47  
    48  	// indexes used for reusing expansions and contractions
    49  	expIndex map[string]int      // positions of expansions keyed by their string representation
    50  	ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
    51  	ctElem   map[string]int      // contraction elements keyed by their string representation
    52  }
    53  
    54  // A Tailoring builds a collation table based on another collation table.
    55  // The table is defined by specifying tailorings to the underlying table.
    56  // See https://unicode.org/reports/tr35/ for an overview of tailoring
    57  // collation tables.  The CLDR contains pre-defined tailorings for a variety
    58  // of languages (See https://www.unicode.org/Public/cldr/<version>/core.zip.)
    59  type Tailoring struct {
    60  	id      string
    61  	builder *Builder
    62  	index   *ordering
    63  
    64  	anchor *entry
    65  	before bool
    66  }
    67  
    68  // NewBuilder returns a new Builder.
    69  func NewBuilder() *Builder {
    70  	return &Builder{
    71  		index:    newTrieBuilder(),
    72  		root:     makeRootOrdering(),
    73  		expIndex: make(map[string]int),
    74  		ctHandle: make(map[string]ctHandle),
    75  		ctElem:   make(map[string]int),
    76  	}
    77  }
    78  
    79  // Tailoring returns a Tailoring for the given locale.  One should
    80  // have completed all calls to Add before calling Tailoring.
    81  func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
    82  	t := &Tailoring{
    83  		id:      loc.String(),
    84  		builder: b,
    85  		index:   b.root.clone(),
    86  	}
    87  	t.index.id = t.id
    88  	b.locale = append(b.locale, t)
    89  	return t
    90  }
    91  
    92  // Add adds an entry to the collation element table, mapping
    93  // a slice of runes to a sequence of collation elements.
    94  // A collation element is specified as list of weights: []int{primary, secondary, ...}.
    95  // The entries are typically obtained from a collation element table
    96  // as defined in https://www.unicode.org/reports/tr10/#Data_Table_Format.
    97  // Note that the collation elements specified by colelems are only used
    98  // as a guide.  The actual weights generated by Builder may differ.
    99  // The argument variables is a list of indices into colelems that should contain
   100  // a value for each colelem that is a variable. (See the reference above.)
   101  func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
   102  	str := string(runes)
   103  	elems := make([]rawCE, len(colelems))
   104  	for i, ce := range colelems {
   105  		if len(ce) == 0 {
   106  			break
   107  		}
   108  		elems[i] = makeRawCE(ce, 0)
   109  		if len(ce) == 1 {
   110  			elems[i].w[1] = defaultSecondary
   111  		}
   112  		if len(ce) <= 2 {
   113  			elems[i].w[2] = defaultTertiary
   114  		}
   115  		if len(ce) <= 3 {
   116  			elems[i].w[3] = ce[0]
   117  		}
   118  	}
   119  	for i, ce := range elems {
   120  		p := ce.w[0]
   121  		isvar := false
   122  		for _, j := range variables {
   123  			if i == j {
   124  				isvar = true
   125  			}
   126  		}
   127  		if isvar {
   128  			if p >= b.minNonVar && b.minNonVar > 0 {
   129  				return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
   130  			}
   131  			if p > b.varTop {
   132  				b.varTop = p
   133  			}
   134  		} else if p > 1 { // 1 is a special primary value reserved for FFFE
   135  			if p <= b.varTop {
   136  				return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
   137  			}
   138  			if b.minNonVar == 0 || p < b.minNonVar {
   139  				b.minNonVar = p
   140  			}
   141  		}
   142  	}
   143  	elems, err := convertLargeWeights(elems)
   144  	if err != nil {
   145  		return err
   146  	}
   147  	cccs := []uint8{}
   148  	nfd := norm.NFD.String(str)
   149  	for i := range nfd {
   150  		cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
   151  	}
   152  	if len(cccs) < len(elems) {
   153  		if len(cccs) > 2 {
   154  			return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
   155  		}
   156  		p := len(elems) - 1
   157  		for ; p > 0 && elems[p].w[0] == 0; p-- {
   158  			elems[p].ccc = cccs[len(cccs)-1]
   159  		}
   160  		for ; p >= 0; p-- {
   161  			elems[p].ccc = cccs[0]
   162  		}
   163  	} else {
   164  		for i := range elems {
   165  			elems[i].ccc = cccs[i]
   166  		}
   167  	}
   168  	// doNorm in collate.go assumes that the following conditions hold.
   169  	if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
   170  		return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
   171  	}
   172  	b.root.newEntry(str, elems)
   173  	return nil
   174  }
   175  
   176  func (t *Tailoring) setAnchor(anchor string) error {
   177  	anchor = norm.NFC.String(anchor)
   178  	a := t.index.find(anchor)
   179  	if a == nil {
   180  		a = t.index.newEntry(anchor, nil)
   181  		a.implicit = true
   182  		a.modified = true
   183  		for _, r := range []rune(anchor) {
   184  			e := t.index.find(string(r))
   185  			e.lock = true
   186  		}
   187  	}
   188  	t.anchor = a
   189  	return nil
   190  }
   191  
   192  // SetAnchor sets the point after which elements passed in subsequent calls to
   193  // Insert will be inserted.  It is equivalent to the reset directive in an LDML
   194  // specification.  See Insert for an example.
   195  // SetAnchor supports the following logical reset positions:
   196  // <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
   197  // and <last_non_ignorable/>.
   198  func (t *Tailoring) SetAnchor(anchor string) error {
   199  	if err := t.setAnchor(anchor); err != nil {
   200  		return err
   201  	}
   202  	t.before = false
   203  	return nil
   204  }
   205  
   206  // SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
   207  // Insert will insert entries before the anchor.
   208  func (t *Tailoring) SetAnchorBefore(anchor string) error {
   209  	if err := t.setAnchor(anchor); err != nil {
   210  		return err
   211  	}
   212  	t.before = true
   213  	return nil
   214  }
   215  
   216  // Insert sets the ordering of str relative to the entry set by the previous
   217  // call to SetAnchor or Insert.  The argument extend corresponds
   218  // to the extend elements as defined in LDML.  A non-empty value for extend
   219  // will cause the collation elements corresponding to extend to be appended
   220  // to the collation elements generated for the entry added by Insert.
   221  // This has the same net effect as sorting str after the string anchor+extend.
   222  // See https://www.unicode.org/reports/tr10/#Tailoring_Example for details
   223  // on parametric tailoring and https://unicode.org/reports/tr35/#Collation_Elements
   224  // for full details on LDML.
   225  //
   226  // Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
   227  // at the primary sorting level:
   228  //      t := b.Tailoring("se")
   229  // 		t.SetAnchor("z")
   230  // 		t.Insert(colltab.Primary, "ä", "")
   231  // Order "ü" after "ue" at the secondary sorting level:
   232  //		t.SetAnchor("ue")
   233  //		t.Insert(colltab.Secondary, "ü","")
   234  // or
   235  //		t.SetAnchor("u")
   236  //		t.Insert(colltab.Secondary, "ü", "e")
   237  // Order "q" afer "ab" at the secondary level and "Q" after "q"
   238  // at the tertiary level:
   239  // 		t.SetAnchor("ab")
   240  // 		t.Insert(colltab.Secondary, "q", "")
   241  // 		t.Insert(colltab.Tertiary, "Q", "")
   242  // Order "b" before "a":
   243  //      t.SetAnchorBefore("a")
   244  //      t.Insert(colltab.Primary, "b", "")
   245  // Order "0" after the last primary ignorable:
   246  //      t.SetAnchor("<last_primary_ignorable/>")
   247  //      t.Insert(colltab.Primary, "0", "")
   248  func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
   249  	if t.anchor == nil {
   250  		return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
   251  	}
   252  	str = norm.NFC.String(str)
   253  	e := t.index.find(str)
   254  	if e == nil {
   255  		e = t.index.newEntry(str, nil)
   256  	} else if e.logical != noAnchor {
   257  		return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
   258  	}
   259  	if e.lock {
   260  		return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
   261  	}
   262  	a := t.anchor
   263  	// Find the first element after the anchor which differs at a level smaller or
   264  	// equal to the given level.  Then insert at this position.
   265  	// See https://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
   266  	e.before = t.before
   267  	if t.before {
   268  		t.before = false
   269  		if a.prev == nil {
   270  			a.insertBefore(e)
   271  		} else {
   272  			for a = a.prev; a.level > level; a = a.prev {
   273  			}
   274  			a.insertAfter(e)
   275  		}
   276  		e.level = level
   277  	} else {
   278  		for ; a.level > level; a = a.next {
   279  		}
   280  		e.level = a.level
   281  		if a != e {
   282  			a.insertAfter(e)
   283  			a.level = level
   284  		} else {
   285  			// We don't set a to prev itself. This has the effect of the entry
   286  			// getting new collation elements that are an increment of itself.
   287  			// This is intentional.
   288  			a.prev.level = level
   289  		}
   290  	}
   291  	e.extend = norm.NFD.String(extend)
   292  	e.exclude = false
   293  	e.modified = true
   294  	e.elems = nil
   295  	t.anchor = e
   296  	return nil
   297  }
   298  
   299  func (o *ordering) getWeight(e *entry) []rawCE {
   300  	if len(e.elems) == 0 && e.logical == noAnchor {
   301  		if e.implicit {
   302  			for _, r := range e.runes {
   303  				e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
   304  			}
   305  		} else if e.before {
   306  			count := [colltab.Identity + 1]int{}
   307  			a := e
   308  			for ; a.elems == nil && !a.implicit; a = a.next {
   309  				count[a.level]++
   310  			}
   311  			e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
   312  			for i := colltab.Primary; i < colltab.Quaternary; i++ {
   313  				if count[i] != 0 {
   314  					e.elems[0].w[i] -= count[i]
   315  					break
   316  				}
   317  			}
   318  			if e.prev != nil {
   319  				o.verifyWeights(e.prev, e, e.prev.level)
   320  			}
   321  		} else {
   322  			prev := e.prev
   323  			e.elems = nextWeight(prev.level, o.getWeight(prev))
   324  			o.verifyWeights(e, e.next, e.level)
   325  		}
   326  	}
   327  	return e.elems
   328  }
   329  
   330  func (o *ordering) addExtension(e *entry) {
   331  	if ex := o.find(e.extend); ex != nil {
   332  		e.elems = append(e.elems, ex.elems...)
   333  	} else {
   334  		for _, r := range []rune(e.extend) {
   335  			e.elems = append(e.elems, o.find(string(r)).elems...)
   336  		}
   337  	}
   338  	e.extend = ""
   339  }
   340  
   341  func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
   342  	if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
   343  		return nil
   344  	}
   345  	for i := colltab.Primary; i < level; i++ {
   346  		if a.elems[0].w[i] < b.elems[0].w[i] {
   347  			return nil
   348  		}
   349  	}
   350  	if a.elems[0].w[level] >= b.elems[0].w[level] {
   351  		err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
   352  		log.Println(err)
   353  		// TODO: return the error instead, or better, fix the conflicting entry by making room.
   354  	}
   355  	return nil
   356  }
   357  
   358  func (b *Builder) error(e error) {
   359  	if e != nil {
   360  		b.err = e
   361  	}
   362  }
   363  
   364  func (b *Builder) errorID(locale string, e error) {
   365  	if e != nil {
   366  		b.err = fmt.Errorf("%s:%v", locale, e)
   367  	}
   368  }
   369  
   370  // patchNorm ensures that NFC and NFD counterparts are consistent.
   371  func (o *ordering) patchNorm() {
   372  	// Insert the NFD counterparts, if necessary.
   373  	for _, e := range o.ordered {
   374  		nfd := norm.NFD.String(e.str)
   375  		if nfd != e.str {
   376  			if e0 := o.find(nfd); e0 != nil && !e0.modified {
   377  				e0.elems = e.elems
   378  			} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
   379  				e := o.newEntry(nfd, e.elems)
   380  				e.modified = true
   381  			}
   382  		}
   383  	}
   384  	// Update unchanged composed forms if one of their parts changed.
   385  	for _, e := range o.ordered {
   386  		nfd := norm.NFD.String(e.str)
   387  		if e.modified || nfd == e.str {
   388  			continue
   389  		}
   390  		if e0 := o.find(nfd); e0 != nil {
   391  			e.elems = e0.elems
   392  		} else {
   393  			e.elems = o.genColElems(nfd)
   394  			if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
   395  				r := []rune(nfd)
   396  				head := string(r[0])
   397  				tail := ""
   398  				for i := 1; i < len(r); i++ {
   399  					s := norm.NFC.String(head + string(r[i]))
   400  					if e0 := o.find(s); e0 != nil && e0.modified {
   401  						head = s
   402  					} else {
   403  						tail += string(r[i])
   404  					}
   405  				}
   406  				e.elems = append(o.genColElems(head), o.genColElems(tail)...)
   407  			}
   408  		}
   409  	}
   410  	// Exclude entries for which the individual runes generate the same collation elements.
   411  	for _, e := range o.ordered {
   412  		if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
   413  			e.exclude = true
   414  		}
   415  	}
   416  }
   417  
   418  func (b *Builder) buildOrdering(o *ordering) {
   419  	for _, e := range o.ordered {
   420  		o.getWeight(e)
   421  	}
   422  	for _, e := range o.ordered {
   423  		o.addExtension(e)
   424  	}
   425  	o.patchNorm()
   426  	o.sort()
   427  	simplify(o)
   428  	b.processExpansions(o)   // requires simplify
   429  	b.processContractions(o) // requires simplify
   430  
   431  	t := newNode()
   432  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   433  		if !e.skip() {
   434  			ce, err := e.encode()
   435  			b.errorID(o.id, err)
   436  			t.insert(e.runes[0], ce)
   437  		}
   438  	}
   439  	o.handle = b.index.addTrie(t)
   440  }
   441  
   442  func (b *Builder) build() (*table, error) {
   443  	if b.built {
   444  		return b.t, b.err
   445  	}
   446  	b.built = true
   447  	b.t = &table{
   448  		Table: colltab.Table{
   449  			MaxContractLen: utf8.UTFMax,
   450  			VariableTop:    uint32(b.varTop),
   451  		},
   452  	}
   453  
   454  	b.buildOrdering(&b.root)
   455  	b.t.root = b.root.handle
   456  	for _, t := range b.locale {
   457  		b.buildOrdering(t.index)
   458  		if b.err != nil {
   459  			break
   460  		}
   461  	}
   462  	i, err := b.index.generate()
   463  	b.t.trie = *i
   464  	b.t.Index = colltab.Trie{
   465  		Index:   i.index,
   466  		Values:  i.values,
   467  		Index0:  i.index[blockSize*b.t.root.lookupStart:],
   468  		Values0: i.values[blockSize*b.t.root.valueStart:],
   469  	}
   470  	b.error(err)
   471  	return b.t, b.err
   472  }
   473  
   474  // Build builds the root Collator.
   475  func (b *Builder) Build() (colltab.Weighter, error) {
   476  	table, err := b.build()
   477  	if err != nil {
   478  		return nil, err
   479  	}
   480  	return table, nil
   481  }
   482  
   483  // Build builds a Collator for Tailoring t.
   484  func (t *Tailoring) Build() (colltab.Weighter, error) {
   485  	// TODO: implement.
   486  	return nil, nil
   487  }
   488  
   489  // Print prints the tables for b and all its Tailorings as a Go file
   490  // that can be included in the Collate package.
   491  func (b *Builder) Print(w io.Writer) (n int, err error) {
   492  	p := func(nn int, e error) {
   493  		n += nn
   494  		if err == nil {
   495  			err = e
   496  		}
   497  	}
   498  	t, err := b.build()
   499  	if err != nil {
   500  		return 0, err
   501  	}
   502  	p(fmt.Fprintf(w, `var availableLocales = "und`))
   503  	for _, loc := range b.locale {
   504  		if loc.id != "und" {
   505  			p(fmt.Fprintf(w, ",%s", loc.id))
   506  		}
   507  	}
   508  	p(fmt.Fprint(w, "\"\n\n"))
   509  	p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
   510  	p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
   511  	for _, loc := range b.locale {
   512  		if loc.id == "und" {
   513  			p(t.fprintIndex(w, loc.index.handle, loc.id))
   514  		}
   515  	}
   516  	for _, loc := range b.locale {
   517  		if loc.id != "und" {
   518  			p(t.fprintIndex(w, loc.index.handle, loc.id))
   519  		}
   520  	}
   521  	p(fmt.Fprint(w, "}\n\n"))
   522  	n, _, err = t.fprint(w, "main")
   523  	return
   524  }
   525  
   526  // reproducibleFromNFKD checks whether the given expansion could be generated
   527  // from an NFKD expansion.
   528  func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
   529  	// Length must be equal.
   530  	if len(exp) != len(nfkd) {
   531  		return false
   532  	}
   533  	for i, ce := range exp {
   534  		// Primary and secondary values should be equal.
   535  		if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
   536  			return false
   537  		}
   538  		// Tertiary values should be equal to maxTertiary for third element onwards.
   539  		// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
   540  		// simply be dropped.  Try this out by dropping the following code.
   541  		if i >= 2 && ce.w[2] != maxTertiary {
   542  			return false
   543  		}
   544  		if _, err := makeCE(ce); err != nil {
   545  			// Simply return false. The error will be caught elsewhere.
   546  			return false
   547  		}
   548  	}
   549  	return true
   550  }
   551  
   552  func simplify(o *ordering) {
   553  	// Runes that are a starter of a contraction should not be removed.
   554  	// (To date, there is only Kannada character 0CCA.)
   555  	keep := make(map[rune]bool)
   556  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   557  		if len(e.runes) > 1 {
   558  			keep[e.runes[0]] = true
   559  		}
   560  	}
   561  	// Tag entries for which the runes NFKD decompose to identical values.
   562  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   563  		s := e.str
   564  		nfkd := norm.NFKD.String(s)
   565  		nfd := norm.NFD.String(s)
   566  		if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
   567  			continue
   568  		}
   569  		if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
   570  			e.decompose = true
   571  		}
   572  	}
   573  }
   574  
   575  // appendExpansion converts the given collation sequence to
   576  // collation elements and adds them to the expansion table.
   577  // It returns an index to the expansion table.
   578  func (b *Builder) appendExpansion(e *entry) int {
   579  	t := b.t
   580  	i := len(t.ExpandElem)
   581  	ce := uint32(len(e.elems))
   582  	t.ExpandElem = append(t.ExpandElem, ce)
   583  	for _, w := range e.elems {
   584  		ce, err := makeCE(w)
   585  		if err != nil {
   586  			b.error(err)
   587  			return -1
   588  		}
   589  		t.ExpandElem = append(t.ExpandElem, ce)
   590  	}
   591  	return i
   592  }
   593  
   594  // processExpansions extracts data necessary to generate
   595  // the extraction tables.
   596  func (b *Builder) processExpansions(o *ordering) {
   597  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   598  		if !e.expansion() {
   599  			continue
   600  		}
   601  		key := fmt.Sprintf("%v", e.elems)
   602  		i, ok := b.expIndex[key]
   603  		if !ok {
   604  			i = b.appendExpansion(e)
   605  			b.expIndex[key] = i
   606  		}
   607  		e.expansionIndex = i
   608  	}
   609  }
   610  
   611  func (b *Builder) processContractions(o *ordering) {
   612  	// Collate contractions per starter rune.
   613  	starters := []rune{}
   614  	cm := make(map[rune][]*entry)
   615  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   616  		if e.contraction() {
   617  			if len(e.str) > b.t.MaxContractLen {
   618  				b.t.MaxContractLen = len(e.str)
   619  			}
   620  			r := e.runes[0]
   621  			if _, ok := cm[r]; !ok {
   622  				starters = append(starters, r)
   623  			}
   624  			cm[r] = append(cm[r], e)
   625  		}
   626  	}
   627  	// Add entries of single runes that are at a start of a contraction.
   628  	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
   629  		if !e.contraction() {
   630  			r := e.runes[0]
   631  			if _, ok := cm[r]; ok {
   632  				cm[r] = append(cm[r], e)
   633  			}
   634  		}
   635  	}
   636  	// Build the tries for the contractions.
   637  	t := b.t
   638  	for _, r := range starters {
   639  		l := cm[r]
   640  		// Compute suffix strings. There are 31 different contraction suffix
   641  		// sets for 715 contractions and 82 contraction starter runes as of
   642  		// version 6.0.0.
   643  		sufx := []string{}
   644  		hasSingle := false
   645  		for _, e := range l {
   646  			if len(e.runes) > 1 {
   647  				sufx = append(sufx, string(e.runes[1:]))
   648  			} else {
   649  				hasSingle = true
   650  			}
   651  		}
   652  		if !hasSingle {
   653  			b.error(fmt.Errorf("no single entry for starter rune %U found", r))
   654  			continue
   655  		}
   656  		// Unique the suffix set.
   657  		sort.Strings(sufx)
   658  		key := strings.Join(sufx, "\n")
   659  		handle, ok := b.ctHandle[key]
   660  		if !ok {
   661  			var err error
   662  			handle, err = appendTrie(&t.ContractTries, sufx)
   663  			if err != nil {
   664  				b.error(err)
   665  			}
   666  			b.ctHandle[key] = handle
   667  		}
   668  		// Bucket sort entries in index order.
   669  		es := make([]*entry, len(l))
   670  		for _, e := range l {
   671  			var p, sn int
   672  			if len(e.runes) > 1 {
   673  				str := []byte(string(e.runes[1:]))
   674  				p, sn = lookup(&t.ContractTries, handle, str)
   675  				if sn != len(str) {
   676  					log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
   677  				}
   678  			}
   679  			if es[p] != nil {
   680  				log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
   681  			}
   682  			es[p] = e
   683  		}
   684  		// Create collation elements for contractions.
   685  		elems := []uint32{}
   686  		for _, e := range es {
   687  			ce, err := e.encodeBase()
   688  			b.errorID(o.id, err)
   689  			elems = append(elems, ce)
   690  		}
   691  		key = fmt.Sprintf("%v", elems)
   692  		i, ok := b.ctElem[key]
   693  		if !ok {
   694  			i = len(t.ContractElem)
   695  			b.ctElem[key] = i
   696  			t.ContractElem = append(t.ContractElem, elems...)
   697  		}
   698  		// Store info in entry for starter rune.
   699  		es[0].contractionIndex = i
   700  		es[0].contractionHandle = handle
   701  	}
   702  }