github.com/liquid-dev/text@v0.3.3-liquid/cases/context_test.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cases
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  	"unicode"
    11  
    12  	"github.com/liquid-dev/text/internal/testtext"
    13  	"github.com/liquid-dev/text/language"
    14  	"github.com/liquid-dev/text/transform"
    15  	"github.com/liquid-dev/text/unicode/norm"
    16  	"github.com/liquid-dev/text/unicode/rangetable"
    17  )
    18  
    19  // The following definitions are taken directly from Chapter 3 of The Unicode
    20  // Standard.
    21  
    22  func propCased(r rune) bool {
    23  	return propLower(r) || propUpper(r) || unicode.IsTitle(r)
    24  }
    25  
    26  func propLower(r rune) bool {
    27  	return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
    28  }
    29  
    30  func propUpper(r rune) bool {
    31  	return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
    32  }
    33  
    34  func propIgnore(r rune) bool {
    35  	if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
    36  		return true
    37  	}
    38  	return caseIgnorable[r]
    39  }
    40  
    41  func hasBreakProp(r rune) bool {
    42  	// binary search over ranges
    43  	lo := 0
    44  	hi := len(breakProp)
    45  	for lo < hi {
    46  		m := lo + (hi-lo)/2
    47  		bp := &breakProp[m]
    48  		if bp.lo <= r && r <= bp.hi {
    49  			return true
    50  		}
    51  		if r < bp.lo {
    52  			hi = m
    53  		} else {
    54  			lo = m + 1
    55  		}
    56  	}
    57  	return false
    58  }
    59  
    60  func contextFromRune(r rune) *context {
    61  	c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
    62  	c.next()
    63  	return &c
    64  }
    65  
    66  func TestCaseProperties(t *testing.T) {
    67  	if unicode.Version != UnicodeVersion {
    68  		// Properties of existing code points may change by Unicode version, so
    69  		// we need to skip.
    70  		t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
    71  	}
    72  	assigned := rangetable.Assigned(UnicodeVersion)
    73  	coreVersion := rangetable.Assigned(unicode.Version)
    74  	for r := rune(0); r <= lastRuneForTesting; r++ {
    75  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
    76  			continue
    77  		}
    78  		c := contextFromRune(r)
    79  		if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
    80  			t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
    81  		}
    82  		// New letters may change case types, but existing case pairings should
    83  		// not change. See Case Pair Stability in
    84  		// https://unicode.org/policies/stability_policy.html.
    85  		if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
    86  			if got, want := c.info.isCased(), propCased(r); got != want {
    87  				t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
    88  			}
    89  			if got, want := c.caseType() == cUpper, propUpper(r); got != want {
    90  				t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
    91  			}
    92  			if got, want := c.caseType() == cLower, propLower(r); got != want {
    93  				t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
    94  			}
    95  		}
    96  		if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
    97  			t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
    98  		}
    99  	}
   100  	// TODO: get title case from unicode file.
   101  }
   102  
   103  func TestMapping(t *testing.T) {
   104  	assigned := rangetable.Assigned(UnicodeVersion)
   105  	coreVersion := rangetable.Assigned(unicode.Version)
   106  	if coreVersion == nil {
   107  		coreVersion = assigned
   108  	}
   109  	apply := func(r rune, f func(c *context) bool) string {
   110  		c := contextFromRune(r)
   111  		f(c)
   112  		return string(c.dst[:c.pDst])
   113  	}
   114  
   115  	for r, tt := range special {
   116  		if got, want := apply(r, lower), tt.toLower; got != want {
   117  			t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
   118  		}
   119  		if got, want := apply(r, title), tt.toTitle; got != want {
   120  			t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
   121  		}
   122  		if got, want := apply(r, upper), tt.toUpper; got != want {
   123  			t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
   124  		}
   125  	}
   126  
   127  	for r := rune(0); r <= lastRuneForTesting; r++ {
   128  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
   129  			continue
   130  		}
   131  		if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
   132  			continue
   133  		}
   134  		if _, ok := special[r]; ok {
   135  			continue
   136  		}
   137  		want := string(unicode.ToLower(r))
   138  		if got := apply(r, lower); got != want {
   139  			t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   140  		}
   141  
   142  		want = string(unicode.ToUpper(r))
   143  		if got := apply(r, upper); got != want {
   144  			t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   145  		}
   146  
   147  		want = string(unicode.ToTitle(r))
   148  		if got := apply(r, title); got != want {
   149  			t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   150  		}
   151  	}
   152  }
   153  
   154  func runeFoldData(r rune) (x struct{ simple, full, special string }) {
   155  	x = foldMap[r]
   156  	if x.simple == "" {
   157  		x.simple = string(unicode.ToLower(r))
   158  	}
   159  	if x.full == "" {
   160  		x.full = string(unicode.ToLower(r))
   161  	}
   162  	if x.special == "" {
   163  		x.special = x.full
   164  	}
   165  	return
   166  }
   167  
   168  func TestFoldData(t *testing.T) {
   169  	assigned := rangetable.Assigned(UnicodeVersion)
   170  	coreVersion := rangetable.Assigned(unicode.Version)
   171  	if coreVersion == nil {
   172  		coreVersion = assigned
   173  	}
   174  	apply := func(r rune, f func(c *context) bool) (string, info) {
   175  		c := contextFromRune(r)
   176  		f(c)
   177  		return string(c.dst[:c.pDst]), c.info.cccType()
   178  	}
   179  	for r := rune(0); r <= lastRuneForTesting; r++ {
   180  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
   181  			continue
   182  		}
   183  		x := runeFoldData(r)
   184  		if got, info := apply(r, foldFull); got != x.full {
   185  			t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
   186  		}
   187  		// TODO: special and simple.
   188  	}
   189  }
   190  
   191  func TestCCC(t *testing.T) {
   192  	assigned := rangetable.Assigned(UnicodeVersion)
   193  	normVersion := rangetable.Assigned(norm.Version)
   194  	for r := rune(0); r <= lastRuneForTesting; r++ {
   195  		if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
   196  			continue
   197  		}
   198  		c := contextFromRune(r)
   199  
   200  		p := norm.NFC.PropertiesString(string(r))
   201  		want := cccOther
   202  		switch p.CCC() {
   203  		case 0:
   204  			want = cccZero
   205  		case above:
   206  			want = cccAbove
   207  		}
   208  		if got := c.info.cccType(); got != want {
   209  			t.Errorf("%U: got %x; want %x", r, got, want)
   210  		}
   211  	}
   212  }
   213  
   214  func TestWordBreaks(t *testing.T) {
   215  	for _, tt := range breakTest {
   216  		testtext.Run(t, tt, func(t *testing.T) {
   217  			parts := strings.Split(tt, "|")
   218  			want := ""
   219  			for _, s := range parts {
   220  				found := false
   221  				// This algorithm implements title casing given word breaks
   222  				// as defined in the Unicode standard 3.13 R3.
   223  				for _, r := range s {
   224  					title := unicode.ToTitle(r)
   225  					lower := unicode.ToLower(r)
   226  					if !found && title != lower {
   227  						found = true
   228  						want += string(title)
   229  					} else {
   230  						want += string(lower)
   231  					}
   232  				}
   233  			}
   234  			src := strings.Join(parts, "")
   235  			got := Title(language.Und).String(src)
   236  			if got != want {
   237  				t.Errorf("got %q; want %q", got, want)
   238  			}
   239  		})
   240  	}
   241  }
   242  
   243  func TestContext(t *testing.T) {
   244  	tests := []struct {
   245  		desc       string
   246  		dstSize    int
   247  		atEOF      bool
   248  		src        string
   249  		out        string
   250  		nSrc       int
   251  		err        error
   252  		ops        string
   253  		prefixArg  string
   254  		prefixWant bool
   255  	}{{
   256  		desc:    "next: past end, atEOF, no checkpoint",
   257  		dstSize: 10,
   258  		atEOF:   true,
   259  		src:     "12",
   260  		out:     "",
   261  		nSrc:    2,
   262  		ops:     "next;next;next",
   263  		// Test that calling prefix with a non-empty argument when the buffer
   264  		// is depleted returns false.
   265  		prefixArg:  "x",
   266  		prefixWant: false,
   267  	}, {
   268  		desc:       "next: not at end, atEOF, no checkpoint",
   269  		dstSize:    10,
   270  		atEOF:      false,
   271  		src:        "12",
   272  		out:        "",
   273  		nSrc:       0,
   274  		err:        transform.ErrShortSrc,
   275  		ops:        "next;next",
   276  		prefixArg:  "",
   277  		prefixWant: true,
   278  	}, {
   279  		desc:       "next: past end, !atEOF, no checkpoint",
   280  		dstSize:    10,
   281  		atEOF:      false,
   282  		src:        "12",
   283  		out:        "",
   284  		nSrc:       0,
   285  		err:        transform.ErrShortSrc,
   286  		ops:        "next;next;next",
   287  		prefixArg:  "",
   288  		prefixWant: true,
   289  	}, {
   290  		desc:       "next: past end, !atEOF, checkpoint",
   291  		dstSize:    10,
   292  		atEOF:      false,
   293  		src:        "12",
   294  		out:        "",
   295  		nSrc:       2,
   296  		ops:        "next;next;checkpoint;next",
   297  		prefixArg:  "",
   298  		prefixWant: true,
   299  	}, {
   300  		desc:       "copy: exact count, atEOF, no checkpoint",
   301  		dstSize:    2,
   302  		atEOF:      true,
   303  		src:        "12",
   304  		out:        "12",
   305  		nSrc:       2,
   306  		ops:        "next;copy;next;copy;next",
   307  		prefixArg:  "",
   308  		prefixWant: true,
   309  	}, {
   310  		desc:       "copy: past end, !atEOF, no checkpoint",
   311  		dstSize:    2,
   312  		atEOF:      false,
   313  		src:        "12",
   314  		out:        "",
   315  		nSrc:       0,
   316  		err:        transform.ErrShortSrc,
   317  		ops:        "next;copy;next;copy;next",
   318  		prefixArg:  "",
   319  		prefixWant: true,
   320  	}, {
   321  		desc:       "copy: past end, !atEOF, checkpoint",
   322  		dstSize:    2,
   323  		atEOF:      false,
   324  		src:        "12",
   325  		out:        "12",
   326  		nSrc:       2,
   327  		ops:        "next;copy;next;copy;checkpoint;next",
   328  		prefixArg:  "",
   329  		prefixWant: true,
   330  	}, {
   331  		desc:       "copy: short dst",
   332  		dstSize:    1,
   333  		atEOF:      false,
   334  		src:        "12",
   335  		out:        "",
   336  		nSrc:       0,
   337  		err:        transform.ErrShortDst,
   338  		ops:        "next;copy;next;copy;checkpoint;next",
   339  		prefixArg:  "12",
   340  		prefixWant: false,
   341  	}, {
   342  		desc:       "copy: short dst, checkpointed",
   343  		dstSize:    1,
   344  		atEOF:      false,
   345  		src:        "12",
   346  		out:        "1",
   347  		nSrc:       1,
   348  		err:        transform.ErrShortDst,
   349  		ops:        "next;copy;checkpoint;next;copy;next",
   350  		prefixArg:  "",
   351  		prefixWant: true,
   352  	}, {
   353  		desc:       "writeString: simple",
   354  		dstSize:    3,
   355  		atEOF:      true,
   356  		src:        "1",
   357  		out:        "1ab",
   358  		nSrc:       1,
   359  		ops:        "next;copy;writeab;next",
   360  		prefixArg:  "",
   361  		prefixWant: true,
   362  	}, {
   363  		desc:       "writeString: short dst",
   364  		dstSize:    2,
   365  		atEOF:      true,
   366  		src:        "12",
   367  		out:        "",
   368  		nSrc:       0,
   369  		err:        transform.ErrShortDst,
   370  		ops:        "next;copy;writeab;next",
   371  		prefixArg:  "2",
   372  		prefixWant: true,
   373  	}, {
   374  		desc:       "writeString: simple",
   375  		dstSize:    3,
   376  		atEOF:      true,
   377  		src:        "12",
   378  		out:        "1ab",
   379  		nSrc:       2,
   380  		ops:        "next;copy;next;writeab;next",
   381  		prefixArg:  "",
   382  		prefixWant: true,
   383  	}, {
   384  		desc:       "writeString: short dst",
   385  		dstSize:    2,
   386  		atEOF:      true,
   387  		src:        "12",
   388  		out:        "",
   389  		nSrc:       0,
   390  		err:        transform.ErrShortDst,
   391  		ops:        "next;copy;next;writeab;next",
   392  		prefixArg:  "1",
   393  		prefixWant: false,
   394  	}, {
   395  		desc:    "prefix",
   396  		dstSize: 2,
   397  		atEOF:   true,
   398  		src:     "12",
   399  		out:     "",
   400  		nSrc:    0,
   401  		// Context will assign an ErrShortSrc if the input wasn't exhausted.
   402  		err:        transform.ErrShortSrc,
   403  		prefixArg:  "12",
   404  		prefixWant: true,
   405  	}}
   406  	for _, tt := range tests {
   407  		c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
   408  
   409  		for _, op := range strings.Split(tt.ops, ";") {
   410  			switch op {
   411  			case "next":
   412  				c.next()
   413  			case "checkpoint":
   414  				c.checkpoint()
   415  			case "writeab":
   416  				c.writeString("ab")
   417  			case "copy":
   418  				c.copy()
   419  			case "":
   420  			default:
   421  				t.Fatalf("unknown op %q", op)
   422  			}
   423  		}
   424  		if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
   425  			t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
   426  		}
   427  		nDst, nSrc, err := c.ret()
   428  		if err != tt.err {
   429  			t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
   430  		}
   431  		if out := string(c.dst[:nDst]); out != tt.out {
   432  			t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
   433  		}
   434  		if nSrc != tt.nSrc {
   435  			t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)
   436  		}
   437  	}
   438  }