github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/unicode/norm/normregtest.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"flag"
    13  	"fmt"
    14  	"log"
    15  	"net/http"
    16  	"os"
    17  	"path"
    18  	"regexp"
    19  	"runtime"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  	"unicode"
    24  	"unicode/utf8"
    25  
    26  	"golang.org/x/text/unicode/norm"
    27  )
    28  
    29  func main() {
    30  	flag.Parse()
    31  	loadTestData()
    32  	CharacterByCharacterTests()
    33  	StandardTests()
    34  	PerformanceTest()
    35  	if errorCount == 0 {
    36  		fmt.Println("PASS")
    37  	}
    38  }
    39  
    40  const file = "NormalizationTest.txt"
    41  
    42  var url = flag.String("url",
    43  	"http://www.unicode.org/Public/"+unicode.Version+"/ucd/"+file,
    44  	"URL of Unicode database directory")
    45  var localFiles = flag.Bool("local",
    46  	false,
    47  	"data files have been copied to the current directory; for debugging only")
    48  
    49  var logger = log.New(os.Stderr, "", log.Lshortfile)
    50  
    51  // This regression test runs the test set in NormalizationTest.txt
    52  // (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
    53  //
    54  // NormalizationTest.txt has form:
    55  // @Part0 # Specific cases
    56  // #
    57  // 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
    58  // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
    59  //
    60  // Each test has 5 columns (c1, c2, c3, c4, c5), where
    61  // (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
    62  //
    63  // CONFORMANCE:
    64  // 1. The following invariants must be true for all conformant implementations
    65  //
    66  //    NFC
    67  //      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
    68  //      c4 ==  NFC(c4) ==  NFC(c5)
    69  //
    70  //    NFD
    71  //      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
    72  //      c5 ==  NFD(c4) ==  NFD(c5)
    73  //
    74  //    NFKC
    75  //      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
    76  //
    77  //    NFKD
    78  //      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
    79  //
    80  // 2. For every code point X assigned in this version of Unicode that is not
    81  //    specifically listed in Part 1, the following invariants must be true
    82  //    for all conformant implementations:
    83  //
    84  //      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
    85  //
    86  
    87  // Column types.
    88  const (
    89  	cRaw = iota
    90  	cNFC
    91  	cNFD
    92  	cNFKC
    93  	cNFKD
    94  	cMaxColumns
    95  )
    96  
    97  // Holds data from NormalizationTest.txt
    98  var part []Part
    99  
   100  type Part struct {
   101  	name   string
   102  	number int
   103  	tests  []Test
   104  }
   105  
   106  type Test struct {
   107  	name   string
   108  	partnr int
   109  	number int
   110  	r      rune                // used for character by character test
   111  	cols   [cMaxColumns]string // Each has 5 entries, see below.
   112  }
   113  
   114  func (t Test) Name() string {
   115  	if t.number < 0 {
   116  		return part[t.partnr].name
   117  	}
   118  	return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
   119  }
   120  
   121  var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
   122  var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
   123  
   124  var counter int
   125  
   126  // Load the data form NormalizationTest.txt
   127  func loadTestData() {
   128  	if *localFiles {
   129  		pwd, _ := os.Getwd()
   130  		*url = "file://" + path.Join(pwd, file)
   131  	}
   132  	t := &http.Transport{}
   133  	t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
   134  	c := &http.Client{Transport: t}
   135  	resp, err := c.Get(*url)
   136  	if err != nil {
   137  		logger.Fatal(err)
   138  	}
   139  	if resp.StatusCode != 200 {
   140  		logger.Fatal("bad GET status for "+file, resp.Status)
   141  	}
   142  	f := resp.Body
   143  	defer f.Close()
   144  	scanner := bufio.NewScanner(f)
   145  	for scanner.Scan() {
   146  		line := scanner.Text()
   147  		if len(line) == 0 || line[0] == '#' {
   148  			continue
   149  		}
   150  		m := partRe.FindStringSubmatch(line)
   151  		if m != nil {
   152  			if len(m) < 3 {
   153  				logger.Fatal("Failed to parse Part: ", line)
   154  			}
   155  			i, err := strconv.Atoi(m[1])
   156  			if err != nil {
   157  				logger.Fatal(err)
   158  			}
   159  			name := m[2]
   160  			part = append(part, Part{name: name[:len(name)-1], number: i})
   161  			continue
   162  		}
   163  		m = testRe.FindStringSubmatch(line)
   164  		if m == nil || len(m) < 7 {
   165  			logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
   166  		}
   167  		test := Test{name: m[6], partnr: len(part) - 1, number: counter}
   168  		counter++
   169  		for j := 1; j < len(m)-1; j++ {
   170  			for _, split := range strings.Split(m[j], " ") {
   171  				r, err := strconv.ParseUint(split, 16, 64)
   172  				if err != nil {
   173  					logger.Fatal(err)
   174  				}
   175  				if test.r == 0 {
   176  					// save for CharacterByCharacterTests
   177  					test.r = rune(r)
   178  				}
   179  				var buf [utf8.UTFMax]byte
   180  				sz := utf8.EncodeRune(buf[:], rune(r))
   181  				test.cols[j-1] += string(buf[:sz])
   182  			}
   183  		}
   184  		part := &part[len(part)-1]
   185  		part.tests = append(part.tests, test)
   186  	}
   187  	if scanner.Err() != nil {
   188  		logger.Fatal(scanner.Err())
   189  	}
   190  }
   191  
   192  var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
   193  
   194  var errorCount int
   195  
   196  func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
   197  	if gold != result {
   198  		errorCount++
   199  		if errorCount > 20 {
   200  			return
   201  		}
   202  		logger.Printf("%s:%s: %s(%+q)=%+q; want %+q: %s",
   203  			t.Name(), name, fstr[f], test, result, gold, t.name)
   204  	}
   205  }
   206  
   207  func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
   208  	if result != want {
   209  		errorCount++
   210  		if errorCount > 20 {
   211  			return
   212  		}
   213  		logger.Printf("%s:%s: %s(%+q)=%v; want %v", t.Name(), name, fstr[f], test, result, want)
   214  	}
   215  }
   216  
   217  func doTest(t *Test, f norm.Form, gold, test string) {
   218  	testb := []byte(test)
   219  	result := f.Bytes(testb)
   220  	cmpResult(t, "Bytes", f, gold, test, string(result))
   221  
   222  	sresult := f.String(test)
   223  	cmpResult(t, "String", f, gold, test, sresult)
   224  
   225  	acc := []byte{}
   226  	i := norm.Iter{}
   227  	i.InitString(f, test)
   228  	for !i.Done() {
   229  		acc = append(acc, i.Next()...)
   230  	}
   231  	cmpResult(t, "Iter.Next", f, gold, test, string(acc))
   232  
   233  	buf := make([]byte, 128)
   234  	acc = nil
   235  	for p := 0; p < len(testb); {
   236  		nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
   237  		acc = append(acc, buf[:nDst]...)
   238  		p += nSrc
   239  	}
   240  	cmpResult(t, "Transform", f, gold, test, string(acc))
   241  
   242  	for i := range test {
   243  		out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
   244  		cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
   245  	}
   246  	cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
   247  	cmpIsNormal(t, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
   248  }
   249  
   250  func doConformanceTests(t *Test, partn int) {
   251  	for i := 0; i <= 2; i++ {
   252  		doTest(t, norm.NFC, t.cols[1], t.cols[i])
   253  		doTest(t, norm.NFD, t.cols[2], t.cols[i])
   254  		doTest(t, norm.NFKC, t.cols[3], t.cols[i])
   255  		doTest(t, norm.NFKD, t.cols[4], t.cols[i])
   256  	}
   257  	for i := 3; i <= 4; i++ {
   258  		doTest(t, norm.NFC, t.cols[3], t.cols[i])
   259  		doTest(t, norm.NFD, t.cols[4], t.cols[i])
   260  		doTest(t, norm.NFKC, t.cols[3], t.cols[i])
   261  		doTest(t, norm.NFKD, t.cols[4], t.cols[i])
   262  	}
   263  }
   264  
   265  func CharacterByCharacterTests() {
   266  	tests := part[1].tests
   267  	var last rune = 0
   268  	for i := 0; i <= len(tests); i++ { // last one is special case
   269  		var r rune
   270  		if i == len(tests) {
   271  			r = 0x2FA1E // Don't have to go to 0x10FFFF
   272  		} else {
   273  			r = tests[i].r
   274  		}
   275  		for last++; last < r; last++ {
   276  			// Check all characters that were not explicitly listed in the test.
   277  			t := &Test{partnr: 1, number: -1}
   278  			char := string(last)
   279  			doTest(t, norm.NFC, char, char)
   280  			doTest(t, norm.NFD, char, char)
   281  			doTest(t, norm.NFKC, char, char)
   282  			doTest(t, norm.NFKD, char, char)
   283  		}
   284  		if i < len(tests) {
   285  			doConformanceTests(&tests[i], 1)
   286  		}
   287  	}
   288  }
   289  
   290  func StandardTests() {
   291  	for _, j := range []int{0, 2, 3} {
   292  		for _, test := range part[j].tests {
   293  			doConformanceTests(&test, j)
   294  		}
   295  	}
   296  }
   297  
   298  // PerformanceTest verifies that normalization is O(n). If any of the
   299  // code does not properly check for maxCombiningChars, normalization
   300  // may exhibit O(n**2) behavior.
   301  func PerformanceTest() {
   302  	runtime.GOMAXPROCS(2)
   303  	success := make(chan bool, 1)
   304  	go func() {
   305  		buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
   306  		buf = append(buf, "\u035B"...)
   307  		norm.NFC.Append(nil, buf...)
   308  		success <- true
   309  	}()
   310  	timeout := time.After(1 * time.Second)
   311  	select {
   312  	case <-success:
   313  		// test completed before the timeout
   314  	case <-timeout:
   315  		errorCount++
   316  		logger.Printf(`unexpectedly long time to complete PerformanceTest`)
   317  	}
   318  }