github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/width/width.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate stringer -type=Kind
     6  //go:generate go run gen.go gen_common.go gen_trieval.go
     7  
     8  // Package width provides functionality for handling different widths in text.
     9  //
    10  // Wide characters behave like ideographs; they tend to allow line breaks after
    11  // each character and remain upright in vertical text layout. Narrow characters
    12  // are kept together in words or runs that are rotated sideways in vertical text
    13  // layout.
    14  //
    15  // For more information, see http://unicode.org/reports/tr11/.
    16  package width // import "golang.org/x/text/width"
    17  
    18  import (
    19  	"unicode/utf8"
    20  
    21  	"golang.org/x/text/transform"
    22  )
    23  
    24  // TODO
    25  // 1) Reduce table size by compressing blocks.
    26  // 2) API proposition for computing display length
    27  //    (approximation, fixed pitch only).
    28  // 3) Implement display length.
    29  
    30  // Kind indicates the type of width property as defined in http://unicode.org/reports/tr11/.
    31  type Kind int
    32  
    33  const (
    34  	// Neutral characters do not occur in legacy East Asian character sets.
    35  	Neutral Kind = iota
    36  
    37  	// EastAsianAmbiguous characters that can be sometimes wide and sometimes
    38  	// narrow and require additional information not contained in the character
    39  	// code to further resolve their width.
    40  	EastAsianAmbiguous
    41  
    42  	// EastAsianWide characters are wide in its usual form. They occur only in
    43  	// the context of East Asian typography. These runes may have explicit
    44  	// halfwidth counterparts.
    45  	EastAsianWide
    46  
    47  	// EastAsianNarrow characters are narrow in its usual form. They often have
    48  	// fullwidth counterparts.
    49  	EastAsianNarrow
    50  
    51  	// Note: there exist Narrow runes that do not have fullwidth or wide
    52  	// counterparts, despite what the definition says (e.g. U+27E6).
    53  
    54  	// EastAsianFullwidth characters have a compatibility decompositions of type
    55  	// wide that map to a narrow counterpart.
    56  	EastAsianFullwidth
    57  
    58  	// EastAsianHalfwidth characters have a compatibility decomposition of type
    59  	// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
    60  	// SIGN.
    61  	EastAsianHalfwidth
    62  
    63  	// Note: there exist runes that have a halfwidth counterparts but that are
    64  	// classified as Ambiguous, rather than wide (e.g. U+2190).
    65  )
    66  
    67  // TODO: the generated tries need to return size 1 for invalid runes for the
    68  // width to be computed correctly (each byte should render width 1)
    69  
    70  var trie = newWidthTrie(0)
    71  
    72  // Lookup reports the Properties of the first rune in b and the number of bytes
    73  // of its UTF-8 encoding.
    74  func Lookup(b []byte) (p Properties, size int) {
    75  	v, sz := trie.lookup(b)
    76  	return Properties{elem(v), b[sz-1]}, sz
    77  }
    78  
    79  // LookupString reports the Properties of the first rune in s and the number of
    80  // bytes of its UTF-8 encoding.
    81  func LookupString(s string) (p Properties, size int) {
    82  	v, sz := trie.lookupString(s)
    83  	return Properties{elem(v), s[sz-1]}, sz
    84  }
    85  
    86  // LookupRune reports the Properties of rune r.
    87  func LookupRune(r rune) Properties {
    88  	var buf [4]byte
    89  	n := utf8.EncodeRune(buf[:], r)
    90  	v, _ := trie.lookup(buf[:n])
    91  	last := byte(r)
    92  	if r >= utf8.RuneSelf {
    93  		last = 0x80 + byte(r&0x3f)
    94  	}
    95  	return Properties{elem(v), last}
    96  }
    97  
    98  // Properties provides access to width properties of a rune.
    99  type Properties struct {
   100  	elem elem
   101  	last byte
   102  }
   103  
   104  func (e elem) kind() Kind {
   105  	return Kind(e >> typeShift)
   106  }
   107  
   108  // Kind returns the Kind of a rune as defined in Unicode TR #11.
   109  // See http://unicode.org/reports/tr11/ for more details.
   110  func (p Properties) Kind() Kind {
   111  	return p.elem.kind()
   112  }
   113  
   114  // Folded returns the folded variant of a rune or 0 if the rune is canonical.
   115  func (p Properties) Folded() rune {
   116  	if p.elem&tagNeedsFold != 0 {
   117  		buf := inverseData[byte(p.elem)]
   118  		buf[buf[0]] ^= p.last
   119  		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
   120  		return r
   121  	}
   122  	return 0
   123  }
   124  
   125  // Narrow returns the narrow variant of a rune or 0 if the rune is already
   126  // narrow or doesn't have a narrow variant.
   127  func (p Properties) Narrow() rune {
   128  	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
   129  		buf := inverseData[byte(p.elem)]
   130  		buf[buf[0]] ^= p.last
   131  		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
   132  		return r
   133  	}
   134  	return 0
   135  }
   136  
   137  // Wide returns the wide variant of a rune or 0 if the rune is already
   138  // wide or doesn't have a wide variant.
   139  func (p Properties) Wide() rune {
   140  	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
   141  		buf := inverseData[byte(p.elem)]
   142  		buf[buf[0]] ^= p.last
   143  		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
   144  		return r
   145  	}
   146  	return 0
   147  }
   148  
   149  // TODO for Properties:
   150  // - Add Fullwidth/Halfwidth or Inverted methods for computing variants
   151  // mapping.
   152  // - Add width information (including information on non-spacing runes).
   153  
   154  // Transformer implements the transform.Transformer interface.
   155  type Transformer struct {
   156  	t transform.Transformer
   157  }
   158  
   159  // Reset implements the transform.Transformer interface.
   160  func (t Transformer) Reset() { t.t.Reset() }
   161  
   162  // Transform implements the Transformer interface.
   163  func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   164  	return t.t.Transform(dst, src, atEOF)
   165  }
   166  
   167  // Bytes returns a new byte slice with the result of applying t to b.
   168  func (t Transformer) Bytes(b []byte) []byte {
   169  	b, _, _ = transform.Bytes(t, b)
   170  	return b
   171  }
   172  
   173  // String returns a string with the result of applying t to s.
   174  func (t Transformer) String(s string) string {
   175  	s, _, _ = transform.String(t, s)
   176  	return s
   177  }
   178  
   179  var (
   180  	// Fold is a transform that maps all runes to their canonical width.
   181  	//
   182  	// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
   183  	// provide a more generic folding mechanism.
   184  	Fold Transformer = Transformer{foldTransform{}}
   185  
   186  	// Widen is a transform that maps runes to their wide variant, if
   187  	// available.
   188  	Widen Transformer = Transformer{wideTransform{}}
   189  
   190  	// Narrow is a transform that maps runes to their narrow variant, if
   191  	// available.
   192  	Narrow Transformer = Transformer{narrowTransform{}}
   193  )
   194  
   195  // TODO: Consider the following options:
   196  // - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
   197  //   generalized variant of this.
   198  // - Consider a wide Won character to be the default width (or some generalized
   199  //   variant of this).
   200  // - Filter the set of characters that gets converted (the preferred approach is
   201  //   to allow applying filters to transforms).