github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/mahonia/kuten.go (about)

     1  package mahonia
     2  
     3  import (
     4  	"sync"
     5  	"unicode/utf8"
     6  )
     7  
     8  // A kutenTable holds the data for a double-byte character set, arranged by ku
     9  // (区, zone) and ten (点, position). These can be converted to various actual
    10  // encoding schemes.
    11  type kutenTable struct {
    12  	// Data[ku][ten] is the unicode value for the character at that zone and
    13  	// position.
    14  	Data [94][94]uint16
    15  
    16  	// FromUnicode holds the ku and ten for each Unicode code point.
    17  	// It is not available until Reverse() has been called.
    18  	FromUnicode [][2]byte
    19  
    20  	// once is used to synchronize the generation of FromUnicode.
    21  	once sync.Once
    22  }
    23  
    24  // Reverse generates FromUnicode.
    25  func (t *kutenTable) Reverse() {
    26  	t.once.Do(func() {
    27  		t.FromUnicode = make([][2]byte, 65536)
    28  		for ku := range t.Data {
    29  			for ten, unicode := range t.Data[ku] {
    30  				t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)}
    31  			}
    32  		}
    33  	})
    34  }
    35  
    36  // DecodeLow decodes a character from an encoding that does not have the high
    37  // bit set.
    38  func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) {
    39  	if len(p) < 2 {
    40  		return 0, 0, NO_ROOM
    41  	}
    42  	ku := p[0] - 0x21
    43  	ten := p[1] - 0x21
    44  	if ku > 93 || ten > 93 {
    45  		return utf8.RuneError, 1, INVALID_CHAR
    46  	}
    47  	u := t.Data[ku][ten]
    48  	if u == 0 {
    49  		return utf8.RuneError, 1, INVALID_CHAR
    50  	}
    51  	return rune(u), 2, SUCCESS
    52  }
    53  
    54  // DecodeHigh decodes a character from an encoding that has the high bit set.
    55  func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) {
    56  	if len(p) < 2 {
    57  		return 0, 0, NO_ROOM
    58  	}
    59  	ku := p[0] - 0xa1
    60  	ten := p[1] - 0xa1
    61  	if ku > 93 || ten > 93 {
    62  		return utf8.RuneError, 1, INVALID_CHAR
    63  	}
    64  	u := t.Data[ku][ten]
    65  	if u == 0 {
    66  		return utf8.RuneError, 1, INVALID_CHAR
    67  	}
    68  	return rune(u), 2, SUCCESS
    69  }
    70  
    71  // EncodeHigh encodes a character in an encoding that has the high bit set.
    72  func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) {
    73  	if len(p) < 2 {
    74  		return 0, NO_ROOM
    75  	}
    76  	if c > 0xffff {
    77  		p[0] = '?'
    78  		return 1, INVALID_CHAR
    79  	}
    80  	kuten := t.FromUnicode[c]
    81  	if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) {
    82  		p[0] = '?'
    83  		return 1, INVALID_CHAR
    84  	}
    85  	p[0] = kuten[0] + 0xa1
    86  	p[1] = kuten[1] + 0xa1
    87  	return 2, SUCCESS
    88  }