github.com/status-im/status-go@v1.1.0/abi-spec/utf8.go (about)

     1  package abispec
     2  
     3  import (
     4  	"fmt"
     5  	"unicode/utf8"
     6  
     7  	"github.com/ethereum/go-ethereum/common/hexutil"
     8  )
     9  
    10  func stringToRunes(str string) []rune {
    11  	var runes []rune
    12  	bytes := []byte(str)
    13  	for len(bytes) > 0 {
    14  		r, size := utf8.DecodeRune(bytes)
    15  		if r == utf8.RuneError {
    16  			for i := 0; i < size; i++ {
    17  				runes = append(runes, rune(bytes[i]))
    18  			}
    19  		} else {
    20  			runes = append(runes, r)
    21  		}
    22  		bytes = bytes[size:]
    23  	}
    24  	return runes
    25  }
    26  
    27  // Taken from https://mths.be/punycode
    28  func ucs2decode(str string) []rune {
    29  	var runes = stringToRunes(str)
    30  	var output []rune
    31  	var counter = 0
    32  	var length = len(runes)
    33  	var value rune
    34  	var extra rune
    35  	for counter < length {
    36  		value = runes[counter]
    37  		counter++
    38  		if value >= 0xD800 && value <= 0xDBFF && counter < length {
    39  			// high surrogate, and there is a next character
    40  			extra = runes[counter]
    41  			counter++
    42  			if (extra & 0xFC00) == 0xDC00 { // low surrogate
    43  				output = append(output, ((value&0x3FF)<<10)+(extra&0x3FF)+0x10000)
    44  			} else {
    45  				// unmatched surrogate; only append this code unit, in case the next
    46  				// code unit is the high surrogate of a surrogate pair
    47  				output = append(output, value)
    48  				counter--
    49  			}
    50  		} else {
    51  			output = append(output, value)
    52  		}
    53  	}
    54  	return output
    55  }
    56  
    57  // Taken from https://mths.be/punycode
    58  func ucs2encode(array []rune) []byte {
    59  	var length = len(array)
    60  	var index = 0
    61  	var value rune
    62  	var output []byte
    63  	for index < length {
    64  		value = array[index]
    65  		if value > 0xFFFF {
    66  			value -= 0x10000
    67  			codePoint := rune(uint32(value)>>10&0x3FF | 0xD800)
    68  			output = appendBytes(output, stringFromCharCode(codePoint))
    69  			value = 0xDC00 | value&0x3FF
    70  		}
    71  		output = appendBytes(output, stringFromCharCode(value))
    72  		index++
    73  	}
    74  	return output
    75  }
    76  
    77  func appendBytes(dest []byte, bytes []byte) []byte {
    78  	for i := 0; i < len(bytes); i++ {
    79  		dest = append(dest, bytes[i])
    80  	}
    81  	return dest
    82  }
    83  
    84  func checkScalarValue(codePoint rune) error {
    85  	if codePoint >= 0xD800 && codePoint <= 0xDFFF {
    86  		return fmt.Errorf("lone surrogate U+%s is not a scalar value", hexutil.EncodeUint64(uint64(codePoint)))
    87  	}
    88  	return nil
    89  }
    90  
    91  func stringFromCharCode(codePoint rune) []byte {
    92  	var buf = make([]byte, 4)
    93  	n := utf8.EncodeRune(buf, codePoint)
    94  	return buf[0:n]
    95  }
    96  
    97  func createByte(codePoint rune, shift uint32) []byte {
    98  	return stringFromCharCode(((codePoint >> shift) & 0x3F) | 0x80)
    99  }
   100  
   101  func encodeCodePoint(codePoint rune) ([]byte, error) {
   102  	if (uint32(codePoint) & uint32(0xFFFFFF80)) == 0 { // 1-byte sequence
   103  		return stringFromCharCode(codePoint), nil
   104  	}
   105  	var symbol []byte
   106  	if uint32(codePoint)&0xFFFFF800 == 0 { // 2-byte sequence
   107  		symbol = stringFromCharCode(((codePoint >> 6) & 0x1F) | 0xC0)
   108  	} else if (uint32(codePoint) & 0xFFFF0000) == 0 { // 3-byte sequence
   109  		err := checkScalarValue(codePoint)
   110  		if err != nil {
   111  			return nil, err
   112  		}
   113  		symbol = stringFromCharCode(((codePoint >> 12) & 0x0F) | 0xE0)
   114  		symbol = appendBytes(symbol, createByte(codePoint, 6))
   115  	} else if (uint32(codePoint) & 0xFFE00000) == 0 { // 4-byte sequence
   116  		symbol = stringFromCharCode(((codePoint >> 18) & 0x07) | 0xF0)
   117  		symbol = appendBytes(symbol, createByte(codePoint, 12))
   118  		symbol = appendBytes(symbol, createByte(codePoint, 6))
   119  	}
   120  	symbol = appendBytes(symbol, stringFromCharCode((codePoint&0x3F)|0x80))
   121  	return symbol, nil
   122  }
   123  
   124  // implementation referenced from https://github.com/mathiasbynens/utf8.js/blob/2ce09544b62f2a274dbcd249473c0986e3660849/utf8.js
   125  func Utf8encode(str string) (string, error) {
   126  	var codePoints = ucs2decode(str)
   127  	var length = len(codePoints)
   128  	var index = 0
   129  	var codePoint rune
   130  	var bytes []byte
   131  	for index < length {
   132  		codePoint = codePoints[index]
   133  		cps, err := encodeCodePoint(codePoint)
   134  		if err != nil {
   135  			return "", err
   136  		}
   137  		bytes = appendBytes(bytes, cps)
   138  		index++
   139  	}
   140  	return string(bytes), nil
   141  }
   142  
   143  func readContinuationByte(byteArray []rune, byteCount int, pByteIndex *int) (rune, error) {
   144  	if *pByteIndex >= byteCount {
   145  		return utf8.RuneError, fmt.Errorf("invalid byte index")
   146  	}
   147  
   148  	var continuationByte = byteArray[*pByteIndex] & 0xFF
   149  	*pByteIndex = *pByteIndex + 1
   150  
   151  	if (continuationByte & 0xC0) == 0x80 {
   152  		return continuationByte & 0x3F, nil
   153  	}
   154  
   155  	// If we end up here, it’s not a continuation byte
   156  	return utf8.RuneError, fmt.Errorf("invalid continuation byte")
   157  }
   158  
   159  func decodeSymbol(byteArray []rune, byteCount int, pByteIndex *int) (rune, bool, error) {
   160  	var byte1 rune
   161  	var codePoint rune
   162  
   163  	if *pByteIndex > byteCount {
   164  		return utf8.RuneError, false, fmt.Errorf("invalid byte index")
   165  	}
   166  
   167  	if *pByteIndex == byteCount {
   168  		return utf8.RuneError, false, nil
   169  	}
   170  
   171  	// Read first byte
   172  	byte1 = byteArray[*pByteIndex] & 0xFF
   173  	*pByteIndex = *pByteIndex + 1
   174  
   175  	// 1-byte sequence (no continuation bytes)
   176  	if (byte1 & 0x80) == 0 {
   177  		return byte1, true, nil
   178  	}
   179  
   180  	// 2-byte sequence
   181  	if (byte1 & 0xE0) == 0xC0 {
   182  		byte2, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   183  		if err != nil {
   184  			return utf8.RuneError, false, err
   185  		}
   186  		codePoint = ((byte1 & 0x1F) << 6) | byte2
   187  		if codePoint >= 0x80 {
   188  			return codePoint, true, nil
   189  		}
   190  		return utf8.RuneError, false, fmt.Errorf("invalid continuation byte")
   191  	}
   192  
   193  	// 3-byte sequence (may include unpaired surrogates)
   194  	if (byte1 & 0xF0) == 0xE0 {
   195  		byte2, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   196  		if err != nil {
   197  			return utf8.RuneError, false, err
   198  		}
   199  		byte3, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   200  		if err != nil {
   201  			return utf8.RuneError, false, err
   202  		}
   203  		codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3
   204  		if codePoint >= 0x0800 {
   205  			err := checkScalarValue(codePoint)
   206  			if err != nil {
   207  				return utf8.RuneError, false, err
   208  			}
   209  			return codePoint, true, nil
   210  		}
   211  		return utf8.RuneError, false, fmt.Errorf("invalid continuation byte")
   212  	}
   213  
   214  	// 4-byte sequence
   215  	if (byte1 & 0xF8) == 0xF0 {
   216  		byte2, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   217  		if err != nil {
   218  			return utf8.RuneError, false, err
   219  		}
   220  		byte3, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   221  		if err != nil {
   222  			return utf8.RuneError, false, err
   223  		}
   224  		byte4, err := readContinuationByte(byteArray, byteCount, pByteIndex)
   225  		if err != nil {
   226  			return utf8.RuneError, false, err
   227  		}
   228  		codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) |
   229  			(byte3 << 0x06) | byte4
   230  		if codePoint >= 0x010000 && codePoint <= 0x10FFFF {
   231  			return codePoint, true, nil
   232  		}
   233  	}
   234  
   235  	return utf8.RuneError, false, fmt.Errorf("invalid UTF-8 detected")
   236  }
   237  
   238  // implementation referenced from https://github.com/mathiasbynens/utf8.js/blob/2ce09544b62f2a274dbcd249473c0986e3660849/utf8.js
   239  func Utf8decode(str string) ([]byte, error) {
   240  	byteArray := ucs2decode(str)
   241  	byteCount := len(byteArray)
   242  	byteIndex := 0
   243  	var codePoints []rune
   244  	for {
   245  		codePoint, goOn, err := decodeSymbol(byteArray, byteCount, &byteIndex)
   246  		if err != nil {
   247  			return nil, err
   248  		}
   249  		if !goOn {
   250  			break
   251  		}
   252  		codePoints = append(codePoints, codePoint)
   253  	}
   254  	return ucs2encode(codePoints), nil
   255  }