decred.org/dcrdex@v1.0.5/dex/encode/passbytes.go (about) 1 // This code is available on the terms of the project LICENSE.md file, 2 // also available online at https://blueoakcouncil.org/license/1.0.0. 3 4 package encode 5 6 import ( 7 "encoding/json" 8 "fmt" 9 "strconv" 10 "unicode" 11 "unicode/utf16" 12 "unicode/utf8" 13 ) 14 15 const ( 16 escapeSequence = '\\' 17 unicodePrefix = 'u' 18 unicodeSequenceLength = 6 19 ) 20 21 // PassBytes represents a UTF8-encoded byte slice. 22 type PassBytes []byte 23 24 // MarshalJSON satisfies the json.Unmarshaler interface, returns a quoted copy 25 // of this byte slice. Returns an error if this byte slice is not a valid 26 // UTF8-encoded byte slice. 27 func (pb PassBytes) MarshalJSON() ([]byte, error) { 28 // pb may have been created by calling PassBytes("some invalid string"). 29 // Returning the quoted copy of pb may lead to errors later when trying to 30 // unmarshall. Sanity check that pb is a valid UTF8-encoded byte slice. 31 if !isUTF8Encoded(pb) { 32 return nil, fmt.Errorf("invalid PassBytes data") 33 } 34 data := make([]byte, len(pb)+2) 35 data[0], data[len(data)-1] = '"', '"' 36 copy(data[1:], pb) 37 return data, nil 38 } 39 40 // UnmarshalJSON satisfies the json.Unmarshaler interface, parses JSON-encoded 41 // data into UTF8-encoded bytes and stores the result in the `PassBytes` pointer. 42 func (pb *PassBytes) UnmarshalJSON(rawBytes []byte) error { 43 utf8EncodedBytes, err := parseJSONEncodedDataAsUTF8Bytes(rawBytes) 44 ClearBytes(rawBytes) 45 if err != nil { 46 return fmt.Errorf("cannot unmarshal password: %w", err) 47 } 48 *pb = utf8EncodedBytes 49 return nil 50 } 51 52 // Clear zeroes the slice. 53 func (pb PassBytes) Clear() { 54 ClearBytes(pb) 55 } 56 57 func isUTF8Encoded(data []byte) bool { 58 if len(data) == 0 { 59 return true // represents an empty string 60 } 61 if data[0] == '"' { 62 // should not be quoted, quotes must be escaped! 63 return false 64 } 65 66 readIndex := 0 67 for readIndex < len(data) { 68 byteAtPos := data[readIndex] 69 switch { 70 case byteAtPos == escapeSequence: 71 // Escape sequence hit, expect a valid escape char from next byte 72 // (or byte sequence). 73 if readIndex+1 >= len(data) { 74 return false 75 } 76 nextByte := data[readIndex+1] 77 if nextByte == unicodePrefix { 78 // expect a unicode char in the form \uXXXX 79 // or a surrogate pair in the form \uXXXX\uYYYY 80 _, bytesRead := unicodeSequenceToCharacter(data[readIndex:]) 81 if bytesRead <= 0 { 82 return false 83 } 84 readIndex += bytesRead 85 } else { 86 // some other escaped character? 87 _, ok := parseEscapedCharacter(nextByte) 88 if !ok { 89 return false 90 } 91 readIndex += 2 92 } 93 94 case byteAtPos == '"', byteAtPos < ' ': 95 // invalid char 96 return false 97 98 default: 99 // Attempt to decode char as UTF8, may get utf8.RuneError. 100 _, bytesRead := utf8.DecodeRune(data[readIndex:]) 101 if bytesRead <= 0 { 102 return false 103 } 104 readIndex += bytesRead 105 } 106 } 107 108 // all bytes check out 109 return true 110 } 111 112 // parseJSONEncodedDataAsUTF8Bytes parses the provided JSON-encoded data into a 113 // UTF8-encoded byte slice. 114 // Returns an error if any of the following conditions is hit: 115 // - `data` is not a valid JSON encoding 116 // - `data` is not quoted 117 // - `data` contains a byte or byte sequence that cannot be parsed into a 118 // UTF8-encoded byte or byte sequence. 119 // 120 // Inspired by encoding/json.(*decodeState).unquoteBytes. 121 func parseJSONEncodedDataAsUTF8Bytes(data []byte) ([]byte, error) { 122 if len(data) < 2 || data[0] != '"' || data[len(data)-1] != '"' { 123 return nil, fmt.Errorf("json-encoded data is not quoted") 124 } 125 if !json.Valid(data) { 126 return nil, fmt.Errorf("data is not json-encoded") 127 } 128 129 // unquote data before parsing 130 data = data[1 : len(data)-1] 131 132 outputBuffer := make([]byte, len(data)) 133 // Separate because a sequence of bytes could be parsed into fewer bytes 134 // than was read, causing readIndex to be > than writeIndex. 135 // This guarantees that readIndex will always be >= writeIndex, with the 136 // implication that `outputBuffer` can never grow beyond len(data). 137 readIndex, writeIndex := 0, 0 138 139 for readIndex < len(data) { 140 byteAtPos := data[readIndex] 141 switch { 142 case byteAtPos == escapeSequence: 143 // Escape sequence hit, next byte (or byte sequence) should tell us 144 // what char was escaped. Error if there is no next byte. 145 if readIndex+1 >= len(data) { 146 return nil, fmt.Errorf("unexpected end of data: escape sequence") 147 } 148 nextByte := data[readIndex+1] 149 if nextByte == unicodePrefix { 150 // must be a unicode char in the form \uXXXX 151 // or a surrogate pair in the form \uXXXX\uYYYY 152 unicodeChar, bytesRead := unicodeSequenceToCharacter(data[readIndex:]) 153 if unicodeChar < 0 { 154 return nil, fmt.Errorf("malformed unicode sequence in data") 155 } 156 readIndex += bytesRead 157 writeIndex += utf8.EncodeRune(outputBuffer[writeIndex:], unicodeChar) 158 } else if unescapedChar, ok := parseEscapedCharacter(nextByte); ok { 159 outputBuffer[writeIndex] = unescapedChar 160 readIndex += 2 // escape sequence + escaped char 161 writeIndex++ 162 } else { 163 return nil, fmt.Errorf("malformed unicode sequence in data") 164 } 165 166 case byteAtPos == '"', byteAtPos < ' ': 167 // Invalid char, error out. 168 return nil, fmt.Errorf("non-utf8 character %v", string(byteAtPos)) 169 170 case byteAtPos < utf8.RuneSelf: 171 // ASCII char, use without parsing. 172 outputBuffer[writeIndex] = byteAtPos 173 readIndex++ 174 writeIndex++ 175 176 default: 177 // Attempt to decode char as UTF8, may get utf8.RuneError. 178 char, bytesRead := utf8.DecodeRune(data[readIndex:]) 179 if char == utf8.RuneError { 180 return nil, fmt.Errorf("invalid character %v", string(byteAtPos)) 181 } 182 readIndex += bytesRead 183 writeIndex += utf8.EncodeRune(outputBuffer[writeIndex:], char) 184 } 185 } 186 187 return outputBuffer[0:writeIndex], nil 188 } 189 190 // unicodeSequenceToCharacter returns the unicode character represented by the 191 // first 6-12 bytes of a byte slice and the number of bytes read from the slice 192 // to produce the unicode character. 193 // Expects the first 6 bytes of the slice to represent a valid unicode character 194 // (e.g. \u5b57) otherwise -1, 0 is returned indicating that the provided slice 195 // cannot be converted to a unicode character. 196 func unicodeSequenceToCharacter(seq []byte) (rune, int) { 197 hexNumber, ok := unicodeSequenceToHexNumber(seq) 198 if !ok { 199 return -1, 0 200 } 201 202 unicodeChar := rune(hexNumber) 203 if unicodeChar == unicode.ReplacementChar { 204 // unknown unicode char 205 return -1, 0 206 } 207 208 // check if `unicodeChar` can appear in a surrogate pair, if so, attempt to 209 // parse another unicode char from the next sequence, and check if the second 210 // character pairs with the first character. 211 if utf16.IsSurrogate(unicodeChar) { 212 nextSequence := seq[unicodeSequenceLength:] 213 hexNumber, ok := unicodeSequenceToHexNumber(nextSequence) 214 if ok { 215 unicodeChar2 := rune(hexNumber) 216 pairedChar := utf16.DecodeRune(unicodeChar, unicodeChar2) 217 if pairedChar != unicode.ReplacementChar { 218 // valid pair, return the pair 219 return pairedChar, unicodeSequenceLength * 2 220 } 221 } 222 } 223 224 return unicodeChar, unicodeSequenceLength 225 } 226 227 // unicodeSequenceToHexNumber converts the last 4 bytes of a valid unicode 228 // []byte sequence to a number in base10. Expects the provided sequence to have 229 // at least 6 bytes, with first 2 bytes being `\u`. 230 func unicodeSequenceToHexNumber(unicodeSequence []byte) (int64, bool) { 231 if len(unicodeSequence) < unicodeSequenceLength || 232 unicodeSequence[0] != escapeSequence || 233 unicodeSequence[1] != unicodePrefix { 234 return -1, false 235 } 236 237 hexSequence := unicodeSequence[2:unicodeSequenceLength] 238 hexN, err := strconv.ParseInt(string(hexSequence), 16, 32) 239 if err != nil { 240 return -1, false 241 } 242 243 return hexN, true 244 } 245 246 // parseEscapedCharacter returns the character represented by the byte 247 // following an escape sequence character if it is recognized. 248 // parseEscapedCharacter does not handle parsing of escaped unicode 249 // characters. Use unicodeSequenceToCharacter for that instead. 250 func parseEscapedCharacter(char byte) (byte, bool) { 251 switch char { 252 default: 253 return 0, false 254 case '"', '\\', '/', '\'': 255 return char, true 256 case 'b': 257 return '\b', true 258 case 'f': 259 return '\f', true 260 case 'n': 261 return '\n', true 262 case 'r': 263 return '\r', true 264 case 't': 265 return '\t', true 266 } 267 }