git.lukeshu.com/go/lowmemjson@v0.3.9-0.20230723050957-72f6d13f6fb2/internal/jsonstring/encode_string.go (about) 1 // Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> 2 // 3 // SPDX-License-Identifier: GPL-2.0-or-later 4 5 package jsonstring 6 7 import ( 8 "encoding/json" 9 "fmt" 10 "io" 11 "reflect" 12 "unicode/utf8" 13 14 "git.lukeshu.com/go/lowmemjson/internal/fastio" 15 "git.lukeshu.com/go/lowmemjson/internal/fastio/noescape" 16 ) 17 18 // InvalidUTF8Mode is describe in the main lowmemjson package docs. 19 type InvalidUTF8Mode uint8 20 21 const ( 22 InvalidUTF8Replace InvalidUTF8Mode = iota 23 InvalidUTF8Preserve 24 InvalidUTF8Error 25 ) 26 27 // BackslashEscapeMode is describe in the main lowmemjson package 28 // docs. 29 type BackslashEscapeMode uint8 30 31 const ( 32 BackslashEscapeNone BackslashEscapeMode = iota 33 BackslashEscapeShort 34 BackslashEscapeRawByte 35 36 // It is significant to the implementation that if X=binary-0 37 // and x=binary-1, then these "BackslashEscapeUnicode" 38 // constants are counting in-order from 0 to 15. 39 40 BackslashEscapeUnicodeXXXX 41 BackslashEscapeUnicodeXXXx 42 BackslashEscapeUnicodeXXxX 43 BackslashEscapeUnicodeXXxx 44 BackslashEscapeUnicodeXxXX 45 BackslashEscapeUnicodeXxXx 46 BackslashEscapeUnicodeXxxX 47 BackslashEscapeUnicodeXxxx 48 BackslashEscapeUnicodexXXX 49 BackslashEscapeUnicodexXXx 50 BackslashEscapeUnicodexXxX 51 BackslashEscapeUnicodexXxx 52 BackslashEscapeUnicodexxXX 53 BackslashEscapeUnicodexxXx 54 BackslashEscapeUnicodexxxX 55 BackslashEscapeUnicodexxxx 56 57 BackslashEscapeUnicodeMin = BackslashEscapeUnicodeXXXX 58 BackslashEscapeUnicodeMax = BackslashEscapeUnicodexxxx 59 60 BackslashEscapeUnicode = BackslashEscapeUnicodexxxx // back-compat 61 ) 62 63 // BackslashEscaper is describe in the main lowmemjson package docs. 64 type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode 65 66 func WriteStringUnicodeEscape(w io.Writer, c rune, mode BackslashEscapeMode) error { 67 const alphabet = "0123456789ABCDEF" 68 _mode := byte(mode - BackslashEscapeUnicodeMin) 69 buf := [6]byte{ 70 '\\', 71 'u', 72 // The 0b0010_0000 bit is the ASCII "lowercase bit". 73 alphabet[(c>>12)&0xf] | ((_mode << 2) & 0b0010_0000), 74 alphabet[(c>>8)&0xf] | ((_mode << 3) & 0b0010_0000), 75 alphabet[(c>>4)&0xf] | ((_mode << 4) & 0b0010_0000), 76 alphabet[(c>>0)&0xf] | ((_mode << 5) & 0b0010_0000), 77 } 78 _, err := noescape.Write(w, buf[:]) 79 return err 80 } 81 82 func writeStringShortEscape(w io.Writer, c rune) error { 83 var b byte 84 switch c { 85 case '"', '\\', '/': 86 b = byte(c) 87 case '\b': 88 b = 'b' 89 case '\f': 90 b = 'f' 91 case '\n': 92 b = 'n' 93 case '\r': 94 b = 'r' 95 case '\t': 96 b = 't' 97 default: 98 panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) 99 } 100 buf := [2]byte{'\\', b} 101 _, err := noescape.Write(w, buf[:]) 102 return err 103 } 104 105 func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { 106 switch escape { 107 case BackslashEscapeNone: 108 switch { 109 case c < 0x0020: // override, gotta escape these 110 switch c { 111 case '\b', '\f', '\n', '\r', '\t': // short-escape if possible 112 return writeStringShortEscape(w, c) 113 default: 114 return WriteStringUnicodeEscape(w, c, BackslashEscapeUnicode) 115 } 116 case c == '"' || c == '\\': // override, gotta escape these 117 return writeStringShortEscape(w, c) 118 default: // obey 119 _, err := w.WriteRune(c) 120 return err 121 } 122 case BackslashEscapeShort: 123 switch c { 124 case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey 125 return writeStringShortEscape(w, c) 126 default: // override, can't short-escape these 127 _, err := w.WriteRune(c) 128 return err 129 } 130 case BackslashEscapeRawByte: 131 switch { 132 case c < utf8.RuneSelf: 133 panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q < utf8.RuneSelf", c)) 134 case c > 0xFF: 135 panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q > 0xFF", c)) 136 default: 137 return w.WriteByte(byte(c)) 138 } 139 default: 140 if BackslashEscapeUnicodeMin <= escape && escape <= BackslashEscapeUnicodeMax { 141 switch { 142 case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) 143 _, err := w.WriteRune(c) 144 return err 145 default: // obey 146 return WriteStringUnicodeEscape(w, c, escape) 147 } 148 } 149 panic(fmt.Errorf("escaper returned an invalid escape mode=%d", escape)) 150 } 151 } 152 153 func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str string) error { 154 if err := w.WriteByte('"'); err != nil { 155 return err 156 } 157 for i := 0; i < len(str); { 158 escaped := BackslashEscapeNone 159 c, size := utf8.DecodeRuneInString(str[i:]) 160 if c == utf8.RuneError && size == 1 { 161 switch utf { 162 case InvalidUTF8Replace: 163 escaped = BackslashEscapeUnicode 164 case InvalidUTF8Preserve: 165 escaped = BackslashEscapeRawByte 166 c = rune(str[i]) 167 case InvalidUTF8Error: 168 return &json.UnsupportedValueError{ 169 Value: val, 170 Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), 171 } 172 } 173 } 174 if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { 175 return err 176 } 177 i += size 178 } 179 if err := w.WriteByte('"'); err != nil { 180 return err 181 } 182 return nil 183 } 184 185 func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str []byte) error { 186 if err := w.WriteByte('"'); err != nil { 187 return err 188 } 189 for i := 0; i < len(str); { 190 escaped := BackslashEscapeNone 191 c, size := utf8.DecodeRune(str[i:]) 192 if c == utf8.RuneError && size == 1 { 193 switch utf { 194 case InvalidUTF8Replace: 195 escaped = BackslashEscapeUnicode 196 case InvalidUTF8Preserve: 197 escaped = BackslashEscapeRawByte 198 c = rune(str[i]) 199 case InvalidUTF8Error: 200 return &json.UnsupportedValueError{ 201 Value: val, 202 Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), 203 } 204 } 205 } 206 if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { 207 return err 208 } 209 i += size 210 } 211 if err := w.WriteByte('"'); err != nil { 212 return err 213 } 214 return nil 215 }