lab.nexedi.com/kirr/go123@v0.0.0-20240207185015-8299741fa871/xfmt/python.go (about) 1 // Copyright (C) 2017-2019 Nexedi SA and Contributors. 2 // Kirill Smelkov <kirr@nexedi.com> 3 // 4 // This program is free software: you can Use, Study, Modify and Redistribute 5 // it under the terms of the GNU General Public License version 3, or (at your 6 // option) any later version, as published by the Free Software Foundation. 7 // 8 // You can also Link and Combine this program with other software covered by 9 // the terms of any of the Free Software licenses or any of the Open Source 10 // Initiative approved licenses and Convey the resulting work. Corresponding 11 // source of such a combination shall include the source code for all other 12 // software used. 13 // 14 // This program is distributed WITHOUT ANY WARRANTY; without even the implied 15 // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16 // 17 // See COPYING file for full licensing terms. 18 // See https://www.nexedi.com/licensing for rationale and options. 19 20 // quoting by Python rules 21 22 package xfmt 23 24 import ( 25 "strconv" 26 "unicode/utf8" 27 28 "lab.nexedi.com/kirr/go123/mem" 29 "lab.nexedi.com/kirr/go123/xbytes" 30 ) 31 32 // AppendQuotePy appends to buf Python quoting of s. 33 func AppendQuotePy(buf []byte, s string) []byte { 34 return AppendQuotePyBytes(buf, mem.Bytes(s)) 35 } 36 37 // AppendQuotePyBytes appends to buf Python quoting of b. 38 func AppendQuotePyBytes(buf, b []byte) []byte { 39 // smartquotes: choose ' or " as quoting character 40 // https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947 41 quote := byte('\'') 42 if xbytes.ContainsByte(b, '\'') && !xbytes.ContainsByte(b, '"') { 43 quote = '"' 44 } 45 46 buf = append(buf, quote) 47 48 for i := 0; i < len(b); { 49 c := b[i] 50 switch { 51 // fast path - ASCII only - trying to avoid UTF-8 decoding 52 case c < utf8.RuneSelf: 53 switch { 54 case c == '\\' || c == quote: 55 buf = append(buf, '\\', c) 56 57 case ' ' <= c && c <= '\x7e': 58 // printable ASCII 59 buf = append(buf, c) 60 61 62 // below: non-printable ASCII 63 64 // NOTE python converts to \<letter> only \t \n \r (not e.g. \v) 65 // https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L963 66 case c == '\t': 67 buf = append(buf, `\t`...) 68 case c == '\n': 69 buf = append(buf, `\n`...) 70 case c == '\r': 71 buf = append(buf, `\r`...) 72 73 default: 74 // NOTE c < ' ' or c == '\x7f' (the only non-printable ASCII character > space) here 75 // we already converted to \<letter> what python represents as such above 76 // everything else goes in numeric byte escapes 77 buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf]) 78 } 79 80 i++ 81 82 // slow path - full UTF-8 decoding 83 default: 84 r, size := utf8.DecodeRune(b[i:]) 85 isize := i + size 86 87 switch { 88 case r == utf8.RuneError: 89 // decode error - just emit raw byte as escaped 90 buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf]) 91 92 case strconv.IsPrint(r): 93 // printable utf-8 characters go as is 94 buf = append(buf, b[i:isize]...) 95 96 default: 97 // everything else goes in numeric byte escapes 98 for j := i; j < isize; j++ { 99 buf = append(buf, '\\', 'x', hexdigits[b[j]>>4], hexdigits[b[j]&0xf]) 100 } 101 } 102 103 i = isize 104 } 105 } 106 107 buf = append(buf, quote) 108 return buf 109 } 110 111 112 // Qpy appends string quoted as Python would do. 113 func (b *Buffer) Qpy(s string) *Buffer { 114 *b = AppendQuotePy(*b, s) 115 return b 116 } 117 118 // Qpyb appends []byte quoted as Python would do. 119 func (b *Buffer) Qpyb(x []byte) *Buffer { 120 *b = AppendQuotePyBytes(*b, x) 121 return b 122 } 123 124 // TODO Qpyc? 125 126 // Qpycb appends byte quoted as Python would do for a single-character string. 127 func (b *Buffer) Qpycb(c byte) *Buffer { 128 *b = AppendQuotePyBytes(*b, []byte{c}) // does not escape 129 return b 130 }