lab.nexedi.com/kirr/go123@v0.0.0-20240207185015-8299741fa871/xfmt/python.go (about)

     1  // Copyright (C) 2017-2019  Nexedi SA and Contributors.
     2  //                          Kirill Smelkov <kirr@nexedi.com>
     3  //
     4  // This program is free software: you can Use, Study, Modify and Redistribute
     5  // it under the terms of the GNU General Public License version 3, or (at your
     6  // option) any later version, as published by the Free Software Foundation.
     7  //
     8  // You can also Link and Combine this program with other software covered by
     9  // the terms of any of the Free Software licenses or any of the Open Source
    10  // Initiative approved licenses and Convey the resulting work. Corresponding
    11  // source of such a combination shall include the source code for all other
    12  // software used.
    13  //
    14  // This program is distributed WITHOUT ANY WARRANTY; without even the implied
    15  // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    16  //
    17  // See COPYING file for full licensing terms.
    18  // See https://www.nexedi.com/licensing for rationale and options.
    19  
    20  // quoting by Python rules
    21  
    22  package xfmt
    23  
    24  import (
    25  	"strconv"
    26  	"unicode/utf8"
    27  
    28  	"lab.nexedi.com/kirr/go123/mem"
    29  	"lab.nexedi.com/kirr/go123/xbytes"
    30  )
    31  
    32  // AppendQuotePy appends to buf Python quoting of s.
    33  func AppendQuotePy(buf []byte, s string) []byte {
    34  	return AppendQuotePyBytes(buf, mem.Bytes(s))
    35  }
    36  
    37  // AppendQuotePyBytes appends to buf Python quoting of b.
    38  func AppendQuotePyBytes(buf, b []byte) []byte {
    39  	// smartquotes: choose ' or " as quoting character
    40  	// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
    41  	quote := byte('\'')
    42  	if xbytes.ContainsByte(b, '\'') && !xbytes.ContainsByte(b, '"') {
    43  		quote = '"'
    44  	}
    45  
    46  	buf = append(buf, quote)
    47  
    48  	for i := 0; i < len(b); {
    49  		c := b[i]
    50  		switch {
    51  		// fast path - ASCII only - trying to avoid UTF-8 decoding
    52  		case c < utf8.RuneSelf:
    53  			switch {
    54  				case c == '\\' || c == quote:
    55  					buf = append(buf, '\\', c)
    56  
    57  				case ' ' <= c && c <= '\x7e':
    58  					// printable ASCII
    59  					buf = append(buf, c)
    60  
    61  
    62  				// below: non-printable ASCII
    63  
    64  				// NOTE python converts to \<letter> only \t \n \r  (not e.g. \v)
    65  				// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L963
    66  				case c == '\t':
    67  					buf = append(buf, `\t`...)
    68  				case c == '\n':
    69  					buf = append(buf, `\n`...)
    70  				case c == '\r':
    71  					buf = append(buf, `\r`...)
    72  
    73  				default:
    74  					// NOTE c < ' ' or c == '\x7f' (the only non-printable ASCII character > space) here
    75  					// we already converted to \<letter> what python represents as such above
    76  					// everything else goes in numeric byte escapes
    77  					buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf])
    78  			}
    79  
    80  			i++
    81  
    82  		// slow path - full UTF-8 decoding
    83  		default:
    84  			r, size := utf8.DecodeRune(b[i:])
    85  			isize := i + size
    86  
    87  			switch {
    88  			case r == utf8.RuneError:
    89  				// decode error - just emit raw byte as escaped
    90  				buf = append(buf, '\\', 'x', hexdigits[c>>4], hexdigits[c&0xf])
    91  
    92  			case strconv.IsPrint(r):
    93  				// printable utf-8 characters go as is
    94  				buf = append(buf, b[i:isize]...)
    95  
    96  			default:
    97  				// everything else goes in numeric byte escapes
    98  				for j := i; j < isize; j++ {
    99  					buf = append(buf, '\\', 'x', hexdigits[b[j]>>4], hexdigits[b[j]&0xf])
   100  				}
   101  			}
   102  
   103  			i = isize
   104  		}
   105  	}
   106  
   107  	buf = append(buf, quote)
   108  	return buf
   109  }
   110  
   111  
   112  // Qpy appends string quoted as Python would do.
   113  func (b *Buffer) Qpy(s string) *Buffer {
   114  	*b = AppendQuotePy(*b, s)
   115  	return b
   116  }
   117  
   118  // Qpyb appends []byte quoted as Python would do.
   119  func (b *Buffer) Qpyb(x []byte) *Buffer {
   120  	*b = AppendQuotePyBytes(*b, x)
   121  	return b
   122  }
   123  
   124  // TODO Qpyc?
   125  
   126  // Qpycb appends byte quoted as Python would do for a single-character string.
   127  func (b *Buffer) Qpycb(c byte) *Buffer {
   128  	*b = AppendQuotePyBytes(*b, []byte{c})	// does not escape
   129  	return b
   130  }