modernc.org/libc@v1.24.1/printf.go

modernc.org/libc@v1.24.1/printf.go (about)

     1  // Copyright 2020 The Libc Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package libc // import "modernc.org/libc"
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"unsafe"
    14  )
    15  
    16  const (
    17  	modNone = iota
    18  	modHH
    19  	modH
    20  	modL
    21  	modLL
    22  	modLD
    23  	modQ
    24  	modCapitalL
    25  	modJ
    26  	modZ
    27  	modCapitalZ
    28  	modT
    29  	mod32
    30  	mod64
    31  )
    32  
    33  // Format of the format string
    34  //
    35  // The format string is a character string, beginning and ending in its initial
    36  // shift state, if any.  The format string is composed of zero or more
    37  // directives: ordinary  characters  (not  %), which  are  copied unchanged to
    38  // the output stream; and conversion specifications, each of which results in
    39  // fetching zero or more subsequent arguments.
    40  func printf(format, args uintptr) []byte {
    41  	format0 := format
    42  	args0 := args
    43  	buf := bytes.NewBuffer(nil)
    44  	for {
    45  		switch c := *(*byte)(unsafe.Pointer(format)); c {
    46  		case '%':
    47  			format = printfConversion(buf, format, &args)
    48  		case 0:
    49  			if dmesgs {
    50  				dmesg("%v: %q, %#x -> %q", origin(1), GoString(format0), args0, buf.Bytes())
    51  			}
    52  			return buf.Bytes()
    53  		default:
    54  			format++
    55  			buf.WriteByte(c)
    56  		}
    57  	}
    58  }
    59  
    60  // Each conversion specification is introduced by the character %, and ends
    61  // with a conversion specifier.  In between there may be (in this order) zero
    62  // or more flags, an optional minimum field width, an optional  precision  and
    63  // an optional length modifier.
    64  func printfConversion(buf *bytes.Buffer, format uintptr, args *uintptr) uintptr {
    65  	format++ // '%'
    66  	spec := "%"
    67  
    68  	// Flags characters
    69  	//
    70  	// The character % is followed by zero or more of the following flags:
    71  flags:
    72  	for {
    73  		switch c := *(*byte)(unsafe.Pointer(format)); c {
    74  		case '#':
    75  			// The value should be converted to an "alternate form".  For o conversions,
    76  			// the first character of the output string is made zero (by prefixing a 0 if
    77  			// it was not zero already).  For x and  X  conversions,  a nonzero result has
    78  			// the string "0x" (or "0X" for X conversions) prepended to it.  For a, A, e,
    79  			// E, f, F, g, and G conversions, the result will always contain a decimal
    80  			// point, even if no digits follow it (normally, a decimal point appears in the
    81  			// results of those conversions only if a digit follows).  For g and G
    82  			// conversions, trailing  zeros are not removed from the result as they would
    83  			// otherwise be.  For other conversions, the result is undefined.
    84  			format++
    85  			spec += "#"
    86  		case '0':
    87  			// The  value  should  be zero padded.  For d, i, o, u, x, X, a, A, e, E, f, F,
    88  			// g, and G conversions, the converted value is padded on the left with zeros
    89  			// rather than blanks.  If the 0 and - flags both appear, the 0 flag is
    90  			// ignored.  If a precision is given with a numeric conversion (d, i, o, u, x,
    91  			// and X), the 0 flag is ignored.  For other conversions, the  behav‐ ior is
    92  			// undefined.
    93  			format++
    94  			spec += "0"
    95  		case '-':
    96  			// The  converted value is to be left adjusted on the field boundary.  (The
    97  			// default is right justification.)  The converted value is padded on the right
    98  			// with blanks, rather than on the left with blanks or zeros.  A - overrides a
    99  			// 0 if both are given.
   100  			format++
   101  			spec += "-"
   102  		case ' ':
   103  			// A blank should be left before a positive number (or empty string) produced
   104  			// by a signed conversion.
   105  			format++
   106  			spec += " "
   107  		case '+':
   108  			// A sign (+ or -) should always be placed before a number produced by a signed
   109  			// conversion.  By default, a sign is used only for negative numbers.  A +
   110  			// overrides a space  if  both  are used.
   111  			format++
   112  			spec += "+"
   113  		default:
   114  			break flags
   115  		}
   116  	}
   117  	format, width, hasWidth := parseFieldWidth(format)
   118  	if hasWidth {
   119  		spec += strconv.Itoa(width)
   120  	}
   121  	format, prec, hasPrecision := parsePrecision(format, args)
   122  	format, mod := parseLengthModifier(format)
   123  
   124  	var str string
   125  
   126  more:
   127  	// Conversion specifiers
   128  	//
   129  	// A character that specifies the type of conversion to be applied.  The
   130  	// conversion specifiers and their meanings are:
   131  	switch c := *(*byte)(unsafe.Pointer(format)); c {
   132  	case 'd', 'i':
   133  		// The  int argument is converted to signed decimal notation.  The precision,
   134  		// if any, gives the minimum number of digits that must appear; if the
   135  		// converted value requires fewer digits, it is padded on the left with zeros.
   136  		// The default precision is 1.  When 0 is printed with an explicit precision 0,
   137  		// the output is empty.
   138  		format++
   139  		var arg int64
   140  		if isWindows && mod == modL {
   141  			mod = modNone
   142  		}
   143  		switch mod {
   144  		case modL, modLL, mod64:
   145  			arg = VaInt64(args)
   146  		case modH:
   147  			arg = int64(int16(VaInt32(args)))
   148  		case modHH:
   149  			arg = int64(int8(VaInt32(args)))
   150  		case mod32, modNone:
   151  			arg = int64(VaInt32(args))
   152  		default:
   153  			panic(todo("", mod))
   154  		}
   155  
   156  		if arg == 0 && hasPrecision && prec == 0 {
   157  			break
   158  		}
   159  
   160  		if hasPrecision {
   161  			panic(todo("", prec))
   162  		}
   163  
   164  		f := spec + "d"
   165  		str = fmt.Sprintf(f, arg)
   166  	case 'u':
   167  		// The unsigned int argument is converted to unsigned decimal notation. The
   168  		// precision, if any, gives the minimum number of digits that must appear; if
   169  		// the converted value requires fewer digits, it is padded on the left with
   170  		// zeros.  The default precision is 1.  When 0 is printed with an explicit
   171  		// precision 0, the output is empty.
   172  		format++
   173  		var arg uint64
   174  		if isWindows && mod == modL {
   175  			mod = modNone
   176  		}
   177  		switch mod {
   178  		case modNone:
   179  			arg = uint64(VaUint32(args))
   180  		case modL, modLL, mod64:
   181  			arg = VaUint64(args)
   182  		case modH:
   183  			arg = uint64(uint16(VaInt32(args)))
   184  		case modHH:
   185  			arg = uint64(uint8(VaInt32(args)))
   186  		case mod32:
   187  			arg = uint64(VaInt32(args))
   188  		default:
   189  			panic(todo("", mod))
   190  		}
   191  
   192  		if arg == 0 && hasPrecision && prec == 0 {
   193  			break
   194  		}
   195  
   196  		if hasPrecision {
   197  			panic(todo("", prec))
   198  		}
   199  
   200  		f := spec + "d"
   201  		str = fmt.Sprintf(f, arg)
   202  	case 'o':
   203  		// The unsigned int argument is converted to unsigned octal notation. The
   204  		// precision, if any, gives the minimum number of digits that must appear; if
   205  		// the converted value requires fewer digits, it is padded on the left with
   206  		// zeros.  The default precision is 1.  When 0 is printed with an explicit
   207  		// precision 0, the output is empty.
   208  		format++
   209  		var arg uint64
   210  		if isWindows && mod == modL {
   211  			mod = modNone
   212  		}
   213  		switch mod {
   214  		case modNone:
   215  			arg = uint64(VaUint32(args))
   216  		case modL, modLL, mod64:
   217  			arg = VaUint64(args)
   218  		case modH:
   219  			arg = uint64(uint16(VaInt32(args)))
   220  		case modHH:
   221  			arg = uint64(uint8(VaInt32(args)))
   222  		case mod32:
   223  			arg = uint64(VaInt32(args))
   224  		default:
   225  			panic(todo("", mod))
   226  		}
   227  
   228  		if arg == 0 && hasPrecision && prec == 0 {
   229  			break
   230  		}
   231  
   232  		if hasPrecision {
   233  			panic(todo("", prec))
   234  		}
   235  
   236  		f := spec + "o"
   237  		str = fmt.Sprintf(f, arg)
   238  	case 'I':
   239  		if !isWindows {
   240  			panic(todo("%#U", c))
   241  		}
   242  
   243  		format++
   244  		switch c = *(*byte)(unsafe.Pointer(format)); c {
   245  		case 'x', 'X':
   246  			// https://docs.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-wsprintfa
   247  			//
   248  			// Ix, IX
   249  			//
   250  			// 64-bit unsigned hexadecimal integer in lowercase or uppercase on 64-bit
   251  			// platforms, 32-bit unsigned hexadecimal integer in lowercase or uppercase on
   252  			// 32-bit platforms.
   253  			if unsafe.Sizeof(int(0)) == 4 {
   254  				mod = mod32
   255  			}
   256  		case '3':
   257  			// https://en.wikipedia.org/wiki/Printf_format_string#Length_field
   258  			//
   259  			// I32	For integer types, causes printf to expect a 32-bit (double word) integer argument.
   260  			format++
   261  			switch c = *(*byte)(unsafe.Pointer(format)); c {
   262  			case '2':
   263  				format++
   264  				mod = mod32
   265  				goto more
   266  			default:
   267  				panic(todo("%#U", c))
   268  			}
   269  		case '6':
   270  			// https://en.wikipedia.org/wiki/Printf_format_string#Length_field
   271  			//
   272  			// I64	For integer types, causes printf to expect a 64-bit (quad word) integer argument.
   273  			format++
   274  			switch c = *(*byte)(unsafe.Pointer(format)); c {
   275  			case '4':
   276  				format++
   277  				mod = mod64
   278  				goto more
   279  			default:
   280  				panic(todo("%#U", c))
   281  			}
   282  		default:
   283  			panic(todo("%#U", c))
   284  		}
   285  		fallthrough
   286  	case 'X':
   287  		fallthrough
   288  	case 'x':
   289  		// The unsigned int argument is converted to unsigned hexadecimal notation.
   290  		// The letters abcdef are used for x  conversions;  the letters ABCDEF are used
   291  		// for X conversions.  The precision, if any, gives the minimum number of
   292  		// digits that must appear; if the converted value requires fewer digits, it is
   293  		// padded on the left with zeros.  The default precision is 1.  When 0 is
   294  		// printed with an explicit precision 0, the output is empty.
   295  		format++
   296  		var arg uint64
   297  		if isWindows && mod == modL {
   298  			mod = modNone
   299  		}
   300  		switch mod {
   301  		case modNone:
   302  			arg = uint64(VaUint32(args))
   303  		case modL, modLL, mod64:
   304  			arg = VaUint64(args)
   305  		case modH:
   306  			arg = uint64(uint16(VaInt32(args)))
   307  		case modHH:
   308  			arg = uint64(uint8(VaInt32(args)))
   309  		case mod32:
   310  			arg = uint64(VaInt32(args))
   311  		default:
   312  			panic(todo("", mod))
   313  		}
   314  
   315  		if arg == 0 && hasPrecision && prec == 0 {
   316  			break
   317  		}
   318  
   319  		if strings.Contains(spec, "#") && arg == 0 {
   320  			spec = strings.ReplaceAll(spec, "#", "")
   321  		}
   322  		var f string
   323  		switch {
   324  		case hasPrecision:
   325  			f = fmt.Sprintf("%s.%d%c", spec, prec, c)
   326  		default:
   327  			f = spec + string(c)
   328  		}
   329  		str = fmt.Sprintf(f, arg)
   330  	case 'e', 'E':
   331  		// The double argument is rounded and converted in the style [-]d.ddde±dd where
   332  		// there is one digit before the decimal-point character and the number of
   333  		// digits after it is equal to  the precision;  if the precision is missing, it
   334  		// is taken as 6; if the precision is zero, no decimal-point character appears.
   335  		// An E conversion uses the letter E (rather than e) to intro‐ duce the
   336  		// exponent.  The exponent always contains at least two digits; if the value is
   337  		// zero, the exponent is 00.
   338  		format++
   339  		arg := VaFloat64(args)
   340  		if !hasPrecision {
   341  			prec = 6
   342  		}
   343  		f := fmt.Sprintf("%s.%d%c", spec, prec, c)
   344  		str = fmt.Sprintf(f, arg)
   345  	case 'f', 'F':
   346  		// The double argument is rounded and converted to decimal notation in the
   347  		// style [-]ddd.ddd, where the number of digits after the decimal-point
   348  		// character  is  equal  to  the  precision specification.   If  the  precision
   349  		// is missing, it is taken as 6; if the precision is explicitly zero, no
   350  		// decimal-point character appears.  If a decimal point appears, at least one
   351  		// digit appears before it.
   352  		format++
   353  		arg := VaFloat64(args)
   354  		if !hasPrecision {
   355  			prec = 6
   356  		}
   357  		f := fmt.Sprintf("%s.%d%c", spec, prec, c)
   358  		str = fixNanInf(fmt.Sprintf(f, arg))
   359  	case 'G':
   360  		fallthrough
   361  	case 'g':
   362  		// The double argument is converted in style f or e (or F or E for G
   363  		// conversions).  The precision specifies the number of significant digits.  If
   364  		// the precision is missing, 6 digits are given;  if the precision is zero, it
   365  		// is treated as 1.  Style e is used if the exponent from its conversion is
   366  		// less than -4 or greater than or equal to the precision.  Trailing zeros are
   367  		// removed from the fractional part of the result; a decimal point appears only
   368  		// if it is followed by at least one digit.
   369  		format++
   370  		arg := VaFloat64(args)
   371  		if !hasPrecision {
   372  			prec = 6
   373  		}
   374  		if prec == 0 {
   375  			prec = 1
   376  		}
   377  
   378  		f := fmt.Sprintf("%s.%d%c", spec, prec, c)
   379  		str = fixNanInf(fmt.Sprintf(f, arg))
   380  	case 's':
   381  		// If  no l modifier is present: the const char * argument is expected to be a
   382  		// pointer to an array of character type (pointer to a string).  Characters
   383  		// from the array are written up to (but not including) a terminating null byte
   384  		// ('\0'); if a precision is specified, no more than the number specified are
   385  		// written.  If a precision  is  given,  no  null  byte  need  be present; if
   386  		// the precision is not specified, or is greater than the size of the array,
   387  		// the array must contain a terminating null byte.
   388  		//
   389  		// If  an  l  modifier  is  present: the const wchar_t * argument is expected
   390  		// to be a pointer to an array of wide characters.  Wide characters from the
   391  		// array are converted to multibyte characters (each by a call to the
   392  		// wcrtomb(3) function, with a conversion state starting in the initial state
   393  		// before the first wide character), up to and including a terminating null
   394  		// wide  character.   The  resulting  multibyte  characters are written up to
   395  		// (but not including) the terminating null byte.  If a precision is specified,
   396  		// no more bytes than the number specified are written, but no partial
   397  		// multibyte characters are written.  Note that the precision determines the
   398  		// number of bytes written, not the number of wide characters or  screen
   399  		// positions.   The  array  must contain a terminating null wide character,
   400  		// unless a precision is given and it is so small that the number of bytes
   401  		// written exceeds it before the end of the array is reached.
   402  		format++
   403  		arg := VaUintptr(args)
   404  		switch mod {
   405  		case modNone:
   406  			var f string
   407  			switch {
   408  			case hasPrecision:
   409  				f = fmt.Sprintf("%s.%ds", spec, prec)
   410  				str = fmt.Sprintf(f, GoString(arg))
   411  			default:
   412  				f = spec + "s"
   413  				str = fmt.Sprintf(f, GoString(arg))
   414  			}
   415  		default:
   416  			panic(todo(""))
   417  		}
   418  	case 'p':
   419  		// The void * pointer argument is printed in hexadecimal (as if by %#x or
   420  		// %#lx).
   421  		format++
   422  		switch runtime.GOOS {
   423  		case "windows":
   424  			switch runtime.GOARCH {
   425  			case "386", "arm":
   426  				fmt.Fprintf(buf, "%08X", VaUintptr(args))
   427  			default:
   428  				fmt.Fprintf(buf, "%016X", VaUintptr(args))
   429  			}
   430  		default:
   431  			fmt.Fprintf(buf, "%#0x", VaUintptr(args))
   432  		}
   433  	case 'c':
   434  		// If no l modifier is present, the int argument is converted to an unsigned
   435  		// char, and the resulting character is written.  If an l modifier is present,
   436  		// the wint_t (wide character) ar‐ gument is converted to a multibyte sequence
   437  		// by a call to the wcrtomb(3) function, with a conversion state starting in
   438  		// the initial state, and the resulting multibyte string is  writ‐ ten.
   439  		format++
   440  		switch mod {
   441  		case modNone:
   442  			arg := VaInt32(args)
   443  			buf.WriteByte(byte(arg))
   444  		default:
   445  			panic(todo(""))
   446  		}
   447  	case '%':
   448  		// A '%' is written.  No argument is converted.  The complete conversion
   449  		// specification is '%%'.
   450  		format++
   451  		buf.WriteByte('%')
   452  	default:
   453  		panic(todo("%#U", c))
   454  	}
   455  
   456  	buf.WriteString(str)
   457  	return format
   458  }
   459  
   460  // Field width
   461  //
   462  // An optional decimal digit string (with nonzero first digit) specifying a
   463  // minimum field width.  If the converted value has fewer characters than the
   464  // field width, it will be padded with spa‐ ces on the left (or right, if the
   465  // left-adjustment flag has been given).  Instead of a decimal digit string one
   466  // may write "*" or "*m$" (for some decimal integer m) to specify that the
   467  // field width  is  given  in the next argument, or in the m-th argument,
   468  // respectively, which must be of type int.  A negative field width is taken as
   469  // a '-' flag followed by a positive field width.  In no case does a
   470  // nonexistent or small field width cause truncation of a field; if the result
   471  // of a conversion is wider than the field width, the field is expanded to
   472  // contain the conversion result.
   473  func parseFieldWidth(format uintptr) (_ uintptr, n int, ok bool) {
   474  	first := true
   475  	for {
   476  		var digit int
   477  		switch c := *(*byte)(unsafe.Pointer(format)); {
   478  		case first && c == '0':
   479  			return format, n, ok
   480  		case first && c == '*':
   481  			panic(todo(""))
   482  		case c >= '0' && c <= '9':
   483  			format++
   484  			ok = true
   485  			first = false
   486  			digit = int(c) - '0'
   487  		default:
   488  			return format, n, ok
   489  		}
   490  
   491  		n0 := n
   492  		n = 10*n + digit
   493  		if n < n0 {
   494  			panic(todo(""))
   495  		}
   496  	}
   497  }
   498  
   499  // Precision
   500  //
   501  // An  optional precision, in the form of a period ('.')  followed by an
   502  // optional decimal digit string.  Instead of a decimal digit string one may
   503  // write "*" or "*m$" (for some decimal integer m) to specify that the
   504  // precision is given in the next argument, or in the m-th argument,
   505  // respectively, which must be of type int.  If the precision is given as just
   506  // '.', the  precision  is taken  to  be  zero.  A negative precision is taken
   507  // as if the precision were omitted.  This gives the minimum number of digits
   508  // to appear for d, i, o, u, x, and X conversions, the number of digits to
   509  // appear after the radix character for a, A, e, E, f, and F conversions, the
   510  // maximum number of significant digits for g and G conversions, or the maximum
   511  // number of characters to be printed from a string for s and S conversions.
   512  func parsePrecision(format uintptr, args *uintptr) (_ uintptr, n int, ok bool) {
   513  	for {
   514  		switch c := *(*byte)(unsafe.Pointer(format)); c {
   515  		case '.':
   516  			format++
   517  			first := true
   518  			for {
   519  				switch c := *(*byte)(unsafe.Pointer(format)); {
   520  				case first && c == '*':
   521  					format++
   522  					n = int(VaInt32(args))
   523  					return format, n, true
   524  				case c >= '0' && c <= '9':
   525  					format++
   526  					first = false
   527  					n0 := n
   528  					n = 10*n + (int(c) - '0')
   529  					if n < n0 {
   530  						panic(todo(""))
   531  					}
   532  				default:
   533  					return format, n, true
   534  				}
   535  			}
   536  		default:
   537  			return format, 0, false
   538  		}
   539  	}
   540  }
   541  
   542  // Length modifier
   543  //
   544  // Here, "integer conversion" stands for d, i, o, u, x, or X conversion.
   545  //
   546  // hh     A following integer conversion corresponds to a signed char or
   547  // unsigned char argument, or a following n conversion corresponds to a pointer
   548  // to a signed char argument.
   549  //
   550  // h      A following integer conversion corresponds to a short int or unsigned
   551  // short int argument, or a following n conversion corresponds to a pointer to
   552  // a short int argument.
   553  //
   554  // l      (ell)  A following integer conversion corresponds to a long int or
   555  // unsigned long int argument, or a following n conversion corresponds to a
   556  // pointer to a long int argument, or a fol‐ lowing c conversion corresponds to
   557  // a wint_t argument, or a following s conversion corresponds to a pointer to
   558  // wchar_t argument.
   559  //
   560  // ll     (ell-ell).  A following integer conversion corresponds to a long long
   561  // int or unsigned long long int argument, or a following n conversion
   562  // corresponds to a pointer to a long long int argument.
   563  //
   564  // q      A synonym for ll.  This is a nonstandard extension, derived from BSD;
   565  // avoid its use in new code.
   566  //
   567  // L      A following a, A, e, E, f, F, g, or G conversion corresponds to a
   568  // long double argument.  (C99 allows %LF, but SUSv2 does not.)
   569  //
   570  // j      A following integer conversion corresponds to an intmax_t or
   571  // uintmax_t argument, or a following n conversion corresponds to a pointer to
   572  // an intmax_t argument.
   573  //
   574  // z      A following integer conversion corresponds to a size_t or ssize_t
   575  // argument, or a following n conversion corresponds to a pointer to a size_t
   576  // argument.
   577  //
   578  // Z      A nonstandard synonym for z that predates the appearance of z.  Do
   579  // not use in new code.
   580  //
   581  // t      A following integer conversion corresponds to a ptrdiff_t argument,
   582  // or a following n conversion corresponds to a pointer to a ptrdiff_t
   583  // argument.
   584  
   585  func parseLengthModifier(format uintptr) (_ uintptr, n int) {
   586  	switch c := *(*byte)(unsafe.Pointer(format)); c {
   587  	case 'h':
   588  		format++
   589  		n = modH
   590  		switch c := *(*byte)(unsafe.Pointer(format)); c {
   591  		case 'h':
   592  			format++
   593  			n = modHH
   594  		}
   595  		return format, n
   596  	case 'l':
   597  		format++
   598  		n = modL
   599  		switch c := *(*byte)(unsafe.Pointer(format)); c {
   600  		case 'l':
   601  			format++
   602  			n = modLL
   603  		}
   604  		return format, n
   605  	case 'q':
   606  		panic(todo(""))
   607  	case 'L':
   608  		format++
   609  		n = modLD
   610  		return format, n
   611  	case 'j':
   612  		panic(todo(""))
   613  	case 'z':
   614  		panic(todo(""))
   615  	case 'Z':
   616  		panic(todo(""))
   617  	case 't':
   618  		panic(todo(""))
   619  	default:
   620  		return format, 0
   621  	}
   622  }
   623  
   624  func fixNanInf(s string) string {
   625  	switch s {
   626  	case "NaN":
   627  		return "nan"
   628  	case "+Inf", "-Inf":
   629  		return "inf"
   630  	default:
   631  		return s
   632  	}
   633  }