github.com/afumu/libc@v0.0.6/scanf.go

github.com/afumu/libc@v0.0.6/scanf.go (about)

     1  // Copyright 2020 The Libc Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package libc // import "github.com/afumu/libc"
     6  
     7  import (
     8  	"strings"
     9  	"unsafe"
    10  )
    11  
    12  // The format string consists of a sequence of directives which describe how to
    13  // process the sequence of input characters.  If processing of a directive
    14  // fails, no further input  is  read,  and scanf()  returns.   A "failure" can
    15  // be either of the following: input failure, meaning that input characters
    16  // were unavailable, or matching failure, meaning that the input was
    17  // inappropriate.
    18  func scanf(r *strings.Reader, format, args uintptr) (nvalues int32) {
    19  	// var src []byte //TODO-
    20  	var ok bool
    21  out:
    22  	for {
    23  		c := *(*byte)(unsafe.Pointer(format))
    24  		// src = append(src, c) //TODO-
    25  		switch c {
    26  		case '%':
    27  			var n int
    28  			var match bool
    29  			format, n, match = scanfConversion(r, format, &args)
    30  			if !match {
    31  				break out
    32  			}
    33  
    34  			nvalues += int32(n)
    35  			ok = true
    36  		case 0:
    37  			break out
    38  		case ' ', '\t', '\n', '\r', '\v', '\f':
    39  			format = skipWhiteSpace(format)
    40  			ok = true
    41  		next:
    42  			for {
    43  				c, err := r.ReadByte()
    44  				if err != nil {
    45  					break out
    46  				}
    47  
    48  				switch c {
    49  				case ' ', '\t', '\n', '\r', '\v', '\f':
    50  					// nop
    51  				default:
    52  					r.UnreadByte()
    53  					break next
    54  				}
    55  			}
    56  		default:
    57  			c2, err := r.ReadByte()
    58  			if err != nil {
    59  				break out
    60  			}
    61  
    62  			if c2 != c {
    63  				r.UnreadByte()
    64  				break out
    65  			}
    66  
    67  			format++
    68  			ok = true
    69  		}
    70  	}
    71  	if ok {
    72  		return nvalues
    73  	}
    74  
    75  	return -1 // stdio.EOF but not defined for windows
    76  }
    77  
    78  func scanfConversion(r *strings.Reader, format uintptr, args *uintptr) (_ uintptr, nvalues int, match bool) {
    79  	format++ // '%'
    80  
    81  	// Each conversion specification in format begins with either the character '%'
    82  	// or the character sequence "%n$" (see below for the distinction) followed by:
    83  
    84  	mod := 0
    85  	width := -1
    86  flags:
    87  	for {
    88  		switch c := *(*byte)(unsafe.Pointer(format)); c {
    89  		case '*':
    90  			// An  optional '*' assignment-suppression character: scanf() reads input as
    91  			// directed by the conversion specification, but discards the input.  No
    92  			// corresponding pointer argument is re‐ quired, and this specification is not
    93  			// included in the count of successful assignments returned by scanf().
    94  			format++
    95  			panic(todo(""))
    96  		case '\'':
    97  			// For decimal conversions, an optional quote character (').  This specifies
    98  			// that the input number may include thousands' separators as defined by the
    99  			// LC_NUMERIC category of  the  current locale.  (See setlocale(3).)  The quote
   100  			// character may precede or follow the '*' assignment-suppression character.
   101  			format++
   102  			panic(todo(""))
   103  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   104  			// An  optional  decimal  integer  which  specifies  the maximum field width.
   105  			// Reading of characters stops either when this maximum is reached or when a
   106  			// nonmatching character is found, whichever happens first.  Most conversions
   107  			// discard initial white space characters (the exceptions are noted below), and
   108  			// these discarded characters don't  count  toward  the  maximum field width.
   109  			// String input conversions store a terminating null byte ('\0') to mark the
   110  			// end of the input; the maximum field width does not include this terminator.
   111  			width = 0
   112  		num:
   113  			for {
   114  				var digit int
   115  				switch c := *(*byte)(unsafe.Pointer(format)); {
   116  				default:
   117  					break num
   118  				case c >= '0' && c <= '9':
   119  					format++
   120  					digit = int(c) - '0'
   121  				}
   122  				width0 := width
   123  				width = 10*width + digit
   124  				if width < width0 {
   125  					panic(todo(""))
   126  				}
   127  			}
   128  		case 'h', 'j', 'l', 'L', 'q', 't', 'z':
   129  			format, mod = parseLengthModifier(format)
   130  		default:
   131  			break flags
   132  		}
   133  	}
   134  
   135  	// A conversion specifier that specifies the type of input conversion to be
   136  	// performed.
   137  	switch c := *(*byte)(unsafe.Pointer(format)); c {
   138  	case '%':
   139  		// Matches a literal '%'.  That is, %% in the format string matches a single
   140  		// input '%' character.  No conversion is done (but initial white space
   141  		// characters are discarded), and assign‐ ment does not occur.
   142  		format++
   143  		panic(todo(""))
   144  	case 'd':
   145  		// Matches an optionally signed decimal integer; the next pointer must be a
   146  		// pointer to int.
   147  		format++
   148  		skipReaderWhiteSpace(r)
   149  		var digit, n uint64
   150  		allowSign := true
   151  		neg := false
   152  	dec:
   153  		for ; width != 0; width-- {
   154  			c, err := r.ReadByte()
   155  			if err != nil {
   156  				if match {
   157  					break dec
   158  				}
   159  
   160  				panic(todo("", err))
   161  			}
   162  
   163  			if allowSign {
   164  				switch c {
   165  				case '-':
   166  					allowSign = false
   167  					neg = true
   168  					continue
   169  				case '+':
   170  					allowSign = false
   171  					continue
   172  				}
   173  			}
   174  
   175  			switch {
   176  			case c >= '0' && c <= '9':
   177  				digit = uint64(c) - '0'
   178  			default:
   179  				r.UnreadByte()
   180  				break dec
   181  			}
   182  			match = true
   183  			n0 := n
   184  			n = n*10 + digit
   185  			if n < n0 {
   186  				panic(todo(""))
   187  			}
   188  		}
   189  		if !match {
   190  			break
   191  		}
   192  
   193  		arg := VaUintptr(args)
   194  		v := int64(n)
   195  		if neg {
   196  			v = -v
   197  		}
   198  		switch mod {
   199  		case modNone:
   200  			*(*int32)(unsafe.Pointer(arg)) = int32(v)
   201  		case modH:
   202  			*(*int16)(unsafe.Pointer(arg)) = int16(v)
   203  		case modHH:
   204  			*(*int8)(unsafe.Pointer(arg)) = int8(v)
   205  		case modL:
   206  			*(*long)(unsafe.Pointer(arg)) = long(n)
   207  		default:
   208  			panic(todo(""))
   209  		}
   210  		nvalues = 1
   211  	case 'D':
   212  		// Equivalent  to  ld;  this  exists  only for backward compatibility.  (Note:
   213  		// thus only in libc4.  In libc5 and glibc the %D is silently ignored, causing
   214  		// old programs to fail mysteriously.)
   215  		format++
   216  		panic(todo(""))
   217  	case 'i':
   218  		// Matches an optionally signed integer; the next pointer must be a pointer to
   219  		// int.  The integer is read in base 16 if it begins with 0x or 0X, in base 8
   220  		// if it begins with  0,  and  in base 10 otherwise.  Only characters that
   221  		// correspond to the base are used.
   222  		format++
   223  		panic(todo(""))
   224  	case 'o':
   225  		// Matches an unsigned octal integer; the next pointer must be a pointer to
   226  		// unsigned int.
   227  		format++
   228  		panic(todo(""))
   229  	case 'u':
   230  		// Matches an unsigned decimal integer; the next pointer must be a pointer to
   231  		// unsigned int.
   232  		format++
   233  		panic(todo(""))
   234  	case 'x', 'X':
   235  		// Matches an unsigned hexadecimal integer; the next pointer must be a pointer
   236  		// to unsigned int.
   237  		format++
   238  		skipReaderWhiteSpace(r)
   239  		var digit, n uint64
   240  		allowPrefix := true
   241  		var b []byte
   242  	hex:
   243  		for ; width != 0; width-- {
   244  			c, err := r.ReadByte()
   245  			if err != nil {
   246  				if match {
   247  					break hex
   248  				}
   249  
   250  				panic(todo("", err))
   251  			}
   252  
   253  			if allowPrefix {
   254  				if len(b) == 1 && b[0] == '0' && (c == 'x' || c == 'X') {
   255  					allowPrefix = false
   256  					match = false
   257  					b = nil
   258  					continue
   259  				}
   260  
   261  				b = append(b, c)
   262  			}
   263  
   264  			switch {
   265  			case c >= '0' && c <= '9':
   266  				digit = uint64(c) - '0'
   267  			case c >= 'a' && c <= 'f':
   268  				digit = uint64(c) - 'a' + 10
   269  			case c >= 'A' && c <= 'F':
   270  				digit = uint64(c) - 'A' + 10
   271  			default:
   272  				r.UnreadByte()
   273  				break hex
   274  			}
   275  			match = true
   276  			n0 := n
   277  			n = n<<4 + digit
   278  			if n < n0 {
   279  				panic(todo(""))
   280  			}
   281  		}
   282  		if !match {
   283  			break
   284  		}
   285  
   286  		arg := VaUintptr(args)
   287  		switch mod {
   288  		case modNone:
   289  			*(*uint32)(unsafe.Pointer(arg)) = uint32(n)
   290  		case modH:
   291  			*(*uint16)(unsafe.Pointer(arg)) = uint16(n)
   292  		case modHH:
   293  			*(*byte)(unsafe.Pointer(arg)) = byte(n)
   294  		case modL:
   295  			*(*ulong)(unsafe.Pointer(arg)) = ulong(n)
   296  		default:
   297  			panic(todo(""))
   298  		}
   299  		nvalues = 1
   300  	case 'f', 'e', 'g', 'E', 'a':
   301  		// Matches an optionally signed floating-point number; the next pointer must be
   302  		// a pointer to float.
   303  		format++
   304  		panic(todo(""))
   305  	case 's':
   306  		// Matches  a  sequence of non-white-space characters; the next pointer must be
   307  		// a pointer to the initial element of a character array that is long enough to
   308  		// hold the input sequence and the terminating null byte ('\0'), which is added
   309  		// automatically.  The input string stops at white space or at the maximum
   310  		// field width, whichever occurs first.
   311  		format++
   312  		panic(todo(""))
   313  	case 'c':
   314  		// Matches a sequence of characters whose length is specified by the maximum
   315  		// field width (default 1); the next pointer must be a pointer to char, and
   316  		// there must be enough room for  all the characters (no terminating null byte
   317  		// is added).  The usual skip of leading white space is suppressed.  To skip
   318  		// white space first, use an explicit space in the format.
   319  		format++
   320  		panic(todo(""))
   321  	case '[':
   322  		// Matches  a nonempty sequence of characters from the specified set of
   323  		// accepted characters; the next pointer must be a pointer to char, and there
   324  		// must be enough room for all the char‐ acters in the string, plus a
   325  		// terminating null byte.  The usual skip of leading white space is suppressed.
   326  		// The string is to be made up of characters in (or not in) a particular set;
   327  		// the  set  is defined by the characters between the open bracket [ character
   328  		// and a close bracket ] character.  The set excludes those characters if the
   329  		// first character after the open bracket is a circumflex (^).  To include a
   330  		// close bracket in the set, make it the first character after the open bracket
   331  		// or the circumflex; any other position will end the set.   The hyphen
   332  		// character - is also special; when placed between two other characters, it
   333  		// adds all intervening characters to the set.  To include a hyphen, make it
   334  		// the last character before the final close bracket.  For instance, [^]0-9-]
   335  		// means the set "everything except close bracket, zero through nine, and
   336  		// hyphen".  The string ends with the appearance of a  character not in the
   337  		// (or, with a circumflex, in) set or when the field width runs out.
   338  		format++
   339  		panic(todo(""))
   340  	case 'p':
   341  		// Matches a pointer value (as printed by %p in printf(3); the next pointer
   342  		// must be a pointer to a pointer to void.
   343  		format++
   344  		skipReaderWhiteSpace(r)
   345  		c, err := r.ReadByte()
   346  		if err != nil {
   347  			panic(todo(""))
   348  		}
   349  
   350  		if c != '0' {
   351  			r.UnreadByte()
   352  			panic(todo(""))
   353  		}
   354  
   355  		if c, err = r.ReadByte(); err != nil {
   356  			panic(todo(""))
   357  		}
   358  
   359  		if c != 'x' && c != 'X' {
   360  			r.UnreadByte()
   361  			panic(todo(""))
   362  		}
   363  
   364  		var digit, n uint64
   365  	ptr:
   366  		for ; width != 0; width-- {
   367  			c, err := r.ReadByte()
   368  			if err != nil {
   369  				if match {
   370  					break ptr
   371  				}
   372  
   373  				panic(todo(""))
   374  			}
   375  
   376  			switch {
   377  			case c >= '0' && c <= '9':
   378  				digit = uint64(c) - '0'
   379  			case c >= 'a' && c <= 'f':
   380  				digit = uint64(c) - 'a' + 10
   381  			case c >= 'A' && c <= 'F':
   382  				digit = uint64(c) - 'A' + 10
   383  			default:
   384  				r.UnreadByte()
   385  				break ptr
   386  			}
   387  			match = true
   388  			n0 := n
   389  			n = n<<4 + digit
   390  			if n < n0 {
   391  				panic(todo(""))
   392  			}
   393  		}
   394  		if !match {
   395  			break
   396  		}
   397  
   398  		arg := VaUintptr(args)
   399  		*(*uintptr)(unsafe.Pointer(arg)) = uintptr(n)
   400  		nvalues = 1
   401  	case 'n':
   402  		// Nothing is expected; instead, the number of characters consumed thus far
   403  		// from the input is stored through the next pointer, which must be a pointer
   404  		// to int.  This is not a conversion and does not increase the count returned
   405  		// by the function.  The assignment can be suppressed with the *
   406  		// assignment-suppression character, but the effect on the return value is
   407  		// undefined.  Therefore %*n conversions should not be used.
   408  		format++
   409  		panic(todo(""))
   410  	default:
   411  		panic(todo("%#U", c))
   412  	}
   413  
   414  	return format, nvalues, match
   415  }
   416  
   417  func skipReaderWhiteSpace(r *strings.Reader) error {
   418  	for {
   419  		c, err := r.ReadByte()
   420  		if err != nil {
   421  			return err
   422  		}
   423  
   424  		switch c {
   425  		case ' ', '\t', '\n', '\r', '\v', '\f':
   426  			// ok
   427  		default:
   428  			r.UnreadByte()
   429  			return nil
   430  		}
   431  	}
   432  }
   433  
   434  func skipWhiteSpace(s uintptr) uintptr {
   435  	for {
   436  		switch c := *(*byte)(unsafe.Pointer(s)); c {
   437  		case ' ', '\t', '\n', '\r', '\v', '\f':
   438  			s++
   439  		default:
   440  			return s
   441  		}
   442  	}
   443  }