github.com/segmentio/encoding@v0.4.0/iso8601/parse.go (about)

     1  package iso8601
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"time"
     7  	"unsafe"
     8  )
     9  
    10  var (
    11  	errInvalidTimestamp = errors.New("invalid ISO8601 timestamp")
    12  	errMonthOutOfRange  = errors.New("month out of range")
    13  	errDayOutOfRange    = errors.New("day out of range")
    14  	errHourOutOfRange   = errors.New("hour out of range")
    15  	errMinuteOutOfRange = errors.New("minute out of range")
    16  	errSecondOutOfRange = errors.New("second out of range")
    17  )
    18  
    19  // Parse parses an ISO8601 timestamp, e.g. "2021-03-25T21:36:12Z".
    20  func Parse(input string) (time.Time, error) {
    21  	b := unsafeStringToBytes(input)
    22  	if len(b) >= 20 && len(b) <= 30 && b[len(b)-1] == 'Z' {
    23  		if len(b) == 21 || (len(b) > 21 && b[19] != '.') {
    24  			return time.Time{}, errInvalidTimestamp
    25  		}
    26  
    27  		t1 := binary.LittleEndian.Uint64(b)
    28  		t2 := binary.LittleEndian.Uint64(b[8:16])
    29  		t3 := uint64(b[16]) | uint64(b[17])<<8 | uint64(b[18])<<16 | uint64('Z')<<24
    30  
    31  		// Check for valid separators by masking input with "    -  -  T  :  :  Z".
    32  		// If separators are all valid, replace them with a '0' (0x30) byte and
    33  		// check all bytes are now numeric.
    34  		if !match(t1, mask1) || !match(t2, mask2) || !match(t3, mask3) {
    35  			return time.Time{}, errInvalidTimestamp
    36  		}
    37  		t1 ^= replace1
    38  		t2 ^= replace2
    39  		t3 ^= replace3
    40  		if (nonNumeric(t1) | nonNumeric(t2) | nonNumeric(t3)) != 0 {
    41  			return time.Time{}, errInvalidTimestamp
    42  		}
    43  
    44  		t1 -= zero
    45  		t2 -= zero
    46  		t3 -= zero
    47  		year := (t1&0xF)*1000 + (t1>>8&0xF)*100 + (t1>>16&0xF)*10 + (t1 >> 24 & 0xF)
    48  		month := (t1>>40&0xF)*10 + (t1 >> 48 & 0xF)
    49  		day := (t2&0xF)*10 + (t2 >> 8 & 0xF)
    50  		hour := (t2>>24&0xF)*10 + (t2 >> 32 & 0xF)
    51  		minute := (t2>>48&0xF)*10 + (t2 >> 56)
    52  		second := (t3>>8&0xF)*10 + (t3 >> 16)
    53  
    54  		nanos := int64(0)
    55  		if len(b) > 20 {
    56  			for _, c := range b[20 : len(b)-1] {
    57  				if c < '0' || c > '9' {
    58  					return time.Time{}, errInvalidTimestamp
    59  				}
    60  				nanos = (nanos * 10) + int64(c-'0')
    61  			}
    62  			nanos *= pow10[30-len(b)]
    63  		}
    64  
    65  		if err := validate(year, month, day, hour, minute, second); err != nil {
    66  			return time.Time{}, err
    67  		}
    68  
    69  		unixSeconds := int64(daysSinceEpoch(year, month, day))*86400 + int64(hour*3600+minute*60+second)
    70  		return time.Unix(unixSeconds, nanos).UTC(), nil
    71  	}
    72  
    73  	// Fallback to using time.Parse().
    74  	t, err := time.Parse(time.RFC3339Nano, input)
    75  	if err != nil {
    76  		// Override (and don't wrap) the error here. The error returned by
    77  		// time.Parse() is dynamic, and includes a reference to the input
    78  		// string. By overriding the error, we guarantee that the input string
    79  		// doesn't escape.
    80  		return time.Time{}, errInvalidTimestamp
    81  	}
    82  	return t, nil
    83  }
    84  
    85  var pow10 = []int64{1, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8}
    86  
    87  const (
    88  	mask1 = 0x2d00002d00000000 // YYYY-MM-
    89  	mask2 = 0x00003a0000540000 // DDTHH:MM
    90  	mask3 = 0x000000005a00003a // :SSZ____
    91  
    92  	// Generate masks that replace the separators with a numeric byte.
    93  	// The input must have valid separators. XOR with the separator bytes
    94  	// to zero them out and then XOR with 0x30 to replace them with '0'.
    95  	replace1 = mask1 ^ 0x3000003000000000
    96  	replace2 = mask2 ^ 0x0000300000300000
    97  	replace3 = mask3 ^ 0x3030303030000030
    98  
    99  	lsb = ^uint64(0) / 255
   100  	msb = lsb * 0x80
   101  
   102  	zero = lsb * '0'
   103  	nine = lsb * '9'
   104  )
   105  
   106  func validate(year, month, day, hour, minute, second uint64) error {
   107  	if day == 0 || day > 31 {
   108  		return errDayOutOfRange
   109  	}
   110  	if month == 0 || month > 12 {
   111  		return errMonthOutOfRange
   112  	}
   113  	if hour >= 24 {
   114  		return errHourOutOfRange
   115  	}
   116  	if minute >= 60 {
   117  		return errMinuteOutOfRange
   118  	}
   119  	if second >= 60 {
   120  		return errSecondOutOfRange
   121  	}
   122  	if month == 2 && (day > 29 || (day == 29 && !isLeapYear(year))) {
   123  		return errDayOutOfRange
   124  	}
   125  	if day == 31 {
   126  		switch month {
   127  		case 4, 6, 9, 11:
   128  			return errDayOutOfRange
   129  		}
   130  	}
   131  	return nil
   132  }
   133  
   134  func match(u, mask uint64) bool {
   135  	return (u & mask) == mask
   136  }
   137  
   138  func nonNumeric(u uint64) uint64 {
   139  	// Derived from https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord.
   140  	// Subtract '0' (0x30) from each byte so that the MSB is set in each byte
   141  	// if there's a byte less than '0' (0x30). Add 0x46 (0x7F-'9') so that the
   142  	// MSB is set if there's a byte greater than '9' (0x39). To handle overflow
   143  	// when adding 0x46, include the MSB from the input bytes in the final mask.
   144  	// Remove all but the MSBs and then you're left with a mask where each
   145  	// non-numeric byte from the input has its MSB set in the output.
   146  	return ((u - zero) | (u + (^msb - nine)) | u) & msb
   147  }
   148  
   149  func daysSinceEpoch(year, month, day uint64) uint64 {
   150  	// Derived from https://blog.reverberate.org/2020/05/12/optimizing-date-algorithms.html.
   151  	monthAdjusted := month - 3
   152  	var carry uint64
   153  	if monthAdjusted > month {
   154  		carry = 1
   155  	}
   156  	var adjust uint64
   157  	if carry == 1 {
   158  		adjust = 12
   159  	}
   160  	yearAdjusted := year + 4800 - carry
   161  	monthDays := ((monthAdjusted+adjust)*62719 + 769) / 2048
   162  	leapDays := yearAdjusted/4 - yearAdjusted/100 + yearAdjusted/400
   163  	return yearAdjusted*365 + leapDays + monthDays + (day - 1) - 2472632
   164  }
   165  
   166  func isLeapYear(y uint64) bool {
   167  	return (y%4) == 0 && ((y%100) != 0 || (y%400) == 0)
   168  }
   169  
   170  func unsafeStringToBytes(s string) []byte {
   171  	return *(*[]byte)(unsafe.Pointer(&sliceHeader{
   172  		Data: *(*unsafe.Pointer)(unsafe.Pointer(&s)),
   173  		Len:  len(s),
   174  		Cap:  len(s),
   175  	}))
   176  }
   177  
   178  // sliceHeader is like reflect.SliceHeader but the Data field is a
   179  // unsafe.Pointer instead of being a uintptr to avoid invalid
   180  // conversions from uintptr to unsafe.Pointer.
   181  type sliceHeader struct {
   182  	Data unsafe.Pointer
   183  	Len  int
   184  	Cap  int
   185  }