github.com/kamalshkeir/kencoding@v0.0.2-0.20230409043843-44b609a0475a/iso8601/parse.go (about) 1 package iso8601 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "time" 7 "unsafe" 8 ) 9 10 var ( 11 errInvalidTimestamp = errors.New("invalid ISO8601 timestamp") 12 errMonthOutOfRange = errors.New("month out of range") 13 errDayOutOfRange = errors.New("day out of range") 14 errHourOutOfRange = errors.New("hour out of range") 15 errMinuteOutOfRange = errors.New("minute out of range") 16 errSecondOutOfRange = errors.New("second out of range") 17 ) 18 19 // Parse parses an ISO8601 timestamp, e.g. "2021-03-25T21:36:12Z". 20 func Parse(input string) (time.Time, error) { 21 b := unsafeStringToBytes(input) 22 if len(b) >= 20 && len(b) <= 30 && b[len(b)-1] == 'Z' { 23 if len(b) == 21 || (len(b) > 21 && b[19] != '.') { 24 return time.Time{}, errInvalidTimestamp 25 } 26 27 t1 := binary.LittleEndian.Uint64(b) 28 t2 := binary.LittleEndian.Uint64(b[8:16]) 29 t3 := uint64(b[16]) | uint64(b[17])<<8 | uint64(b[18])<<16 | uint64('Z')<<24 30 31 // Check for valid separators by masking input with " - - T : : Z". 32 // If separators are all valid, replace them with a '0' (0x30) byte and 33 // check all bytes are now numeric. 34 if !match(t1, mask1) || !match(t2, mask2) || !match(t3, mask3) { 35 return time.Time{}, errInvalidTimestamp 36 } 37 t1 ^= replace1 38 t2 ^= replace2 39 t3 ^= replace3 40 if (nonNumeric(t1) | nonNumeric(t2) | nonNumeric(t3)) != 0 { 41 return time.Time{}, errInvalidTimestamp 42 } 43 44 t1 -= zero 45 t2 -= zero 46 t3 -= zero 47 year := (t1&0xF)*1000 + (t1>>8&0xF)*100 + (t1>>16&0xF)*10 + (t1 >> 24 & 0xF) 48 month := (t1>>40&0xF)*10 + (t1 >> 48 & 0xF) 49 day := (t2&0xF)*10 + (t2 >> 8 & 0xF) 50 hour := (t2>>24&0xF)*10 + (t2 >> 32 & 0xF) 51 minute := (t2>>48&0xF)*10 + (t2 >> 56) 52 second := (t3>>8&0xF)*10 + (t3 >> 16) 53 54 nanos := int64(0) 55 if len(b) > 20 { 56 for _, c := range b[20 : len(b)-1] { 57 if c < '0' || c > '9' { 58 return time.Time{}, errInvalidTimestamp 59 } 60 nanos = (nanos * 10) + int64(c-'0') 61 } 62 nanos *= pow10[30-len(b)] 63 } 64 65 if err := validate(year, month, day, hour, minute, second); err != nil { 66 return time.Time{}, err 67 } 68 69 unixSeconds := int64(daysSinceEpoch(year, month, day))*86400 + int64(hour*3600+minute*60+second) 70 return time.Unix(unixSeconds, nanos).UTC(), nil 71 } 72 73 // Fallback to using time.Parse(). 74 t, err := time.Parse(time.RFC3339Nano, input) 75 if err != nil { 76 // Override (and don't wrap) the error here. The error returned by 77 // time.Parse() is dynamic, and includes a reference to the input 78 // string. By overriding the error, we guarantee that the input string 79 // doesn't escape. 80 return time.Time{}, errInvalidTimestamp 81 } 82 return t, nil 83 } 84 85 var pow10 = []int64{1, 10, 100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8} 86 87 const ( 88 mask1 = 0x2d00002d00000000 // YYYY-MM- 89 mask2 = 0x00003a0000540000 // DDTHH:MM 90 mask3 = 0x000000005a00003a // :SSZ____ 91 92 // Generate masks that replace the separators with a numeric byte. 93 // The input must have valid separators. XOR with the separator bytes 94 // to zero them out and then XOR with 0x30 to replace them with '0'. 95 replace1 = mask1 ^ 0x3000003000000000 96 replace2 = mask2 ^ 0x0000300000300000 97 replace3 = mask3 ^ 0x3030303030000030 98 99 lsb = ^uint64(0) / 255 100 msb = lsb * 0x80 101 102 zero = lsb * '0' 103 nine = lsb * '9' 104 ) 105 106 func validate(year, month, day, hour, minute, second uint64) error { 107 if day == 0 || day > 31 { 108 return errDayOutOfRange 109 } 110 if month == 0 || month > 12 { 111 return errMonthOutOfRange 112 } 113 if hour >= 24 { 114 return errHourOutOfRange 115 } 116 if minute >= 60 { 117 return errMinuteOutOfRange 118 } 119 if second >= 60 { 120 return errSecondOutOfRange 121 } 122 if month == 2 && (day > 29 || (day == 29 && !isLeapYear(year))) { 123 return errDayOutOfRange 124 } 125 if day == 31 { 126 switch month { 127 case 4, 6, 9, 11: 128 return errDayOutOfRange 129 } 130 } 131 return nil 132 } 133 134 func match(u, mask uint64) bool { 135 return (u & mask) == mask 136 } 137 138 func nonNumeric(u uint64) uint64 { 139 // Derived from https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord. 140 // Subtract '0' (0x30) from each byte so that the MSB is set in each byte 141 // if there's a byte less than '0' (0x30). Add 0x46 (0x7F-'9') so that the 142 // MSB is set if there's a byte greater than '9' (0x39). To handle overflow 143 // when adding 0x46, include the MSB from the input bytes in the final mask. 144 // Remove all but the MSBs and then you're left with a mask where each 145 // non-numeric byte from the input has its MSB set in the output. 146 return ((u - zero) | (u + (^msb - nine)) | u) & msb 147 } 148 149 func daysSinceEpoch(year, month, day uint64) uint64 { 150 // Derived from https://blog.reverberate.org/2020/05/12/optimizing-date-algorithms.html. 151 monthAdjusted := month - 3 152 var carry uint64 153 if monthAdjusted > month { 154 carry = 1 155 } 156 var adjust uint64 157 if carry == 1 { 158 adjust = 12 159 } 160 yearAdjusted := year + 4800 - carry 161 monthDays := ((monthAdjusted+adjust)*62719 + 769) / 2048 162 leapDays := yearAdjusted/4 - yearAdjusted/100 + yearAdjusted/400 163 return yearAdjusted*365 + leapDays + monthDays + (day - 1) - 2472632 164 } 165 166 func isLeapYear(y uint64) bool { 167 return (y%4) == 0 && ((y%100) != 0 || (y%400) == 0) 168 } 169 170 func unsafeStringToBytes(s string) []byte { 171 return *(*[]byte)(unsafe.Pointer(&sliceHeader{ 172 Data: *(*unsafe.Pointer)(unsafe.Pointer(&s)), 173 Len: len(s), 174 Cap: len(s), 175 })) 176 } 177 178 // sliceHeader is like reflect.SliceHeader but the Data field is a 179 // unsafe.Pointer instead of being a uintptr to avoid invalid 180 // conversions from uintptr to unsafe.Pointer. 181 type sliceHeader struct { 182 Data unsafe.Pointer 183 Len int 184 Cap int 185 }