github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/utils/timestamp.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Timestamp utilities. 19 20 For internal use only; no backwards-compatibility guarantees. 21 """ 22 23 # pytype: skip-file 24 # mypy: disallow-untyped-defs 25 26 import datetime 27 import time 28 from typing import Union 29 from typing import overload 30 31 import dateutil.parser 32 import pytz 33 from google.protobuf import duration_pb2 34 from google.protobuf import timestamp_pb2 35 36 from apache_beam.portability import common_urns 37 38 # types compatible with Timestamp.of() 39 TimestampTypes = Union[int, float, 'Timestamp'] 40 # types compatible with Duration.of() 41 DurationTypes = Union[int, float, 'Duration'] 42 TimestampDurationTypes = Union[int, float, 'Duration', 'Timestamp'] 43 44 45 class Timestamp(object): 46 """Represents a Unix second timestamp with microsecond granularity. 47 48 Can be treated in common timestamp arithmetic operations as a numeric type. 49 50 Internally stores a time interval as an int of microseconds. This strategy 51 is necessary since floating point values lose precision when storing values, 52 especially after arithmetic operations (for example, 10000000 % 0.1 evaluates 53 to 0.0999999994448885). 54 """ 55 def __init__(self, seconds=0, micros=0): 56 # type: (Union[int, float], Union[int, float]) -> None 57 if not isinstance(seconds, (int, float)): 58 raise TypeError( 59 'Cannot interpret %s %s as seconds.' % (seconds, type(seconds))) 60 if not isinstance(micros, (int, float)): 61 raise TypeError( 62 'Cannot interpret %s %s as micros.' % (micros, type(micros))) 63 self.micros = int(seconds * 1000000) + int(micros) 64 65 @staticmethod 66 def of(seconds): 67 # type: (TimestampTypes) -> Timestamp 68 69 """Return the Timestamp for the given number of seconds. 70 71 If the input is already a Timestamp, the input itself will be returned. 72 73 Args: 74 seconds: Number of seconds as int, float, long, or Timestamp. 75 76 Returns: 77 Corresponding Timestamp object. 78 """ 79 80 if not isinstance(seconds, (int, float, Timestamp)): 81 raise TypeError( 82 'Cannot interpret %s %s as Timestamp.' % (seconds, type(seconds))) 83 if isinstance(seconds, Timestamp): 84 return seconds 85 return Timestamp(seconds) 86 87 @staticmethod 88 def now(): 89 # type: () -> Timestamp 90 return Timestamp(seconds=time.time()) 91 92 @staticmethod 93 def _epoch_datetime_utc(): 94 # type: () -> datetime.datetime 95 return datetime.datetime.fromtimestamp(0, pytz.utc) 96 97 @classmethod 98 def from_utc_datetime(cls, dt): 99 # type: (datetime.datetime) -> Timestamp 100 101 """Create a ``Timestamp`` instance from a ``datetime.datetime`` object. 102 103 Args: 104 dt: A ``datetime.datetime`` object in UTC (offset-aware). 105 """ 106 if dt.tzinfo is None: 107 raise ValueError( 108 "dt has no timezone info " + 109 "(https://docs.python.org/3/library/datetime.html" + 110 "#aware-and-naive-objects): %s" % dt) 111 if dt.tzinfo != pytz.utc and dt.tzinfo != datetime.timezone.utc: 112 raise ValueError('dt not in UTC: %s' % dt) 113 duration = dt - cls._epoch_datetime_utc() 114 return Timestamp(duration.total_seconds()) 115 116 @classmethod 117 def from_rfc3339(cls, rfc3339): 118 # type: (str) -> Timestamp 119 120 """Create a ``Timestamp`` instance from an RFC 3339 compliant string. 121 122 .. note:: 123 All timezones are implicitly converted to UTC. 124 125 Args: 126 rfc3339: String in RFC 3339 form. 127 """ 128 try: 129 dt = dateutil.parser.isoparse(rfc3339).astimezone(pytz.UTC) 130 except ValueError as e: 131 raise ValueError( 132 "Could not parse RFC 3339 string '{}' due to error: '{}'.".format( 133 rfc3339, e)) 134 return cls.from_utc_datetime(dt) 135 136 def seconds(self) -> int: 137 """Returns the timestamp in seconds.""" 138 return self.micros // 1000000 139 140 def predecessor(self): 141 # type: () -> Timestamp 142 143 """Returns the largest timestamp smaller than self.""" 144 return Timestamp(micros=self.micros - 1) 145 146 def __repr__(self): 147 # type: () -> str 148 micros = self.micros 149 sign = '' 150 if micros < 0: 151 sign = '-' 152 micros = -micros 153 int_part = micros // 1000000 154 frac_part = micros % 1000000 155 if frac_part: 156 return 'Timestamp(%s%d.%06d)' % (sign, int_part, frac_part) 157 return 'Timestamp(%s%d)' % (sign, int_part) 158 159 def to_utc_datetime(self): 160 # type: () -> datetime.datetime 161 # We can't easily construct a datetime object from microseconds, so we 162 # create one at the epoch and add an appropriate timedelta interval. 163 return self._epoch_datetime_utc().replace(tzinfo=None) + datetime.timedelta( 164 microseconds=self.micros) 165 166 def to_rfc3339(self): 167 # type: () -> str 168 # Append 'Z' for UTC timezone. 169 return self.to_utc_datetime().isoformat() + 'Z' 170 171 def to_proto(self): 172 # type: () -> timestamp_pb2.Timestamp 173 174 """Returns the `google.protobuf.timestamp_pb2` representation.""" 175 secs = self.micros // 1000000 176 nanos = (self.micros % 1000000) * 1000 177 return timestamp_pb2.Timestamp(seconds=secs, nanos=nanos) 178 179 @staticmethod 180 def from_proto(timestamp_proto): 181 # type: (timestamp_pb2.Timestamp) -> Timestamp 182 183 """Creates a Timestamp from a `google.protobuf.timestamp_pb2`. 184 185 Note that the google has a sub-second resolution of nanoseconds whereas this 186 class has a resolution of microsends. This class will truncate the 187 nanosecond resolution down to the microsecond. 188 """ 189 190 if timestamp_proto.nanos % 1000 != 0: 191 # TODO(https://github.com/apache/beam/issues/19922): Better define 192 # timestamps. 193 raise ValueError( 194 "Cannot convert from nanoseconds to microseconds " + 195 "because this loses precision. Please make sure that " + 196 "this is the correct behavior you want and manually " + 197 "truncate the precision to the nearest microseconds. " + 198 "See [https://github.com/apache/beam/issues/19922] for " + 199 "more information.") 200 201 return Timestamp( 202 seconds=timestamp_proto.seconds, micros=timestamp_proto.nanos // 1000) 203 204 def __float__(self): 205 # type: () -> float 206 # Note that the returned value may have lost precision. 207 return self.micros / 1000000 208 209 def __int__(self): 210 # type: () -> int 211 # Note that the returned value may have lost precision. 212 return self.micros // 1000000 213 214 def __eq__(self, other): 215 # type: (object) -> bool 216 # Allow comparisons between Duration and Timestamp values. 217 if isinstance(other, (Duration, Timestamp)): 218 return self.micros == other.micros 219 elif isinstance(other, (int, float)): 220 return self.micros == Timestamp.of(other).micros 221 else: 222 # Support equality with other types 223 return NotImplemented 224 225 def __lt__(self, other): 226 # type: (TimestampDurationTypes) -> bool 227 # Allow comparisons between Duration and Timestamp values. 228 if not isinstance(other, Duration): 229 other = Timestamp.of(other) 230 return self.micros < other.micros 231 232 def __gt__(self, other): 233 # type: (TimestampDurationTypes) -> bool 234 return not (self < other or self == other) 235 236 def __le__(self, other): 237 # type: (TimestampDurationTypes) -> bool 238 return self < other or self == other 239 240 def __ge__(self, other): 241 # type: (TimestampDurationTypes) -> bool 242 return not self < other 243 244 def __hash__(self): 245 # type: () -> int 246 return hash(self.micros) 247 248 def __add__(self, other): 249 # type: (DurationTypes) -> Timestamp 250 other = Duration.of(other) 251 return Timestamp(micros=self.micros + other.micros) 252 253 def __radd__(self, other): 254 # type: (DurationTypes) -> Timestamp 255 return self + other 256 257 @overload 258 def __sub__(self, other): 259 # type: (DurationTypes) -> Timestamp 260 pass 261 262 @overload 263 def __sub__(self, other): 264 # type: (Timestamp) -> Duration 265 pass 266 267 def __sub__(self, other): 268 # type: (Union[DurationTypes, Timestamp]) -> Union[Timestamp, Duration] 269 if isinstance(other, Timestamp): 270 return Duration(micros=self.micros - other.micros) 271 other = Duration.of(other) 272 return Timestamp(micros=self.micros - other.micros) 273 274 def __mod__(self, other): 275 # type: (DurationTypes) -> Duration 276 other = Duration.of(other) 277 return Duration(micros=self.micros % other.micros) 278 279 280 MIN_TIMESTAMP = Timestamp( 281 micros=int(common_urns.constants.MIN_TIMESTAMP_MILLIS.constant) * 1000) 282 MAX_TIMESTAMP = Timestamp( 283 micros=int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) * 1000) 284 285 286 class Duration(object): 287 """Represents a second duration with microsecond granularity. 288 289 Can be treated in common arithmetic operations as a numeric type. 290 291 Internally stores a time interval as an int of microseconds. This strategy 292 is necessary since floating point values lose precision when storing values, 293 especially after arithmetic operations (for example, 10000000 % 0.1 evaluates 294 to 0.0999999994448885). 295 """ 296 def __init__(self, seconds=0, micros=0): 297 # type: (Union[int, float], Union[int, float]) -> None 298 self.micros = int(seconds * 1000000) + int(micros) 299 300 @staticmethod 301 def of(seconds): 302 # type: (DurationTypes) -> Duration 303 304 """Return the Duration for the given number of seconds since Unix epoch. 305 306 If the input is already a Duration, the input itself will be returned. 307 308 Args: 309 seconds: Number of seconds as int, float or Duration. 310 311 Returns: 312 Corresponding Duration object. 313 """ 314 315 if isinstance(seconds, Timestamp): 316 raise TypeError('Cannot interpret %s as Duration.' % seconds) 317 if isinstance(seconds, Duration): 318 return seconds 319 return Duration(seconds) 320 321 def to_proto(self): 322 # type: () -> duration_pb2.Duration 323 324 """Returns the `google.protobuf.duration_pb2` representation.""" 325 secs = self.micros // 1000000 326 nanos = (self.micros % 1000000) * 1000 327 return duration_pb2.Duration(seconds=secs, nanos=nanos) 328 329 @staticmethod 330 def from_proto(duration_proto): 331 # type: (duration_pb2.Duration) -> Duration 332 333 """Creates a Duration from a `google.protobuf.duration_pb2`. 334 335 Note that the google has a sub-second resolution of nanoseconds whereas this 336 class has a resolution of microsends. This class will truncate the 337 nanosecond resolution down to the microsecond. 338 """ 339 340 if duration_proto.nanos % 1000 != 0: 341 # TODO(https://github.com/apache/beam/issues/19922): Better define 342 # durations. 343 raise ValueError( 344 "Cannot convert from nanoseconds to microseconds " + 345 "because this loses precision. Please make sure that " + 346 "this is the correct behavior you want and manually " + 347 "truncate the precision to the nearest microseconds. " + 348 "See [https://github.com/apache/beam/issues/19922] for " + 349 "more information.") 350 351 return Duration( 352 seconds=duration_proto.seconds, micros=duration_proto.nanos // 1000) 353 354 def __repr__(self): 355 # type: () -> str 356 micros = self.micros 357 sign = '' 358 if micros < 0: 359 sign = '-' 360 micros = -micros 361 int_part = micros // 1000000 362 frac_part = micros % 1000000 363 if frac_part: 364 return 'Duration(%s%d.%06d)' % (sign, int_part, frac_part) 365 return 'Duration(%s%d)' % (sign, int_part) 366 367 def __float__(self): 368 # type: () -> float 369 # Note that the returned value may have lost precision. 370 return self.micros / 1000000 371 372 def __eq__(self, other): 373 # type: (object) -> bool 374 # Allow comparisons between Duration and Timestamp values. 375 if isinstance(other, (Duration, Timestamp)): 376 return self.micros == other.micros 377 elif isinstance(other, (int, float)): 378 return self.micros == Duration.of(other).micros 379 else: 380 # Support equality with other types 381 return NotImplemented 382 383 def __lt__(self, other): 384 # type: (TimestampDurationTypes) -> bool 385 # Allow comparisons between Duration and Timestamp values. 386 if not isinstance(other, Timestamp): 387 other = Duration.of(other) 388 return self.micros < other.micros 389 390 def __gt__(self, other): 391 # type: (TimestampDurationTypes) -> bool 392 return not (self < other or self == other) 393 394 def __le__(self, other): 395 # type: (TimestampDurationTypes) -> bool 396 return self < other or self == other 397 398 def __ge__(self, other): 399 # type: (TimestampDurationTypes) -> bool 400 return not self < other 401 402 def __hash__(self): 403 # type: () -> int 404 return hash(self.micros) 405 406 def __neg__(self): 407 # type: () -> Duration 408 return Duration(micros=-self.micros) 409 410 def __add__(self, other): 411 # type: (DurationTypes) -> Duration 412 if isinstance(other, Timestamp): 413 # defer to Timestamp.__add__ 414 return NotImplemented 415 other = Duration.of(other) 416 return Duration(micros=self.micros + other.micros) 417 418 def __radd__(self, other): 419 # type: (DurationTypes) -> Duration 420 return self + other 421 422 def __sub__(self, other): 423 # type: (DurationTypes) -> Duration 424 other = Duration.of(other) 425 return Duration(micros=self.micros - other.micros) 426 427 def __rsub__(self, other): 428 # type: (DurationTypes) -> Duration 429 return -(self - other) 430 431 def __mul__(self, other): 432 # type: (DurationTypes) -> Duration 433 other = Duration.of(other) 434 return Duration(micros=self.micros * other.micros // 1000000) 435 436 def __rmul__(self, other): 437 # type: (DurationTypes) -> Duration 438 return self * other 439 440 def __mod__(self, other): 441 # type: (DurationTypes) -> Duration 442 other = Duration.of(other) 443 return Duration(micros=self.micros % other.micros) 444 445 446 # The minimum granularity / interval expressible in a Timestamp / Duration 447 # object. 448 TIME_GRANULARITY = Duration(micros=1)