github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/utils/timestamp.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Timestamp utilities.
    19  
    20  For internal use only; no backwards-compatibility guarantees.
    21  """
    22  
    23  # pytype: skip-file
    24  # mypy: disallow-untyped-defs
    25  
    26  import datetime
    27  import time
    28  from typing import Union
    29  from typing import overload
    30  
    31  import dateutil.parser
    32  import pytz
    33  from google.protobuf import duration_pb2
    34  from google.protobuf import timestamp_pb2
    35  
    36  from apache_beam.portability import common_urns
    37  
    38  # types compatible with Timestamp.of()
    39  TimestampTypes = Union[int, float, 'Timestamp']
    40  # types compatible with Duration.of()
    41  DurationTypes = Union[int, float, 'Duration']
    42  TimestampDurationTypes = Union[int, float, 'Duration', 'Timestamp']
    43  
    44  
    45  class Timestamp(object):
    46    """Represents a Unix second timestamp with microsecond granularity.
    47  
    48    Can be treated in common timestamp arithmetic operations as a numeric type.
    49  
    50    Internally stores a time interval as an int of microseconds. This strategy
    51    is necessary since floating point values lose precision when storing values,
    52    especially after arithmetic operations (for example, 10000000 % 0.1 evaluates
    53    to 0.0999999994448885).
    54    """
    55    def __init__(self, seconds=0, micros=0):
    56      # type: (Union[int, float], Union[int, float]) -> None
    57      if not isinstance(seconds, (int, float)):
    58        raise TypeError(
    59            'Cannot interpret %s %s as seconds.' % (seconds, type(seconds)))
    60      if not isinstance(micros, (int, float)):
    61        raise TypeError(
    62            'Cannot interpret %s %s as micros.' % (micros, type(micros)))
    63      self.micros = int(seconds * 1000000) + int(micros)
    64  
    65    @staticmethod
    66    def of(seconds):
    67      # type: (TimestampTypes) -> Timestamp
    68  
    69      """Return the Timestamp for the given number of seconds.
    70  
    71      If the input is already a Timestamp, the input itself will be returned.
    72  
    73      Args:
    74        seconds: Number of seconds as int, float, long, or Timestamp.
    75  
    76      Returns:
    77        Corresponding Timestamp object.
    78      """
    79  
    80      if not isinstance(seconds, (int, float, Timestamp)):
    81        raise TypeError(
    82            'Cannot interpret %s %s as Timestamp.' % (seconds, type(seconds)))
    83      if isinstance(seconds, Timestamp):
    84        return seconds
    85      return Timestamp(seconds)
    86  
    87    @staticmethod
    88    def now():
    89      # type: () -> Timestamp
    90      return Timestamp(seconds=time.time())
    91  
    92    @staticmethod
    93    def _epoch_datetime_utc():
    94      # type: () -> datetime.datetime
    95      return datetime.datetime.fromtimestamp(0, pytz.utc)
    96  
    97    @classmethod
    98    def from_utc_datetime(cls, dt):
    99      # type: (datetime.datetime) -> Timestamp
   100  
   101      """Create a ``Timestamp`` instance from a ``datetime.datetime`` object.
   102  
   103      Args:
   104        dt: A ``datetime.datetime`` object in UTC (offset-aware).
   105      """
   106      if dt.tzinfo is None:
   107        raise ValueError(
   108            "dt has no timezone info " +
   109            "(https://docs.python.org/3/library/datetime.html" +
   110            "#aware-and-naive-objects): %s" % dt)
   111      if dt.tzinfo != pytz.utc and dt.tzinfo != datetime.timezone.utc:
   112        raise ValueError('dt not in UTC: %s' % dt)
   113      duration = dt - cls._epoch_datetime_utc()
   114      return Timestamp(duration.total_seconds())
   115  
   116    @classmethod
   117    def from_rfc3339(cls, rfc3339):
   118      # type: (str) -> Timestamp
   119  
   120      """Create a ``Timestamp`` instance from an RFC 3339 compliant string.
   121  
   122      .. note::
   123        All timezones are implicitly converted to UTC.
   124  
   125      Args:
   126        rfc3339: String in RFC 3339 form.
   127      """
   128      try:
   129        dt = dateutil.parser.isoparse(rfc3339).astimezone(pytz.UTC)
   130      except ValueError as e:
   131        raise ValueError(
   132            "Could not parse RFC 3339 string '{}' due to error: '{}'.".format(
   133                rfc3339, e))
   134      return cls.from_utc_datetime(dt)
   135  
   136    def seconds(self) -> int:
   137      """Returns the timestamp in seconds."""
   138      return self.micros // 1000000
   139  
   140    def predecessor(self):
   141      # type: () -> Timestamp
   142  
   143      """Returns the largest timestamp smaller than self."""
   144      return Timestamp(micros=self.micros - 1)
   145  
   146    def __repr__(self):
   147      # type: () -> str
   148      micros = self.micros
   149      sign = ''
   150      if micros < 0:
   151        sign = '-'
   152        micros = -micros
   153      int_part = micros // 1000000
   154      frac_part = micros % 1000000
   155      if frac_part:
   156        return 'Timestamp(%s%d.%06d)' % (sign, int_part, frac_part)
   157      return 'Timestamp(%s%d)' % (sign, int_part)
   158  
   159    def to_utc_datetime(self):
   160      # type: () -> datetime.datetime
   161      # We can't easily construct a datetime object from microseconds, so we
   162      # create one at the epoch and add an appropriate timedelta interval.
   163      return self._epoch_datetime_utc().replace(tzinfo=None) + datetime.timedelta(
   164          microseconds=self.micros)
   165  
   166    def to_rfc3339(self):
   167      # type: () -> str
   168      # Append 'Z' for UTC timezone.
   169      return self.to_utc_datetime().isoformat() + 'Z'
   170  
   171    def to_proto(self):
   172      # type: () -> timestamp_pb2.Timestamp
   173  
   174      """Returns the `google.protobuf.timestamp_pb2` representation."""
   175      secs = self.micros // 1000000
   176      nanos = (self.micros % 1000000) * 1000
   177      return timestamp_pb2.Timestamp(seconds=secs, nanos=nanos)
   178  
   179    @staticmethod
   180    def from_proto(timestamp_proto):
   181      # type: (timestamp_pb2.Timestamp) -> Timestamp
   182  
   183      """Creates a Timestamp from a `google.protobuf.timestamp_pb2`.
   184  
   185      Note that the google has a sub-second resolution of nanoseconds whereas this
   186      class has a resolution of microsends. This class will truncate the
   187      nanosecond resolution down to the microsecond.
   188      """
   189  
   190      if timestamp_proto.nanos % 1000 != 0:
   191        # TODO(https://github.com/apache/beam/issues/19922): Better define
   192        # timestamps.
   193        raise ValueError(
   194            "Cannot convert from nanoseconds to microseconds " +
   195            "because this loses precision. Please make sure that " +
   196            "this is the correct behavior you want and manually " +
   197            "truncate the precision to the nearest microseconds. " +
   198            "See [https://github.com/apache/beam/issues/19922] for " +
   199            "more information.")
   200  
   201      return Timestamp(
   202          seconds=timestamp_proto.seconds, micros=timestamp_proto.nanos // 1000)
   203  
   204    def __float__(self):
   205      # type: () -> float
   206      # Note that the returned value may have lost precision.
   207      return self.micros / 1000000
   208  
   209    def __int__(self):
   210      # type: () -> int
   211      # Note that the returned value may have lost precision.
   212      return self.micros // 1000000
   213  
   214    def __eq__(self, other):
   215      # type: (object) -> bool
   216      # Allow comparisons between Duration and Timestamp values.
   217      if isinstance(other, (Duration, Timestamp)):
   218        return self.micros == other.micros
   219      elif isinstance(other, (int, float)):
   220        return self.micros == Timestamp.of(other).micros
   221      else:
   222        # Support equality with other types
   223        return NotImplemented
   224  
   225    def __lt__(self, other):
   226      # type: (TimestampDurationTypes) -> bool
   227      # Allow comparisons between Duration and Timestamp values.
   228      if not isinstance(other, Duration):
   229        other = Timestamp.of(other)
   230      return self.micros < other.micros
   231  
   232    def __gt__(self, other):
   233      # type: (TimestampDurationTypes) -> bool
   234      return not (self < other or self == other)
   235  
   236    def __le__(self, other):
   237      # type: (TimestampDurationTypes) -> bool
   238      return self < other or self == other
   239  
   240    def __ge__(self, other):
   241      # type: (TimestampDurationTypes) -> bool
   242      return not self < other
   243  
   244    def __hash__(self):
   245      # type: () -> int
   246      return hash(self.micros)
   247  
   248    def __add__(self, other):
   249      # type: (DurationTypes) -> Timestamp
   250      other = Duration.of(other)
   251      return Timestamp(micros=self.micros + other.micros)
   252  
   253    def __radd__(self, other):
   254      # type: (DurationTypes) -> Timestamp
   255      return self + other
   256  
   257    @overload
   258    def __sub__(self, other):
   259      # type: (DurationTypes) -> Timestamp
   260      pass
   261  
   262    @overload
   263    def __sub__(self, other):
   264      # type: (Timestamp) -> Duration
   265      pass
   266  
   267    def __sub__(self, other):
   268      # type: (Union[DurationTypes, Timestamp]) -> Union[Timestamp, Duration]
   269      if isinstance(other, Timestamp):
   270        return Duration(micros=self.micros - other.micros)
   271      other = Duration.of(other)
   272      return Timestamp(micros=self.micros - other.micros)
   273  
   274    def __mod__(self, other):
   275      # type: (DurationTypes) -> Duration
   276      other = Duration.of(other)
   277      return Duration(micros=self.micros % other.micros)
   278  
   279  
   280  MIN_TIMESTAMP = Timestamp(
   281      micros=int(common_urns.constants.MIN_TIMESTAMP_MILLIS.constant) * 1000)
   282  MAX_TIMESTAMP = Timestamp(
   283      micros=int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) * 1000)
   284  
   285  
   286  class Duration(object):
   287    """Represents a second duration with microsecond granularity.
   288  
   289    Can be treated in common arithmetic operations as a numeric type.
   290  
   291    Internally stores a time interval as an int of microseconds. This strategy
   292    is necessary since floating point values lose precision when storing values,
   293    especially after arithmetic operations (for example, 10000000 % 0.1 evaluates
   294    to 0.0999999994448885).
   295    """
   296    def __init__(self, seconds=0, micros=0):
   297      # type: (Union[int, float], Union[int, float]) -> None
   298      self.micros = int(seconds * 1000000) + int(micros)
   299  
   300    @staticmethod
   301    def of(seconds):
   302      # type: (DurationTypes) -> Duration
   303  
   304      """Return the Duration for the given number of seconds since Unix epoch.
   305  
   306      If the input is already a Duration, the input itself will be returned.
   307  
   308      Args:
   309        seconds: Number of seconds as int, float or Duration.
   310  
   311      Returns:
   312        Corresponding Duration object.
   313      """
   314  
   315      if isinstance(seconds, Timestamp):
   316        raise TypeError('Cannot interpret %s as Duration.' % seconds)
   317      if isinstance(seconds, Duration):
   318        return seconds
   319      return Duration(seconds)
   320  
   321    def to_proto(self):
   322      # type: () -> duration_pb2.Duration
   323  
   324      """Returns the `google.protobuf.duration_pb2` representation."""
   325      secs = self.micros // 1000000
   326      nanos = (self.micros % 1000000) * 1000
   327      return duration_pb2.Duration(seconds=secs, nanos=nanos)
   328  
   329    @staticmethod
   330    def from_proto(duration_proto):
   331      # type: (duration_pb2.Duration) -> Duration
   332  
   333      """Creates a Duration from a `google.protobuf.duration_pb2`.
   334  
   335      Note that the google has a sub-second resolution of nanoseconds whereas this
   336      class has a resolution of microsends. This class will truncate the
   337      nanosecond resolution down to the microsecond.
   338      """
   339  
   340      if duration_proto.nanos % 1000 != 0:
   341        # TODO(https://github.com/apache/beam/issues/19922): Better define
   342        # durations.
   343        raise ValueError(
   344            "Cannot convert from nanoseconds to microseconds " +
   345            "because this loses precision. Please make sure that " +
   346            "this is the correct behavior you want and manually " +
   347            "truncate the precision to the nearest microseconds. " +
   348            "See [https://github.com/apache/beam/issues/19922] for " +
   349            "more information.")
   350  
   351      return Duration(
   352          seconds=duration_proto.seconds, micros=duration_proto.nanos // 1000)
   353  
   354    def __repr__(self):
   355      # type: () -> str
   356      micros = self.micros
   357      sign = ''
   358      if micros < 0:
   359        sign = '-'
   360        micros = -micros
   361      int_part = micros // 1000000
   362      frac_part = micros % 1000000
   363      if frac_part:
   364        return 'Duration(%s%d.%06d)' % (sign, int_part, frac_part)
   365      return 'Duration(%s%d)' % (sign, int_part)
   366  
   367    def __float__(self):
   368      # type: () -> float
   369      # Note that the returned value may have lost precision.
   370      return self.micros / 1000000
   371  
   372    def __eq__(self, other):
   373      # type: (object) -> bool
   374      # Allow comparisons between Duration and Timestamp values.
   375      if isinstance(other, (Duration, Timestamp)):
   376        return self.micros == other.micros
   377      elif isinstance(other, (int, float)):
   378        return self.micros == Duration.of(other).micros
   379      else:
   380        # Support equality with other types
   381        return NotImplemented
   382  
   383    def __lt__(self, other):
   384      # type: (TimestampDurationTypes) -> bool
   385      # Allow comparisons between Duration and Timestamp values.
   386      if not isinstance(other, Timestamp):
   387        other = Duration.of(other)
   388      return self.micros < other.micros
   389  
   390    def __gt__(self, other):
   391      # type: (TimestampDurationTypes) -> bool
   392      return not (self < other or self == other)
   393  
   394    def __le__(self, other):
   395      # type: (TimestampDurationTypes) -> bool
   396      return self < other or self == other
   397  
   398    def __ge__(self, other):
   399      # type: (TimestampDurationTypes) -> bool
   400      return not self < other
   401  
   402    def __hash__(self):
   403      # type: () -> int
   404      return hash(self.micros)
   405  
   406    def __neg__(self):
   407      # type: () -> Duration
   408      return Duration(micros=-self.micros)
   409  
   410    def __add__(self, other):
   411      # type: (DurationTypes) -> Duration
   412      if isinstance(other, Timestamp):
   413        # defer to Timestamp.__add__
   414        return NotImplemented
   415      other = Duration.of(other)
   416      return Duration(micros=self.micros + other.micros)
   417  
   418    def __radd__(self, other):
   419      # type: (DurationTypes) -> Duration
   420      return self + other
   421  
   422    def __sub__(self, other):
   423      # type: (DurationTypes) -> Duration
   424      other = Duration.of(other)
   425      return Duration(micros=self.micros - other.micros)
   426  
   427    def __rsub__(self, other):
   428      # type: (DurationTypes) -> Duration
   429      return -(self - other)
   430  
   431    def __mul__(self, other):
   432      # type: (DurationTypes) -> Duration
   433      other = Duration.of(other)
   434      return Duration(micros=self.micros * other.micros // 1000000)
   435  
   436    def __rmul__(self, other):
   437      # type: (DurationTypes) -> Duration
   438      return self * other
   439  
   440    def __mod__(self, other):
   441      # type: (DurationTypes) -> Duration
   442      other = Duration.of(other)
   443      return Duration(micros=self.micros % other.micros)
   444  
   445  
   446  # The minimum granularity / interval expressible in a Timestamp / Duration
   447  # object.
   448  TIME_GRANULARITY = Duration(micros=1)