github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/coders/slow_stream.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/coders/slow_stream.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """A pure Python implementation of stream.pyx.
    19  
    20  For internal use only; no backwards-compatibility guarantees.
    21  """
    22  # pytype: skip-file
    23  
    24  import struct
    25  from typing import List
    26  
    27  
    28  class OutputStream(object):
    29    """For internal use only; no backwards-compatibility guarantees.
    30  
    31    A pure Python implementation of stream.OutputStream."""
    32    def __init__(self):
    33      self.data = []  # type: List[bytes]
    34      self.byte_count = 0
    35  
    36    def write(self, b, nested=False):
    37      # type: (bytes, bool) -> None
    38      assert isinstance(b, bytes)
    39      if nested:
    40        self.write_var_int64(len(b))
    41      self.data.append(b)
    42      self.byte_count += len(b)
    43  
    44    def write_byte(self, val):
    45      self.data.append(chr(val).encode('latin-1'))
    46      self.byte_count += 1
    47  
    48    def write_var_int64(self, v):
    49      # type: (int) -> None
    50      if v < 0:
    51        v += 1 << 64
    52        if v <= 0:
    53          raise ValueError('Value too large (negative).')
    54      while True:
    55        bits = v & 0x7F
    56        v >>= 7
    57        if v:
    58          bits |= 0x80
    59        self.write_byte(bits)
    60        if not v:
    61          break
    62  
    63    def write_bigendian_int64(self, v):
    64      self.write(struct.pack('>q', v))
    65  
    66    def write_bigendian_uint64(self, v):
    67      self.write(struct.pack('>Q', v))
    68  
    69    def write_bigendian_int32(self, v):
    70      self.write(struct.pack('>i', v))
    71  
    72    def write_bigendian_int16(self, v):
    73      self.write(struct.pack('>h', v))
    74  
    75    def write_bigendian_double(self, v):
    76      self.write(struct.pack('>d', v))
    77  
    78    def write_bigendian_float(self, v):
    79      self.write(struct.pack('>f', v))
    80  
    81    def get(self):
    82      # type: () -> bytes
    83      return b''.join(self.data)
    84  
    85    def size(self):
    86      # type: () -> int
    87      return self.byte_count
    88  
    89    def _clear(self):
    90      # type: () -> None
    91      self.data = []
    92      self.byte_count = 0
    93  
    94  
    95  class ByteCountingOutputStream(OutputStream):
    96    """For internal use only; no backwards-compatibility guarantees.
    97  
    98    A pure Python implementation of stream.ByteCountingOutputStream."""
    99    def __init__(self):
   100      # Note that we don't actually use any of the data initialized by our super.
   101      super().__init__()
   102      self.count = 0
   103  
   104    def write(self, byte_array, nested=False):
   105      # type: (bytes, bool) -> None
   106      blen = len(byte_array)
   107      if nested:
   108        self.write_var_int64(blen)
   109      self.count += blen
   110  
   111    def write_byte(self, _):
   112      self.count += 1
   113  
   114    def get_count(self):
   115      return self.count
   116  
   117    def get(self):
   118      raise NotImplementedError
   119  
   120    def __str__(self):
   121      return '<%s %s>' % (self.__class__.__name__, self.count)
   122  
   123  
   124  class InputStream(object):
   125    """For internal use only; no backwards-compatibility guarantees.
   126  
   127    A pure Python implementation of stream.InputStream."""
   128    def __init__(self, data):
   129      # type: (bytes) -> None
   130      self.data = data
   131      self.pos = 0
   132  
   133    def size(self):
   134      return len(self.data) - self.pos
   135  
   136    def read(self, size):
   137      # type: (int) -> bytes
   138      self.pos += size
   139      return self.data[self.pos - size:self.pos]
   140  
   141    def read_all(self, nested):
   142      # type: (bool) -> bytes
   143      return self.read(self.read_var_int64() if nested else self.size())
   144  
   145    def read_byte(self):
   146      # type: () -> int
   147      self.pos += 1
   148      return self.data[self.pos - 1]
   149  
   150    def read_var_int64(self):
   151      shift = 0
   152      result = 0
   153      while True:
   154        byte = self.read_byte()
   155        if byte < 0:
   156          raise RuntimeError('VarLong not terminated.')
   157  
   158        bits = byte & 0x7F
   159        if shift >= 64 or (shift >= 63 and bits > 1):
   160          raise RuntimeError('VarLong too long.')
   161        result |= bits << shift
   162        shift += 7
   163        if not byte & 0x80:
   164          break
   165      if result >= 1 << 63:
   166        result -= 1 << 64
   167      return result
   168  
   169    def read_bigendian_int64(self):
   170      return struct.unpack('>q', self.read(8))[0]
   171  
   172    def read_bigendian_uint64(self):
   173      return struct.unpack('>Q', self.read(8))[0]
   174  
   175    def read_bigendian_int32(self):
   176      return struct.unpack('>i', self.read(4))[0]
   177  
   178    def read_bigendian_int16(self):
   179      return struct.unpack('>h', self.read(2))[0]
   180  
   181    def read_bigendian_double(self):
   182      return struct.unpack('>d', self.read(8))[0]
   183  
   184    def read_bigendian_float(self):
   185      return struct.unpack('>f', self.read(4))[0]
   186  
   187  
   188  def get_varint_size(v):
   189    """For internal use only; no backwards-compatibility guarantees.
   190  
   191    Returns the size of the given integer value when encode as a VarInt."""
   192    if v < 0:
   193      v += 1 << 64
   194      if v <= 0:
   195        raise ValueError('Value too large (negative).')
   196    varint_size = 0
   197    while True:
   198      varint_size += 1
   199      v >>= 7
   200      if not v:
   201        break
   202    return varint_size