github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/coders/coder_impl_row_encoders.pyx (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Optimized implementations of various schema columner types."""
    19  
    20  # pytype: skip-file
    21  
    22  import numpy as np
    23  cimport numpy as np
    24  
    25  from apache_beam.coders import coder_impl
    26  from apache_beam.coders.coder_impl cimport RowColumnEncoder, OutputStream, InputStream
    27  from apache_beam.portability.api import schema_pb2
    28  
    29  
    30  cdef class AtomicTypeRowColumnEncoder(RowColumnEncoder):
    31    cdef original
    32    cdef contiguous
    33  
    34    def __init__(self, column):
    35      self.original = column
    36      self.contiguous = np.ascontiguousarray(column)
    37  
    38    def null_flags(self):
    39      return None
    40  
    41    def finalize_write(self):
    42      if self.original is not self.contiguous:
    43        self.original[:] = self.contiguous
    44  
    45  
    46  cdef class FloatFloat32RowColumnEncoder(AtomicTypeRowColumnEncoder):
    47    cdef np.float32_t* data
    48  
    49    def __init__(self, unused_coder, column):
    50      super(FloatFloat32RowColumnEncoder, self).__init__(column)
    51      cdef np.float32_t[::1] view = self.contiguous
    52      self.data = &view[0]
    53  
    54    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
    55      stream.write_bigendian_float(self.data[index])
    56  
    57    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
    58      self.data[index] = stream.read_bigendian_float()
    59  
    60  FloatFloat32RowColumnEncoder.register(schema_pb2.FLOAT, np.float32().dtype)
    61  
    62  
    63  cdef class FloatFloat64RowColumnEncoder(AtomicTypeRowColumnEncoder):
    64    cdef np.float64_t* data
    65  
    66    def __init__(self, unused_coder, column):
    67      super(FloatFloat64RowColumnEncoder, self).__init__(column)
    68      cdef np.float64_t[::1] view = self.contiguous
    69      self.data = &view[0]
    70  
    71    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
    72      stream.write_bigendian_float(self.data[index])
    73  
    74    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
    75      self.data[index] = stream.read_bigendian_float()
    76  
    77  FloatFloat64RowColumnEncoder.register(schema_pb2.FLOAT, np.float64().dtype)
    78  
    79  
    80  cdef class DoubleFloat32RowColumnEncoder(AtomicTypeRowColumnEncoder):
    81    cdef np.float32_t* data
    82  
    83    def __init__(self, unused_coder, column):
    84      super(DoubleFloat32RowColumnEncoder, self).__init__(column)
    85      cdef np.float32_t[::1] view = self.contiguous
    86      self.data = &view[0]
    87  
    88    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
    89      stream.write_bigendian_double(self.data[index])
    90  
    91    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
    92      self.data[index] = stream.read_bigendian_double()
    93  
    94  DoubleFloat32RowColumnEncoder.register(schema_pb2.DOUBLE, np.float32().dtype)
    95  
    96  
    97  cdef class DoubleFloat64RowColumnEncoder(AtomicTypeRowColumnEncoder):
    98    cdef np.float64_t* data
    99  
   100    def __init__(self, unused_coder, column):
   101      super(DoubleFloat64RowColumnEncoder, self).__init__(column)
   102      cdef np.float64_t[::1] view = self.contiguous
   103      self.data = &view[0]
   104  
   105    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
   106      stream.write_bigendian_double(self.data[index])
   107  
   108    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
   109      self.data[index] = stream.read_bigendian_double()
   110  
   111  DoubleFloat64RowColumnEncoder.register(schema_pb2.DOUBLE, np.float64().dtype)
   112  
   113  
   114  cdef class Int32Int32RowColumnEncoder(AtomicTypeRowColumnEncoder):
   115    cdef np.int32_t* data
   116  
   117    def __init__(self, unused_coder, column):
   118      super(Int32Int32RowColumnEncoder, self).__init__(column)
   119      cdef np.int32_t[::1] view = self.contiguous
   120      self.data = &view[0]
   121  
   122    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
   123      stream.write_var_int64(self.data[index])
   124  
   125    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
   126      self.data[index] = stream.read_var_int64()
   127  
   128  Int32Int32RowColumnEncoder.register(schema_pb2.INT32, np.int32().dtype)
   129  Int32Int32RowColumnEncoder.register(schema_pb2.INT32, np.int64().dtype)
   130  
   131  
   132  cdef class Int64Int64RowColumnEncoder(AtomicTypeRowColumnEncoder):
   133    cdef np.int64_t* data
   134  
   135    def __init__(self, unused_coder, column):
   136      super(Int64Int64RowColumnEncoder, self).__init__(column)
   137      cdef np.int64_t[::1] view = self.contiguous
   138      self.data = &view[0]
   139  
   140    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
   141      stream.write_var_int64(self.data[index])
   142  
   143    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
   144      self.data[index] = stream.read_var_int64()
   145  
   146  Int64Int64RowColumnEncoder.register(schema_pb2.INT64, np.int64().dtype)
   147  
   148  
   149  cdef class BoolRowColumnEncoder(AtomicTypeRowColumnEncoder):
   150    cdef np.uint8_t* data
   151  
   152    def __init__(self, unused_coder, column):
   153      super(BoolRowColumnEncoder, self).__init__(column)
   154      self.contiguous = self.contiguous.astype(np.uint8)
   155      cdef np.uint8_t[::1] view = self.contiguous
   156      self.data = &view[0]
   157  
   158    cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1:
   159      stream.write_byte(self.data[index])
   160  
   161    cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1:
   162      self.data[index] = stream.read_byte()
   163  
   164  BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.int8().dtype)
   165  BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.uint8().dtype)
   166  BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.bool_().dtype)
   167  
   168