github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/coders/coder_impl_row_encoders.pyx (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Optimized implementations of various schema columner types.""" 19 20 # pytype: skip-file 21 22 import numpy as np 23 cimport numpy as np 24 25 from apache_beam.coders import coder_impl 26 from apache_beam.coders.coder_impl cimport RowColumnEncoder, OutputStream, InputStream 27 from apache_beam.portability.api import schema_pb2 28 29 30 cdef class AtomicTypeRowColumnEncoder(RowColumnEncoder): 31 cdef original 32 cdef contiguous 33 34 def __init__(self, column): 35 self.original = column 36 self.contiguous = np.ascontiguousarray(column) 37 38 def null_flags(self): 39 return None 40 41 def finalize_write(self): 42 if self.original is not self.contiguous: 43 self.original[:] = self.contiguous 44 45 46 cdef class FloatFloat32RowColumnEncoder(AtomicTypeRowColumnEncoder): 47 cdef np.float32_t* data 48 49 def __init__(self, unused_coder, column): 50 super(FloatFloat32RowColumnEncoder, self).__init__(column) 51 cdef np.float32_t[::1] view = self.contiguous 52 self.data = &view[0] 53 54 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 55 stream.write_bigendian_float(self.data[index]) 56 57 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 58 self.data[index] = stream.read_bigendian_float() 59 60 FloatFloat32RowColumnEncoder.register(schema_pb2.FLOAT, np.float32().dtype) 61 62 63 cdef class FloatFloat64RowColumnEncoder(AtomicTypeRowColumnEncoder): 64 cdef np.float64_t* data 65 66 def __init__(self, unused_coder, column): 67 super(FloatFloat64RowColumnEncoder, self).__init__(column) 68 cdef np.float64_t[::1] view = self.contiguous 69 self.data = &view[0] 70 71 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 72 stream.write_bigendian_float(self.data[index]) 73 74 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 75 self.data[index] = stream.read_bigendian_float() 76 77 FloatFloat64RowColumnEncoder.register(schema_pb2.FLOAT, np.float64().dtype) 78 79 80 cdef class DoubleFloat32RowColumnEncoder(AtomicTypeRowColumnEncoder): 81 cdef np.float32_t* data 82 83 def __init__(self, unused_coder, column): 84 super(DoubleFloat32RowColumnEncoder, self).__init__(column) 85 cdef np.float32_t[::1] view = self.contiguous 86 self.data = &view[0] 87 88 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 89 stream.write_bigendian_double(self.data[index]) 90 91 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 92 self.data[index] = stream.read_bigendian_double() 93 94 DoubleFloat32RowColumnEncoder.register(schema_pb2.DOUBLE, np.float32().dtype) 95 96 97 cdef class DoubleFloat64RowColumnEncoder(AtomicTypeRowColumnEncoder): 98 cdef np.float64_t* data 99 100 def __init__(self, unused_coder, column): 101 super(DoubleFloat64RowColumnEncoder, self).__init__(column) 102 cdef np.float64_t[::1] view = self.contiguous 103 self.data = &view[0] 104 105 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 106 stream.write_bigendian_double(self.data[index]) 107 108 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 109 self.data[index] = stream.read_bigendian_double() 110 111 DoubleFloat64RowColumnEncoder.register(schema_pb2.DOUBLE, np.float64().dtype) 112 113 114 cdef class Int32Int32RowColumnEncoder(AtomicTypeRowColumnEncoder): 115 cdef np.int32_t* data 116 117 def __init__(self, unused_coder, column): 118 super(Int32Int32RowColumnEncoder, self).__init__(column) 119 cdef np.int32_t[::1] view = self.contiguous 120 self.data = &view[0] 121 122 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 123 stream.write_var_int64(self.data[index]) 124 125 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 126 self.data[index] = stream.read_var_int64() 127 128 Int32Int32RowColumnEncoder.register(schema_pb2.INT32, np.int32().dtype) 129 Int32Int32RowColumnEncoder.register(schema_pb2.INT32, np.int64().dtype) 130 131 132 cdef class Int64Int64RowColumnEncoder(AtomicTypeRowColumnEncoder): 133 cdef np.int64_t* data 134 135 def __init__(self, unused_coder, column): 136 super(Int64Int64RowColumnEncoder, self).__init__(column) 137 cdef np.int64_t[::1] view = self.contiguous 138 self.data = &view[0] 139 140 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 141 stream.write_var_int64(self.data[index]) 142 143 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 144 self.data[index] = stream.read_var_int64() 145 146 Int64Int64RowColumnEncoder.register(schema_pb2.INT64, np.int64().dtype) 147 148 149 cdef class BoolRowColumnEncoder(AtomicTypeRowColumnEncoder): 150 cdef np.uint8_t* data 151 152 def __init__(self, unused_coder, column): 153 super(BoolRowColumnEncoder, self).__init__(column) 154 self.contiguous = self.contiguous.astype(np.uint8) 155 cdef np.uint8_t[::1] view = self.contiguous 156 self.data = &view[0] 157 158 cdef bint encode_to_stream(self, size_t index, OutputStream stream) except -1: 159 stream.write_byte(self.data[index]) 160 161 cdef bint decode_from_stream(self, size_t index, InputStream stream) except -1: 162 self.data[index] = stream.read_byte() 163 164 BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.int8().dtype) 165 BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.uint8().dtype) 166 BoolRowColumnEncoder.register(schema_pb2.BOOLEAN, np.bool_().dtype) 167 168