github.com/apache/arrow/go/v14@v14.0.1/arrow/schema.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package arrow 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 24 "github.com/apache/arrow/go/v14/arrow/endian" 25 ) 26 27 type Metadata struct { 28 keys []string 29 values []string 30 } 31 32 func NewMetadata(keys, values []string) Metadata { 33 if len(keys) != len(values) { 34 panic("arrow: len mismatch") 35 } 36 37 n := len(keys) 38 if n == 0 { 39 return Metadata{} 40 } 41 42 md := Metadata{ 43 keys: make([]string, n), 44 values: make([]string, n), 45 } 46 copy(md.keys, keys) 47 copy(md.values, values) 48 return md 49 } 50 51 func MetadataFrom(kv map[string]string) Metadata { 52 md := Metadata{ 53 keys: make([]string, 0, len(kv)), 54 values: make([]string, 0, len(kv)), 55 } 56 for k := range kv { 57 md.keys = append(md.keys, k) 58 } 59 sort.Strings(md.keys) 60 for _, k := range md.keys { 61 md.values = append(md.values, kv[k]) 62 } 63 return md 64 } 65 66 func (md Metadata) Len() int { return len(md.keys) } 67 func (md Metadata) Keys() []string { return md.keys } 68 func (md Metadata) Values() []string { return md.values } 69 func (md Metadata) ToMap() map[string]string { 70 m := make(map[string]string, len(md.keys)) 71 for i := range md.keys { 72 m[md.keys[i]] = md.values[i] 73 } 74 return m 75 } 76 77 func (md Metadata) String() string { 78 o := new(strings.Builder) 79 fmt.Fprintf(o, "[") 80 for i := range md.keys { 81 if i > 0 { 82 fmt.Fprintf(o, ", ") 83 } 84 fmt.Fprintf(o, "%q: %q", md.keys[i], md.values[i]) 85 } 86 fmt.Fprintf(o, "]") 87 return o.String() 88 } 89 90 // FindKey returns the index of the key-value pair with the provided key name, 91 // or -1 if such a key does not exist. 92 func (md Metadata) FindKey(k string) int { 93 for i, v := range md.keys { 94 if v == k { 95 return i 96 } 97 } 98 return -1 99 } 100 101 // GetValue returns the value associated with the provided key name. 102 // If the key does not exist, the second return value is false. 103 func (md Metadata) GetValue(k string) (string, bool) { 104 i := md.FindKey(k) 105 if i < 0 { 106 return "", false 107 } 108 return md.values[i], true 109 } 110 111 func (md Metadata) clone() Metadata { 112 if len(md.keys) == 0 { 113 return Metadata{} 114 } 115 116 o := Metadata{ 117 keys: make([]string, len(md.keys)), 118 values: make([]string, len(md.values)), 119 } 120 copy(o.keys, md.keys) 121 copy(o.values, md.values) 122 123 return o 124 } 125 126 func (md Metadata) sortedIndices() []int { 127 idxes := make([]int, len(md.keys)) 128 for i := range idxes { 129 idxes[i] = i 130 } 131 132 sort.Slice(idxes, func(i, j int) bool { 133 return md.keys[idxes[i]] < md.keys[idxes[j]] 134 }) 135 return idxes 136 } 137 138 func (md Metadata) Equal(rhs Metadata) bool { 139 if md.Len() != rhs.Len() { 140 return false 141 } 142 143 idxes := md.sortedIndices() 144 rhsIdxes := rhs.sortedIndices() 145 for i := range idxes { 146 j := idxes[i] 147 k := rhsIdxes[i] 148 if md.keys[j] != rhs.keys[k] || md.values[j] != rhs.values[k] { 149 return false 150 } 151 } 152 return true 153 } 154 155 // Schema is a sequence of Field values, describing the columns of a table or 156 // a record batch. 157 type Schema struct { 158 fields []Field 159 index map[string][]int 160 meta Metadata 161 endianness endian.Endianness 162 } 163 164 // NewSchema returns a new Schema value from the slice of fields and metadata. 165 // 166 // NewSchema panics if there is a field with an invalid DataType. 167 func NewSchema(fields []Field, metadata *Metadata) *Schema { 168 return NewSchemaWithEndian(fields, metadata, endian.NativeEndian) 169 } 170 171 func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema { 172 sc := &Schema{ 173 fields: make([]Field, 0, len(fields)), 174 index: make(map[string][]int, len(fields)), 175 endianness: e, 176 } 177 if metadata != nil { 178 sc.meta = metadata.clone() 179 } 180 for i, field := range fields { 181 if field.Type == nil { 182 panic("arrow: field with nil DataType") 183 } 184 sc.fields = append(sc.fields, field) 185 sc.index[field.Name] = append(sc.index[field.Name], i) 186 } 187 return sc 188 } 189 190 func (sc *Schema) WithEndianness(e endian.Endianness) *Schema { 191 return NewSchemaWithEndian(sc.fields, &sc.meta, e) 192 } 193 194 func (sc *Schema) Endianness() endian.Endianness { return sc.endianness } 195 func (sc *Schema) IsNativeEndian() bool { return sc.endianness == endian.NativeEndian } 196 func (sc *Schema) Metadata() Metadata { return sc.meta } 197 func (sc *Schema) Fields() []Field { 198 fields := make([]Field, len(sc.fields)) 199 copy(fields, sc.fields) 200 return fields 201 } 202 func (sc *Schema) Field(i int) Field { return sc.fields[i] } 203 func (sc *Schema) NumFields() int { return len(sc.fields) } 204 205 func (sc *Schema) FieldsByName(n string) ([]Field, bool) { 206 indices, ok := sc.index[n] 207 if !ok { 208 return nil, ok 209 } 210 fields := make([]Field, 0, len(indices)) 211 for _, v := range indices { 212 fields = append(fields, sc.fields[v]) 213 } 214 return fields, ok 215 } 216 217 // FieldIndices returns the indices of the named field or nil. 218 func (sc *Schema) FieldIndices(n string) []int { 219 return sc.index[n] 220 } 221 222 func (sc *Schema) HasField(n string) bool { return len(sc.FieldIndices(n)) > 0 } 223 func (sc *Schema) HasMetadata() bool { return len(sc.meta.keys) > 0 } 224 225 // Equal returns whether two schema are equal. 226 // Equal does not compare the metadata. 227 func (sc *Schema) Equal(o *Schema) bool { 228 switch { 229 case sc == o: 230 return true 231 case sc == nil || o == nil: 232 return false 233 case len(sc.fields) != len(o.fields): 234 return false 235 case sc.endianness != o.endianness: 236 return false 237 } 238 239 for i := range sc.fields { 240 if !sc.fields[i].Equal(o.fields[i]) { 241 return false 242 } 243 } 244 return true 245 } 246 247 // AddField adds a field at the given index and return a new schema. 248 func (s *Schema) AddField(i int, field Field) (*Schema, error) { 249 if i < 0 || i > len(s.fields) { 250 return nil, fmt.Errorf("arrow: invalid field index %d", i) 251 } 252 253 fields := make([]Field, len(s.fields)+1) 254 copy(fields[:i], s.fields[:i]) 255 fields[i] = field 256 copy(fields[i+1:], s.fields[i:]) 257 return NewSchema(fields, &s.meta), nil 258 } 259 260 func (s *Schema) String() string { 261 o := new(strings.Builder) 262 fmt.Fprintf(o, "schema:\n fields: %d\n", len(s.Fields())) 263 for i, f := range s.Fields() { 264 if i > 0 { 265 o.WriteString("\n") 266 } 267 fmt.Fprintf(o, " - %v", f) 268 } 269 if s.endianness != endian.NativeEndian { 270 fmt.Fprintf(o, "\n endianness: %v", s.endianness) 271 } 272 if meta := s.Metadata(); meta.Len() > 0 { 273 fmt.Fprintf(o, "\n metadata: %v", meta) 274 } 275 return o.String() 276 } 277 278 func (s *Schema) Fingerprint() string { 279 if s == nil { 280 return "" 281 } 282 283 var b strings.Builder 284 b.WriteString("S{") 285 for _, f := range s.Fields() { 286 fieldFingerprint := f.Fingerprint() 287 if fieldFingerprint == "" { 288 return "" 289 } 290 291 b.WriteString(fieldFingerprint) 292 b.WriteByte(';') 293 } 294 if s.endianness == endian.LittleEndian { 295 b.WriteByte('L') 296 } else { 297 b.WriteByte('B') 298 } 299 b.WriteByte('}') 300 return b.String() 301 }