github.com/apache/arrow/go/v14@v14.0.1/arrow/schema.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package arrow
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strings"
    23  
    24  	"github.com/apache/arrow/go/v14/arrow/endian"
    25  )
    26  
    27  type Metadata struct {
    28  	keys   []string
    29  	values []string
    30  }
    31  
    32  func NewMetadata(keys, values []string) Metadata {
    33  	if len(keys) != len(values) {
    34  		panic("arrow: len mismatch")
    35  	}
    36  
    37  	n := len(keys)
    38  	if n == 0 {
    39  		return Metadata{}
    40  	}
    41  
    42  	md := Metadata{
    43  		keys:   make([]string, n),
    44  		values: make([]string, n),
    45  	}
    46  	copy(md.keys, keys)
    47  	copy(md.values, values)
    48  	return md
    49  }
    50  
    51  func MetadataFrom(kv map[string]string) Metadata {
    52  	md := Metadata{
    53  		keys:   make([]string, 0, len(kv)),
    54  		values: make([]string, 0, len(kv)),
    55  	}
    56  	for k := range kv {
    57  		md.keys = append(md.keys, k)
    58  	}
    59  	sort.Strings(md.keys)
    60  	for _, k := range md.keys {
    61  		md.values = append(md.values, kv[k])
    62  	}
    63  	return md
    64  }
    65  
    66  func (md Metadata) Len() int         { return len(md.keys) }
    67  func (md Metadata) Keys() []string   { return md.keys }
    68  func (md Metadata) Values() []string { return md.values }
    69  func (md Metadata) ToMap() map[string]string {
    70  	m := make(map[string]string, len(md.keys))
    71  	for i := range md.keys {
    72  		m[md.keys[i]] = md.values[i]
    73  	}
    74  	return m
    75  }
    76  
    77  func (md Metadata) String() string {
    78  	o := new(strings.Builder)
    79  	fmt.Fprintf(o, "[")
    80  	for i := range md.keys {
    81  		if i > 0 {
    82  			fmt.Fprintf(o, ", ")
    83  		}
    84  		fmt.Fprintf(o, "%q: %q", md.keys[i], md.values[i])
    85  	}
    86  	fmt.Fprintf(o, "]")
    87  	return o.String()
    88  }
    89  
    90  // FindKey returns the index of the key-value pair with the provided key name,
    91  // or -1 if such a key does not exist.
    92  func (md Metadata) FindKey(k string) int {
    93  	for i, v := range md.keys {
    94  		if v == k {
    95  			return i
    96  		}
    97  	}
    98  	return -1
    99  }
   100  
   101  // GetValue returns the value associated with the provided key name.
   102  // If the key does not exist, the second return value is false.
   103  func (md Metadata) GetValue(k string) (string, bool) {
   104  	i := md.FindKey(k)
   105  	if i < 0 {
   106  		return "", false
   107  	}
   108  	return md.values[i], true
   109  }
   110  
   111  func (md Metadata) clone() Metadata {
   112  	if len(md.keys) == 0 {
   113  		return Metadata{}
   114  	}
   115  
   116  	o := Metadata{
   117  		keys:   make([]string, len(md.keys)),
   118  		values: make([]string, len(md.values)),
   119  	}
   120  	copy(o.keys, md.keys)
   121  	copy(o.values, md.values)
   122  
   123  	return o
   124  }
   125  
   126  func (md Metadata) sortedIndices() []int {
   127  	idxes := make([]int, len(md.keys))
   128  	for i := range idxes {
   129  		idxes[i] = i
   130  	}
   131  
   132  	sort.Slice(idxes, func(i, j int) bool {
   133  		return md.keys[idxes[i]] < md.keys[idxes[j]]
   134  	})
   135  	return idxes
   136  }
   137  
   138  func (md Metadata) Equal(rhs Metadata) bool {
   139  	if md.Len() != rhs.Len() {
   140  		return false
   141  	}
   142  
   143  	idxes := md.sortedIndices()
   144  	rhsIdxes := rhs.sortedIndices()
   145  	for i := range idxes {
   146  		j := idxes[i]
   147  		k := rhsIdxes[i]
   148  		if md.keys[j] != rhs.keys[k] || md.values[j] != rhs.values[k] {
   149  			return false
   150  		}
   151  	}
   152  	return true
   153  }
   154  
   155  // Schema is a sequence of Field values, describing the columns of a table or
   156  // a record batch.
   157  type Schema struct {
   158  	fields     []Field
   159  	index      map[string][]int
   160  	meta       Metadata
   161  	endianness endian.Endianness
   162  }
   163  
   164  // NewSchema returns a new Schema value from the slice of fields and metadata.
   165  //
   166  // NewSchema panics if there is a field with an invalid DataType.
   167  func NewSchema(fields []Field, metadata *Metadata) *Schema {
   168  	return NewSchemaWithEndian(fields, metadata, endian.NativeEndian)
   169  }
   170  
   171  func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema {
   172  	sc := &Schema{
   173  		fields:     make([]Field, 0, len(fields)),
   174  		index:      make(map[string][]int, len(fields)),
   175  		endianness: e,
   176  	}
   177  	if metadata != nil {
   178  		sc.meta = metadata.clone()
   179  	}
   180  	for i, field := range fields {
   181  		if field.Type == nil {
   182  			panic("arrow: field with nil DataType")
   183  		}
   184  		sc.fields = append(sc.fields, field)
   185  		sc.index[field.Name] = append(sc.index[field.Name], i)
   186  	}
   187  	return sc
   188  }
   189  
   190  func (sc *Schema) WithEndianness(e endian.Endianness) *Schema {
   191  	return NewSchemaWithEndian(sc.fields, &sc.meta, e)
   192  }
   193  
   194  func (sc *Schema) Endianness() endian.Endianness { return sc.endianness }
   195  func (sc *Schema) IsNativeEndian() bool          { return sc.endianness == endian.NativeEndian }
   196  func (sc *Schema) Metadata() Metadata            { return sc.meta }
   197  func (sc *Schema) Fields() []Field {
   198  	fields := make([]Field, len(sc.fields))
   199  	copy(fields, sc.fields)
   200  	return fields
   201  }
   202  func (sc *Schema) Field(i int) Field { return sc.fields[i] }
   203  func (sc *Schema) NumFields() int    { return len(sc.fields) }
   204  
   205  func (sc *Schema) FieldsByName(n string) ([]Field, bool) {
   206  	indices, ok := sc.index[n]
   207  	if !ok {
   208  		return nil, ok
   209  	}
   210  	fields := make([]Field, 0, len(indices))
   211  	for _, v := range indices {
   212  		fields = append(fields, sc.fields[v])
   213  	}
   214  	return fields, ok
   215  }
   216  
   217  // FieldIndices returns the indices of the named field or nil.
   218  func (sc *Schema) FieldIndices(n string) []int {
   219  	return sc.index[n]
   220  }
   221  
   222  func (sc *Schema) HasField(n string) bool { return len(sc.FieldIndices(n)) > 0 }
   223  func (sc *Schema) HasMetadata() bool      { return len(sc.meta.keys) > 0 }
   224  
   225  // Equal returns whether two schema are equal.
   226  // Equal does not compare the metadata.
   227  func (sc *Schema) Equal(o *Schema) bool {
   228  	switch {
   229  	case sc == o:
   230  		return true
   231  	case sc == nil || o == nil:
   232  		return false
   233  	case len(sc.fields) != len(o.fields):
   234  		return false
   235  	case sc.endianness != o.endianness:
   236  		return false
   237  	}
   238  
   239  	for i := range sc.fields {
   240  		if !sc.fields[i].Equal(o.fields[i]) {
   241  			return false
   242  		}
   243  	}
   244  	return true
   245  }
   246  
   247  // AddField adds a field at the given index and return a new schema.
   248  func (s *Schema) AddField(i int, field Field) (*Schema, error) {
   249  	if i < 0 || i > len(s.fields) {
   250  		return nil, fmt.Errorf("arrow: invalid field index %d", i)
   251  	}
   252  
   253  	fields := make([]Field, len(s.fields)+1)
   254  	copy(fields[:i], s.fields[:i])
   255  	fields[i] = field
   256  	copy(fields[i+1:], s.fields[i:])
   257  	return NewSchema(fields, &s.meta), nil
   258  }
   259  
   260  func (s *Schema) String() string {
   261  	o := new(strings.Builder)
   262  	fmt.Fprintf(o, "schema:\n  fields: %d\n", len(s.Fields()))
   263  	for i, f := range s.Fields() {
   264  		if i > 0 {
   265  			o.WriteString("\n")
   266  		}
   267  		fmt.Fprintf(o, "    - %v", f)
   268  	}
   269  	if s.endianness != endian.NativeEndian {
   270  		fmt.Fprintf(o, "\n  endianness: %v", s.endianness)
   271  	}
   272  	if meta := s.Metadata(); meta.Len() > 0 {
   273  		fmt.Fprintf(o, "\n  metadata: %v", meta)
   274  	}
   275  	return o.String()
   276  }
   277  
   278  func (s *Schema) Fingerprint() string {
   279  	if s == nil {
   280  		return ""
   281  	}
   282  
   283  	var b strings.Builder
   284  	b.WriteString("S{")
   285  	for _, f := range s.Fields() {
   286  		fieldFingerprint := f.Fingerprint()
   287  		if fieldFingerprint == "" {
   288  			return ""
   289  		}
   290  
   291  		b.WriteString(fieldFingerprint)
   292  		b.WriteByte(';')
   293  	}
   294  	if s.endianness == endian.LittleEndian {
   295  		b.WriteByte('L')
   296  	} else {
   297  		b.WriteByte('B')
   298  	}
   299  	b.WriteByte('}')
   300  	return b.String()
   301  }