storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/schema/element.go (about)

     1  /*
     2   * Minio Cloud Storage, (C) 2019 Minio, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package schema
    18  
    19  import (
    20  	"fmt"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
    25  )
    26  
    27  var nameRegexp = regexp.MustCompile("^[a-zA-Z0-9_]+$")
    28  
    29  func validataPathSegments(pathSegments []string) error {
    30  	for _, pathSegment := range pathSegments {
    31  		if !nameRegexp.MatchString(pathSegment) {
    32  			return fmt.Errorf("unsupported name %v", strings.Join(pathSegments, "."))
    33  		}
    34  	}
    35  
    36  	return nil
    37  }
    38  
    39  // Element - represents schema element and its children. Any element must have Name and RepetitionType fields set.
    40  type Element struct {
    41  	parquet.SchemaElement
    42  	numChildren        int32
    43  	Encoding           *parquet.Encoding         // Optional; defaults is computed.
    44  	CompressionType    *parquet.CompressionCodec // Optional; defaults to SNAPPY.
    45  	Children           *Tree
    46  	MaxDefinitionLevel int64
    47  	MaxRepetitionLevel int64
    48  	PathInTree         string
    49  	PathInSchema       string
    50  }
    51  
    52  // String - stringify this element.
    53  func (element *Element) String() string {
    54  	var s []string
    55  	s = append(s, "Name:"+element.Name)
    56  	s = append(s, "RepetitionType:"+element.RepetitionType.String())
    57  	if element.Type != nil {
    58  		s = append(s, "Type:"+element.Type.String())
    59  	}
    60  	if element.ConvertedType != nil {
    61  		s = append(s, "ConvertedType:"+element.ConvertedType.String())
    62  	}
    63  	if element.Encoding != nil {
    64  		s = append(s, "Encoding:"+element.Encoding.String())
    65  	}
    66  	if element.CompressionType != nil {
    67  		s = append(s, "CompressionType:"+element.CompressionType.String())
    68  	}
    69  	if element.Children != nil && element.Children.Length() > 0 {
    70  		s = append(s, "Children:"+element.Children.String())
    71  	}
    72  	s = append(s, fmt.Sprintf("MaxDefinitionLevel:%v", element.MaxDefinitionLevel))
    73  	s = append(s, fmt.Sprintf("MaxRepetitionLevel:%v", element.MaxRepetitionLevel))
    74  	if element.PathInTree != "" {
    75  		s = append(s, "PathInTree:"+element.PathInTree)
    76  	}
    77  	if element.PathInSchema != "" {
    78  		s = append(s, "PathInSchema:"+element.PathInSchema)
    79  	}
    80  
    81  	return "{" + strings.Join(s, ", ") + "}"
    82  }
    83  
    84  // NewElement - creates new element.
    85  func NewElement(name string, repetitionType parquet.FieldRepetitionType,
    86  	elementType *parquet.Type, convertedType *parquet.ConvertedType,
    87  	encoding *parquet.Encoding, compressionType *parquet.CompressionCodec,
    88  	children *Tree) (*Element, error) {
    89  
    90  	if !nameRegexp.MatchString(name) {
    91  		return nil, fmt.Errorf("unsupported name %v", name)
    92  	}
    93  
    94  	switch repetitionType {
    95  	case parquet.FieldRepetitionType_REQUIRED, parquet.FieldRepetitionType_OPTIONAL, parquet.FieldRepetitionType_REPEATED:
    96  	default:
    97  		return nil, fmt.Errorf("unknown repetition type %v", repetitionType)
    98  	}
    99  
   100  	if repetitionType == parquet.FieldRepetitionType_REPEATED && (elementType != nil || convertedType != nil) {
   101  		return nil, fmt.Errorf("repetition type REPEATED should be used in group element")
   102  	}
   103  
   104  	if children != nil && children.Length() != 0 {
   105  		if elementType != nil {
   106  			return nil, fmt.Errorf("type should be nil for group element")
   107  		}
   108  	}
   109  
   110  	element := Element{
   111  		Encoding:        encoding,
   112  		CompressionType: compressionType,
   113  		Children:        children,
   114  	}
   115  
   116  	element.Name = name
   117  	element.RepetitionType = &repetitionType
   118  	element.Type = elementType
   119  	element.ConvertedType = convertedType
   120  	element.NumChildren = &element.numChildren
   121  	if element.Children != nil {
   122  		element.numChildren = int32(element.Children.Length())
   123  	}
   124  
   125  	return &element, nil
   126  }