github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/glue/converter.go (about)

     1  package glue
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/aws/aws-sdk-go-v2/aws"
     7  	"github.com/aws/aws-sdk-go-v2/service/glue/types"
     8  	"github.com/treeverse/lakefs/pkg/metastore"
     9  )
    10  
    11  func serDeGlueToLocal(g *types.SerDeInfo) *metastore.SerDeInfo {
    12  	if g == nil {
    13  		return nil
    14  	}
    15  	return &metastore.SerDeInfo{
    16  		Name:             aws.ToString(g.Name),
    17  		SerializationLib: aws.ToString(g.SerializationLibrary),
    18  		Parameters:       g.Parameters,
    19  	}
    20  }
    21  
    22  func skewedGlueToLocal(g *types.SkewedInfo) *metastore.SkewedInfo {
    23  	if g == nil {
    24  		return &metastore.SkewedInfo{}
    25  	}
    26  	return &metastore.SkewedInfo{
    27  		SkewedColNames:             g.SkewedColumnNames,
    28  		SkewedColValues:            [][]string{g.SkewedColumnValues}, // TODO(Guys): validate this hive uses [][]string glue uses []string (????)
    29  		SkewedColValueLocationMaps: g.SkewedColumnValueLocationMaps,
    30  	}
    31  }
    32  
    33  func sortColumnsGlueToLocal(columns []types.Order) []*metastore.Order {
    34  	res := make([]*metastore.Order, len(columns))
    35  	for i, column := range columns {
    36  		res[i] = &metastore.Order{
    37  			Col:   aws.ToString(column.Column),
    38  			Order: int(column.SortOrder),
    39  		}
    40  	}
    41  	return res
    42  }
    43  
    44  func columnsGlueToLocal(columns []types.Column) []*metastore.FieldSchema {
    45  	res := make([]*metastore.FieldSchema, len(columns))
    46  	for i, column := range columns {
    47  		res[i] = &metastore.FieldSchema{
    48  			Name:    aws.ToString(column.Name),
    49  			Type:    aws.ToString(column.Type),
    50  			Comment: aws.ToString(column.Comment),
    51  		}
    52  	}
    53  	return res
    54  }
    55  
    56  func TableGlueToLocal(glueTable *types.Table) *metastore.Table {
    57  	sd := SDGlueToLocal(glueTable.StorageDescriptor)
    58  	ht := &metastore.Table{
    59  		TableName:        aws.ToString(glueTable.Name),
    60  		DBName:           aws.ToString(glueTable.DatabaseName),
    61  		Owner:            aws.ToString(glueTable.Owner),
    62  		CreateTime:       aws.ToTime(glueTable.CreateTime).Unix(),     // TODO(Guys): check if this OK
    63  		LastAccessTime:   aws.ToTime(glueTable.LastAccessTime).Unix(), // TODO(Guys): check if this OK
    64  		Retention:        int(glueTable.Retention),                    // TODO(Guys): check if this OK
    65  		Sd:               sd,
    66  		PartitionKeys:    columnsGlueToLocal(glueTable.PartitionKeys),
    67  		Parameters:       glueTable.Parameters,
    68  		ViewOriginalText: aws.ToString(glueTable.ViewOriginalText),
    69  		ViewExpandedText: aws.ToString(glueTable.ViewExpandedText),
    70  		TableType:        aws.ToString(glueTable.TableType),
    71  		RewriteEnabled:   nil,
    72  		Temporary:        false,
    73  	}
    74  	return ht
    75  }
    76  
    77  func TablesGlueToLocal(glueTables []types.Table) []*metastore.Table {
    78  	tables := make([]*metastore.Table, len(glueTables))
    79  	for i := range glueTables {
    80  		tables[i] = TableGlueToLocal(&glueTables[i])
    81  	}
    82  	return tables
    83  }
    84  
    85  func PartitionGlueToLocal(gluePartition *types.Partition) *metastore.Partition {
    86  	sd := SDGlueToLocal(gluePartition.StorageDescriptor)
    87  	partition := &metastore.Partition{
    88  		Values:              gluePartition.Values,
    89  		DBName:              aws.ToString(gluePartition.DatabaseName),
    90  		TableName:           aws.ToString(gluePartition.TableName),
    91  		CreateTime:          int(aws.ToTime(gluePartition.CreationTime).Unix()),
    92  		LastAccessTime:      int(aws.ToTime(gluePartition.LastAccessTime).Unix()),
    93  		Sd:                  sd,
    94  		Parameters:          gluePartition.Parameters,
    95  		AWSLastAnalyzedTime: gluePartition.LastAnalyzedTime,
    96  	}
    97  	return partition
    98  }
    99  
   100  func PartitionsGlueToLocal(gluePartitions []types.Partition) []*metastore.Partition {
   101  	partitions := make([]*metastore.Partition, len(gluePartitions))
   102  	for i := range gluePartitions {
   103  		partitions[i] = PartitionGlueToLocal(&gluePartitions[i])
   104  	}
   105  	return partitions
   106  }
   107  
   108  func serDeLocalToGlue(info *metastore.SerDeInfo) *types.SerDeInfo {
   109  	if info == nil {
   110  		return nil
   111  	}
   112  
   113  	// glue cannot have an empty name
   114  	name := "default"
   115  	if info.Name != "" {
   116  		name = info.Name
   117  	}
   118  
   119  	return &types.SerDeInfo{
   120  		Name:                 aws.String(name),
   121  		Parameters:           info.Parameters,
   122  		SerializationLibrary: aws.String(info.SerializationLib),
   123  	}
   124  }
   125  
   126  func skewedLocalToGlue(info *metastore.SkewedInfo) *types.SkewedInfo {
   127  	if info == nil {
   128  		return &types.SkewedInfo{}
   129  	}
   130  	return &types.SkewedInfo{
   131  		SkewedColumnNames:             info.SkewedColNames,
   132  		SkewedColumnValueLocationMaps: info.SkewedColValueLocationMaps,
   133  		SkewedColumnValues:            info.AWSSkewedColValues, // TODO(Guys): validate this hive uses [][]string glue uses []string (????)
   134  	}
   135  }
   136  
   137  func sortColumnsLocalToGlue(columns []*metastore.Order) []types.Order {
   138  	res := make([]types.Order, 0, len(columns))
   139  	for _, column := range columns {
   140  		res = append(res, types.Order{
   141  			Column:    aws.String(column.Col),
   142  			SortOrder: int32(column.Order),
   143  		})
   144  	}
   145  	return res
   146  }
   147  
   148  func columnsLocalToGlue(columns []*metastore.FieldSchema) []types.Column {
   149  	res := make([]types.Column, 0, len(columns))
   150  	for _, column := range columns {
   151  		res = append(res, types.Column{
   152  			Comment: aws.String(column.Comment),
   153  			Name:    aws.String(column.Name),
   154  			// Parameters: nil,
   155  			Type: aws.String(column.Type),
   156  		})
   157  	}
   158  	return res
   159  }
   160  
   161  func DatabaseLocalToGlue(db *metastore.Database) *types.DatabaseInput {
   162  	return &types.DatabaseInput{
   163  		// CreateTableDefaultPermissions: db.,
   164  		Description:    aws.String(db.Description),
   165  		LocationUri:    aws.String(db.LocationURI),
   166  		Name:           aws.String(db.Name),
   167  		Parameters:     db.Parameters,
   168  		TargetDatabase: db.AWSTargetDatabase,
   169  	}
   170  }
   171  
   172  func DatabasesGlueToLocal(glueDatabases []types.Database) []*metastore.Database {
   173  	databases := make([]*metastore.Database, len(glueDatabases))
   174  	for i := range glueDatabases {
   175  		databases[i] = DatabaseGlueToLocal(&glueDatabases[i])
   176  	}
   177  	return databases
   178  }
   179  
   180  func DatabaseGlueToLocal(db *types.Database) *metastore.Database {
   181  	return &metastore.Database{
   182  		Name:        aws.ToString(db.Name),
   183  		Description: aws.ToString(db.Description),
   184  		LocationURI: aws.ToString(db.LocationUri),
   185  		Parameters:  db.Parameters,
   186  	}
   187  }
   188  
   189  func TableLocalToGlue(table *metastore.Table) *types.TableInput {
   190  	sd := SDLocalToGlue(table.Sd)
   191  	targetTable, _ := table.AWSTargetTable.(*types.TableIdentifier)
   192  	ht := &types.TableInput{
   193  		Description:       table.AWSDescription,
   194  		LastAccessTime:    localToAWSTime(table.LastAccessTime), // TODO(Guys): check if this OK
   195  		LastAnalyzedTime:  table.AWSLastAnalyzedTime,
   196  		Name:              aws.String(table.TableName),
   197  		Owner:             aws.String(table.Owner),
   198  		Parameters:        table.Parameters,
   199  		PartitionKeys:     columnsLocalToGlue(table.PartitionKeys),
   200  		Retention:         int32(table.Retention), // TODO(Guys): check if this OK
   201  		StorageDescriptor: sd,
   202  		TableType:         aws.String(table.TableType),
   203  		TargetTable:       targetTable,
   204  		ViewExpandedText:  aws.String(table.ViewExpandedText),
   205  		ViewOriginalText:  aws.String(table.ViewOriginalText),
   206  	}
   207  	return ht
   208  }
   209  
   210  func PartitionLocalToGlue(partition *metastore.Partition) *types.PartitionInput {
   211  	sd := SDLocalToGlue(partition.Sd)
   212  	ht := &types.PartitionInput{
   213  		// IsRegisteredWithLakeFormation: partition.AWSIsRegisteredWithLakeFormation,
   214  		LastAccessTime:    localToAWSTime(int64(partition.LastAccessTime)), // TODO(Guys): check if this OK
   215  		LastAnalyzedTime:  partition.AWSLastAnalyzedTime,
   216  		Parameters:        partition.Parameters,
   217  		StorageDescriptor: sd,
   218  		Values:            partition.Values,
   219  	}
   220  	return ht
   221  }
   222  
   223  func PartitionsLocalToGlue(partitions []*metastore.Partition) []*types.PartitionInput {
   224  	gluePartitions := make([]*types.PartitionInput, len(partitions))
   225  	for i, partition := range partitions {
   226  		gluePartitions[i] = PartitionLocalToGlue(partition)
   227  	}
   228  	return gluePartitions
   229  }
   230  
   231  func SDLocalToGlue(sd *metastore.StorageDescriptor) *types.StorageDescriptor {
   232  	if sd == nil {
   233  		return nil
   234  	}
   235  	schemaRef, _ := sd.AWSSchemaReference.(*types.SchemaReference)
   236  	return &types.StorageDescriptor{
   237  		BucketColumns:          sd.BucketCols,
   238  		Columns:                columnsLocalToGlue(sd.Cols),
   239  		Compressed:             sd.Compressed,
   240  		InputFormat:            aws.String(sd.InputFormat),
   241  		Location:               aws.String(sd.Location),
   242  		NumberOfBuckets:        int32(sd.NumBuckets),
   243  		OutputFormat:           aws.String(sd.OutputFormat),
   244  		Parameters:             sd.Parameters,
   245  		SchemaReference:        schemaRef,
   246  		SerdeInfo:              serDeLocalToGlue(sd.SerdeInfo),
   247  		SkewedInfo:             skewedLocalToGlue(sd.SkewedInfo),
   248  		SortColumns:            sortColumnsLocalToGlue(sd.SortCols),
   249  		StoredAsSubDirectories: aws.ToBool(sd.StoredAsSubDirectories),
   250  	}
   251  }
   252  
   253  func SDGlueToLocal(sd *types.StorageDescriptor) *metastore.StorageDescriptor {
   254  	if sd == nil {
   255  		return nil
   256  	}
   257  	return &metastore.StorageDescriptor{
   258  		Cols:                   columnsGlueToLocal(sd.Columns),
   259  		Location:               aws.ToString(sd.Location),
   260  		InputFormat:            aws.ToString(sd.InputFormat),
   261  		OutputFormat:           aws.ToString(sd.OutputFormat),
   262  		Compressed:             sd.Compressed,
   263  		NumBuckets:             int(sd.NumberOfBuckets),
   264  		SerdeInfo:              serDeGlueToLocal(sd.SerdeInfo),
   265  		BucketCols:             sd.BucketColumns,
   266  		SortCols:               sortColumnsGlueToLocal(sd.SortColumns),
   267  		Parameters:             sd.Parameters,
   268  		SkewedInfo:             skewedGlueToLocal(sd.SkewedInfo),
   269  		StoredAsSubDirectories: aws.Bool(sd.StoredAsSubDirectories),
   270  		AWSSchemaReference:     sd.SchemaReference,
   271  	}
   272  }
   273  
   274  func localToAWSTime(t int64) *time.Time {
   275  	tm := time.UnixMilli(t).UTC()
   276  	return &tm
   277  }