github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/glue/converter.go (about) 1 package glue 2 3 import ( 4 "time" 5 6 "github.com/aws/aws-sdk-go-v2/aws" 7 "github.com/aws/aws-sdk-go-v2/service/glue/types" 8 "github.com/treeverse/lakefs/pkg/metastore" 9 ) 10 11 func serDeGlueToLocal(g *types.SerDeInfo) *metastore.SerDeInfo { 12 if g == nil { 13 return nil 14 } 15 return &metastore.SerDeInfo{ 16 Name: aws.ToString(g.Name), 17 SerializationLib: aws.ToString(g.SerializationLibrary), 18 Parameters: g.Parameters, 19 } 20 } 21 22 func skewedGlueToLocal(g *types.SkewedInfo) *metastore.SkewedInfo { 23 if g == nil { 24 return &metastore.SkewedInfo{} 25 } 26 return &metastore.SkewedInfo{ 27 SkewedColNames: g.SkewedColumnNames, 28 SkewedColValues: [][]string{g.SkewedColumnValues}, // TODO(Guys): validate this hive uses [][]string glue uses []string (????) 29 SkewedColValueLocationMaps: g.SkewedColumnValueLocationMaps, 30 } 31 } 32 33 func sortColumnsGlueToLocal(columns []types.Order) []*metastore.Order { 34 res := make([]*metastore.Order, len(columns)) 35 for i, column := range columns { 36 res[i] = &metastore.Order{ 37 Col: aws.ToString(column.Column), 38 Order: int(column.SortOrder), 39 } 40 } 41 return res 42 } 43 44 func columnsGlueToLocal(columns []types.Column) []*metastore.FieldSchema { 45 res := make([]*metastore.FieldSchema, len(columns)) 46 for i, column := range columns { 47 res[i] = &metastore.FieldSchema{ 48 Name: aws.ToString(column.Name), 49 Type: aws.ToString(column.Type), 50 Comment: aws.ToString(column.Comment), 51 } 52 } 53 return res 54 } 55 56 func TableGlueToLocal(glueTable *types.Table) *metastore.Table { 57 sd := SDGlueToLocal(glueTable.StorageDescriptor) 58 ht := &metastore.Table{ 59 TableName: aws.ToString(glueTable.Name), 60 DBName: aws.ToString(glueTable.DatabaseName), 61 Owner: aws.ToString(glueTable.Owner), 62 CreateTime: aws.ToTime(glueTable.CreateTime).Unix(), // TODO(Guys): check if this OK 63 LastAccessTime: aws.ToTime(glueTable.LastAccessTime).Unix(), // TODO(Guys): check if this OK 64 Retention: int(glueTable.Retention), // TODO(Guys): check if this OK 65 Sd: sd, 66 PartitionKeys: columnsGlueToLocal(glueTable.PartitionKeys), 67 Parameters: glueTable.Parameters, 68 ViewOriginalText: aws.ToString(glueTable.ViewOriginalText), 69 ViewExpandedText: aws.ToString(glueTable.ViewExpandedText), 70 TableType: aws.ToString(glueTable.TableType), 71 RewriteEnabled: nil, 72 Temporary: false, 73 } 74 return ht 75 } 76 77 func TablesGlueToLocal(glueTables []types.Table) []*metastore.Table { 78 tables := make([]*metastore.Table, len(glueTables)) 79 for i := range glueTables { 80 tables[i] = TableGlueToLocal(&glueTables[i]) 81 } 82 return tables 83 } 84 85 func PartitionGlueToLocal(gluePartition *types.Partition) *metastore.Partition { 86 sd := SDGlueToLocal(gluePartition.StorageDescriptor) 87 partition := &metastore.Partition{ 88 Values: gluePartition.Values, 89 DBName: aws.ToString(gluePartition.DatabaseName), 90 TableName: aws.ToString(gluePartition.TableName), 91 CreateTime: int(aws.ToTime(gluePartition.CreationTime).Unix()), 92 LastAccessTime: int(aws.ToTime(gluePartition.LastAccessTime).Unix()), 93 Sd: sd, 94 Parameters: gluePartition.Parameters, 95 AWSLastAnalyzedTime: gluePartition.LastAnalyzedTime, 96 } 97 return partition 98 } 99 100 func PartitionsGlueToLocal(gluePartitions []types.Partition) []*metastore.Partition { 101 partitions := make([]*metastore.Partition, len(gluePartitions)) 102 for i := range gluePartitions { 103 partitions[i] = PartitionGlueToLocal(&gluePartitions[i]) 104 } 105 return partitions 106 } 107 108 func serDeLocalToGlue(info *metastore.SerDeInfo) *types.SerDeInfo { 109 if info == nil { 110 return nil 111 } 112 113 // glue cannot have an empty name 114 name := "default" 115 if info.Name != "" { 116 name = info.Name 117 } 118 119 return &types.SerDeInfo{ 120 Name: aws.String(name), 121 Parameters: info.Parameters, 122 SerializationLibrary: aws.String(info.SerializationLib), 123 } 124 } 125 126 func skewedLocalToGlue(info *metastore.SkewedInfo) *types.SkewedInfo { 127 if info == nil { 128 return &types.SkewedInfo{} 129 } 130 return &types.SkewedInfo{ 131 SkewedColumnNames: info.SkewedColNames, 132 SkewedColumnValueLocationMaps: info.SkewedColValueLocationMaps, 133 SkewedColumnValues: info.AWSSkewedColValues, // TODO(Guys): validate this hive uses [][]string glue uses []string (????) 134 } 135 } 136 137 func sortColumnsLocalToGlue(columns []*metastore.Order) []types.Order { 138 res := make([]types.Order, 0, len(columns)) 139 for _, column := range columns { 140 res = append(res, types.Order{ 141 Column: aws.String(column.Col), 142 SortOrder: int32(column.Order), 143 }) 144 } 145 return res 146 } 147 148 func columnsLocalToGlue(columns []*metastore.FieldSchema) []types.Column { 149 res := make([]types.Column, 0, len(columns)) 150 for _, column := range columns { 151 res = append(res, types.Column{ 152 Comment: aws.String(column.Comment), 153 Name: aws.String(column.Name), 154 // Parameters: nil, 155 Type: aws.String(column.Type), 156 }) 157 } 158 return res 159 } 160 161 func DatabaseLocalToGlue(db *metastore.Database) *types.DatabaseInput { 162 return &types.DatabaseInput{ 163 // CreateTableDefaultPermissions: db., 164 Description: aws.String(db.Description), 165 LocationUri: aws.String(db.LocationURI), 166 Name: aws.String(db.Name), 167 Parameters: db.Parameters, 168 TargetDatabase: db.AWSTargetDatabase, 169 } 170 } 171 172 func DatabasesGlueToLocal(glueDatabases []types.Database) []*metastore.Database { 173 databases := make([]*metastore.Database, len(glueDatabases)) 174 for i := range glueDatabases { 175 databases[i] = DatabaseGlueToLocal(&glueDatabases[i]) 176 } 177 return databases 178 } 179 180 func DatabaseGlueToLocal(db *types.Database) *metastore.Database { 181 return &metastore.Database{ 182 Name: aws.ToString(db.Name), 183 Description: aws.ToString(db.Description), 184 LocationURI: aws.ToString(db.LocationUri), 185 Parameters: db.Parameters, 186 } 187 } 188 189 func TableLocalToGlue(table *metastore.Table) *types.TableInput { 190 sd := SDLocalToGlue(table.Sd) 191 targetTable, _ := table.AWSTargetTable.(*types.TableIdentifier) 192 ht := &types.TableInput{ 193 Description: table.AWSDescription, 194 LastAccessTime: localToAWSTime(table.LastAccessTime), // TODO(Guys): check if this OK 195 LastAnalyzedTime: table.AWSLastAnalyzedTime, 196 Name: aws.String(table.TableName), 197 Owner: aws.String(table.Owner), 198 Parameters: table.Parameters, 199 PartitionKeys: columnsLocalToGlue(table.PartitionKeys), 200 Retention: int32(table.Retention), // TODO(Guys): check if this OK 201 StorageDescriptor: sd, 202 TableType: aws.String(table.TableType), 203 TargetTable: targetTable, 204 ViewExpandedText: aws.String(table.ViewExpandedText), 205 ViewOriginalText: aws.String(table.ViewOriginalText), 206 } 207 return ht 208 } 209 210 func PartitionLocalToGlue(partition *metastore.Partition) *types.PartitionInput { 211 sd := SDLocalToGlue(partition.Sd) 212 ht := &types.PartitionInput{ 213 // IsRegisteredWithLakeFormation: partition.AWSIsRegisteredWithLakeFormation, 214 LastAccessTime: localToAWSTime(int64(partition.LastAccessTime)), // TODO(Guys): check if this OK 215 LastAnalyzedTime: partition.AWSLastAnalyzedTime, 216 Parameters: partition.Parameters, 217 StorageDescriptor: sd, 218 Values: partition.Values, 219 } 220 return ht 221 } 222 223 func PartitionsLocalToGlue(partitions []*metastore.Partition) []*types.PartitionInput { 224 gluePartitions := make([]*types.PartitionInput, len(partitions)) 225 for i, partition := range partitions { 226 gluePartitions[i] = PartitionLocalToGlue(partition) 227 } 228 return gluePartitions 229 } 230 231 func SDLocalToGlue(sd *metastore.StorageDescriptor) *types.StorageDescriptor { 232 if sd == nil { 233 return nil 234 } 235 schemaRef, _ := sd.AWSSchemaReference.(*types.SchemaReference) 236 return &types.StorageDescriptor{ 237 BucketColumns: sd.BucketCols, 238 Columns: columnsLocalToGlue(sd.Cols), 239 Compressed: sd.Compressed, 240 InputFormat: aws.String(sd.InputFormat), 241 Location: aws.String(sd.Location), 242 NumberOfBuckets: int32(sd.NumBuckets), 243 OutputFormat: aws.String(sd.OutputFormat), 244 Parameters: sd.Parameters, 245 SchemaReference: schemaRef, 246 SerdeInfo: serDeLocalToGlue(sd.SerdeInfo), 247 SkewedInfo: skewedLocalToGlue(sd.SkewedInfo), 248 SortColumns: sortColumnsLocalToGlue(sd.SortCols), 249 StoredAsSubDirectories: aws.ToBool(sd.StoredAsSubDirectories), 250 } 251 } 252 253 func SDGlueToLocal(sd *types.StorageDescriptor) *metastore.StorageDescriptor { 254 if sd == nil { 255 return nil 256 } 257 return &metastore.StorageDescriptor{ 258 Cols: columnsGlueToLocal(sd.Columns), 259 Location: aws.ToString(sd.Location), 260 InputFormat: aws.ToString(sd.InputFormat), 261 OutputFormat: aws.ToString(sd.OutputFormat), 262 Compressed: sd.Compressed, 263 NumBuckets: int(sd.NumberOfBuckets), 264 SerdeInfo: serDeGlueToLocal(sd.SerdeInfo), 265 BucketCols: sd.BucketColumns, 266 SortCols: sortColumnsGlueToLocal(sd.SortColumns), 267 Parameters: sd.Parameters, 268 SkewedInfo: skewedGlueToLocal(sd.SkewedInfo), 269 StoredAsSubDirectories: aws.Bool(sd.StoredAsSubDirectories), 270 AWSSchemaReference: sd.SchemaReference, 271 } 272 } 273 274 func localToAWSTime(t int64) *time.Time { 275 tm := time.UnixMilli(t).UTC() 276 return &tm 277 }