github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/glue/metastore_client.go (about) 1 package glue 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "regexp" 8 "strings" 9 10 "github.com/aws/aws-sdk-go-v2/aws" 11 "github.com/aws/aws-sdk-go-v2/service/glue" 12 "github.com/aws/aws-sdk-go-v2/service/glue/types" 13 "github.com/treeverse/lakefs/pkg/logging" 14 "github.com/treeverse/lakefs/pkg/metastore" 15 mserrors "github.com/treeverse/lakefs/pkg/metastore/errors" 16 ) 17 18 const MaxParts = 1000 // max possible 1000 19 20 type MSClient struct { 21 client *glue.Client 22 catalogID string 23 baseLocationURI string 24 } 25 26 func (g *MSClient) GetDBLocation(dbName string) string { 27 return fmt.Sprintf("%s/%s", g.baseLocationURI, dbName) 28 } 29 30 func (g *MSClient) NormalizeDBName(db string) string { 31 return db 32 } 33 34 func NewMSClient(client *glue.Client, catalogID, baselLocationURI string) (metastore.Client, error) { 35 if catalogID == "" { 36 logging.ContextUnavailable().Warn("Glue catalog id is empty") 37 } 38 return &MSClient{ 39 client: client, 40 catalogID: catalogID, 41 baseLocationURI: strings.TrimRight(baselLocationURI, "/"), 42 }, nil 43 } 44 45 func (g *MSClient) HasTable(ctx context.Context, dbName string, tableName string) (bool, error) { 46 table, err := g.GetTable(ctx, dbName, tableName) 47 var errEnityNotFound *types.EntityNotFoundException // TODO(Guys): validate this one 48 if err != nil && !errors.As(err, &errEnityNotFound) { 49 return false, err 50 } 51 return table != nil, nil 52 } 53 54 func (g *MSClient) GetDatabase(ctx context.Context, name string) (*metastore.Database, error) { 55 db, err := g.client.GetDatabase(ctx, &glue.GetDatabaseInput{ 56 CatalogId: aws.String(g.catalogID), 57 Name: aws.String(name), 58 }) 59 if err != nil { 60 return nil, err 61 } 62 return DatabaseGlueToLocal(db.Database), nil 63 } 64 65 func (g *MSClient) getDatabaseFromGlue(ctx context.Context, token *string, parts int) (*glue.GetDatabasesOutput, error) { 66 return g.client.GetDatabases(ctx, &glue.GetDatabasesInput{ 67 CatalogId: aws.String(g.catalogID), 68 MaxResults: aws.Int32(int32(parts)), 69 NextToken: token, 70 }) 71 } 72 73 func (g *MSClient) GetDatabases(ctx context.Context, pattern string) ([]*metastore.Database, error) { 74 var nextToken *string 75 var allDatabases []*metastore.Database 76 77 for { 78 getDatabasesOutput, err := g.getDatabaseFromGlue(ctx, nextToken, MaxParts) 79 if err != nil { 80 return nil, err 81 } 82 nextToken = getDatabasesOutput.NextToken 83 filteredDatabases, err := filterDatabases(getDatabasesOutput.DatabaseList, pattern) 84 if err != nil { 85 return nil, err 86 } 87 databases := DatabasesGlueToLocal(filteredDatabases) 88 allDatabases = append(allDatabases, databases...) 89 if nextToken == nil { 90 break 91 } 92 } 93 return allDatabases, nil 94 } 95 96 func filterDatabases(databases []types.Database, pattern string) ([]types.Database, error) { 97 if pattern == "" { 98 return databases, nil 99 } 100 r, err := regexp.Compile(pattern) 101 if err != nil { 102 return nil, err 103 } 104 res := make([]types.Database, 0) 105 for _, database := range databases { 106 if r.MatchString(aws.ToString(database.Name)) { 107 res = append(res, database) 108 } 109 } 110 return res, nil 111 } 112 113 func (g *MSClient) GetTables(ctx context.Context, dbName string, pattern string) ([]*metastore.Table, error) { 114 var nextToken *string 115 allTables := make([]*metastore.Table, 0) 116 for { 117 getTablesOutput, err := g.client.GetTables(ctx, &glue.GetTablesInput{ 118 CatalogId: aws.String(g.catalogID), 119 DatabaseName: aws.String(dbName), 120 Expression: aws.String(pattern), 121 MaxResults: aws.Int32(MaxParts), 122 NextToken: nextToken, 123 }) 124 if err != nil { 125 return nil, err 126 } 127 nextToken = getTablesOutput.NextToken 128 129 tables := TablesGlueToLocal(getTablesOutput.TableList) 130 allTables = append(allTables, tables...) 131 if nextToken == nil { 132 break 133 } 134 } 135 return allTables, nil 136 } 137 138 func (g *MSClient) AlterTable(ctx context.Context, dbName string, _ string, newTable *metastore.Table) error { 139 table := TableLocalToGlue(newTable) 140 _, err := g.client.UpdateTable(ctx, &glue.UpdateTableInput{ 141 CatalogId: aws.String(g.catalogID), 142 DatabaseName: aws.String(dbName), 143 SkipArchive: aws.Bool(false), // UpdateTable always creates an archived version of the table before updating it. However, if skipArchive is set to true, UpdateTable does not create the archived version. 144 TableInput: table, 145 }) 146 return err 147 } 148 149 func (g *MSClient) DropPartition(ctx context.Context, dbName string, tableName string, values []string) error { 150 _, err := g.client.DeletePartition(ctx, &glue.DeletePartitionInput{ 151 CatalogId: aws.String(g.catalogID), 152 DatabaseName: aws.String(dbName), 153 PartitionValues: values, 154 TableName: aws.String(tableName), 155 }) 156 return err 157 } 158 159 func (g *MSClient) CreateDatabase(ctx context.Context, database *metastore.Database) error { 160 databaseInput := DatabaseLocalToGlue(database) 161 _, err := g.client.CreateDatabase(ctx, &glue.CreateDatabaseInput{ 162 CatalogId: aws.String(g.catalogID), 163 DatabaseInput: databaseInput, 164 }) 165 var errExists *types.AlreadyExistsException 166 if errors.As(err, &errExists) { 167 return mserrors.ErrSchemaExists 168 } 169 return err 170 } 171 172 func (g *MSClient) getTableData(ctx context.Context, dbName string, tblName string) (*types.Table, error) { 173 table, err := g.client.GetTable(ctx, 174 &glue.GetTableInput{ 175 CatalogId: aws.String(g.catalogID), 176 DatabaseName: aws.String(dbName), 177 Name: aws.String(tblName), 178 }) 179 if err != nil { 180 return nil, err 181 } 182 return table.Table, nil 183 } 184 185 func (g *MSClient) GetTable(ctx context.Context, dbName string, tableName string) (*metastore.Table, error) { 186 table, err := g.getTableData(ctx, dbName, tableName) 187 if err != nil { 188 return nil, err 189 } 190 return TableGlueToLocal(table), nil 191 } 192 193 func (g *MSClient) CreateTable(ctx context.Context, tbl *metastore.Table) error { 194 table := TableLocalToGlue(tbl) 195 dbName := tbl.DBName 196 _, err := g.client.CreateTable(ctx, 197 &glue.CreateTableInput{ 198 CatalogId: aws.String(g.catalogID), 199 DatabaseName: aws.String(dbName), 200 TableInput: table, 201 }) 202 return err 203 } 204 205 func (g *MSClient) GetPartition(ctx context.Context, dbName string, tableName string, values []string) (*metastore.Partition, error) { 206 output, err := g.client.GetPartition(ctx, 207 &glue.GetPartitionInput{ 208 CatalogId: aws.String(g.catalogID), 209 DatabaseName: aws.String(dbName), 210 PartitionValues: values, 211 TableName: aws.String(tableName), 212 }) 213 if err != nil { 214 return nil, err 215 } 216 return PartitionGlueToLocal(output.Partition), nil 217 } 218 219 func (g *MSClient) GetPartitions(ctx context.Context, dbName string, tableName string) ([]*metastore.Partition, error) { 220 partitions, err := g.GetAllPartitions(ctx, dbName, tableName) 221 if err != nil { 222 return nil, err 223 } 224 225 return PartitionsGlueToLocal(partitions), nil 226 } 227 228 func (g *MSClient) getPartitionsFromGlue(ctx context.Context, dbName, tableName string, nextToken *string, maxParts int16) (*glue.GetPartitionsOutput, error) { 229 return g.client.GetPartitions(ctx, 230 &glue.GetPartitionsInput{ 231 CatalogId: aws.String(g.catalogID), 232 DatabaseName: aws.String(dbName), 233 MaxResults: aws.Int32(int32(maxParts)), 234 NextToken: nextToken, 235 TableName: aws.String(tableName), 236 }) 237 } 238 239 func (g *MSClient) GetAllPartitions(ctx context.Context, dbName, tableName string) ([]types.Partition, error) { 240 var nextToken *string 241 var allPartitions []types.Partition 242 for { 243 getPartitionsOutput, err := g.getPartitionsFromGlue(ctx, dbName, tableName, nextToken, MaxParts) 244 if err != nil { 245 return nil, err 246 } 247 nextToken = getPartitionsOutput.NextToken 248 partitions := getPartitionsOutput.Partitions 249 allPartitions = append(allPartitions, partitions...) 250 if nextToken == nil { 251 break 252 } 253 } 254 return allPartitions, nil 255 } 256 257 func (g *MSClient) AddPartition(ctx context.Context, tableName string, dbName string, newPartition *metastore.Partition) error { 258 gluePartition := PartitionLocalToGlue(newPartition) 259 _, err := g.client.CreatePartition(ctx, 260 &glue.CreatePartitionInput{ 261 CatalogId: aws.String(g.catalogID), 262 DatabaseName: aws.String(dbName), 263 PartitionInput: gluePartition, 264 TableName: aws.String(tableName), 265 }) 266 return err 267 } 268 269 func (g *MSClient) AddPartitions(ctx context.Context, tableName string, dbName string, newParts []*metastore.Partition) error { 270 gluePartitions := PartitionsLocalToGlue(newParts) 271 272 partitionList := make([]types.PartitionInput, 0, len(gluePartitions)) 273 for _, partition := range gluePartitions { 274 partitionList = append(partitionList, types.PartitionInput{ 275 LastAccessTime: partition.LastAccessTime, 276 LastAnalyzedTime: partition.LastAnalyzedTime, 277 Parameters: partition.Parameters, 278 StorageDescriptor: partition.StorageDescriptor, 279 Values: partition.Values, 280 }) 281 } 282 _, err := g.client.BatchCreatePartition(ctx, 283 &glue.BatchCreatePartitionInput{ 284 CatalogId: aws.String(g.catalogID), 285 DatabaseName: aws.String(dbName), 286 PartitionInputList: partitionList, 287 TableName: aws.String(tableName), 288 }) 289 290 return err 291 } 292 293 func (g *MSClient) AlterPartition(ctx context.Context, dbName string, tableName string, partition *metastore.Partition) error { 294 // No batch alter partitions we will need to do it one by one 295 gluePartition := PartitionLocalToGlue(partition) 296 297 _, err := g.client.UpdatePartition(ctx, 298 &glue.UpdatePartitionInput{ 299 CatalogId: aws.String(g.catalogID), 300 DatabaseName: aws.String(dbName), 301 PartitionInput: gluePartition, 302 PartitionValueList: gluePartition.Values, 303 TableName: aws.String(tableName), 304 }) 305 return err 306 } 307 308 func (g *MSClient) AlterPartitions(ctx context.Context, dbName string, tableName string, newPartitions []*metastore.Partition) error { 309 // No batch alter partitions we will need to do it one by one 310 for _, partition := range newPartitions { 311 err := g.AlterPartition(ctx, dbName, tableName, partition) 312 if err != nil { 313 return err 314 } 315 } 316 return nil 317 }