github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/metastore.go (about) 1 package metastore 2 3 import ( 4 "context" 5 "strings" 6 "time" 7 8 "github.com/aws/aws-sdk-go-v2/service/glue/types" 9 "github.com/davecgh/go-spew/spew" 10 "github.com/treeverse/lakefs/pkg/logging" 11 ) 12 13 const ( 14 // sparkSQLWorkaroundSuffix is a suffix added as a hack in Spark SQL, locations with this suffix are not used and should not be changed, Please refer to https://issues.apache.org/jira/browse/SPARK-15269 for more details. 15 sparkSQLWorkaroundSuffix = "-__PLACEHOLDER__" 16 // sparkSQLTableProviderKey specifies the table is a Spark SQL data source table 17 sparkSQLTableProviderKey = "spark.sql.sources.provider" 18 sparkSQLProviderLocationKey = "path" 19 ) 20 21 func (m *Table) Update(ctx context.Context, db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error { 22 log := logging.FromContext(ctx).WithFields(logging.Fields{ 23 "db": db, 24 "table": table, 25 "serde": serde, 26 "setSymlink": setSymlink, 27 }) 28 if m.Sd == nil { 29 m.Sd = &StorageDescriptor{} 30 } 31 m.DBName = db 32 m.TableName = table 33 err := m.Sd.Update(db, table, serde, setSymlink, transformLocation, isSparkSQLTable, fixSparkPlaceHolder) 34 if err != nil { 35 log.WithError(err).WithField("table", spew.Sdump(*m)).Error("Update table") 36 return err 37 } 38 log.WithField("table", spew.Sdump(*m)).Debug("Update table") 39 return nil 40 } 41 42 func (m *Table) isSparkSQLTable() (res bool) { 43 _, res = m.Parameters[sparkSQLTableProviderKey] 44 return 45 } 46 47 func (m *Partition) Update(ctx context.Context, db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error { 48 log := logging.FromContext(ctx).WithFields(logging.Fields{ 49 "db": db, 50 "table": table, 51 "serde": serde, 52 "setSymlink": setSymlink, 53 }) 54 if m.Sd == nil { 55 m.Sd = &StorageDescriptor{} 56 } 57 if m.Sd.SerdeInfo == nil { 58 m.Sd.SerdeInfo = &SerDeInfo{} 59 } 60 m.DBName = db 61 m.TableName = table 62 m.Sd.SerdeInfo.Name = serde 63 if setSymlink { 64 m.Sd.InputFormat = symlinkInputFormat 65 } 66 67 err := m.Sd.Update(db, table, serde, setSymlink, transformLocation, isSparkSQLTable, fixSparkPlaceHolder) 68 if err != nil { 69 log.WithError(err).WithField("table", spew.Sdump(*m)).Error("Update table") 70 return err 71 } 72 log.WithField("table", spew.Sdump(*m)).Debug("Update table") 73 return nil 74 } 75 76 func (m *StorageDescriptor) Update(db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error { 77 if m.SerdeInfo == nil { 78 m.SerdeInfo = &SerDeInfo{} 79 } 80 81 m.SerdeInfo.Name = serde 82 83 if setSymlink { 84 m.InputFormat = symlinkInputFormat 85 } 86 var err error 87 if m.Location != "" && !(isSparkSQLTable && strings.HasSuffix(m.Location, sparkSQLWorkaroundSuffix)) { 88 m.Location, err = transformLocation(m.Location) 89 } 90 if err != nil { 91 return err 92 } 93 94 if isSparkSQLTable { 95 // Table was created by Spark SQL, we should change the internal stored Spark SQL location 96 if l, ok := m.SerdeInfo.Parameters[sparkSQLProviderLocationKey]; ok { 97 updatedLocation, err := transformLocation(l) 98 if err != nil { 99 return err 100 } 101 m.SerdeInfo.Parameters[sparkSQLProviderLocationKey] = updatedLocation 102 if fixSparkPlaceHolder && strings.HasSuffix(m.Location, sparkSQLWorkaroundSuffix) { 103 m.Location = updatedLocation 104 } 105 } 106 } 107 return err 108 } 109 110 type Database struct { 111 Name string 112 Description string 113 LocationURI string 114 Parameters map[string]string 115 HivePrivileges interface{} 116 OwnerName *string 117 HiveOwnerType interface{} 118 AWSTargetDatabase *types.DatabaseIdentifier 119 } 120 121 type Table struct { 122 TableName string 123 DBName string 124 Owner string 125 CreateTime int64 126 LastAccessTime int64 127 Retention int 128 Sd *StorageDescriptor 129 PartitionKeys []*FieldSchema 130 Parameters map[string]string 131 ViewOriginalText string 132 ViewExpandedText string 133 TableType string 134 Temporary bool 135 RewriteEnabled *bool 136 AWSCreatedBy *string 137 AWSDescription *string 138 AWSIsRegisteredWithLakeFormation *bool 139 AWSLastAnalyzedTime *time.Time 140 AWSTargetTable interface{} 141 AWSUpdateTime *time.Time 142 Privileges interface{} 143 } 144 145 type Partition struct { 146 Values []string 147 DBName string 148 TableName string 149 CreateTime int 150 LastAccessTime int 151 Sd *StorageDescriptor 152 Parameters map[string]string 153 AWSLastAnalyzedTime *time.Time 154 Privileges interface{} 155 } 156 157 type StorageDescriptor struct { 158 Cols []*FieldSchema 159 Location string 160 InputFormat string 161 OutputFormat string 162 Compressed bool 163 NumBuckets int 164 SerdeInfo *SerDeInfo 165 BucketCols []string 166 SortCols []*Order 167 Parameters map[string]string 168 SkewedInfo *SkewedInfo 169 StoredAsSubDirectories *bool 170 AWSSchemaReference interface{} 171 } 172 173 type SerDeInfo struct { 174 Name string 175 SerializationLib string 176 Parameters map[string]string 177 } 178 179 type FieldSchema struct { 180 Name string 181 Type string 182 Comment string 183 } 184 185 type Order struct { 186 Col string 187 Order int 188 } 189 190 type SkewedInfo struct { 191 SkewedColNames []string 192 SkewedColValues [][]string 193 AWSSkewedColValues []string // 194 SkewedColValueLocationMaps map[string]string 195 } 196 197 func (m Partition) Name() string { 198 return strings.Join(m.Values, "-") 199 }