github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/metastore.go (about)

     1  package metastore
     2  
     3  import (
     4  	"context"
     5  	"strings"
     6  	"time"
     7  
     8  	"github.com/aws/aws-sdk-go-v2/service/glue/types"
     9  	"github.com/davecgh/go-spew/spew"
    10  	"github.com/treeverse/lakefs/pkg/logging"
    11  )
    12  
    13  const (
    14  	// sparkSQLWorkaroundSuffix is a suffix added as a hack in Spark SQL, locations with this suffix are not used and should not be changed, Please refer to https://issues.apache.org/jira/browse/SPARK-15269 for more details.
    15  	sparkSQLWorkaroundSuffix = "-__PLACEHOLDER__"
    16  	// sparkSQLTableProviderKey  specifies the table is a Spark SQL data source table
    17  	sparkSQLTableProviderKey    = "spark.sql.sources.provider"
    18  	sparkSQLProviderLocationKey = "path"
    19  )
    20  
    21  func (m *Table) Update(ctx context.Context, db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error {
    22  	log := logging.FromContext(ctx).WithFields(logging.Fields{
    23  		"db":         db,
    24  		"table":      table,
    25  		"serde":      serde,
    26  		"setSymlink": setSymlink,
    27  	})
    28  	if m.Sd == nil {
    29  		m.Sd = &StorageDescriptor{}
    30  	}
    31  	m.DBName = db
    32  	m.TableName = table
    33  	err := m.Sd.Update(db, table, serde, setSymlink, transformLocation, isSparkSQLTable, fixSparkPlaceHolder)
    34  	if err != nil {
    35  		log.WithError(err).WithField("table", spew.Sdump(*m)).Error("Update table")
    36  		return err
    37  	}
    38  	log.WithField("table", spew.Sdump(*m)).Debug("Update table")
    39  	return nil
    40  }
    41  
    42  func (m *Table) isSparkSQLTable() (res bool) {
    43  	_, res = m.Parameters[sparkSQLTableProviderKey]
    44  	return
    45  }
    46  
    47  func (m *Partition) Update(ctx context.Context, db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error {
    48  	log := logging.FromContext(ctx).WithFields(logging.Fields{
    49  		"db":         db,
    50  		"table":      table,
    51  		"serde":      serde,
    52  		"setSymlink": setSymlink,
    53  	})
    54  	if m.Sd == nil {
    55  		m.Sd = &StorageDescriptor{}
    56  	}
    57  	if m.Sd.SerdeInfo == nil {
    58  		m.Sd.SerdeInfo = &SerDeInfo{}
    59  	}
    60  	m.DBName = db
    61  	m.TableName = table
    62  	m.Sd.SerdeInfo.Name = serde
    63  	if setSymlink {
    64  		m.Sd.InputFormat = symlinkInputFormat
    65  	}
    66  
    67  	err := m.Sd.Update(db, table, serde, setSymlink, transformLocation, isSparkSQLTable, fixSparkPlaceHolder)
    68  	if err != nil {
    69  		log.WithError(err).WithField("table", spew.Sdump(*m)).Error("Update table")
    70  		return err
    71  	}
    72  	log.WithField("table", spew.Sdump(*m)).Debug("Update table")
    73  	return nil
    74  }
    75  
    76  func (m *StorageDescriptor) Update(db, table, serde string, setSymlink bool, transformLocation func(location string) (string, error), isSparkSQLTable, fixSparkPlaceHolder bool) error {
    77  	if m.SerdeInfo == nil {
    78  		m.SerdeInfo = &SerDeInfo{}
    79  	}
    80  
    81  	m.SerdeInfo.Name = serde
    82  
    83  	if setSymlink {
    84  		m.InputFormat = symlinkInputFormat
    85  	}
    86  	var err error
    87  	if m.Location != "" && !(isSparkSQLTable && strings.HasSuffix(m.Location, sparkSQLWorkaroundSuffix)) {
    88  		m.Location, err = transformLocation(m.Location)
    89  	}
    90  	if err != nil {
    91  		return err
    92  	}
    93  
    94  	if isSparkSQLTable {
    95  		// Table was created by Spark SQL, we should change the internal stored Spark SQL location
    96  		if l, ok := m.SerdeInfo.Parameters[sparkSQLProviderLocationKey]; ok {
    97  			updatedLocation, err := transformLocation(l)
    98  			if err != nil {
    99  				return err
   100  			}
   101  			m.SerdeInfo.Parameters[sparkSQLProviderLocationKey] = updatedLocation
   102  			if fixSparkPlaceHolder && strings.HasSuffix(m.Location, sparkSQLWorkaroundSuffix) {
   103  				m.Location = updatedLocation
   104  			}
   105  		}
   106  	}
   107  	return err
   108  }
   109  
   110  type Database struct {
   111  	Name              string
   112  	Description       string
   113  	LocationURI       string
   114  	Parameters        map[string]string
   115  	HivePrivileges    interface{}
   116  	OwnerName         *string
   117  	HiveOwnerType     interface{}
   118  	AWSTargetDatabase *types.DatabaseIdentifier
   119  }
   120  
   121  type Table struct {
   122  	TableName                        string
   123  	DBName                           string
   124  	Owner                            string
   125  	CreateTime                       int64
   126  	LastAccessTime                   int64
   127  	Retention                        int
   128  	Sd                               *StorageDescriptor
   129  	PartitionKeys                    []*FieldSchema
   130  	Parameters                       map[string]string
   131  	ViewOriginalText                 string
   132  	ViewExpandedText                 string
   133  	TableType                        string
   134  	Temporary                        bool
   135  	RewriteEnabled                   *bool
   136  	AWSCreatedBy                     *string
   137  	AWSDescription                   *string
   138  	AWSIsRegisteredWithLakeFormation *bool
   139  	AWSLastAnalyzedTime              *time.Time
   140  	AWSTargetTable                   interface{}
   141  	AWSUpdateTime                    *time.Time
   142  	Privileges                       interface{}
   143  }
   144  
   145  type Partition struct {
   146  	Values              []string
   147  	DBName              string
   148  	TableName           string
   149  	CreateTime          int
   150  	LastAccessTime      int
   151  	Sd                  *StorageDescriptor
   152  	Parameters          map[string]string
   153  	AWSLastAnalyzedTime *time.Time
   154  	Privileges          interface{}
   155  }
   156  
   157  type StorageDescriptor struct {
   158  	Cols                   []*FieldSchema
   159  	Location               string
   160  	InputFormat            string
   161  	OutputFormat           string
   162  	Compressed             bool
   163  	NumBuckets             int
   164  	SerdeInfo              *SerDeInfo
   165  	BucketCols             []string
   166  	SortCols               []*Order
   167  	Parameters             map[string]string
   168  	SkewedInfo             *SkewedInfo
   169  	StoredAsSubDirectories *bool
   170  	AWSSchemaReference     interface{}
   171  }
   172  
   173  type SerDeInfo struct {
   174  	Name             string
   175  	SerializationLib string
   176  	Parameters       map[string]string
   177  }
   178  
   179  type FieldSchema struct {
   180  	Name    string
   181  	Type    string
   182  	Comment string
   183  }
   184  
   185  type Order struct {
   186  	Col   string
   187  	Order int
   188  }
   189  
   190  type SkewedInfo struct {
   191  	SkewedColNames             []string
   192  	SkewedColValues            [][]string
   193  	AWSSkewedColValues         []string //
   194  	SkewedColValueLocationMaps map[string]string
   195  }
   196  
   197  func (m Partition) Name() string {
   198  	return strings.Join(m.Values, "-")
   199  }