github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/metastore/hive/metastore_client.go (about)

     1  package hive
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"errors"
     7  	"fmt"
     8  	"strings"
     9  
    10  	"github.com/apache/thrift/lib/go/thrift"
    11  	"github.com/treeverse/lakefs/pkg/metastore"
    12  	mserrors "github.com/treeverse/lakefs/pkg/metastore/errors"
    13  	"github.com/treeverse/lakefs/pkg/metastore/hive/gen-go/hive_metastore"
    14  )
    15  
    16  type ThriftHiveMetastoreClient interface {
    17  	CreateTable(ctx context.Context, tbl *hive_metastore.Table) error
    18  	GetTable(ctx context.Context, dbname string, tableName string) (*hive_metastore.Table, error)
    19  	AlterTable(ctx context.Context, dbname string, tableName string, newTable *hive_metastore.Table) error
    20  	AddPartitions(ctx context.Context, newParts []*hive_metastore.Partition) (int32, error)
    21  	GetPartitions(ctx context.Context, dbName string, tableName string, maxPartitions int16) (r []*hive_metastore.Partition, err error)
    22  	GetPartition(ctx context.Context, dbName string, tableName string, values []string) (r *hive_metastore.Partition, err error)
    23  	AlterPartitions(ctx context.Context, dbName string, tableName string, newPartitions []*hive_metastore.Partition) error
    24  	AlterPartition(ctx context.Context, dbName string, tableName string, values *hive_metastore.Partition) error
    25  	AddPartition(ctx context.Context, newPartition *hive_metastore.Partition) (r *hive_metastore.Partition, err error)
    26  	DropPartition(ctx context.Context, dbName string, tableName string, values []string, deleteData bool) (bool, error)
    27  	GetDatabase(ctx context.Context, name string) (r *hive_metastore.Database, err error)
    28  	GetDatabases(ctx context.Context, pattern string) (r []string, err error)
    29  	GetAllDatabases(ctx context.Context) (r []string, err error)
    30  	CreateDatabase(ctx context.Context, database *hive_metastore.Database) error
    31  	GetTables(ctx context.Context, dbName string, pattern string) ([]string, error)
    32  }
    33  
    34  type MSClient struct {
    35  	Client          ThriftHiveMetastoreClient
    36  	transport       thrift.TTransport
    37  	baseLocationURI string
    38  }
    39  
    40  func (h *MSClient) GetDBLocation(dbName string) string {
    41  	return fmt.Sprintf("%s/%s.db", h.baseLocationURI, dbName)
    42  }
    43  
    44  func (h *MSClient) NormalizeDBName(name string) string {
    45  	return strings.ReplaceAll(name, "-", "_") // Table names including `-` are allowed in Glue but not in Hive
    46  }
    47  
    48  func NewMSClient(addr string, secure bool, baseLocationURI string) (*MSClient, error) {
    49  	msClient := &MSClient{
    50  		baseLocationURI: strings.TrimRight(baseLocationURI, "/"),
    51  	}
    52  	err := msClient.open(addr, secure)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  	return msClient, nil
    57  }
    58  
    59  func (h *MSClient) open(addr string, secure bool) error {
    60  	var err error
    61  	cfg := &thrift.TConfiguration{}
    62  	if secure {
    63  		cfg.TLSConfig = &tls.Config{
    64  			//nolint:gosec
    65  			InsecureSkipVerify: true,
    66  		}
    67  		h.transport = thrift.NewTSSLSocketConf(addr, cfg)
    68  	} else {
    69  		h.transport = thrift.NewTSocketConf(addr, cfg)
    70  	}
    71  	err = h.transport.Open()
    72  	if err != nil {
    73  		return err
    74  	}
    75  
    76  	protocolFactory := thrift.NewTBinaryProtocolFactoryConf(cfg)
    77  	iprot := protocolFactory.GetProtocol(h.transport)
    78  	oprot := protocolFactory.GetProtocol(h.transport)
    79  	h.Client = hive_metastore.NewThriftHiveMetastoreClient(thrift.NewTStandardClient(iprot, oprot))
    80  	return nil
    81  }
    82  
    83  func (h *MSClient) Close() error {
    84  	if h.transport != nil {
    85  		return h.transport.Close()
    86  	}
    87  	return nil
    88  }
    89  
    90  func (h *MSClient) CreateTable(ctx context.Context, tbl *metastore.Table) error {
    91  	table := TableLocalToHive(tbl)
    92  	err := h.Client.CreateTable(ctx, table)
    93  	return err
    94  }
    95  
    96  func (h *MSClient) HasTable(ctx context.Context, dbname string, tableName string) (bool, error) {
    97  	table, err := h.GetTable(ctx, dbname, tableName)
    98  	var noSuchObjectErr *hive_metastore.NoSuchObjectException
    99  	if err != nil && !errors.As(err, &noSuchObjectErr) {
   100  		return false, err
   101  	}
   102  	return table != nil, nil
   103  }
   104  
   105  func (h *MSClient) GetTable(ctx context.Context, dbname string, tableName string) (*metastore.Table, error) {
   106  	tb, err := h.Client.GetTable(ctx, dbname, tableName)
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  	return TableHiveToLocal(tb), nil
   111  }
   112  
   113  func (h *MSClient) AlterTable(ctx context.Context, dbName string, tableName string, newTable *metastore.Table) error {
   114  	newHiveTable := TableLocalToHive(newTable)
   115  
   116  	return h.Client.AlterTable(ctx, dbName, tableName, newHiveTable)
   117  }
   118  
   119  func (h *MSClient) AddPartitions(ctx context.Context, _ string, _ string, newParts []*metastore.Partition) error {
   120  	newHivePartitions := PartitionsLocalToHive(newParts)
   121  	_, err := h.Client.AddPartitions(ctx, newHivePartitions)
   122  	return err
   123  }
   124  
   125  func (h *MSClient) GetPartitions(ctx context.Context, dbName string, tableName string) ([]*metastore.Partition, error) {
   126  	partitions, err := h.Client.GetPartitions(ctx, dbName, tableName, -1)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  	return PartitionsHiveToLocal(partitions), nil
   131  }
   132  
   133  func (h *MSClient) GetPartition(ctx context.Context, dbName string, tableName string, values []string) (*metastore.Partition, error) {
   134  	partition, err := h.Client.GetPartition(ctx, dbName, tableName, values)
   135  	if err != nil {
   136  		return nil, err
   137  	}
   138  	return PartitionHiveToLocal(partition), nil
   139  }
   140  
   141  func (h *MSClient) AlterPartitions(ctx context.Context, dbName string, tableName string, newPartitions []*metastore.Partition) error {
   142  	partitions := PartitionsLocalToHive(newPartitions)
   143  	return h.Client.AlterPartitions(ctx, dbName, tableName, partitions)
   144  }
   145  
   146  func (h *MSClient) AlterPartition(ctx context.Context, dbName string, tableName string, partition *metastore.Partition) error {
   147  	hivePartition := PartitionLocalToHive(partition)
   148  	return h.Client.AlterPartition(ctx, dbName, tableName, hivePartition)
   149  }
   150  
   151  func (h *MSClient) AddPartition(ctx context.Context, _ string, _ string, newPartition *metastore.Partition) error {
   152  	hivePartition := PartitionLocalToHive(newPartition)
   153  	_, err := h.Client.AddPartition(ctx, hivePartition)
   154  	return err
   155  }
   156  
   157  func (h *MSClient) DropPartition(ctx context.Context, dbName string, tableName string, values []string) error {
   158  	_, err := h.Client.DropPartition(ctx, dbName, tableName, values, false)
   159  	return err
   160  }
   161  
   162  func (h *MSClient) GetDatabase(ctx context.Context, name string) (*metastore.Database, error) {
   163  	db, err := h.Client.GetDatabase(ctx, name)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  	return DatabaseHiveToLocal(db), nil
   168  }
   169  
   170  func (h *MSClient) GetDatabases(ctx context.Context, pattern string) ([]*metastore.Database, error) {
   171  	databaseNames, err := h.Client.GetDatabases(ctx, pattern)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  	return h.getDatabasesFromNames(ctx, databaseNames)
   176  }
   177  
   178  func (h *MSClient) getDatabasesFromNames(ctx context.Context, names []string) ([]*metastore.Database, error) {
   179  	databases := make([]*metastore.Database, len(names))
   180  	for i, dbName := range names {
   181  		hiveDatabase, err := h.Client.GetDatabase(ctx, dbName)
   182  		if err != nil {
   183  			return nil, err
   184  		}
   185  		database := DatabaseHiveToLocal(hiveDatabase)
   186  		databases[i] = database
   187  	}
   188  	return databases, nil
   189  }
   190  
   191  func (h *MSClient) GetAllDatabases(ctx context.Context) ([]*metastore.Database, error) {
   192  	databaseNames, err := h.Client.GetAllDatabases(ctx)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	return h.getDatabasesFromNames(ctx, databaseNames)
   197  }
   198  
   199  func (h *MSClient) CreateDatabase(ctx context.Context, database *metastore.Database) error {
   200  	hiveDatabase := DatabaseLocalToHive(database)
   201  	err := h.Client.CreateDatabase(ctx, hiveDatabase)
   202  	var ErrExists *hive_metastore.AlreadyExistsException
   203  	if errors.As(err, &ErrExists) {
   204  		return mserrors.ErrSchemaExists
   205  	}
   206  	return err
   207  }
   208  
   209  func (h *MSClient) GetTables(ctx context.Context, dbName string, pattern string) ([]*metastore.Table, error) {
   210  	tableNames, err := h.Client.GetTables(ctx, dbName, pattern)
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  	return h.getTablesFromNames(ctx, dbName, tableNames)
   215  }
   216  
   217  func (h *MSClient) getTablesFromNames(ctx context.Context, dbName string, names []string) ([]*metastore.Table, error) {
   218  	tables := make([]*metastore.Table, len(names))
   219  	for i, tableName := range names {
   220  		hiveTables, err := h.Client.GetTable(ctx, dbName, tableName)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		table := TableHiveToLocal(hiveTables)
   225  		tables[i] = table
   226  	}
   227  	return tables, nil
   228  }