github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/metastore/disk_metastore.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metastore
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io"
    22  	"os"
    23  	"path/filepath"
    24  	"sort"
    25  	"strconv"
    26  	"strings"
    27  	"sync"
    28  
    29  	"github.com/uber/aresdb/metastore/common"
    30  	"github.com/uber/aresdb/utils"
    31  )
    32  
    33  // meaningful defaults of table configurations.
    34  const (
    35  	DefaultBatchSize                      = 2097152
    36  	DefaultArchivingDelayMinutes          = 1440
    37  	DefaultArchivingIntervalMinutes       = 180
    38  	DefaultBackfillIntervalMinutes        = 60
    39  	DefaultBackfillMaxBufferSize    int64 = 4294967296
    40  	DefaultBackfillThresholdInBytes int64 = 2097152
    41  	DefaultBackfillStoreBatchSize         = 20000
    42  	DefaultRecordRetentionInDays          = 90
    43  	DefaultSnapshotIntervalMinutes        = 360                  // 6 hours
    44  	DefaultSnapshotThreshold              = 3 * DefaultBatchSize // 3 batches
    45  	DefaultRedologRotationInterval        = 10800                // 3 hours
    46  	DefaultMaxRedoLogSize                 = 1 << 30              // 1 GB
    47  )
    48  
    49  // DefaultTableConfig represents default table config
    50  var DefaultTableConfig = common.TableConfig{
    51  	BatchSize:                DefaultBatchSize,
    52  	ArchivingIntervalMinutes: DefaultArchivingIntervalMinutes,
    53  	ArchivingDelayMinutes:    DefaultArchivingDelayMinutes,
    54  	BackfillMaxBufferSize:    DefaultBackfillMaxBufferSize,
    55  	BackfillIntervalMinutes:  DefaultBackfillIntervalMinutes,
    56  	BackfillThresholdInBytes: DefaultBackfillThresholdInBytes,
    57  	BackfillStoreBatchSize:   DefaultBackfillStoreBatchSize,
    58  	RecordRetentionInDays:    DefaultRecordRetentionInDays,
    59  	SnapshotIntervalMinutes:  DefaultSnapshotIntervalMinutes,
    60  	SnapshotThreshold:        DefaultSnapshotThreshold,
    61  	RedoLogRotationInterval:  DefaultRedologRotationInterval,
    62  	MaxRedoLogFileSize:       DefaultMaxRedoLogSize,
    63  }
    64  
    65  // disk-based metastore implementation.
    66  // all validation of user input (eg. table/column name and table/column struct) will be pushed to api layer,
    67  // which is the earliest point of user input, all schemas inside system will be already valid,
    68  // Note:
    69  // There are four types of write calls to MetaStore, the handling of each is different:
    70  // 1. Schema Changes
    71  // 	synchronous, return after both writing to watcher channel and reading from done channel are done
    72  // 2. Update EnumCases
    73  // 	return after changes persisted in disk and writing to watcher channel; does not read from done channel
    74  // 3. Adding Watchers
    75  // 	3.1 for enum cases, create channels and push existing enum cases starting from start case to channel if any
    76  //  3.2 for table list and table schema channels, create channels and return
    77  // 4. Update configurations
    78  //  configurations update including updates on archiving cutoff, snapshot version, archive batch version etc,
    79  //  these changes does not need to be pushed to memstore.
    80  // Operations involves writing to watcher channels (case 1 and 2), we need to enforce the order of changes pushed into channel,
    81  // writeLock is introduced to enforce that.
    82  // Other operations (case 3 and 4), we only need lock to protect internal data structure, a read write lock is used.
    83  type diskMetaStore struct {
    84  	sync.RWMutex
    85  	utils.FileSystem
    86  
    87  	// writeLock is to enforce single writer at a time
    88  	// to make sure the same order of shema change when applied to
    89  	// MemStore through watcher channel
    90  	writeLock sync.Mutex
    91  
    92  	// the base path for MetaStore in disk
    93  	basePath string
    94  
    95  	// tableListWatcher
    96  	tableListWatcher chan<- []string
    97  	// tableListDone is the channel for tracking whether watcher has
    98  	// successfully got the table list change,
    99  	// here we adopt a synchronous model for schema change.
   100  	tableListDone <-chan struct{}
   101  
   102  	// tableSchemaWatcher
   103  	tableSchemaWatcher chan<- *common.Table
   104  	// tableSchemaDone is the channel for tracking whether watcher has
   105  	// successfully got the table schema change
   106  	tableSchemaDone <-chan struct{}
   107  
   108  	// enumDictWatchers
   109  	// maps from tableName to columnName to watchers
   110  	enumDictWatchers map[string]map[string]chan<- string
   111  	// tableSchemaDone are the channels for tracking whether watcher has
   112  	// successfully got the enum case change.
   113  	enumDictDone map[string]map[string]<-chan struct{}
   114  
   115  	// shardOwnershipWatcher
   116  	shardOwnershipWatcher chan<- common.ShardOwnership
   117  	// shardOwnershipDone is used for block waiting for the consumer to finish
   118  	// processing each ownership change event.
   119  	shardOwnershipDone <-chan struct{}
   120  }
   121  
   122  // ListTables list existing table names
   123  func (dm *diskMetaStore) ListTables() ([]string, error) {
   124  	return dm.listTables()
   125  }
   126  
   127  // GetTable return the table schema stored in metastore given tablename,
   128  // return ErrTableDoesNotExist if table not exists.
   129  func (dm *diskMetaStore) GetTable(name string) (*common.Table, error) {
   130  	dm.RLock()
   131  	defer dm.RUnlock()
   132  	err := dm.tableExists(name)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  	return dm.readSchemaFile(name)
   137  }
   138  
   139  // GetOwnedShards returns the list of shards that are owned by this instance.
   140  func (dm *diskMetaStore) GetOwnedShards(table string) ([]int, error) {
   141  	return []int{0}, nil
   142  }
   143  
   144  // GetEnumDict gets the enum cases for given tableName and columnName
   145  func (dm *diskMetaStore) GetEnumDict(tableName, columnName string) ([]string, error) {
   146  	dm.RLock()
   147  	defer dm.RUnlock()
   148  	if err := dm.enumColumnExists(tableName, columnName); err != nil {
   149  		return nil, err
   150  	}
   151  	return dm.readEnumFile(tableName, columnName)
   152  }
   153  
   154  // GetArchivingCutoff gets the latest archiving cutoff for given table and shard.
   155  func (dm *diskMetaStore) GetArchivingCutoff(tableName string, shard int) (uint32, error) {
   156  	dm.RLock()
   157  	defer dm.RUnlock()
   158  	err := dm.shardExists(tableName, shard)
   159  	if err != nil {
   160  		return 0, err
   161  	}
   162  	file := dm.getShardVersionFilePath(tableName, shard)
   163  	return dm.readVersion(file)
   164  }
   165  
   166  // GetSnapshotProgress gets the latest snapshot progress for given table and shard
   167  func (dm *diskMetaStore) GetSnapshotProgress(tableName string, shard int) (int64, uint32, int32, uint32, error) {
   168  	dm.RLock()
   169  	defer dm.RUnlock()
   170  	if err := dm.shardExists(tableName, shard); err != nil {
   171  		return 0, 0, 0, 0, err
   172  	}
   173  	file := dm.getSnapshotRedoLogVersionAndOffsetFilePath(tableName, shard)
   174  	return dm.readSnapshotRedoLogFileAndOffset(file)
   175  }
   176  
   177  // UpdateArchivingCutoff updates archiving cutoff for given table (fact table), shard
   178  func (dm *diskMetaStore) UpdateArchivingCutoff(tableName string, shard int, cutoff uint32) error {
   179  	dm.Lock()
   180  	defer dm.Unlock()
   181  	if err := dm.shardExists(tableName, shard); err != nil {
   182  		return err
   183  	}
   184  
   185  	schema, err := dm.readSchemaFile(tableName)
   186  	if err != nil {
   187  		return err
   188  	}
   189  
   190  	if !schema.IsFactTable {
   191  		return ErrNotFactTable
   192  	}
   193  
   194  	file := dm.getShardVersionFilePath(tableName, shard)
   195  	return dm.writeArchivingCutoff(file, cutoff)
   196  }
   197  
   198  // UpdateSnapshotProgress update snapshot version for given table (dimension table), shard.
   199  func (dm *diskMetaStore) UpdateSnapshotProgress(tableName string, shard int, redoLogFile int64, upsertBatchOffset uint32, lastReadBatchID int32, lastReadBatchOffset uint32) error {
   200  	dm.Lock()
   201  	defer dm.Unlock()
   202  	if err := dm.shardExists(tableName, shard); err != nil {
   203  		return err
   204  	}
   205  
   206  	schema, err := dm.readSchemaFile(tableName)
   207  	if err != nil {
   208  		return err
   209  	}
   210  
   211  	if schema.IsFactTable {
   212  		return ErrNotDimensionTable
   213  	}
   214  
   215  	file := dm.getSnapshotRedoLogVersionAndOffsetFilePath(tableName, shard)
   216  	return dm.writeSnapshotRedoLogVersionAndOffset(file, redoLogFile, upsertBatchOffset, lastReadBatchID, lastReadBatchOffset)
   217  }
   218  
   219  // Updates the latest redolog/offset that have been backfilled for the specified shard.
   220  func (dm *diskMetaStore) UpdateBackfillProgress(table string, shard int, redoFile int64, offset uint32) error {
   221  	utils.GetLogger().Debugf("Backfill checkpoint(table=%s shard=%d redoFile=%d offset=%d)", table, shard, redoFile, offset)
   222  
   223  	dm.Lock()
   224  	defer dm.Unlock()
   225  	if err := dm.shardExists(table, shard); err != nil {
   226  		return err
   227  	}
   228  
   229  	schema, err := dm.readSchemaFile(table)
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	if !schema.IsFactTable {
   235  		return ErrNotFactTable
   236  	}
   237  
   238  	file := dm.getRedoLogVersionAndOffsetFilePath(table, shard)
   239  	return dm.writeRedoLogVersionAndOffset(file, redoFile, offset)
   240  }
   241  
   242  // Retrieve the latest redolog/offset that have been backfilled for the specified shard.
   243  func (dm *diskMetaStore) GetBackfillProgressInfo(table string, shard int) (int64, uint32, error) {
   244  	dm.RLock()
   245  	defer dm.RUnlock()
   246  	if err := dm.shardExists(table, shard); err != nil {
   247  		return 0, 0, err
   248  	}
   249  
   250  	file := dm.getRedoLogVersionAndOffsetFilePath(table, shard)
   251  	return dm.readRedoLogFileAndOffset(file)
   252  }
   253  
   254  // WatchTableListEvents register a watcher to table list change events,
   255  // should only be called once,
   256  // returns ErrWatcherAlreadyExist once watcher already exists
   257  func (dm *diskMetaStore) WatchTableListEvents() (events <-chan []string, done chan<- struct{}, err error) {
   258  	dm.Lock()
   259  	defer dm.Unlock()
   260  	if dm.tableListWatcher != nil {
   261  		return nil, nil, ErrWatcherAlreadyExist
   262  	}
   263  
   264  	watcherChan, doneChan := make(chan []string), make(chan struct{})
   265  	dm.tableListWatcher, dm.tableListDone = watcherChan, doneChan
   266  	return watcherChan, doneChan, nil
   267  }
   268  
   269  // WatchTableSchemaEvents register a watcher to table schema change events,
   270  // should be only called once,
   271  // returns ErrWatcherAlreadyExist once watcher already exists
   272  func (dm *diskMetaStore) WatchTableSchemaEvents() (events <-chan *common.Table, done chan<- struct{}, err error) {
   273  	dm.Lock()
   274  	defer dm.Unlock()
   275  	if dm.tableSchemaWatcher != nil {
   276  		return nil, nil, ErrWatcherAlreadyExist
   277  	}
   278  
   279  	watcherChan, doneChan := make(chan *common.Table), make(chan struct{})
   280  	dm.tableSchemaWatcher, dm.tableSchemaDone = watcherChan, doneChan
   281  	return watcherChan, doneChan, nil
   282  }
   283  
   284  // WatchEnumDictEvents register a watcher to enum cases change events for given table and column,
   285  // returns
   286  // 	ErrTableDoesNotExist, ErrColumnDoesNotExist, ErrNotEnumColumn, ErrWatcherAlreadyExist.
   287  // if startCase is larger than the number of current existing enum cases, it will be just as if receiving from
   288  // latest.
   289  func (dm *diskMetaStore) WatchEnumDictEvents(table, column string, startCase int) (events <-chan string, done chan<- struct{}, err error) {
   290  	dm.Lock()
   291  	defer dm.Unlock()
   292  
   293  	if err = dm.enumColumnExists(table, column); err != nil {
   294  		return nil, nil, err
   295  	}
   296  
   297  	if _, exist := dm.enumDictWatchers[table]; !exist {
   298  		dm.enumDictWatchers[table] = make(map[string]chan<- string)
   299  		dm.enumDictDone[table] = make(map[string]<-chan struct{})
   300  	}
   301  
   302  	if _, exist := dm.enumDictWatchers[table][column]; exist {
   303  		return nil, nil, ErrWatcherAlreadyExist
   304  	}
   305  
   306  	existingEnumCases, err := dm.readEnumFile(table, column)
   307  	if err != nil {
   308  		return nil, nil, err
   309  	}
   310  
   311  	channelCapacity := len(existingEnumCases) - startCase
   312  	// if start is larger than length of existing enum cases
   313  	// will treat as if sending from latest
   314  	if channelCapacity <= 0 {
   315  		channelCapacity = 1
   316  	}
   317  
   318  	watcherChan, doneChan := make(chan string, channelCapacity), make(chan struct{})
   319  	dm.enumDictWatchers[table][column], dm.enumDictDone[table][column] = watcherChan, doneChan
   320  	for start := startCase; start < len(existingEnumCases); start++ {
   321  		watcherChan <- existingEnumCases[start]
   322  	}
   323  	return watcherChan, doneChan, nil
   324  }
   325  
   326  // WatchTableSchemaEvents register a watcher to table schema change events,
   327  // should be only called once,
   328  // returns ErrWatcherAlreadyExist once watcher already exists
   329  func (dm *diskMetaStore) WatchShardOwnershipEvents() (events <-chan common.ShardOwnership, done chan<- struct{}, err error) {
   330  	dm.Lock()
   331  	defer dm.Unlock()
   332  	if dm.shardOwnershipWatcher != nil {
   333  		return nil, nil, ErrWatcherAlreadyExist
   334  	}
   335  
   336  	watcherChan, doneChan := make(chan common.ShardOwnership), make(chan struct{})
   337  	dm.shardOwnershipWatcher, dm.shardOwnershipDone = watcherChan, doneChan
   338  	return watcherChan, doneChan, nil
   339  }
   340  
   341  // CreateTable creates a new Table,
   342  // returns
   343  // 	ErrTableAlreadyExist if table already exists
   344  func (dm *diskMetaStore) CreateTable(table *common.Table) (err error) {
   345  	dm.writeLock.Lock()
   346  	defer dm.writeLock.Unlock()
   347  
   348  	var existingTables []string
   349  	dm.Lock()
   350  	defer func() {
   351  		dm.Unlock()
   352  		if err == nil {
   353  			dm.pushSchemaChange(table)
   354  			dm.pushShardOwnershipChange(table.Name)
   355  		}
   356  	}()
   357  
   358  	existingTables, err = dm.listTables()
   359  	if err != nil {
   360  		return err
   361  	}
   362  
   363  	if utils.IndexOfStr(existingTables, table.Name) >= 0 {
   364  		return ErrTableAlreadyExist
   365  	}
   366  
   367  	validator := NewTableSchameValidator()
   368  	validator.SetNewTable(*table)
   369  	err = validator.Validate()
   370  	if err != nil {
   371  		return err
   372  	}
   373  
   374  	if err = dm.MkdirAll(dm.getTableDirPath(table.Name), 0755); err != nil {
   375  		return err
   376  	}
   377  
   378  	if err = dm.writeSchemaFile(table); err != nil {
   379  		return err
   380  	}
   381  
   382  	// append enum case for enum column with default value
   383  	for _, column := range table.Columns {
   384  		if column.DefaultValue != nil && column.IsEnumColumn() {
   385  			err = dm.writeEnumFile(table.Name, column.Name, []string{*column.DefaultValue})
   386  			if err != nil {
   387  				return err
   388  			}
   389  		}
   390  	}
   391  
   392  	// for single instance version, when creating table, create shard zero as well
   393  	return dm.createShard(table.Name, table.IsFactTable, 0)
   394  }
   395  
   396  // UpdateTable update table configurations
   397  // return
   398  //  ErrTableDoesNotExist if table does not exist
   399  func (dm *diskMetaStore) UpdateTableConfig(tableName string, config common.TableConfig) (err error) {
   400  	dm.writeLock.Lock()
   401  	defer dm.writeLock.Unlock()
   402  
   403  	var table *common.Table
   404  	dm.Lock()
   405  	defer func() {
   406  		dm.Unlock()
   407  		if err == nil && table != nil {
   408  			dm.pushSchemaChange(table)
   409  		}
   410  	}()
   411  
   412  	if err = dm.tableExists(tableName); err != nil {
   413  		return err
   414  	}
   415  
   416  	table, err = dm.readSchemaFile(tableName)
   417  	if err != nil {
   418  		return err
   419  	}
   420  
   421  	table.Config = config
   422  	return dm.writeSchemaFile(table)
   423  }
   424  
   425  // UpdateTable updates table schema and config
   426  // table passed in should have been validated against existing table schema
   427  func (dm *diskMetaStore) UpdateTable(table common.Table) (err error) {
   428  	dm.writeLock.Lock()
   429  	defer dm.writeLock.Unlock()
   430  
   431  	dm.Lock()
   432  	defer func() {
   433  		dm.Unlock()
   434  		if err == nil {
   435  			dm.pushSchemaChange(&table)
   436  		}
   437  	}()
   438  
   439  	var existingTable *common.Table
   440  	existingTable, err = dm.readSchemaFile(table.Name)
   441  	if err != nil {
   442  		return
   443  	}
   444  
   445  	if err = dm.writeSchemaFile(&table); err != nil {
   446  		return err
   447  	}
   448  
   449  	// append enum case for enum column with default value for new columns
   450  	for i := len(existingTable.Columns); i < len(table.Columns); i++ {
   451  		column := table.Columns[i]
   452  		if column.DefaultValue != nil && column.IsEnumColumn() {
   453  			err = dm.writeEnumFile(table.Name, column.Name, []string{*column.DefaultValue})
   454  			if err != nil {
   455  				return
   456  			}
   457  		}
   458  	}
   459  
   460  	return
   461  }
   462  
   463  // DeleteTable deletes a table
   464  // return
   465  // 	ErrTableDoesNotExist if table does not exist
   466  func (dm *diskMetaStore) DeleteTable(tableName string) (err error) {
   467  	dm.writeLock.Lock()
   468  	defer dm.writeLock.Unlock()
   469  
   470  	var existingTables []string
   471  	dm.Lock()
   472  	defer func() {
   473  		dm.Unlock()
   474  		if dm.tableListWatcher != nil {
   475  			dm.tableListWatcher <- existingTables
   476  			<-dm.tableListDone
   477  		}
   478  	}()
   479  
   480  	existingTables, err = dm.listTables()
   481  	if err != nil {
   482  		return utils.StackError(err, "Failed to list tables")
   483  	}
   484  
   485  	index := utils.IndexOfStr(existingTables, tableName)
   486  	if index < 0 {
   487  		return ErrTableDoesNotExist
   488  	}
   489  
   490  	if err = dm.removeTable(tableName); err != nil {
   491  		return err
   492  	}
   493  	existingTables = append(existingTables[:index], existingTables[index+1:]...)
   494  	return nil
   495  }
   496  
   497  // AddColumn adds a new column
   498  // returns
   499  // 	ErrTableDoesNotExist if table does not exist
   500  // 	ErrColumnAlreadyExist if column already exists
   501  func (dm *diskMetaStore) AddColumn(tableName string, column common.Column, appendToArchivingSortOrder bool) (err error) {
   502  	dm.writeLock.Lock()
   503  	defer dm.writeLock.Unlock()
   504  
   505  	var table *common.Table
   506  	dm.Lock()
   507  	defer func() {
   508  		dm.Unlock()
   509  		if err == nil {
   510  			dm.pushSchemaChange(table)
   511  		}
   512  	}()
   513  
   514  	if err = dm.tableExists(tableName); err != nil {
   515  		return err
   516  	}
   517  
   518  	if table, err = dm.readSchemaFile(tableName); err != nil {
   519  		return err
   520  	}
   521  	return dm.addColumn(table, column, appendToArchivingSortOrder)
   522  }
   523  
   524  // UpdateColumn deletes a column.
   525  // return
   526  // 	ErrTableDoesNotExist if table does not exist.
   527  // 	ErrColumnDoesNotExist if column does not exist.
   528  func (dm *diskMetaStore) UpdateColumn(tableName string, columnName string, config common.ColumnConfig) (err error) {
   529  	dm.writeLock.Lock()
   530  	defer dm.writeLock.Unlock()
   531  
   532  	var table *common.Table
   533  	dm.Lock()
   534  	defer func() {
   535  		dm.Unlock()
   536  		if err == nil {
   537  			dm.pushSchemaChange(table)
   538  		}
   539  	}()
   540  
   541  	if err = dm.tableExists(tableName); err != nil {
   542  		return err
   543  	}
   544  
   545  	if table, err = dm.readSchemaFile(tableName); err != nil {
   546  		return err
   547  	}
   548  
   549  	return dm.updateColumn(table, columnName, config)
   550  }
   551  
   552  // DeleteColumn deletes a column
   553  // return
   554  // 	ErrTableDoesNotExist if table not exist
   555  // 	ErrColumnDoesNotExist if column not exist
   556  func (dm *diskMetaStore) DeleteColumn(tableName string, columnName string) (err error) {
   557  	dm.writeLock.Lock()
   558  	defer dm.writeLock.Unlock()
   559  
   560  	var table *common.Table
   561  	dm.Lock()
   562  	defer func() {
   563  		dm.Unlock()
   564  		if err == nil {
   565  			dm.pushSchemaChange(table)
   566  		}
   567  	}()
   568  
   569  	if err = dm.tableExists(tableName); err != nil {
   570  		return err
   571  	}
   572  
   573  	if table, err = dm.readSchemaFile(tableName); err != nil {
   574  		return err
   575  	}
   576  
   577  	return dm.removeColumn(table, columnName)
   578  }
   579  
   580  // ExtendEnumDict extends enum cases for given table column
   581  func (dm *diskMetaStore) ExtendEnumDict(table, column string, enumCases []string) (enumIDs []int, err error) {
   582  	dm.writeLock.Lock()
   583  	defer dm.writeLock.Unlock()
   584  
   585  	var existingCases []string
   586  	newEnumCases := make([]string, 0, len(enumCases))
   587  
   588  	dm.Lock()
   589  	defer func() {
   590  		dm.Unlock()
   591  		if err == nil {
   592  			if _, tableExist := dm.enumDictWatchers[table]; tableExist {
   593  				if watcher, columnExist := dm.enumDictWatchers[table][column]; columnExist {
   594  					for _, enumCase := range newEnumCases {
   595  						watcher <- enumCase
   596  					}
   597  				}
   598  			}
   599  		}
   600  	}()
   601  
   602  	if err = dm.enumColumnExists(table, column); err != nil {
   603  		return nil, err
   604  	}
   605  
   606  	existingCases, err = dm.readEnumFile(table, column)
   607  	if err != nil {
   608  		return nil, err
   609  	}
   610  
   611  	enumDict := make(map[string]int)
   612  	for enumID, enumCase := range existingCases {
   613  		enumDict[enumCase] = enumID
   614  	}
   615  
   616  	newEnumID := len(existingCases)
   617  
   618  	enumIDs = make([]int, len(enumCases))
   619  	for index, newCase := range enumCases {
   620  		if enumID, exist := enumDict[newCase]; exist {
   621  			enumIDs[index] = enumID
   622  		} else {
   623  			enumDict[newCase] = newEnumID
   624  			newEnumCases = append(newEnumCases, newCase)
   625  			enumIDs[index] = newEnumID
   626  			newEnumID++
   627  		}
   628  	}
   629  
   630  	if err = dm.writeEnumFile(table, column, newEnumCases); err != nil {
   631  		return nil, err
   632  	}
   633  
   634  	utils.GetRootReporter().GetChildGauge(map[string]string{
   635  		"table":      table,
   636  		"columnName": column,
   637  	}, utils.NumberOfEnumCasesPerColumn).Update(float64(newEnumID))
   638  
   639  	return enumIDs, nil
   640  }
   641  
   642  // PurgeArchiveBatches deletes the archive batches' metadata with batchID within [batchIDStart, batchIDEnd)
   643  func (dm *diskMetaStore) PurgeArchiveBatches(tableName string, shard, batchIDStart, batchIDEnd int) error {
   644  	dm.Lock()
   645  	defer dm.Unlock()
   646  
   647  	if err := dm.shardExists(tableName, shard); err != nil {
   648  		return err
   649  	}
   650  
   651  	batchFiles, err := dm.ReadDir(dm.getArchiveBatchDirPath(tableName, shard))
   652  	if os.IsNotExist(err) {
   653  		utils.GetLogger().Warnf("table %s shard %d does not exist", tableName, shard)
   654  		return nil
   655  	} else if err != nil {
   656  		return utils.StackError(err, "failed to read batch dir, table: %s, shard: %d", tableName, shard)
   657  	}
   658  
   659  	for _, batchFile := range batchFiles {
   660  		batchID, err := strconv.ParseInt(batchFile.Name(), 10, 32)
   661  		if err != nil {
   662  			return err
   663  		}
   664  
   665  		if batchID < int64(batchIDEnd) && batchID >= int64(batchIDStart) {
   666  			path := dm.getArchiveBatchVersionFilePath(tableName, shard, int(batchID))
   667  			err := dm.Remove(path)
   668  			if os.IsNotExist(err) {
   669  				utils.GetLogger().Warnf("batch %d of table %s, shard %d does not exist", batchID, tableName, shard)
   670  			} else if err != nil {
   671  				return utils.StackError(err, "failed to delete metadata, table: %s, shard: %d, batch: %d", tableName, shard, batchID)
   672  			}
   673  		}
   674  	}
   675  
   676  	return nil
   677  }
   678  
   679  // AddArchiveBatchVersion adds a new version to archive batch.
   680  func (dm *diskMetaStore) AddArchiveBatchVersion(tableName string, shard, batchID int, version uint32, seqNum uint32, batchSize int) error {
   681  	dm.Lock()
   682  	defer dm.Unlock()
   683  
   684  	if err := dm.shardExists(tableName, shard); err != nil {
   685  		return err
   686  	}
   687  
   688  	path := dm.getArchiveBatchVersionFilePath(tableName, shard, batchID)
   689  
   690  	if err := dm.MkdirAll(filepath.Dir(path), 0755); err != nil {
   691  		return utils.StackError(err, "Failed to create archive batch version directory")
   692  	}
   693  
   694  	writer, err := dm.OpenFileForWrite(
   695  		path,
   696  		os.O_WRONLY|os.O_APPEND|os.O_CREATE,
   697  		0644,
   698  	)
   699  
   700  	if err != nil {
   701  		return utils.StackError(
   702  			err,
   703  			"Failed to open archive batch version file, table: %s, shard: %d, batch: %d",
   704  			tableName,
   705  			shard,
   706  			batchID,
   707  		)
   708  	}
   709  	defer writer.Close()
   710  
   711  	if seqNum > 0 {
   712  		_, err = io.WriteString(writer, fmt.Sprintf("%d-%d,%d\n", version, seqNum, batchSize))
   713  	} else {
   714  		_, err = io.WriteString(writer, fmt.Sprintf("%d,%d\n", version, batchSize))
   715  	}
   716  	if err != nil {
   717  		return utils.StackError(err, "Failed to write to batch version file, table: %s, shard: %d, batch: %d",
   718  			tableName,
   719  			shard,
   720  			batchID,
   721  		)
   722  	}
   723  
   724  	return nil
   725  }
   726  
   727  // GetArchiveBatchVersion gets the latest version <= given archiving/live cutoff
   728  // all cutoff and batch versions are sorted in file per batch
   729  // sample:
   730  // 	/root_path/metastore/{$table}/shards/{$shard_id}/batches/{$batch_id}
   731  //  version,size
   732  //  1-0,10
   733  //  2-0,20
   734  //  2-1,26
   735  //  4-0,20
   736  //  5-0,20
   737  //  5-1,25
   738  //  5-2,38
   739  // if given cutoff 6, returns 5-2,38
   740  // if given cutoff 4, returns 4-0,20
   741  // if given cutoff 0, returns 0-0, 0
   742  func (dm *diskMetaStore) GetArchiveBatchVersion(table string, shard, batchID int, cutoff uint32) (uint32, uint32, int, error) {
   743  	dm.RLock()
   744  	defer dm.RUnlock()
   745  
   746  	if err := dm.shardExists(table, shard); err != nil {
   747  		return 0, 0, 0, err
   748  	}
   749  
   750  	batchVersionBytes, err := dm.ReadFile(dm.getArchiveBatchVersionFilePath(table, shard, batchID))
   751  	if os.IsNotExist(err) {
   752  		return 0, 0, 0, nil
   753  	} else if err != nil {
   754  		return 0, 0, 0, utils.StackError(err, "Failed to read batch")
   755  	}
   756  
   757  	batchVersionSizes := strings.Split(strings.TrimSuffix(string(batchVersionBytes), "\n"), "\n")
   758  
   759  	var version uint64
   760  	// do binary search to find the first cutoff that is larger than the specified cutoff
   761  	firstIndex := sort.Search(len(batchVersionSizes), func(i int) bool {
   762  		versionSizePair := strings.Split(batchVersionSizes[i], ",")
   763  
   764  		// backward compatible: sequence number may not exist for old version
   765  		if !strings.Contains(versionSizePair[0], "-") {
   766  			version, err = strconv.ParseUint(versionSizePair[0], 10, 32)
   767  		} else {
   768  			versionSeqStr := strings.Split(versionSizePair[0], "-")
   769  			version, err = strconv.ParseUint(versionSeqStr[0], 10, 32)
   770  		}
   771  
   772  		if err != nil {
   773  			// this should never happen
   774  			utils.GetLogger().With(
   775  				"error", err.Error(),
   776  				"table", table,
   777  				"shard", shard,
   778  				"batchID", batchID).
   779  				Panic("Incorrect batch version")
   780  		}
   781  		return uint32(version) > cutoff
   782  	})
   783  
   784  	// all cutoffs larger than given cutoff
   785  	if firstIndex == 0 {
   786  		return 0, 0, 0, nil
   787  	}
   788  
   789  	versionSizePair := strings.Split(batchVersionSizes[firstIndex-1], ",")
   790  	if len(versionSizePair) != 2 {
   791  		return 0, 0, 0, utils.StackError(err, "Incorrect batch version and size pair, %s", batchVersionSizes[firstIndex-1])
   792  	}
   793  
   794  	var seqNum uint64
   795  	if !strings.Contains(versionSizePair[0], "-") {
   796  		version, err = strconv.ParseUint(versionSizePair[0], 10, 32)
   797  		seqNum = 0
   798  	} else {
   799  		versionSeqStr := strings.Split(versionSizePair[0], "-")
   800  		seqNum, err = strconv.ParseUint(versionSeqStr[1], 10, 32)
   801  		if err != nil {
   802  			return 0, 0, 0, utils.StackError(err, "Failed to parse batch sequence, %s", versionSizePair[0])
   803  		}
   804  		version, err = strconv.ParseUint(versionSeqStr[0], 10, 32)
   805  	}
   806  
   807  	if err != nil {
   808  		return 0, 0, 0, utils.StackError(err, "Failed to parse batchVersion, %s", versionSizePair[0])
   809  	}
   810  	batchSize, err := strconv.ParseInt(versionSizePair[1], 10, 32)
   811  	if err != nil {
   812  		return 0, 0, 0, utils.StackError(err, "Failed to parse batchSize, %s", versionSizePair[1])
   813  	}
   814  
   815  	return uint32(version), uint32(seqNum), int(batchSize), nil
   816  }
   817  
   818  func (dm *diskMetaStore) pushSchemaChange(table *common.Table) {
   819  	if dm.tableSchemaWatcher != nil {
   820  		dm.tableSchemaWatcher <- table
   821  		<-dm.tableSchemaDone
   822  	}
   823  }
   824  
   825  func (dm *diskMetaStore) pushShardOwnershipChange(tableName string) {
   826  	if dm.shardOwnershipWatcher != nil {
   827  		dm.shardOwnershipWatcher <- common.ShardOwnership{
   828  			TableName: tableName,
   829  			Shard:     0,
   830  			ShouldOwn: true}
   831  		<-dm.shardOwnershipDone
   832  	}
   833  }
   834  
   835  // listTable lists the table
   836  func (dm *diskMetaStore) listTables() ([]string, error) {
   837  	tableDirs, err := dm.ReadDir(dm.basePath)
   838  	if err != nil {
   839  		return nil, utils.StackError(err, "Failed to list tables")
   840  	}
   841  	tableNames := make([]string, len(tableDirs))
   842  	for id, tableDir := range tableDirs {
   843  		tableNames[id] = tableDir.Name()
   844  	}
   845  	return tableNames, nil
   846  }
   847  
   848  func (dm *diskMetaStore) removeTable(tableName string) error {
   849  	if err := dm.RemoveAll(dm.getTableDirPath(tableName)); err != nil {
   850  		return utils.StackError(err, "Failed to remove directory, table: %s", tableName)
   851  	}
   852  
   853  	// close all related enum dict watchers
   854  	// make sure all producer have done producing and detach
   855  	columnWatchers := dm.enumDictWatchers[tableName]
   856  	doneWatchers := dm.enumDictDone[tableName]
   857  	delete(dm.enumDictWatchers, tableName)
   858  	delete(dm.enumDictDone, tableName)
   859  
   860  	for columnName, watcher := range columnWatchers {
   861  		close(watcher)
   862  		// drain done channels for related enum watchers
   863  		// to make sure all previous changes are done
   864  		for range doneWatchers[columnName] {
   865  		}
   866  	}
   867  	return nil
   868  }
   869  
   870  func (dm *diskMetaStore) addColumn(table *common.Table, column common.Column, appendToArchivingSortOrder bool) error {
   871  	validator := NewTableSchameValidator()
   872  	validator.SetOldTable(*table)
   873  
   874  	newColumnID := len(table.Columns)
   875  	table.Columns = append(table.Columns, column)
   876  	if appendToArchivingSortOrder {
   877  		table.ArchivingSortColumns = append(table.ArchivingSortColumns, newColumnID)
   878  	}
   879  	validator.SetNewTable(*table)
   880  	err := validator.Validate()
   881  	if err != nil {
   882  		return err
   883  	}
   884  
   885  	if err := dm.writeSchemaFile(table); err != nil {
   886  		return utils.StackError(err, "Failed to write schema file, table: %s", table.Name)
   887  	}
   888  
   889  	// if enum column, append a enum case for default value
   890  	if column.DefaultValue != nil && column.IsEnumColumn() {
   891  		return dm.writeEnumFile(table.Name, column.Name, []string{*column.DefaultValue})
   892  	}
   893  
   894  	return nil
   895  }
   896  
   897  func (dm *diskMetaStore) updateColumn(table *common.Table, columnName string, config common.ColumnConfig) (err error) {
   898  	for id, column := range table.Columns {
   899  		if column.Name == columnName {
   900  			if column.Deleted {
   901  				// continue looking since there could be reused column name
   902  				// with different column id.
   903  				continue
   904  			}
   905  			column.Config = config
   906  			table.Columns[id] = column
   907  			return dm.writeSchemaFile(table)
   908  		}
   909  	}
   910  	return ErrColumnDoesNotExist
   911  }
   912  
   913  func (dm *diskMetaStore) removeColumn(table *common.Table, columnName string) error {
   914  	for id, column := range table.Columns {
   915  		if column.Name == columnName {
   916  			if column.Deleted {
   917  				// continue looking since there could be reused column name
   918  				// with different column id
   919  				continue
   920  			}
   921  
   922  			// trying to delete timestamp column from fact table
   923  			if table.IsFactTable && id == 0 {
   924  				return ErrDeleteTimeColumn
   925  			}
   926  
   927  			if utils.IndexOfInt(table.PrimaryKeyColumns, id) >= 0 {
   928  				return ErrDeletePrimaryKeyColumn
   929  			}
   930  
   931  			column.Deleted = true
   932  			table.Columns[id] = column
   933  			if err := dm.writeSchemaFile(table); err != nil {
   934  				return err
   935  			}
   936  
   937  			if column.IsEnumColumn() {
   938  				dm.removeEnumColumn(table.Name, column.Name)
   939  			}
   940  
   941  			return nil
   942  		}
   943  	}
   944  	return ErrColumnDoesNotExist
   945  }
   946  
   947  func (dm *diskMetaStore) getTableDirPath(tableName string) string {
   948  	return filepath.Join(dm.basePath, tableName)
   949  }
   950  
   951  func (dm *diskMetaStore) getEnumDirPath(tableName string) string {
   952  	return filepath.Join(dm.getTableDirPath(tableName), "enums")
   953  }
   954  
   955  func (dm *diskMetaStore) getEnumFilePath(tableName, columnName string) string {
   956  	return filepath.Join(dm.getEnumDirPath(tableName), columnName)
   957  }
   958  
   959  func (dm *diskMetaStore) getSchemaFilePath(tableName string) string {
   960  	return filepath.Join(dm.getTableDirPath(tableName), "schema")
   961  }
   962  
   963  func (dm *diskMetaStore) getShardsDirPath(tableName string) string {
   964  	return filepath.Join(dm.getTableDirPath(tableName), "shards")
   965  }
   966  
   967  func (dm *diskMetaStore) getShardDirPath(tableName string, shard int) string {
   968  	return filepath.Join(dm.getShardsDirPath(tableName), strconv.Itoa(shard))
   969  }
   970  
   971  func (dm *diskMetaStore) getShardVersionFilePath(tableName string, shard int) string {
   972  	return filepath.Join(dm.getShardDirPath(tableName, shard), "version")
   973  }
   974  
   975  func (dm *diskMetaStore) getArchiveBatchVersionFilePath(tableName string, shard, batchID int) string {
   976  	return filepath.Join(dm.getShardDirPath(tableName, shard), "batches", strconv.Itoa(batchID))
   977  }
   978  
   979  func (dm *diskMetaStore) getArchiveBatchDirPath(tableName string, shard int) string {
   980  	return filepath.Join(dm.getShardDirPath(tableName, shard), "batches")
   981  }
   982  
   983  func (dm *diskMetaStore) getRedoLogVersionAndOffsetFilePath(tableName string, shard int) string {
   984  	return filepath.Join(dm.getShardDirPath(tableName, shard), "redolog-offset")
   985  }
   986  
   987  func (dm *diskMetaStore) getSnapshotRedoLogVersionAndOffsetFilePath(tableName string, shard int) string {
   988  	return filepath.Join(dm.getShardDirPath(tableName, shard), "snapshot")
   989  }
   990  
   991  // readEnumFile reads the enum cases from file.
   992  func (dm *diskMetaStore) readEnumFile(tableName, columnName string) ([]string, error) {
   993  	enumBytes, err := dm.ReadFile(dm.getEnumFilePath(tableName, columnName))
   994  	if err != nil {
   995  		if os.IsNotExist(err) {
   996  			return []string{}, nil
   997  		}
   998  		return nil,
   999  			utils.StackError(err,
  1000  				"Failed to read enum file, table: %s, column: %s",
  1001  				tableName,
  1002  				columnName,
  1003  			)
  1004  	}
  1005  	return strings.Split(strings.TrimSuffix(string(enumBytes), common.EnumDelimiter), common.EnumDelimiter), nil
  1006  }
  1007  
  1008  // writeEnumFile append enum cases to existing file
  1009  func (dm *diskMetaStore) writeEnumFile(tableName, columnName string, enumCases []string) error {
  1010  	if len(enumCases) == 0 {
  1011  		return nil
  1012  	}
  1013  	err := dm.MkdirAll(dm.getEnumDirPath(tableName), 0755)
  1014  	if err != nil {
  1015  		return utils.StackError(err, "Failed to create enums directory")
  1016  	}
  1017  
  1018  	writer, err := dm.OpenFileForWrite(
  1019  		dm.getEnumFilePath(tableName, columnName),
  1020  		os.O_WRONLY|os.O_APPEND|os.O_CREATE,
  1021  		0644,
  1022  	)
  1023  	if err != nil {
  1024  		return utils.StackError(err, "Failed to open enum file, table: %s, column: %s", tableName, columnName)
  1025  	}
  1026  	defer writer.Close()
  1027  
  1028  	_, err = io.WriteString(writer, fmt.Sprintf("%s%s", strings.Join(enumCases, common.EnumDelimiter), common.EnumDelimiter))
  1029  	if err != nil {
  1030  		return utils.StackError(err, "Failed to write enum cases, table: %s, column: %s", tableName, columnName)
  1031  	}
  1032  
  1033  	return nil
  1034  }
  1035  
  1036  // readSchemaFile reads the schema file for given table.
  1037  func (dm *diskMetaStore) readSchemaFile(tableName string) (*common.Table, error) {
  1038  	jsonBytes, err := dm.ReadFile(dm.getSchemaFilePath(tableName))
  1039  	if err != nil {
  1040  		return nil, utils.StackError(
  1041  			err,
  1042  			"Failed to read schema file, table: %s",
  1043  			tableName,
  1044  		)
  1045  	}
  1046  	var table common.Table
  1047  	table.Config = DefaultTableConfig
  1048  
  1049  	err = json.Unmarshal(jsonBytes, &table)
  1050  	if err != nil {
  1051  		return nil, utils.StackError(
  1052  			err,
  1053  			"Failed to unmarshal table schema, table: %s",
  1054  			tableName,
  1055  		)
  1056  	}
  1057  
  1058  	return &table, nil
  1059  }
  1060  
  1061  // writeSchemaFile reads the schema file for given table.
  1062  func (dm *diskMetaStore) writeSchemaFile(table *common.Table) error {
  1063  	tableSchemaBytes, err := json.MarshalIndent(table, "", "  ")
  1064  	if err != nil {
  1065  		return utils.StackError(err, "Failed to marshal schema")
  1066  	}
  1067  
  1068  	writer, err := dm.OpenFileForWrite(
  1069  		dm.getSchemaFilePath(table.Name),
  1070  		os.O_WRONLY|os.O_TRUNC|os.O_CREATE,
  1071  		0644,
  1072  	)
  1073  
  1074  	if err != nil {
  1075  		return utils.StackError(
  1076  			err,
  1077  			"Failed to open schema file for write, table: %s",
  1078  			table.Name,
  1079  		)
  1080  	}
  1081  
  1082  	defer writer.Close()
  1083  	_, err = writer.Write(tableSchemaBytes)
  1084  	return err
  1085  }
  1086  
  1087  // readVersion reads the version from a given version file.
  1088  func (dm *diskMetaStore) readVersion(file string) (uint32, error) {
  1089  	fileBytes, err := dm.ReadFile(file)
  1090  	if os.IsNotExist(err) {
  1091  		return 0, nil
  1092  	}
  1093  	if err != nil {
  1094  		return 0, utils.StackError(err, "Failed to open version file %s", file)
  1095  	}
  1096  
  1097  	var version uint32
  1098  	_, err = fmt.Fscanln(bytes.NewBuffer(fileBytes), &version)
  1099  	if err != nil {
  1100  		return 0, utils.StackError(err, "Failed to read version file %s", file)
  1101  	}
  1102  	return version, nil
  1103  }
  1104  
  1105  // readRedoLogFileAndOffset reads the redo log file and offset from the file.
  1106  func (dm *diskMetaStore) readRedoLogFileAndOffset(filePath string) (int64, uint32, error) {
  1107  	bytes, err := dm.ReadFile(filePath)
  1108  	if os.IsNotExist(err) {
  1109  		return 0, 0, nil
  1110  	} else if err != nil {
  1111  		return 0, 0, utils.StackError(err, "Failed to read file:%s\n", filePath)
  1112  	}
  1113  
  1114  	redoLogAndOffset := strings.Split(strings.TrimSuffix(string(bytes), "\n"), ",")
  1115  
  1116  	if len(redoLogAndOffset) < 2 {
  1117  		return 0, 0, utils.StackError(nil, "Invalid redo log and offset file:%s:not enough strings\n", filePath)
  1118  	}
  1119  
  1120  	var redoLogVersion int64
  1121  	redoLogVersion, err = strconv.ParseInt(redoLogAndOffset[0], 10, 64)
  1122  	if err != nil {
  1123  		return 0, 0, utils.StackError(err, "Invalid redo log and offset file:%s:invalid redo log file\n", filePath)
  1124  	}
  1125  
  1126  	offset, err := strconv.ParseUint(redoLogAndOffset[1], 10, 32)
  1127  	if err != nil {
  1128  		return 0, 0, utils.StackError(err, "Invalid redo log and offset file:%s:invalid offset\n", filePath)
  1129  	}
  1130  
  1131  	return redoLogVersion, uint32(offset), nil
  1132  }
  1133  
  1134  // readSnapshotRedoLogFileAndOffset reads the redo log file and offset from the file.
  1135  func (dm *diskMetaStore) readSnapshotRedoLogFileAndOffset(filePath string) (int64, uint32, int32, uint32, error) {
  1136  	bytes, err := dm.ReadFile(filePath)
  1137  	if os.IsNotExist(err) {
  1138  		return 0, 0, 0, 0, nil
  1139  	} else if err != nil {
  1140  		return 0, 0, 0, 0, utils.StackError(err, "Failed to read file:%s\n", filePath)
  1141  	}
  1142  
  1143  	snapshotInfo := strings.Split(strings.TrimSuffix(string(bytes), "\n"), ",")
  1144  
  1145  	if len(snapshotInfo) < 4 {
  1146  		return 0, 0, 0, 0, utils.StackError(nil, "Invalid snapshot redolog file:%s:not enough strings\n", filePath)
  1147  	}
  1148  
  1149  	var redoLogVersion int64
  1150  	redoLogVersion, err = strconv.ParseInt(snapshotInfo[0], 10, 64)
  1151  	if err != nil {
  1152  		return 0, 0, 0, 0, utils.StackError(err, "Invalid sanshot redolog file:%s:invalid redo log file\n", filePath)
  1153  	}
  1154  
  1155  	offset, err := strconv.ParseUint(snapshotInfo[1], 10, 32)
  1156  	if err != nil {
  1157  		return 0, 0, 0, 0, utils.StackError(err, "Invalid snapshot redolog file:%s:invalid offset\n", filePath)
  1158  	}
  1159  
  1160  	batchID, err := strconv.ParseInt(snapshotInfo[2], 10, 64)
  1161  	if err != nil {
  1162  		return 0, 0, 0, 0, utils.StackError(err, "Invalid snapshot redolog file:%s:invalid batch id\n", filePath)
  1163  	}
  1164  
  1165  	index, err := strconv.ParseUint(snapshotInfo[3], 10, 32)
  1166  	if err != nil {
  1167  		return 0, 0, 0, 0, utils.StackError(err, "Invalid snapshot redolog file:%s:invalid index\n", filePath)
  1168  	}
  1169  
  1170  	return redoLogVersion, uint32(offset), int32(batchID), uint32(index), nil
  1171  }
  1172  
  1173  // writeArchivingCutoff writes the version to a given file.
  1174  func (dm *diskMetaStore) writeArchivingCutoff(file string, version uint32) error {
  1175  	if err := dm.MkdirAll(filepath.Dir(file), 0755); err != nil {
  1176  		return utils.StackError(err, "Failed to create version directory")
  1177  	}
  1178  
  1179  	writer, err := dm.OpenFileForWrite(
  1180  		file,
  1181  		os.O_CREATE|os.O_TRUNC|os.O_WRONLY,
  1182  		0644,
  1183  	)
  1184  
  1185  	if err != nil {
  1186  		return utils.StackError(err, "Failed to open version file %s for write", file)
  1187  	}
  1188  	defer writer.Close()
  1189  
  1190  	_, err = io.WriteString(writer, fmt.Sprintf("%d", version))
  1191  	return err
  1192  }
  1193  
  1194  // writeRedoLogVersionAndOffset writes redolog&offset to a given file.
  1195  func (dm *diskMetaStore) writeRedoLogVersionAndOffset(file string, redoLogFile int64, upsertBatchOffset uint32) error {
  1196  	if err := dm.MkdirAll(filepath.Dir(file), 0755); err != nil {
  1197  		return utils.StackError(err, "Failed to create redo log version and upsert batch offset directory")
  1198  	}
  1199  
  1200  	writer, err := dm.OpenFileForWrite(
  1201  		file,
  1202  		os.O_CREATE|os.O_TRUNC|os.O_WRONLY,
  1203  		0644,
  1204  	)
  1205  
  1206  	if err != nil {
  1207  		return utils.StackError(err, "Failed to open redo log version and upsert batch offset file %s for write", file)
  1208  	}
  1209  	defer writer.Close()
  1210  
  1211  	_, err = io.WriteString(writer, fmt.Sprintf("%d,%d", redoLogFile, upsertBatchOffset))
  1212  	return err
  1213  }
  1214  
  1215  // writeSnapshotRedoLogVersionAndOffset writes redolog&offset and last record position to a given file.
  1216  func (dm *diskMetaStore) writeSnapshotRedoLogVersionAndOffset(file string, redoLogFile int64, upsertBatchOffset uint32, lastReadBatchID int32, lastReadBatchOffset uint32) error {
  1217  	if err := dm.MkdirAll(filepath.Dir(file), 0755); err != nil {
  1218  		return utils.StackError(err, "Failed to create snapshot redo log version and upsert batch offset directory")
  1219  	}
  1220  
  1221  	writer, err := dm.OpenFileForWrite(
  1222  		file,
  1223  		os.O_CREATE|os.O_TRUNC|os.O_WRONLY,
  1224  		0644,
  1225  	)
  1226  
  1227  	if err != nil {
  1228  		return utils.StackError(err, "Failed to open snapshot redo log version and upsert batch offset file %s for write", file)
  1229  	}
  1230  	defer writer.Close()
  1231  
  1232  	_, err = io.WriteString(writer, fmt.Sprintf("%d,%d,%d,%d", redoLogFile, upsertBatchOffset, lastReadBatchID, lastReadBatchOffset))
  1233  	return err
  1234  }
  1235  
  1236  // closeEnumWatcher try to close enum watcher and delete enum file
  1237  func (dm *diskMetaStore) removeEnumColumn(tableName, columnName string) {
  1238  	if _, tableExist := dm.enumDictWatchers[tableName]; tableExist {
  1239  		watcher, watcherExist := dm.enumDictWatchers[tableName][columnName]
  1240  		if watcherExist {
  1241  			doneChan, _ := dm.enumDictDone[tableName][columnName]
  1242  			delete(dm.enumDictWatchers[tableName], columnName)
  1243  			delete(dm.enumDictDone[tableName], columnName)
  1244  			close(watcher)
  1245  			// drain up done channel for enum column
  1246  			// to make sure previous changes are processed
  1247  			for range doneChan {
  1248  			}
  1249  		}
  1250  	}
  1251  
  1252  	if err := dm.Remove(dm.getEnumFilePath(tableName, columnName)); err != nil {
  1253  		//TODO: log an error and alert.
  1254  	}
  1255  }
  1256  
  1257  // tableExists checks whether table exists,
  1258  // return ErrTableDoesNotExist.
  1259  func (dm *diskMetaStore) tableExists(tableName string) error {
  1260  	_, err := dm.Stat(dm.getSchemaFilePath(tableName))
  1261  	if os.IsNotExist(err) {
  1262  		return ErrTableDoesNotExist
  1263  	} else if err != nil {
  1264  		return utils.StackError(err, "Failed to read directory, table: %s", tableName)
  1265  	}
  1266  	return nil
  1267  }
  1268  
  1269  // enumColumnExists checks whether column exists and it is a enum column,
  1270  // return ErrTableDoesNotExist, ErrColumnDoesNotExist, ErrNotEnumColumn.
  1271  func (dm *diskMetaStore) enumColumnExists(tableName string, columnName string) error {
  1272  	if err := dm.tableExists(tableName); err != nil {
  1273  		return err
  1274  	}
  1275  
  1276  	table, err := dm.readSchemaFile(tableName)
  1277  	if err != nil {
  1278  		return err
  1279  	}
  1280  
  1281  	for _, column := range table.Columns {
  1282  		if column.Name == columnName {
  1283  			if column.Deleted {
  1284  				// continue since column name can be reused
  1285  				// with different id
  1286  				continue
  1287  			}
  1288  
  1289  			if !column.IsEnumColumn() {
  1290  				return ErrNotEnumColumn
  1291  			}
  1292  
  1293  			return nil
  1294  		}
  1295  	}
  1296  	return ErrColumnDoesNotExist
  1297  }
  1298  
  1299  // shardExists checks whether shard exists,
  1300  // return ErrShardDoesNotExist.
  1301  func (dm *diskMetaStore) shardExists(tableName string, shard int) error {
  1302  	if err := dm.tableExists(tableName); err != nil {
  1303  		return err
  1304  	}
  1305  
  1306  	_, err := dm.Stat(dm.getShardDirPath(tableName, shard))
  1307  	if os.IsNotExist(err) {
  1308  		return ErrShardDoesNotExist
  1309  	} else if err != nil {
  1310  		return utils.StackError(err, "Failed to read directory, table: %s, shard: %d", tableName, shard)
  1311  	}
  1312  
  1313  	return nil
  1314  }
  1315  
  1316  // createShard assume table is created already
  1317  func (dm *diskMetaStore) createShard(tableName string, isFactTable bool, shard int) error {
  1318  	var err error
  1319  	if isFactTable {
  1320  		// only fact table have archive batches directory
  1321  		err = dm.MkdirAll(filepath.Join(dm.getShardDirPath(tableName, 0), "batches"), 0755)
  1322  	} else {
  1323  		err = dm.MkdirAll(dm.getShardDirPath(tableName, 0), 0755)
  1324  	}
  1325  	if err != nil {
  1326  		return utils.StackError(err, "Failed to create shard directory, table: %s, shard: %d", tableName, shard)
  1327  	}
  1328  	return nil
  1329  }
  1330  
  1331  // NewDiskMetaStore creates a new disk based metastore
  1332  func NewDiskMetaStore(basePath string) (MetaStore, error) {
  1333  	metaStore := &diskMetaStore{
  1334  		FileSystem:       utils.OSFileSystem{},
  1335  		basePath:         basePath,
  1336  		writeLock:        sync.Mutex{},
  1337  		enumDictWatchers: make(map[string]map[string]chan<- string),
  1338  		enumDictDone:     make(map[string]map[string]<-chan struct{}),
  1339  	}
  1340  	err := metaStore.MkdirAll(basePath, 0755)
  1341  	if err != nil {
  1342  		return nil, utils.StackError(err, "Failed to make base directory for metastore, path: %s", basePath)
  1343  	}
  1344  	return metaStore, nil
  1345  }