github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/client/schema.go (about)

     1  package client
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"net/http"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/uber-go/tally"
    15  	metaCom "github.com/uber/aresdb/metastore/common"
    16  	"github.com/uber/aresdb/utils"
    17  	"go.uber.org/zap"
    18  )
    19  
    20  // SchemaFetcher is the interface for fetch schema and enums
    21  type SchemaFetcher interface {
    22  	// FetchAllSchemas fetches all schemas
    23  	FetchAllSchemas() ([]metaCom.Table, error)
    24  	// FetchSchema fetch one schema for given table
    25  	FetchSchema(table string) (*metaCom.Table, error)
    26  	// FetchAllEnums fetches all enums for given table and column
    27  	FetchAllEnums(tableName string, columnName string) ([]string, error)
    28  	// ExtendEnumCases extends enum cases to given table column
    29  	ExtendEnumCases(tableName, columnName string, enumCases []string) ([]int, error)
    30  }
    31  
    32  // httpSchemaFetcher is a http based schema fetcher
    33  type httpSchemaFetcher struct {
    34  	httpClient  http.Client
    35  	metricScope tally.Scope
    36  	address     string
    37  }
    38  
    39  // CachedSchemaHandler handles schema and enum requests with cache
    40  type CachedSchemaHandler struct {
    41  	*sync.RWMutex
    42  
    43  	logger        *zap.SugaredLogger
    44  	metricScope   tally.Scope
    45  	schemaFetcher SchemaFetcher
    46  
    47  	// mapping from table name to table schema
    48  	schemas map[string]*TableSchema
    49  	// map from table to columnID to enum dictionary
    50  	// use columnID instead of name since column name can be reused
    51  	// table names can be reused as well, deleting and adding a new table
    52  	// will anyway requires job restart
    53  	enumMappings map[string]map[int]enumDict
    54  
    55  	// map from table to columnID to default enum id. Initialized during bootstrap
    56  	// and will be set only if default value is non nil.
    57  	enumDefaultValueMappings map[string]map[int]int
    58  }
    59  
    60  // NewCachedSchemaHandler creates a new cached schema handler
    61  func NewCachedSchemaHandler(logger *zap.SugaredLogger, scope tally.Scope, schamaFetcher SchemaFetcher) *CachedSchemaHandler {
    62  	return &CachedSchemaHandler{
    63  		RWMutex:                  &sync.RWMutex{},
    64  		logger:                   logger,
    65  		metricScope:              scope,
    66  		schemaFetcher:            schamaFetcher,
    67  		schemas:                  make(map[string]*TableSchema),
    68  		enumMappings:             make(map[string]map[int]enumDict),
    69  		enumDefaultValueMappings: make(map[string]map[int]int),
    70  	}
    71  }
    72  
    73  // NewHttpSchemaFetcher creates a new http schema fetcher
    74  func NewHttpSchemaFetcher(httpClient http.Client, address string, scope tally.Scope) SchemaFetcher {
    75  	return &httpSchemaFetcher{
    76  		metricScope: scope,
    77  		address:     address,
    78  		httpClient:  httpClient,
    79  	}
    80  }
    81  
    82  // Start starts the CachedSchemaHandler, if interval > 0, will start periodical refresh
    83  func (cf *CachedSchemaHandler) Start(interval int) error {
    84  	err := cf.FetchAllSchema()
    85  	if err != nil {
    86  		return err
    87  	}
    88  
    89  	if interval <= 0 {
    90  		return nil
    91  	}
    92  
    93  	go func(refreshInterval int) {
    94  		ticks := time.Tick(time.Duration(refreshInterval) * time.Second)
    95  		for range ticks {
    96  			err = cf.FetchAllSchema()
    97  			if err != nil {
    98  				cf.logger.With(
    99  					"error", err.Error()).Errorf("Failed to fetch table schema")
   100  			}
   101  		}
   102  	}(interval)
   103  	return nil
   104  }
   105  
   106  // TranslateEnum translates given enum value to its enumID
   107  func (cf *CachedSchemaHandler) TranslateEnum(tableName string, columnID int, value interface{}, caseInsensitive bool) (enumID int, err error) {
   108  	if value == nil {
   109  		return -1, nil
   110  	}
   111  	enumCase, ok := value.(string)
   112  	if !ok {
   113  		return 0, utils.StackError(nil, "Enum value should be string, but got: %T", value)
   114  	}
   115  	if caseInsensitive {
   116  		enumCase = strings.ToLower(enumCase)
   117  	}
   118  	cf.RLock()
   119  	// here it already make sure the enum dictionary exists in cache
   120  	enumID, ok = cf.enumMappings[tableName][columnID][enumCase]
   121  	cf.RUnlock()
   122  	if !ok {
   123  		cf.metricScope.Tagged(
   124  			map[string]string{
   125  				"TableName": tableName,
   126  				"ColumnID":  strconv.Itoa(columnID),
   127  			},
   128  		).Counter("new_enum_case_rows_ignored").Inc(int64(1))
   129  		if defaultValue, ok := cf.enumDefaultValueMappings[tableName][columnID]; ok {
   130  			return defaultValue, nil
   131  		}
   132  		return -1, nil
   133  	}
   134  	return enumID, nil
   135  }
   136  
   137  // FetchAllSchema fetch all schemas
   138  func (cf *CachedSchemaHandler) FetchAllSchema() error {
   139  	tables, err := cf.schemaFetcher.FetchAllSchemas()
   140  	if err != nil {
   141  		return err
   142  	}
   143  
   144  	for _, table := range tables {
   145  		cf.setTable(&table)
   146  		err := cf.fetchAndSetEnumCases(&table)
   147  		if err != nil {
   148  			return err
   149  		}
   150  	}
   151  	return nil
   152  }
   153  
   154  // FetchSchema fetchs the schema of given table name
   155  func (cf *CachedSchemaHandler) FetchSchema(tableName string) (*TableSchema, error) {
   156  	cf.RLock()
   157  	schema, exist := cf.schemas[tableName]
   158  	cf.RUnlock()
   159  	if exist {
   160  		return schema, nil
   161  	}
   162  	table, err := cf.schemaFetcher.FetchSchema(tableName)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	schema = cf.setTable(table)
   167  	err = cf.fetchAndSetEnumCases(table)
   168  	return schema, err
   169  }
   170  
   171  // PrepareEnumCases prepares enum cases
   172  func (cf *CachedSchemaHandler) PrepareEnumCases(tableName, columnName string, enumCases []string) error {
   173  	newEnumCases := make([]string, 0, len(enumCases))
   174  	cf.RLock()
   175  	schema, exist := cf.schemas[tableName]
   176  	if !exist {
   177  		cf.RUnlock()
   178  		return nil
   179  	}
   180  	columnID, exist := schema.ColumnDict[columnName]
   181  	if !exist {
   182  		cf.RUnlock()
   183  		return nil
   184  	}
   185  
   186  	caseInsensitive := schema.Table.Columns[columnID].CaseInsensitive
   187  	disableAutoExpand := schema.Table.Columns[columnID].DisableAutoExpand
   188  	for _, enumCase := range enumCases {
   189  		if _, exist := cf.enumMappings[tableName][columnID][enumCase]; !exist {
   190  			newEnumCases = append(newEnumCases, enumCase)
   191  		}
   192  	}
   193  	cf.RUnlock()
   194  
   195  	if disableAutoExpand {
   196  		// It's recommended to set up elk or sentry logging to catch this error.
   197  		cf.logger.With(
   198  			"TableName", tableName,
   199  			"ColumnName", columnName,
   200  			"ColumnID", columnID,
   201  			"newEnumCasesSet", newEnumCases,
   202  			"caseInsensitive", caseInsensitive,
   203  		).Error("Finding new enum cases during ingestion but enum auto expansion is disabled")
   204  		cf.metricScope.Tagged(
   205  			map[string]string{
   206  				"TableName": tableName,
   207  				"ColumnID":  strconv.Itoa(columnID),
   208  			},
   209  		).Counter("new_enum_cases_ignored").Inc(int64(len(newEnumCases)))
   210  		return nil
   211  	}
   212  
   213  	enumIDs, err := cf.schemaFetcher.ExtendEnumCases(tableName, columnName, newEnumCases)
   214  	if err != nil {
   215  		return err
   216  	}
   217  
   218  	cf.Lock()
   219  	for index, enumCase := range newEnumCases {
   220  		if caseInsensitive {
   221  			enumCase = strings.ToLower(enumCase)
   222  		}
   223  		cf.enumMappings[tableName][columnID][enumCase] = enumIDs[index]
   224  	}
   225  	cf.Unlock()
   226  	return nil
   227  }
   228  
   229  func (cf *CachedSchemaHandler) fetchAndSetEnumCases(table *metaCom.Table) error {
   230  	enumMappings := make(map[int]enumDict)
   231  	enumDefaultValueMappings := make(map[int]int)
   232  	for columnID, column := range table.Columns {
   233  		if column.Deleted {
   234  			continue
   235  		}
   236  		enumMappings[columnID] = make(enumDict)
   237  		caseInsensitive := column.CaseInsensitive
   238  		var defValuePtr *string
   239  
   240  		if column.DefaultValue != nil {
   241  			var defValue = *column.DefaultValue
   242  			if caseInsensitive {
   243  				defValue = strings.ToLower(defValue)
   244  			}
   245  			defValuePtr = &defValue
   246  		}
   247  
   248  		if column.IsEnumColumn() {
   249  			enumCases, err := cf.schemaFetcher.FetchAllEnums(table.Name, column.Name)
   250  			if err == nil {
   251  				for enumID, enumCase := range enumCases {
   252  					// Convert to lower case for comparison during ingestion.
   253  					if caseInsensitive {
   254  						enumCase = strings.ToLower(enumCase)
   255  					}
   256  					// all mapping should be pre created
   257  					enumMappings[columnID][enumCase] = enumID
   258  
   259  					if defValuePtr != nil {
   260  						if *defValuePtr == enumCase {
   261  							enumDefaultValueMappings[columnID] = enumID
   262  						}
   263  					}
   264  				}
   265  			} else {
   266  				cf.metricScope.Tagged(map[string]string{
   267  					"table":    table.Name,
   268  					"columnID": strconv.Itoa(columnID),
   269  				}).Counter("err_fetch_enum_dict").Inc(1)
   270  				return utils.StackError(err, "Failed to fetch enum cases for table: %s, column: %d", table.Name, columnID)
   271  			}
   272  		}
   273  	}
   274  	cf.Lock()
   275  	cf.enumMappings[table.Name] = enumMappings
   276  	cf.enumDefaultValueMappings[table.Name] = enumDefaultValueMappings
   277  	cf.Unlock()
   278  	return nil
   279  }
   280  
   281  func (cf *CachedSchemaHandler) setTable(table *metaCom.Table) *TableSchema {
   282  	columnDict := make(map[string]int)
   283  	for columnID, column := range table.Columns {
   284  		if !column.Deleted {
   285  			columnDict[column.Name] = columnID
   286  		}
   287  	}
   288  
   289  	schema := &TableSchema{
   290  		Table:      table,
   291  		ColumnDict: columnDict,
   292  	}
   293  
   294  	cf.Lock()
   295  	cf.schemas[table.Name] = schema
   296  	if _, tableExist := cf.enumMappings[table.Name]; !tableExist {
   297  		cf.enumMappings[table.Name] = make(map[int]enumDict)
   298  		cf.enumDefaultValueMappings[table.Name] = make(map[int]int)
   299  	}
   300  	for columnID, column := range table.Columns {
   301  		if !column.Deleted && column.IsEnumColumn() {
   302  			if _, columnExist := cf.enumMappings[table.Name][columnID]; !columnExist {
   303  				cf.enumMappings[table.Name][columnID] = make(enumDict)
   304  			}
   305  		}
   306  	}
   307  	cf.Unlock()
   308  	return schema
   309  }
   310  
   311  func (hf *httpSchemaFetcher) FetchAllEnums(tableName, columnName string) ([]string, error) {
   312  	var enumDictReponse []string
   313  
   314  	resp, err := hf.httpClient.Get(hf.enumDictPath(tableName, columnName))
   315  
   316  	err = hf.readJSONResponse(resp, err, &enumDictReponse)
   317  
   318  	return enumDictReponse, err
   319  }
   320  
   321  func (hf *httpSchemaFetcher) ExtendEnumCases(tableName, columnName string, enumCases []string) ([]int, error) {
   322  	enumCasesRequest := enumCasesWrapper{
   323  		EnumCases: enumCases,
   324  	}
   325  
   326  	enumCasesBytes, err := json.Marshal(enumCasesRequest)
   327  	if err != nil {
   328  		return nil, utils.StackError(err, "Failed to marshal enum cases")
   329  	}
   330  
   331  	var enumIDs []int
   332  	resp, err := hf.httpClient.Post(hf.enumDictPath(tableName, columnName), applicationJSONHeader, bytes.NewReader(enumCasesBytes))
   333  	err = hf.readJSONResponse(resp, err, &enumIDs)
   334  	if err != nil {
   335  		return nil, err
   336  	}
   337  	return enumIDs, nil
   338  }
   339  
   340  func (hf *httpSchemaFetcher) FetchSchema(tableName string) (*metaCom.Table, error) {
   341  	var table metaCom.Table
   342  	resp, err := hf.httpClient.Get(hf.tablePath(tableName))
   343  	err = hf.readJSONResponse(resp, err, &table)
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  
   348  	return &table, nil
   349  }
   350  
   351  func (hf *httpSchemaFetcher) FetchAllSchemas() ([]metaCom.Table, error) {
   352  	var tables []string
   353  	resp, err := hf.httpClient.Get(hf.listTablesPath())
   354  	err = hf.readJSONResponse(resp, err, &tables)
   355  	if err != nil {
   356  		return nil, utils.StackError(err, "Failed to fetch table list")
   357  	}
   358  
   359  	var res []metaCom.Table
   360  	for _, tableName := range tables {
   361  		table, err := hf.FetchSchema(tableName)
   362  		if err != nil {
   363  			hf.metricScope.Tagged(map[string]string{
   364  				"table": tableName,
   365  			}).Counter("err_fetch_table").Inc(1)
   366  			return nil, utils.StackError(err, "Failed to fetch schema error")
   367  		}
   368  		res = append(res, *table)
   369  	}
   370  
   371  	return res, nil
   372  }
   373  
   374  func (hf *httpSchemaFetcher) readJSONResponse(response *http.Response, err error, data interface{}) error {
   375  	if err != nil {
   376  		return utils.StackError(err, "Failed call remote endpoint")
   377  	}
   378  
   379  	respBytes, err := ioutil.ReadAll(response.Body)
   380  	if err != nil {
   381  		return utils.StackError(err, "Failed to read response body")
   382  	}
   383  
   384  	if response.StatusCode != http.StatusOK {
   385  		return utils.StackError(nil, "Received error response %d:%s from remote endpoint", response.StatusCode, respBytes)
   386  	}
   387  
   388  	err = json.Unmarshal(respBytes, data)
   389  	if err != nil {
   390  		return utils.StackError(err, "Failed to unmarshal json")
   391  	}
   392  	return nil
   393  }
   394  
   395  func (hf *httpSchemaFetcher) tablePath(tableName string) string {
   396  	return fmt.Sprintf("%s/%s", hf.listTablesPath(), tableName)
   397  }
   398  
   399  func (hf *httpSchemaFetcher) listTablesPath() string {
   400  	return fmt.Sprintf("http://%s/schema/tables", hf.address)
   401  }
   402  
   403  func (hf *httpSchemaFetcher) enumDictPath(tableName, columnName string) string {
   404  	return fmt.Sprintf("%s/%s/columns/%s/enum-cases", hf.listTablesPath(), tableName, columnName)
   405  }