github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/client/schema.go (about) 1 package client 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "io/ioutil" 8 "net/http" 9 "strconv" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/uber-go/tally" 15 metaCom "github.com/uber/aresdb/metastore/common" 16 "github.com/uber/aresdb/utils" 17 "go.uber.org/zap" 18 ) 19 20 // SchemaFetcher is the interface for fetch schema and enums 21 type SchemaFetcher interface { 22 // FetchAllSchemas fetches all schemas 23 FetchAllSchemas() ([]metaCom.Table, error) 24 // FetchSchema fetch one schema for given table 25 FetchSchema(table string) (*metaCom.Table, error) 26 // FetchAllEnums fetches all enums for given table and column 27 FetchAllEnums(tableName string, columnName string) ([]string, error) 28 // ExtendEnumCases extends enum cases to given table column 29 ExtendEnumCases(tableName, columnName string, enumCases []string) ([]int, error) 30 } 31 32 // httpSchemaFetcher is a http based schema fetcher 33 type httpSchemaFetcher struct { 34 httpClient http.Client 35 metricScope tally.Scope 36 address string 37 } 38 39 // CachedSchemaHandler handles schema and enum requests with cache 40 type CachedSchemaHandler struct { 41 *sync.RWMutex 42 43 logger *zap.SugaredLogger 44 metricScope tally.Scope 45 schemaFetcher SchemaFetcher 46 47 // mapping from table name to table schema 48 schemas map[string]*TableSchema 49 // map from table to columnID to enum dictionary 50 // use columnID instead of name since column name can be reused 51 // table names can be reused as well, deleting and adding a new table 52 // will anyway requires job restart 53 enumMappings map[string]map[int]enumDict 54 55 // map from table to columnID to default enum id. Initialized during bootstrap 56 // and will be set only if default value is non nil. 57 enumDefaultValueMappings map[string]map[int]int 58 } 59 60 // NewCachedSchemaHandler creates a new cached schema handler 61 func NewCachedSchemaHandler(logger *zap.SugaredLogger, scope tally.Scope, schamaFetcher SchemaFetcher) *CachedSchemaHandler { 62 return &CachedSchemaHandler{ 63 RWMutex: &sync.RWMutex{}, 64 logger: logger, 65 metricScope: scope, 66 schemaFetcher: schamaFetcher, 67 schemas: make(map[string]*TableSchema), 68 enumMappings: make(map[string]map[int]enumDict), 69 enumDefaultValueMappings: make(map[string]map[int]int), 70 } 71 } 72 73 // NewHttpSchemaFetcher creates a new http schema fetcher 74 func NewHttpSchemaFetcher(httpClient http.Client, address string, scope tally.Scope) SchemaFetcher { 75 return &httpSchemaFetcher{ 76 metricScope: scope, 77 address: address, 78 httpClient: httpClient, 79 } 80 } 81 82 // Start starts the CachedSchemaHandler, if interval > 0, will start periodical refresh 83 func (cf *CachedSchemaHandler) Start(interval int) error { 84 err := cf.FetchAllSchema() 85 if err != nil { 86 return err 87 } 88 89 if interval <= 0 { 90 return nil 91 } 92 93 go func(refreshInterval int) { 94 ticks := time.Tick(time.Duration(refreshInterval) * time.Second) 95 for range ticks { 96 err = cf.FetchAllSchema() 97 if err != nil { 98 cf.logger.With( 99 "error", err.Error()).Errorf("Failed to fetch table schema") 100 } 101 } 102 }(interval) 103 return nil 104 } 105 106 // TranslateEnum translates given enum value to its enumID 107 func (cf *CachedSchemaHandler) TranslateEnum(tableName string, columnID int, value interface{}, caseInsensitive bool) (enumID int, err error) { 108 if value == nil { 109 return -1, nil 110 } 111 enumCase, ok := value.(string) 112 if !ok { 113 return 0, utils.StackError(nil, "Enum value should be string, but got: %T", value) 114 } 115 if caseInsensitive { 116 enumCase = strings.ToLower(enumCase) 117 } 118 cf.RLock() 119 // here it already make sure the enum dictionary exists in cache 120 enumID, ok = cf.enumMappings[tableName][columnID][enumCase] 121 cf.RUnlock() 122 if !ok { 123 cf.metricScope.Tagged( 124 map[string]string{ 125 "TableName": tableName, 126 "ColumnID": strconv.Itoa(columnID), 127 }, 128 ).Counter("new_enum_case_rows_ignored").Inc(int64(1)) 129 if defaultValue, ok := cf.enumDefaultValueMappings[tableName][columnID]; ok { 130 return defaultValue, nil 131 } 132 return -1, nil 133 } 134 return enumID, nil 135 } 136 137 // FetchAllSchema fetch all schemas 138 func (cf *CachedSchemaHandler) FetchAllSchema() error { 139 tables, err := cf.schemaFetcher.FetchAllSchemas() 140 if err != nil { 141 return err 142 } 143 144 for _, table := range tables { 145 cf.setTable(&table) 146 err := cf.fetchAndSetEnumCases(&table) 147 if err != nil { 148 return err 149 } 150 } 151 return nil 152 } 153 154 // FetchSchema fetchs the schema of given table name 155 func (cf *CachedSchemaHandler) FetchSchema(tableName string) (*TableSchema, error) { 156 cf.RLock() 157 schema, exist := cf.schemas[tableName] 158 cf.RUnlock() 159 if exist { 160 return schema, nil 161 } 162 table, err := cf.schemaFetcher.FetchSchema(tableName) 163 if err != nil { 164 return nil, err 165 } 166 schema = cf.setTable(table) 167 err = cf.fetchAndSetEnumCases(table) 168 return schema, err 169 } 170 171 // PrepareEnumCases prepares enum cases 172 func (cf *CachedSchemaHandler) PrepareEnumCases(tableName, columnName string, enumCases []string) error { 173 newEnumCases := make([]string, 0, len(enumCases)) 174 cf.RLock() 175 schema, exist := cf.schemas[tableName] 176 if !exist { 177 cf.RUnlock() 178 return nil 179 } 180 columnID, exist := schema.ColumnDict[columnName] 181 if !exist { 182 cf.RUnlock() 183 return nil 184 } 185 186 caseInsensitive := schema.Table.Columns[columnID].CaseInsensitive 187 disableAutoExpand := schema.Table.Columns[columnID].DisableAutoExpand 188 for _, enumCase := range enumCases { 189 if _, exist := cf.enumMappings[tableName][columnID][enumCase]; !exist { 190 newEnumCases = append(newEnumCases, enumCase) 191 } 192 } 193 cf.RUnlock() 194 195 if disableAutoExpand { 196 // It's recommended to set up elk or sentry logging to catch this error. 197 cf.logger.With( 198 "TableName", tableName, 199 "ColumnName", columnName, 200 "ColumnID", columnID, 201 "newEnumCasesSet", newEnumCases, 202 "caseInsensitive", caseInsensitive, 203 ).Error("Finding new enum cases during ingestion but enum auto expansion is disabled") 204 cf.metricScope.Tagged( 205 map[string]string{ 206 "TableName": tableName, 207 "ColumnID": strconv.Itoa(columnID), 208 }, 209 ).Counter("new_enum_cases_ignored").Inc(int64(len(newEnumCases))) 210 return nil 211 } 212 213 enumIDs, err := cf.schemaFetcher.ExtendEnumCases(tableName, columnName, newEnumCases) 214 if err != nil { 215 return err 216 } 217 218 cf.Lock() 219 for index, enumCase := range newEnumCases { 220 if caseInsensitive { 221 enumCase = strings.ToLower(enumCase) 222 } 223 cf.enumMappings[tableName][columnID][enumCase] = enumIDs[index] 224 } 225 cf.Unlock() 226 return nil 227 } 228 229 func (cf *CachedSchemaHandler) fetchAndSetEnumCases(table *metaCom.Table) error { 230 enumMappings := make(map[int]enumDict) 231 enumDefaultValueMappings := make(map[int]int) 232 for columnID, column := range table.Columns { 233 if column.Deleted { 234 continue 235 } 236 enumMappings[columnID] = make(enumDict) 237 caseInsensitive := column.CaseInsensitive 238 var defValuePtr *string 239 240 if column.DefaultValue != nil { 241 var defValue = *column.DefaultValue 242 if caseInsensitive { 243 defValue = strings.ToLower(defValue) 244 } 245 defValuePtr = &defValue 246 } 247 248 if column.IsEnumColumn() { 249 enumCases, err := cf.schemaFetcher.FetchAllEnums(table.Name, column.Name) 250 if err == nil { 251 for enumID, enumCase := range enumCases { 252 // Convert to lower case for comparison during ingestion. 253 if caseInsensitive { 254 enumCase = strings.ToLower(enumCase) 255 } 256 // all mapping should be pre created 257 enumMappings[columnID][enumCase] = enumID 258 259 if defValuePtr != nil { 260 if *defValuePtr == enumCase { 261 enumDefaultValueMappings[columnID] = enumID 262 } 263 } 264 } 265 } else { 266 cf.metricScope.Tagged(map[string]string{ 267 "table": table.Name, 268 "columnID": strconv.Itoa(columnID), 269 }).Counter("err_fetch_enum_dict").Inc(1) 270 return utils.StackError(err, "Failed to fetch enum cases for table: %s, column: %d", table.Name, columnID) 271 } 272 } 273 } 274 cf.Lock() 275 cf.enumMappings[table.Name] = enumMappings 276 cf.enumDefaultValueMappings[table.Name] = enumDefaultValueMappings 277 cf.Unlock() 278 return nil 279 } 280 281 func (cf *CachedSchemaHandler) setTable(table *metaCom.Table) *TableSchema { 282 columnDict := make(map[string]int) 283 for columnID, column := range table.Columns { 284 if !column.Deleted { 285 columnDict[column.Name] = columnID 286 } 287 } 288 289 schema := &TableSchema{ 290 Table: table, 291 ColumnDict: columnDict, 292 } 293 294 cf.Lock() 295 cf.schemas[table.Name] = schema 296 if _, tableExist := cf.enumMappings[table.Name]; !tableExist { 297 cf.enumMappings[table.Name] = make(map[int]enumDict) 298 cf.enumDefaultValueMappings[table.Name] = make(map[int]int) 299 } 300 for columnID, column := range table.Columns { 301 if !column.Deleted && column.IsEnumColumn() { 302 if _, columnExist := cf.enumMappings[table.Name][columnID]; !columnExist { 303 cf.enumMappings[table.Name][columnID] = make(enumDict) 304 } 305 } 306 } 307 cf.Unlock() 308 return schema 309 } 310 311 func (hf *httpSchemaFetcher) FetchAllEnums(tableName, columnName string) ([]string, error) { 312 var enumDictReponse []string 313 314 resp, err := hf.httpClient.Get(hf.enumDictPath(tableName, columnName)) 315 316 err = hf.readJSONResponse(resp, err, &enumDictReponse) 317 318 return enumDictReponse, err 319 } 320 321 func (hf *httpSchemaFetcher) ExtendEnumCases(tableName, columnName string, enumCases []string) ([]int, error) { 322 enumCasesRequest := enumCasesWrapper{ 323 EnumCases: enumCases, 324 } 325 326 enumCasesBytes, err := json.Marshal(enumCasesRequest) 327 if err != nil { 328 return nil, utils.StackError(err, "Failed to marshal enum cases") 329 } 330 331 var enumIDs []int 332 resp, err := hf.httpClient.Post(hf.enumDictPath(tableName, columnName), applicationJSONHeader, bytes.NewReader(enumCasesBytes)) 333 err = hf.readJSONResponse(resp, err, &enumIDs) 334 if err != nil { 335 return nil, err 336 } 337 return enumIDs, nil 338 } 339 340 func (hf *httpSchemaFetcher) FetchSchema(tableName string) (*metaCom.Table, error) { 341 var table metaCom.Table 342 resp, err := hf.httpClient.Get(hf.tablePath(tableName)) 343 err = hf.readJSONResponse(resp, err, &table) 344 if err != nil { 345 return nil, err 346 } 347 348 return &table, nil 349 } 350 351 func (hf *httpSchemaFetcher) FetchAllSchemas() ([]metaCom.Table, error) { 352 var tables []string 353 resp, err := hf.httpClient.Get(hf.listTablesPath()) 354 err = hf.readJSONResponse(resp, err, &tables) 355 if err != nil { 356 return nil, utils.StackError(err, "Failed to fetch table list") 357 } 358 359 var res []metaCom.Table 360 for _, tableName := range tables { 361 table, err := hf.FetchSchema(tableName) 362 if err != nil { 363 hf.metricScope.Tagged(map[string]string{ 364 "table": tableName, 365 }).Counter("err_fetch_table").Inc(1) 366 return nil, utils.StackError(err, "Failed to fetch schema error") 367 } 368 res = append(res, *table) 369 } 370 371 return res, nil 372 } 373 374 func (hf *httpSchemaFetcher) readJSONResponse(response *http.Response, err error, data interface{}) error { 375 if err != nil { 376 return utils.StackError(err, "Failed call remote endpoint") 377 } 378 379 respBytes, err := ioutil.ReadAll(response.Body) 380 if err != nil { 381 return utils.StackError(err, "Failed to read response body") 382 } 383 384 if response.StatusCode != http.StatusOK { 385 return utils.StackError(nil, "Received error response %d:%s from remote endpoint", response.StatusCode, respBytes) 386 } 387 388 err = json.Unmarshal(respBytes, data) 389 if err != nil { 390 return utils.StackError(err, "Failed to unmarshal json") 391 } 392 return nil 393 } 394 395 func (hf *httpSchemaFetcher) tablePath(tableName string) string { 396 return fmt.Sprintf("%s/%s", hf.listTablesPath(), tableName) 397 } 398 399 func (hf *httpSchemaFetcher) listTablesPath() string { 400 return fmt.Sprintf("http://%s/schema/tables", hf.address) 401 } 402 403 func (hf *httpSchemaFetcher) enumDictPath(tableName, columnName string) string { 404 return fmt.Sprintf("%s/%s/columns/%s/enum-cases", hf.listTablesPath(), tableName, columnName) 405 }