github.com/klaytn/klaytn@v1.12.1/storage/database/dynamodb.go (about) 1 // Copyright 2020 The klaytn Authors 2 // This file is part of the klaytn library. 3 // 4 // The klaytn library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The klaytn library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>. 16 // 17 // Database implementation of AWS DynamoDB. 18 // 19 // [WARN] Using this DB may cause pricing in your AWS account. 20 // [WARN] DynamoDB creates both Dynamo DB table and S3 bucket. 21 // 22 // You need to set AWS credentials to access to dynamoDB. 23 // $ export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY 24 // $ export AWS_SECRET_ACCESS_KEY=YOUR_SECRET 25 26 package database 27 28 import ( 29 "bytes" 30 "net/http" 31 "strconv" 32 "strings" 33 "sync" 34 "time" 35 36 klaytnmetrics "github.com/klaytn/klaytn/metrics" 37 38 "github.com/aws/aws-sdk-go/aws" 39 "github.com/aws/aws-sdk-go/aws/client" 40 "github.com/aws/aws-sdk-go/aws/request" 41 "github.com/aws/aws-sdk-go/aws/session" 42 "github.com/aws/aws-sdk-go/service/dynamodb" 43 "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 44 "github.com/klaytn/klaytn/common/hexutil" 45 "github.com/klaytn/klaytn/log" 46 "github.com/pkg/errors" 47 "github.com/rcrowley/go-metrics" 48 ) 49 50 var overSizedDataPrefix = []byte("oversizeditem") 51 52 // Performance of batch operations of DynamoDB are collected by default. 53 var dynamoBatchWriteTimeMeter metrics.Meter = &metrics.NilMeter{} 54 55 // errors 56 var dataNotFoundErr = errors.New("data is not found with the given key") 57 58 var ( 59 nilDynamoConfigErr = errors.New("attempt to create DynamoDB with nil configuration") 60 noTableNameErr = errors.New("dynamoDB table name not provided") 61 ) 62 63 // batch write size 64 const dynamoWriteSizeLimit = 399 * 1024 // The maximum write size is 400KB including attribute names and values 65 const ( 66 dynamoBatchSize = 25 67 dynamoMaxRetry = 20 68 dynamoTimeout = 10 * time.Second 69 ) 70 71 // batch write 72 const ( 73 WorkerNum = 10 74 itemChanSize = WorkerNum * 2 75 ) 76 77 var ( 78 dynamoDBClient *dynamodb.DynamoDB // handles dynamoDB connections 79 dynamoWriteCh chan *batchWriteWorkerInput // use global write channel for shared worker 80 dynamoOnceWorker = &sync.Once{} // makes sure worker is created once 81 dynamoOpenedDBNum uint 82 ) 83 84 type DynamoDBConfig struct { 85 TableName string 86 Region string // AWS region 87 Endpoint string // Where DynamoDB reside (Used to specify the localstack endpoint on the test) 88 S3Endpoint string // Where S3 reside 89 IsProvisioned bool // Billing mode 90 ReadCapacityUnits int64 // read capacity when provisioned 91 WriteCapacityUnits int64 // write capacity when provisioned 92 ReadOnly bool // disables write 93 PerfCheck bool 94 } 95 96 type batchWriteWorkerInput struct { 97 tableName string 98 items []*dynamodb.WriteRequest 99 wg *sync.WaitGroup 100 } 101 102 // TODO-Klaytn refactor the structure : there are common configs that are placed separated 103 type dynamoDB struct { 104 config DynamoDBConfig 105 fdb fileDB // where over size items are stored 106 logger log.Logger // Contextual logger tracking the database path 107 108 // metrics 109 getTimer klaytnmetrics.HybridTimer 110 putTimer klaytnmetrics.HybridTimer 111 } 112 113 type DynamoData struct { 114 Key []byte `json:"Key" dynamodbav:"Key"` 115 Val []byte `json:"Val" dynamodbav:"Val"` 116 } 117 118 // CustomRetryer wraps AWS SDK's built in DefaultRetryer adding additional custom features. 119 // DefaultRetryer of AWS SDK has its own standard of retryable situation, 120 // but it's not proper when network environment is not stable. 121 // CustomRetryer conservatively retry in all error cases because DB failure of Klaytn is critical. 122 type CustomRetryer struct { 123 client.DefaultRetryer 124 } 125 126 // ShouldRetry overrides AWS SDK's built in DefaultRetryer to retry in all error cases. 127 func (r CustomRetryer) ShouldRetry(req *request.Request) bool { 128 logger.Debug("dynamoDB client retry", "error", req.Error, "retryCnt", req.RetryCount, "retryDelay", 129 req.RetryDelay, "maxRetry", r.MaxRetries()) 130 return req.Error != nil && req.RetryCount < r.MaxRetries() 131 } 132 133 // GetTestDynamoConfig gets dynamo config for actual aws DynamoDB test 134 // 135 // If you use this config, you will be charged for what you use. 136 // You need to set AWS credentials to access to dynamoDB. 137 // $ export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY 138 // $ export AWS_SECRET_ACCESS_KEY=YOUR_SECRET 139 func GetDefaultDynamoDBConfig() *DynamoDBConfig { 140 return &DynamoDBConfig{ 141 Region: "ap-northeast-2", 142 Endpoint: "", // nil or "" means the default generated endpoint 143 TableName: "klaytn-default" + strconv.Itoa(time.Now().Nanosecond()), 144 IsProvisioned: false, 145 ReadCapacityUnits: 10000, 146 WriteCapacityUnits: 10000, 147 ReadOnly: false, 148 PerfCheck: true, 149 } 150 } 151 152 // NewDynamoDB creates either dynamoDB or dynamoDBReadOnly depending on config.ReadOnly. 153 func NewDynamoDB(config *DynamoDBConfig) (Database, error) { 154 if config.ReadOnly { 155 return newDynamoDBReadOnly(config) 156 } 157 return newDynamoDB(config) 158 } 159 160 // newDynamoDB creates dynamoDB. dynamoDB can be used to create dynamoDBReadOnly. 161 func newDynamoDB(config *DynamoDBConfig) (*dynamoDB, error) { 162 if config == nil { 163 return nil, nilDynamoConfigErr 164 } 165 if len(config.TableName) == 0 { 166 return nil, noTableNameErr 167 } 168 169 config.TableName = strings.ReplaceAll(config.TableName, "_", "-") 170 171 s3FileDB, err := newS3FileDB(config.Region, config.S3Endpoint, config.TableName) 172 if err != nil { 173 logger.Error("Unable to create/get S3FileDB", "DB", config.TableName) 174 return nil, err 175 } 176 177 if dynamoDBClient == nil { 178 dynamoDBClient = dynamodb.New(session.Must(session.NewSessionWithOptions(session.Options{ 179 Config: aws.Config{ 180 Retryer: CustomRetryer{ 181 DefaultRetryer: client.DefaultRetryer{ 182 NumMaxRetries: dynamoMaxRetry, 183 MaxRetryDelay: time.Second, 184 MaxThrottleDelay: time.Second, 185 }, 186 }, 187 Endpoint: aws.String(config.Endpoint), 188 Region: aws.String(config.Region), 189 S3ForcePathStyle: aws.Bool(true), 190 MaxRetries: aws.Int(dynamoMaxRetry), 191 HTTPClient: &http.Client{Timeout: dynamoTimeout}, // default client is &http.Client{} 192 }, 193 }))) 194 } 195 dynamoDB := &dynamoDB{ 196 config: *config, 197 fdb: s3FileDB, 198 } 199 200 dynamoDB.logger = logger.NewWith("region", config.Region, "tableName", dynamoDB.config.TableName) 201 202 // Check if the table is ready to serve 203 for { 204 tableStatus, err := dynamoDB.tableStatus() 205 if err != nil { 206 if !strings.Contains(err.Error(), "ResourceNotFoundException") { 207 dynamoDB.logger.Error("unable to get DynamoDB table status", "err", err.Error()) 208 return nil, err 209 } 210 211 dynamoDB.logger.Warn("creating a DynamoDB table. You will be CHARGED until the DB is deleted") 212 if err := dynamoDB.createTable(); err != nil { 213 dynamoDB.logger.Error("unable to create a DynamoDB table", "err", err.Error()) 214 return nil, err 215 } 216 } 217 218 switch tableStatus { 219 case dynamodb.TableStatusActive: 220 if !dynamoDB.config.ReadOnly { 221 // count successful table creating 222 dynamoOpenedDBNum++ 223 // create workers on the first successful table creation 224 dynamoOnceWorker.Do(func() { 225 createBatchWriteWorkerPool() 226 }) 227 } 228 dynamoDB.logger.Info("successfully created dynamoDB session") 229 return dynamoDB, nil 230 case dynamodb.TableStatusDeleting, dynamodb.TableStatusArchiving, dynamodb.TableStatusArchived: 231 return nil, errors.New("failed to get DynamoDB table, table status : " + tableStatus) 232 default: 233 dynamoDB.logger.Info("waiting for the table to be ready", "table status", tableStatus) 234 time.Sleep(1 * time.Second) 235 } 236 } 237 } 238 239 func (dynamo *dynamoDB) createTable() error { 240 input := &dynamodb.CreateTableInput{ 241 BillingMode: aws.String("PAY_PER_REQUEST"), 242 AttributeDefinitions: []*dynamodb.AttributeDefinition{ 243 { 244 AttributeName: aws.String("Key"), 245 AttributeType: aws.String("B"), // B - the attribute is of type Binary 246 }, 247 }, 248 KeySchema: []*dynamodb.KeySchemaElement{ 249 { 250 AttributeName: aws.String("Key"), 251 KeyType: aws.String("HASH"), // HASH - partition key, RANGE - sort key 252 }, 253 }, 254 255 TableName: aws.String(dynamo.config.TableName), 256 } 257 258 if dynamo.config.IsProvisioned { 259 input.BillingMode = aws.String("PROVISIONED") 260 input.ProvisionedThroughput = &dynamodb.ProvisionedThroughput{ 261 ReadCapacityUnits: aws.Int64(dynamo.config.ReadCapacityUnits), 262 WriteCapacityUnits: aws.Int64(dynamo.config.WriteCapacityUnits), 263 } 264 dynamo.logger.Warn("Billing mode is provisioned. You will be charged every hour.", "RCU", dynamo.config.ReadCapacityUnits, "WRU", dynamo.config.WriteCapacityUnits) 265 } 266 267 _, err := dynamoDBClient.CreateTable(input) 268 if err != nil { 269 dynamo.logger.Error("Error while creating the DynamoDB table", "err", err, "tableName", dynamo.config.TableName) 270 return err 271 } 272 dynamo.logger.Warn("Requesting create dynamoDB table. You will be charged until the table is deleted.") 273 return nil 274 } 275 276 func (dynamo *dynamoDB) deleteTable() error { 277 if _, err := dynamoDBClient.DeleteTable(&dynamodb.DeleteTableInput{TableName: &dynamo.config.TableName}); err != nil { 278 dynamo.logger.Error("Error while deleting the DynamoDB table", "tableName", dynamo.config.TableName) 279 return err 280 } 281 dynamo.logger.Info("Successfully deleted the DynamoDB table", "tableName", dynamo.config.TableName) 282 return nil 283 } 284 285 func (dynamo *dynamoDB) tableStatus() (string, error) { 286 desc, err := dynamo.tableDescription() 287 if err != nil { 288 return "", err 289 } 290 291 return *desc.TableStatus, nil 292 } 293 294 func (dynamo *dynamoDB) tableDescription() (*dynamodb.TableDescription, error) { 295 describe, err := dynamoDBClient.DescribeTable(&dynamodb.DescribeTableInput{TableName: aws.String(dynamo.config.TableName)}) 296 if describe == nil { 297 return nil, err 298 } 299 300 return describe.Table, err 301 } 302 303 func (dynamo *dynamoDB) Type() DBType { 304 return DynamoDB 305 } 306 307 // Put inserts the given key and value pair to the database. 308 func (dynamo *dynamoDB) Put(key []byte, val []byte) error { 309 if dynamo.config.PerfCheck { 310 start := time.Now() 311 err := dynamo.put(key, val) 312 dynamo.putTimer.Update(time.Since(start)) 313 return err 314 } 315 return dynamo.put(key, val) 316 } 317 318 func (dynamo *dynamoDB) put(key []byte, val []byte) error { 319 if len(key) == 0 { 320 return nil 321 } 322 323 if len(val) > dynamoWriteSizeLimit { 324 _, err := dynamo.fdb.write(item{key: key, val: val}) 325 if err != nil { 326 return err 327 } 328 return dynamo.Put(key, overSizedDataPrefix) 329 } 330 331 data := DynamoData{Key: key, Val: val} 332 marshaledData, err := dynamodbattribute.MarshalMap(data) 333 if err != nil { 334 return err 335 } 336 337 params := &dynamodb.PutItemInput{ 338 TableName: aws.String(dynamo.config.TableName), 339 Item: marshaledData, 340 } 341 342 _, err = dynamoDBClient.PutItem(params) 343 if err != nil { 344 dynamo.logger.Crit("failed to put an item", "err", err, "key", hexutil.Encode(data.Key)) 345 return err 346 } 347 348 return nil 349 } 350 351 // Has returns true if the corresponding value to the given key exists. 352 func (dynamo *dynamoDB) Has(key []byte) (bool, error) { 353 if _, err := dynamo.Get(key); err != nil { 354 if err == dataNotFoundErr { 355 return false, nil 356 } 357 return false, err 358 } 359 return true, nil 360 } 361 362 // Get returns the corresponding value to the given key if exists. 363 func (dynamo *dynamoDB) Get(key []byte) ([]byte, error) { 364 if dynamo.config.PerfCheck { 365 start := time.Now() 366 val, err := dynamo.get(key) 367 dynamo.getTimer.Update(time.Since(start)) 368 return val, err 369 } 370 return dynamo.get(key) 371 } 372 373 func (dynamo *dynamoDB) get(key []byte) ([]byte, error) { 374 params := &dynamodb.GetItemInput{ 375 TableName: aws.String(dynamo.config.TableName), 376 Key: map[string]*dynamodb.AttributeValue{ 377 "Key": { 378 B: key, 379 }, 380 }, 381 ConsistentRead: aws.Bool(true), 382 } 383 384 result, err := dynamoDBClient.GetItem(params) 385 if err != nil { 386 dynamo.logger.Crit("failed to get an item", "err", err, "key", hexutil.Encode(key)) 387 return nil, err 388 } 389 390 if result.Item == nil { 391 return nil, dataNotFoundErr 392 } 393 394 var data DynamoData 395 if err := dynamodbattribute.UnmarshalMap(result.Item, &data); err != nil { 396 dynamo.logger.Crit("failed to unmarshal dynamodb data", "err", err) 397 return nil, err 398 } 399 400 if data.Val == nil { 401 return []byte{}, nil 402 } 403 404 if bytes.Equal(data.Val, overSizedDataPrefix) { 405 ret, err := dynamo.fdb.read(key) 406 if err != nil { 407 dynamo.logger.Crit("failed to read filedb data", "err", err, "key", hexutil.Encode(key)) 408 } 409 return ret, err 410 } 411 412 return data.Val, nil 413 } 414 415 // Delete deletes the key from the queue and database 416 func (dynamo *dynamoDB) Delete(key []byte) error { 417 params := &dynamodb.DeleteItemInput{ 418 TableName: aws.String(dynamo.config.TableName), 419 Key: map[string]*dynamodb.AttributeValue{ 420 "Key": { 421 B: key, 422 }, 423 }, 424 } 425 426 _, err := dynamoDBClient.DeleteItem(params) 427 if err != nil { 428 dynamo.logger.Crit("failed to delete an item", "err", err, "key", hexutil.Encode(key)) 429 return err 430 } 431 return nil 432 } 433 434 func (dynamo *dynamoDB) Close() { 435 if dynamoOpenedDBNum > 0 { 436 dynamoOpenedDBNum-- 437 } 438 if dynamoOpenedDBNum == 0 && dynamoWriteCh != nil { 439 close(dynamoWriteCh) 440 } 441 } 442 443 func (dynamo *dynamoDB) Meter(prefix string) { 444 dynamo.getTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"get/time", nil) 445 dynamo.putTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"put/time", nil) 446 dynamoBatchWriteTimeMeter = metrics.NewRegisteredMeter(prefix+"batchwrite/time", nil) 447 } 448 449 func (dynamo *dynamoDB) TryCatchUpWithPrimary() error { 450 return nil 451 } 452 453 func (dynamo *dynamoDB) NewIterator(prefix []byte, start []byte) Iterator { 454 // TODO-Klaytn: implement this later. 455 return nil 456 } 457 458 func createBatchWriteWorkerPool() { 459 dynamoWriteCh = make(chan *batchWriteWorkerInput, itemChanSize) 460 for i := 0; i < WorkerNum; i++ { 461 go createBatchWriteWorker(dynamoWriteCh) 462 } 463 logger.Info("made dynamo batch write workers", "workerNum", WorkerNum) 464 } 465 466 func createBatchWriteWorker(writeCh <-chan *batchWriteWorkerInput) { 467 failCount := 0 468 logger.Debug("generate a dynamoDB batchWrite worker") 469 470 for batchInput := range writeCh { 471 batchWriteInput := &dynamodb.BatchWriteItemInput{ 472 RequestItems: map[string][]*dynamodb.WriteRequest{}, 473 } 474 batchWriteInput.RequestItems[batchInput.tableName] = batchInput.items 475 476 BatchWriteItemOutput, err := dynamoDBClient.BatchWriteItem(batchWriteInput) 477 numUnprocessed := len(BatchWriteItemOutput.UnprocessedItems[batchInput.tableName]) 478 for err != nil || numUnprocessed != 0 { 479 if err != nil { 480 // ValidationException occurs when a required parameter is missing, a value is out of range, 481 // or data types mismatch and so on. If this is the case, check if there is a duplicated key, 482 // batch length out of range, null value and so on. 483 // When ValidationException occurs, retrying won't fix the problem. 484 if strings.Contains(err.Error(), "ValidationException") { 485 logger.Crit("Invalid input for dynamoDB BatchWrite", 486 "err", err, "tableName", batchInput.tableName, "itemNum", len(batchInput.items)) 487 } 488 failCount++ 489 logger.Warn("dynamoDB failed to write batch items", 490 "tableName", batchInput.tableName, "err", err, "failCnt", failCount) 491 if failCount > dynamoMaxRetry { 492 logger.Error("dynamoDB failed many times. sleep a second and retry", 493 "tableName", batchInput.tableName, "failCnt", failCount) 494 time.Sleep(time.Second) 495 } 496 } 497 498 if numUnprocessed != 0 { 499 logger.Debug("dynamoDB batchWrite remains unprocessedItem", 500 "tableName", batchInput.tableName, "numUnprocessedItem", numUnprocessed) 501 batchWriteInput.RequestItems[batchInput.tableName] = BatchWriteItemOutput.UnprocessedItems[batchInput.tableName] 502 } 503 504 start := time.Now() 505 BatchWriteItemOutput, err = dynamoDBClient.BatchWriteItem(batchWriteInput) 506 dynamoBatchWriteTimeMeter.Mark(int64(time.Since(start))) 507 numUnprocessed = len(BatchWriteItemOutput.UnprocessedItems) 508 } 509 510 failCount = 0 511 batchInput.wg.Done() 512 } 513 logger.Debug("close a dynamoDB batchWrite worker") 514 } 515 516 func (dynamo *dynamoDB) NewBatch() Batch { 517 return &dynamoBatch{db: dynamo, tableName: dynamo.config.TableName, wg: &sync.WaitGroup{}, keyMap: map[string]struct{}{}} 518 } 519 520 type dynamoBatch struct { 521 db *dynamoDB 522 tableName string 523 batchItems []*dynamodb.WriteRequest 524 keyMap map[string]struct{} // checks duplication of keys 525 size int 526 wg *sync.WaitGroup 527 } 528 529 // Put adds an item to dynamo batch. 530 // If the number of items in batch reaches dynamoBatchSize, a write request to dynamoDB is made. 531 // Each batch write is executed in thread. (There is an worker pool for dynamo batch write) 532 // 533 // Note: If there is a duplicated key in a batch, only the first value is written. 534 func (batch *dynamoBatch) Put(key, val []byte) error { 535 // if there is an duplicated key in batch, skip 536 if _, exist := batch.keyMap[string(key)]; exist { 537 return nil 538 } 539 batch.keyMap[string(key)] = struct{}{} 540 541 data := DynamoData{Key: key, Val: val} 542 dataSize := len(val) 543 544 // If the size of the item is larger than the limit, it should be handled in different way 545 if dataSize > dynamoWriteSizeLimit { 546 batch.wg.Add(1) 547 go func() { 548 failCnt := 0 549 batch.db.logger.Debug("write large size data into fileDB") 550 551 _, err := batch.db.fdb.write(item{key: key, val: val}) 552 for err != nil { 553 failCnt++ 554 batch.db.logger.Error("cannot write an item into fileDB. check the status of s3", 555 "err", err, "numRetry", failCnt) 556 time.Sleep(time.Second) 557 558 batch.db.logger.Warn("retrying write an item into fileDB") 559 _, err = batch.db.fdb.write(item{key: key, val: val}) 560 } 561 batch.wg.Done() 562 }() 563 data.Val = overSizedDataPrefix 564 dataSize = len(data.Val) 565 } 566 567 marshaledData, err := dynamodbattribute.MarshalMap(data) 568 if err != nil { 569 batch.db.logger.Error("err while batch put", "err", err, "len(val)", len(val)) 570 return err 571 } 572 573 batch.batchItems = append(batch.batchItems, &dynamodb.WriteRequest{ 574 PutRequest: &dynamodb.PutRequest{Item: marshaledData}, 575 }) 576 batch.size += dataSize 577 578 if len(batch.batchItems) == dynamoBatchSize { 579 batch.wg.Add(1) 580 dynamoWriteCh <- &batchWriteWorkerInput{batch.tableName, batch.batchItems, batch.wg} 581 batch.Reset() 582 } 583 return nil 584 } 585 586 // Delete inserts the a key removal into the batch for later committing. 587 func (batch *dynamoBatch) Delete(key []byte) error { 588 logger.CritWithStack("Delete should not be called when using dynamodb batch") 589 return nil 590 } 591 592 func (batch *dynamoBatch) Write() error { 593 var writeRequest []*dynamodb.WriteRequest 594 numRemainedItems := len(batch.batchItems) 595 596 for numRemainedItems > 0 { 597 if numRemainedItems > dynamoBatchSize { 598 writeRequest = batch.batchItems[:dynamoBatchSize] 599 batch.batchItems = batch.batchItems[dynamoBatchSize:] 600 } else { 601 writeRequest = batch.batchItems 602 } 603 batch.wg.Add(1) 604 dynamoWriteCh <- &batchWriteWorkerInput{batch.tableName, writeRequest, batch.wg} 605 numRemainedItems -= len(writeRequest) 606 } 607 608 batch.wg.Wait() 609 return nil 610 } 611 612 func (batch *dynamoBatch) ValueSize() int { 613 return batch.size 614 } 615 616 func (batch *dynamoBatch) Reset() { 617 batch.batchItems = []*dynamodb.WriteRequest{} 618 batch.keyMap = map[string]struct{}{} 619 batch.size = 0 620 } 621 622 func (batch *dynamoBatch) Release() { 623 // nothing to do with dynamoBatch 624 } 625 626 func (batch *dynamoBatch) Replay(w KeyValueWriter) error { 627 logger.CritWithStack("Replay should not be called when using dynamodb batch") 628 return nil 629 } 630 631 func (dynamo *dynamoDB) Stat(property string) (string, error) { 632 return "", errors.New("unknown property") 633 } 634 635 func (dynamo *dynamoDB) Compact(start []byte, limit []byte) error { 636 return nil 637 }