github.com/klaytn/klaytn@v1.10.2/storage/database/dynamodb.go (about) 1 // Copyright 2020 The klaytn Authors 2 // This file is part of the klaytn library. 3 // 4 // The klaytn library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The klaytn library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>. 16 // 17 // Database implementation of AWS DynamoDB. 18 // 19 // [WARN] Using this DB may cause pricing in your AWS account. 20 // [WARN] DynamoDB creates both Dynamo DB table and S3 bucket. 21 // 22 // You need to set AWS credentials to access to dynamoDB. 23 // $ export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY 24 // $ export AWS_SECRET_ACCESS_KEY=YOUR_SECRET 25 26 package database 27 28 import ( 29 "bytes" 30 "net/http" 31 "strconv" 32 "strings" 33 "sync" 34 "time" 35 36 klaytnmetrics "github.com/klaytn/klaytn/metrics" 37 38 "github.com/aws/aws-sdk-go/aws" 39 "github.com/aws/aws-sdk-go/aws/client" 40 "github.com/aws/aws-sdk-go/aws/request" 41 "github.com/aws/aws-sdk-go/aws/session" 42 "github.com/aws/aws-sdk-go/service/dynamodb" 43 "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 44 "github.com/klaytn/klaytn/common/hexutil" 45 "github.com/klaytn/klaytn/log" 46 "github.com/pkg/errors" 47 "github.com/rcrowley/go-metrics" 48 ) 49 50 var overSizedDataPrefix = []byte("oversizeditem") 51 52 // Performance of batch operations of DynamoDB are collected by default. 53 var dynamoBatchWriteTimeMeter metrics.Meter = &metrics.NilMeter{} 54 55 // errors 56 var dataNotFoundErr = errors.New("data is not found with the given key") 57 58 var ( 59 nilDynamoConfigErr = errors.New("attempt to create DynamoDB with nil configuration") 60 noTableNameErr = errors.New("dynamoDB table name not provided") 61 ) 62 63 // batch write size 64 const dynamoWriteSizeLimit = 399 * 1024 // The maximum write size is 400KB including attribute names and values 65 const ( 66 dynamoBatchSize = 25 67 dynamoMaxRetry = 20 68 dynamoTimeout = 10 * time.Second 69 ) 70 71 // batch write 72 const WorkerNum = 10 73 const itemChanSize = WorkerNum * 2 74 75 var ( 76 dynamoDBClient *dynamodb.DynamoDB // handles dynamoDB connections 77 dynamoWriteCh chan *batchWriteWorkerInput // use global write channel for shared worker 78 dynamoOnceWorker = &sync.Once{} // makes sure worker is created once 79 dynamoOpenedDBNum uint 80 ) 81 82 type DynamoDBConfig struct { 83 TableName string 84 Region string // AWS region 85 Endpoint string // Where DynamoDB reside (Used to specify the localstack endpoint on the test) 86 S3Endpoint string // Where S3 reside 87 IsProvisioned bool // Billing mode 88 ReadCapacityUnits int64 // read capacity when provisioned 89 WriteCapacityUnits int64 // write capacity when provisioned 90 ReadOnly bool // disables write 91 PerfCheck bool 92 } 93 94 type batchWriteWorkerInput struct { 95 tableName string 96 items []*dynamodb.WriteRequest 97 wg *sync.WaitGroup 98 } 99 100 // TODO-Klaytn refactor the structure : there are common configs that are placed separated 101 type dynamoDB struct { 102 config DynamoDBConfig 103 fdb fileDB // where over size items are stored 104 logger log.Logger // Contextual logger tracking the database path 105 106 // metrics 107 getTimer klaytnmetrics.HybridTimer 108 putTimer klaytnmetrics.HybridTimer 109 } 110 111 type DynamoData struct { 112 Key []byte `json:"Key" dynamodbav:"Key"` 113 Val []byte `json:"Val" dynamodbav:"Val"` 114 } 115 116 // CustomRetryer wraps AWS SDK's built in DefaultRetryer adding additional custom features. 117 // DefaultRetryer of AWS SDK has its own standard of retryable situation, 118 // but it's not proper when network environment is not stable. 119 // CustomRetryer conservatively retry in all error cases because DB failure of Klaytn is critical. 120 type CustomRetryer struct { 121 client.DefaultRetryer 122 } 123 124 // ShouldRetry overrides AWS SDK's built in DefaultRetryer to retry in all error cases. 125 func (r CustomRetryer) ShouldRetry(req *request.Request) bool { 126 logger.Debug("dynamoDB client retry", "error", req.Error, "retryCnt", req.RetryCount, "retryDelay", 127 req.RetryDelay, "maxRetry", r.MaxRetries()) 128 return req.Error != nil && req.RetryCount < r.MaxRetries() 129 } 130 131 // GetTestDynamoConfig gets dynamo config for actual aws DynamoDB test 132 // 133 // If you use this config, you will be charged for what you use. 134 // You need to set AWS credentials to access to dynamoDB. 135 // $ export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY 136 // $ export AWS_SECRET_ACCESS_KEY=YOUR_SECRET 137 func GetDefaultDynamoDBConfig() *DynamoDBConfig { 138 return &DynamoDBConfig{ 139 Region: "ap-northeast-2", 140 Endpoint: "", // nil or "" means the default generated endpoint 141 TableName: "klaytn-default" + strconv.Itoa(time.Now().Nanosecond()), 142 IsProvisioned: false, 143 ReadCapacityUnits: 10000, 144 WriteCapacityUnits: 10000, 145 ReadOnly: false, 146 PerfCheck: true, 147 } 148 } 149 150 // NewDynamoDB creates either dynamoDB or dynamoDBReadOnly depending on config.ReadOnly. 151 func NewDynamoDB(config *DynamoDBConfig) (Database, error) { 152 if config.ReadOnly { 153 return newDynamoDBReadOnly(config) 154 } 155 return newDynamoDB(config) 156 } 157 158 // newDynamoDB creates dynamoDB. dynamoDB can be used to create dynamoDBReadOnly. 159 func newDynamoDB(config *DynamoDBConfig) (*dynamoDB, error) { 160 if config == nil { 161 return nil, nilDynamoConfigErr 162 } 163 if len(config.TableName) == 0 { 164 return nil, noTableNameErr 165 } 166 167 config.TableName = strings.ReplaceAll(config.TableName, "_", "-") 168 169 s3FileDB, err := newS3FileDB(config.Region, config.S3Endpoint, config.TableName) 170 if err != nil { 171 logger.Error("Unable to create/get S3FileDB", "DB", config.TableName) 172 return nil, err 173 } 174 175 if dynamoDBClient == nil { 176 dynamoDBClient = dynamodb.New(session.Must(session.NewSessionWithOptions(session.Options{ 177 Config: aws.Config{ 178 Retryer: CustomRetryer{ 179 DefaultRetryer: client.DefaultRetryer{ 180 NumMaxRetries: dynamoMaxRetry, 181 MaxRetryDelay: time.Second, 182 MaxThrottleDelay: time.Second, 183 }, 184 }, 185 Endpoint: aws.String(config.Endpoint), 186 Region: aws.String(config.Region), 187 S3ForcePathStyle: aws.Bool(true), 188 MaxRetries: aws.Int(dynamoMaxRetry), 189 HTTPClient: &http.Client{Timeout: dynamoTimeout}, // default client is &http.Client{} 190 }, 191 }))) 192 } 193 dynamoDB := &dynamoDB{ 194 config: *config, 195 fdb: s3FileDB, 196 } 197 198 dynamoDB.logger = logger.NewWith("region", config.Region, "tableName", dynamoDB.config.TableName) 199 200 // Check if the table is ready to serve 201 for { 202 tableStatus, err := dynamoDB.tableStatus() 203 if err != nil { 204 if !strings.Contains(err.Error(), "ResourceNotFoundException") { 205 dynamoDB.logger.Error("unable to get DynamoDB table status", "err", err.Error()) 206 return nil, err 207 } 208 209 dynamoDB.logger.Warn("creating a DynamoDB table. You will be CHARGED until the DB is deleted") 210 if err := dynamoDB.createTable(); err != nil { 211 dynamoDB.logger.Error("unable to create a DynamoDB table", "err", err.Error()) 212 return nil, err 213 } 214 } 215 216 switch tableStatus { 217 case dynamodb.TableStatusActive: 218 if !dynamoDB.config.ReadOnly { 219 // count successful table creating 220 dynamoOpenedDBNum++ 221 // create workers on the first successful table creation 222 dynamoOnceWorker.Do(func() { 223 createBatchWriteWorkerPool() 224 }) 225 } 226 dynamoDB.logger.Info("successfully created dynamoDB session") 227 return dynamoDB, nil 228 case dynamodb.TableStatusDeleting, dynamodb.TableStatusArchiving, dynamodb.TableStatusArchived: 229 return nil, errors.New("failed to get DynamoDB table, table status : " + tableStatus) 230 default: 231 dynamoDB.logger.Info("waiting for the table to be ready", "table status", tableStatus) 232 time.Sleep(1 * time.Second) 233 } 234 } 235 } 236 237 func (dynamo *dynamoDB) createTable() error { 238 input := &dynamodb.CreateTableInput{ 239 BillingMode: aws.String("PAY_PER_REQUEST"), 240 AttributeDefinitions: []*dynamodb.AttributeDefinition{ 241 { 242 AttributeName: aws.String("Key"), 243 AttributeType: aws.String("B"), // B - the attribute is of type Binary 244 }, 245 }, 246 KeySchema: []*dynamodb.KeySchemaElement{ 247 { 248 AttributeName: aws.String("Key"), 249 KeyType: aws.String("HASH"), // HASH - partition key, RANGE - sort key 250 }, 251 }, 252 253 TableName: aws.String(dynamo.config.TableName), 254 } 255 256 if dynamo.config.IsProvisioned { 257 input.BillingMode = aws.String("PROVISIONED") 258 input.ProvisionedThroughput = &dynamodb.ProvisionedThroughput{ 259 ReadCapacityUnits: aws.Int64(dynamo.config.ReadCapacityUnits), 260 WriteCapacityUnits: aws.Int64(dynamo.config.WriteCapacityUnits), 261 } 262 dynamo.logger.Warn("Billing mode is provisioned. You will be charged every hour.", "RCU", dynamo.config.ReadCapacityUnits, "WRU", dynamo.config.WriteCapacityUnits) 263 } 264 265 _, err := dynamoDBClient.CreateTable(input) 266 if err != nil { 267 dynamo.logger.Error("Error while creating the DynamoDB table", "err", err, "tableName", dynamo.config.TableName) 268 return err 269 } 270 dynamo.logger.Warn("Requesting create dynamoDB table. You will be charged until the table is deleted.") 271 return nil 272 } 273 274 func (dynamo *dynamoDB) deleteTable() error { 275 if _, err := dynamoDBClient.DeleteTable(&dynamodb.DeleteTableInput{TableName: &dynamo.config.TableName}); err != nil { 276 dynamo.logger.Error("Error while deleting the DynamoDB table", "tableName", dynamo.config.TableName) 277 return err 278 } 279 dynamo.logger.Info("Successfully deleted the DynamoDB table", "tableName", dynamo.config.TableName) 280 return nil 281 } 282 283 func (dynamo *dynamoDB) tableStatus() (string, error) { 284 desc, err := dynamo.tableDescription() 285 if err != nil { 286 return "", err 287 } 288 289 return *desc.TableStatus, nil 290 } 291 292 func (dynamo *dynamoDB) tableDescription() (*dynamodb.TableDescription, error) { 293 describe, err := dynamoDBClient.DescribeTable(&dynamodb.DescribeTableInput{TableName: aws.String(dynamo.config.TableName)}) 294 if describe == nil { 295 return nil, err 296 } 297 298 return describe.Table, err 299 } 300 301 func (dynamo *dynamoDB) Type() DBType { 302 return DynamoDB 303 } 304 305 // Put inserts the given key and value pair to the database. 306 func (dynamo *dynamoDB) Put(key []byte, val []byte) error { 307 if dynamo.config.PerfCheck { 308 start := time.Now() 309 err := dynamo.put(key, val) 310 dynamo.putTimer.Update(time.Since(start)) 311 return err 312 } 313 return dynamo.put(key, val) 314 } 315 316 func (dynamo *dynamoDB) put(key []byte, val []byte) error { 317 if len(key) == 0 { 318 return nil 319 } 320 321 if len(val) > dynamoWriteSizeLimit { 322 _, err := dynamo.fdb.write(item{key: key, val: val}) 323 if err != nil { 324 return err 325 } 326 return dynamo.Put(key, overSizedDataPrefix) 327 } 328 329 data := DynamoData{Key: key, Val: val} 330 marshaledData, err := dynamodbattribute.MarshalMap(data) 331 if err != nil { 332 return err 333 } 334 335 params := &dynamodb.PutItemInput{ 336 TableName: aws.String(dynamo.config.TableName), 337 Item: marshaledData, 338 } 339 340 _, err = dynamoDBClient.PutItem(params) 341 if err != nil { 342 dynamo.logger.Crit("failed to put an item", "err", err, "key", hexutil.Encode(data.Key)) 343 return err 344 } 345 346 return nil 347 } 348 349 // Has returns true if the corresponding value to the given key exists. 350 func (dynamo *dynamoDB) Has(key []byte) (bool, error) { 351 if _, err := dynamo.Get(key); err != nil { 352 if err == dataNotFoundErr { 353 return false, nil 354 } 355 return false, err 356 } 357 return true, nil 358 } 359 360 // Get returns the corresponding value to the given key if exists. 361 func (dynamo *dynamoDB) Get(key []byte) ([]byte, error) { 362 if dynamo.config.PerfCheck { 363 start := time.Now() 364 val, err := dynamo.get(key) 365 dynamo.getTimer.Update(time.Since(start)) 366 return val, err 367 } 368 return dynamo.get(key) 369 } 370 371 func (dynamo *dynamoDB) get(key []byte) ([]byte, error) { 372 params := &dynamodb.GetItemInput{ 373 TableName: aws.String(dynamo.config.TableName), 374 Key: map[string]*dynamodb.AttributeValue{ 375 "Key": { 376 B: key, 377 }, 378 }, 379 ConsistentRead: aws.Bool(true), 380 } 381 382 result, err := dynamoDBClient.GetItem(params) 383 if err != nil { 384 dynamo.logger.Crit("failed to get an item", "err", err, "key", hexutil.Encode(key)) 385 return nil, err 386 } 387 388 if result.Item == nil { 389 return nil, dataNotFoundErr 390 } 391 392 var data DynamoData 393 if err := dynamodbattribute.UnmarshalMap(result.Item, &data); err != nil { 394 dynamo.logger.Crit("failed to unmarshal dynamodb data", "err", err) 395 return nil, err 396 } 397 398 if data.Val == nil { 399 return []byte{}, nil 400 } 401 402 if bytes.Equal(data.Val, overSizedDataPrefix) { 403 ret, err := dynamo.fdb.read(key) 404 if err != nil { 405 dynamo.logger.Crit("failed to read filedb data", "err", err, "key", hexutil.Encode(key)) 406 } 407 return ret, err 408 } 409 410 return data.Val, nil 411 } 412 413 // Delete deletes the key from the queue and database 414 func (dynamo *dynamoDB) Delete(key []byte) error { 415 params := &dynamodb.DeleteItemInput{ 416 TableName: aws.String(dynamo.config.TableName), 417 Key: map[string]*dynamodb.AttributeValue{ 418 "Key": { 419 B: key, 420 }, 421 }, 422 } 423 424 _, err := dynamoDBClient.DeleteItem(params) 425 if err != nil { 426 dynamo.logger.Crit("failed to delete an item", "err", err, "key", hexutil.Encode(key)) 427 return err 428 } 429 return nil 430 } 431 432 func (dynamo *dynamoDB) Close() { 433 if dynamoOpenedDBNum > 0 { 434 dynamoOpenedDBNum-- 435 } 436 if dynamoOpenedDBNum == 0 && dynamoWriteCh != nil { 437 close(dynamoWriteCh) 438 } 439 } 440 441 func (dynamo *dynamoDB) Meter(prefix string) { 442 dynamo.getTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"get/time", nil) 443 dynamo.putTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"put/time", nil) 444 dynamoBatchWriteTimeMeter = metrics.NewRegisteredMeter(prefix+"batchwrite/time", nil) 445 } 446 447 func (dynamo *dynamoDB) NewIterator(prefix []byte, start []byte) Iterator { 448 // TODO-Klaytn: implement this later. 449 return nil 450 } 451 452 func createBatchWriteWorkerPool() { 453 dynamoWriteCh = make(chan *batchWriteWorkerInput, itemChanSize) 454 for i := 0; i < WorkerNum; i++ { 455 go createBatchWriteWorker(dynamoWriteCh) 456 } 457 logger.Info("made dynamo batch write workers", "workerNum", WorkerNum) 458 } 459 460 func createBatchWriteWorker(writeCh <-chan *batchWriteWorkerInput) { 461 failCount := 0 462 logger.Debug("generate a dynamoDB batchWrite worker") 463 464 for batchInput := range writeCh { 465 batchWriteInput := &dynamodb.BatchWriteItemInput{ 466 RequestItems: map[string][]*dynamodb.WriteRequest{}, 467 } 468 batchWriteInput.RequestItems[batchInput.tableName] = batchInput.items 469 470 BatchWriteItemOutput, err := dynamoDBClient.BatchWriteItem(batchWriteInput) 471 numUnprocessed := len(BatchWriteItemOutput.UnprocessedItems[batchInput.tableName]) 472 for err != nil || numUnprocessed != 0 { 473 if err != nil { 474 // ValidationException occurs when a required parameter is missing, a value is out of range, 475 // or data types mismatch and so on. If this is the case, check if there is a duplicated key, 476 // batch length out of range, null value and so on. 477 // When ValidationException occurs, retrying won't fix the problem. 478 if strings.Contains(err.Error(), "ValidationException") { 479 logger.Crit("Invalid input for dynamoDB BatchWrite", 480 "err", err, "tableName", batchInput.tableName, "itemNum", len(batchInput.items)) 481 } 482 failCount++ 483 logger.Warn("dynamoDB failed to write batch items", 484 "tableName", batchInput.tableName, "err", err, "failCnt", failCount) 485 if failCount > dynamoMaxRetry { 486 logger.Error("dynamoDB failed many times. sleep a second and retry", 487 "tableName", batchInput.tableName, "failCnt", failCount) 488 time.Sleep(time.Second) 489 } 490 } 491 492 if numUnprocessed != 0 { 493 logger.Debug("dynamoDB batchWrite remains unprocessedItem", 494 "tableName", batchInput.tableName, "numUnprocessedItem", numUnprocessed) 495 batchWriteInput.RequestItems[batchInput.tableName] = BatchWriteItemOutput.UnprocessedItems[batchInput.tableName] 496 } 497 498 start := time.Now() 499 BatchWriteItemOutput, err = dynamoDBClient.BatchWriteItem(batchWriteInput) 500 dynamoBatchWriteTimeMeter.Mark(int64(time.Since(start))) 501 numUnprocessed = len(BatchWriteItemOutput.UnprocessedItems) 502 } 503 504 failCount = 0 505 batchInput.wg.Done() 506 } 507 logger.Debug("close a dynamoDB batchWrite worker") 508 } 509 510 func (dynamo *dynamoDB) NewBatch() Batch { 511 return &dynamoBatch{db: dynamo, tableName: dynamo.config.TableName, wg: &sync.WaitGroup{}, keyMap: map[string]struct{}{}} 512 } 513 514 type dynamoBatch struct { 515 db *dynamoDB 516 tableName string 517 batchItems []*dynamodb.WriteRequest 518 keyMap map[string]struct{} // checks duplication of keys 519 size int 520 wg *sync.WaitGroup 521 } 522 523 // Put adds an item to dynamo batch. 524 // If the number of items in batch reaches dynamoBatchSize, a write request to dynamoDB is made. 525 // Each batch write is executed in thread. (There is an worker pool for dynamo batch write) 526 // 527 // Note: If there is a duplicated key in a batch, only the first value is written. 528 func (batch *dynamoBatch) Put(key, val []byte) error { 529 // if there is an duplicated key in batch, skip 530 if _, exist := batch.keyMap[string(key)]; exist { 531 return nil 532 } 533 batch.keyMap[string(key)] = struct{}{} 534 535 data := DynamoData{Key: key, Val: val} 536 dataSize := len(val) 537 538 // If the size of the item is larger than the limit, it should be handled in different way 539 if dataSize > dynamoWriteSizeLimit { 540 batch.wg.Add(1) 541 go func() { 542 failCnt := 0 543 batch.db.logger.Debug("write large size data into fileDB") 544 545 _, err := batch.db.fdb.write(item{key: key, val: val}) 546 for err != nil { 547 failCnt++ 548 batch.db.logger.Error("cannot write an item into fileDB. check the status of s3", 549 "err", err, "numRetry", failCnt) 550 time.Sleep(time.Second) 551 552 batch.db.logger.Warn("retrying write an item into fileDB") 553 _, err = batch.db.fdb.write(item{key: key, val: val}) 554 } 555 batch.wg.Done() 556 }() 557 data.Val = overSizedDataPrefix 558 dataSize = len(data.Val) 559 } 560 561 marshaledData, err := dynamodbattribute.MarshalMap(data) 562 if err != nil { 563 batch.db.logger.Error("err while batch put", "err", err, "len(val)", len(val)) 564 return err 565 } 566 567 batch.batchItems = append(batch.batchItems, &dynamodb.WriteRequest{ 568 PutRequest: &dynamodb.PutRequest{Item: marshaledData}, 569 }) 570 batch.size += dataSize 571 572 if len(batch.batchItems) == dynamoBatchSize { 573 batch.wg.Add(1) 574 dynamoWriteCh <- &batchWriteWorkerInput{batch.tableName, batch.batchItems, batch.wg} 575 batch.Reset() 576 } 577 return nil 578 } 579 580 // Delete inserts the a key removal into the batch for later committing. 581 func (batch *dynamoBatch) Delete(key []byte) error { 582 logger.CritWithStack("Delete should not be called when using dynamodb batch") 583 return nil 584 } 585 586 func (batch *dynamoBatch) Write() error { 587 var writeRequest []*dynamodb.WriteRequest 588 numRemainedItems := len(batch.batchItems) 589 590 for numRemainedItems > 0 { 591 if numRemainedItems > dynamoBatchSize { 592 writeRequest = batch.batchItems[:dynamoBatchSize] 593 batch.batchItems = batch.batchItems[dynamoBatchSize:] 594 } else { 595 writeRequest = batch.batchItems 596 } 597 batch.wg.Add(1) 598 dynamoWriteCh <- &batchWriteWorkerInput{batch.tableName, writeRequest, batch.wg} 599 numRemainedItems -= len(writeRequest) 600 } 601 602 batch.wg.Wait() 603 return nil 604 } 605 606 func (batch *dynamoBatch) ValueSize() int { 607 return batch.size 608 } 609 610 func (batch *dynamoBatch) Reset() { 611 batch.batchItems = []*dynamodb.WriteRequest{} 612 batch.keyMap = map[string]struct{}{} 613 batch.size = 0 614 } 615 616 func (batch *dynamoBatch) Replay(w KeyValueWriter) error { 617 logger.CritWithStack("Replay should not be called when using dynamodb batch") 618 return nil 619 }