storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/gateway/hdfs/gateway-hdfs.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2019 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package hdfs 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "net" 25 "net/http" 26 "os" 27 "os/user" 28 "path" 29 "sort" 30 "strings" 31 "syscall" 32 "time" 33 34 "github.com/colinmarc/hdfs/v2" 35 "github.com/colinmarc/hdfs/v2/hadoopconf" 36 krb "github.com/jcmturner/gokrb5/v8/client" 37 "github.com/jcmturner/gokrb5/v8/config" 38 "github.com/jcmturner/gokrb5/v8/credentials" 39 "github.com/jcmturner/gokrb5/v8/keytab" 40 "github.com/minio/cli" 41 "github.com/minio/minio-go/v7/pkg/s3utils" 42 43 minio "storj.io/minio/cmd" 44 "storj.io/minio/cmd/logger" 45 "storj.io/minio/pkg/auth" 46 "storj.io/minio/pkg/env" 47 "storj.io/minio/pkg/madmin" 48 xnet "storj.io/minio/pkg/net" 49 ) 50 51 const ( 52 hdfsSeparator = minio.SlashSeparator 53 ) 54 55 func init() { 56 const hdfsGatewayTemplate = `NAME: 57 {{.HelpName}} - {{.Usage}} 58 59 USAGE: 60 {{.HelpName}} {{if .VisibleFlags}}[FLAGS]{{end}} HDFS-NAMENODE [HDFS-NAMENODE...] 61 {{if .VisibleFlags}} 62 FLAGS: 63 {{range .VisibleFlags}}{{.}} 64 {{end}}{{end}} 65 HDFS-NAMENODE: 66 HDFS namenode URI 67 68 EXAMPLES: 69 1. Start minio gateway server for HDFS backend 70 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_ROOT_USER{{.AssignmentOperator}}accesskey 71 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_ROOT_PASSWORD{{.AssignmentOperator}}secretkey 72 {{.Prompt}} {{.HelpName}} hdfs://namenode:8200 73 74 2. Start minio gateway server for HDFS with edge caching enabled 75 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_ROOT_USER{{.AssignmentOperator}}accesskey 76 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_ROOT_PASSWORD{{.AssignmentOperator}}secretkey 77 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_DRIVES{{.AssignmentOperator}}"/mnt/drive1,/mnt/drive2,/mnt/drive3,/mnt/drive4" 78 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_EXCLUDE{{.AssignmentOperator}}"bucket1/*,*.png" 79 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_QUOTA{{.AssignmentOperator}}90 80 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_AFTER{{.AssignmentOperator}}3 81 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_WATERMARK_LOW{{.AssignmentOperator}}75 82 {{.Prompt}} {{.EnvVarSetCommand}} MINIO_CACHE_WATERMARK_HIGH{{.AssignmentOperator}}85 83 {{.Prompt}} {{.HelpName}} hdfs://namenode:8200 84 ` 85 86 minio.RegisterGatewayCommand(cli.Command{ 87 Name: minio.HDFSBackendGateway, 88 Usage: "Hadoop Distributed File System (HDFS)", 89 Action: hdfsGatewayMain, 90 CustomHelpTemplate: hdfsGatewayTemplate, 91 HideHelpCommand: true, 92 }) 93 } 94 95 // Handler for 'minio gateway hdfs' command line. 96 func hdfsGatewayMain(ctx *cli.Context) { 97 // Validate gateway arguments. 98 if ctx.Args().First() == "help" { 99 cli.ShowCommandHelpAndExit(ctx, minio.HDFSBackendGateway, 1) 100 } 101 102 minio.StartGateway(ctx, &HDFS{args: ctx.Args()}) 103 } 104 105 // HDFS implements Gateway. 106 type HDFS struct { 107 args []string 108 } 109 110 // Name implements Gateway interface. 111 func (g *HDFS) Name() string { 112 return minio.HDFSBackendGateway 113 } 114 115 func getKerberosClient() (*krb.Client, error) { 116 cfg, err := config.Load(env.Get("KRB5_CONFIG", "/etc/krb5.conf")) 117 if err != nil { 118 return nil, err 119 } 120 121 u, err := user.Current() 122 if err != nil { 123 return nil, err 124 } 125 126 keytabPath := env.Get("KRB5KEYTAB", "") 127 if keytabPath != "" { 128 kt, err := keytab.Load(keytabPath) 129 if err != nil { 130 return nil, err 131 } 132 133 username := env.Get("KRB5USERNAME", "") 134 realm := env.Get("KRB5REALM", "") 135 if username == "" || realm == "" { 136 return nil, errors.New("empty KRB5USERNAME or KRB5REALM") 137 138 } 139 140 return krb.NewWithKeytab(username, realm, kt, cfg), nil 141 } 142 143 // Determine the ccache location from the environment, falling back to the default location. 144 ccachePath := env.Get("KRB5CCNAME", fmt.Sprintf("/tmp/krb5cc_%s", u.Uid)) 145 if strings.Contains(ccachePath, ":") { 146 if strings.HasPrefix(ccachePath, "FILE:") { 147 ccachePath = strings.TrimPrefix(ccachePath, "FILE:") 148 } else { 149 return nil, fmt.Errorf("unable to use kerberos ccache: %s", ccachePath) 150 } 151 } 152 153 ccache, err := credentials.LoadCCache(ccachePath) 154 if err != nil { 155 return nil, err 156 } 157 158 return krb.NewFromCCache(ccache, cfg) 159 } 160 161 // NewGatewayLayer returns hdfs gatewaylayer. 162 func (g *HDFS) NewGatewayLayer(creds auth.Credentials) (minio.ObjectLayer, error) { 163 dialFunc := (&net.Dialer{ 164 Timeout: 30 * time.Second, 165 KeepAlive: 30 * time.Second, 166 DualStack: true, 167 }).DialContext 168 169 hconfig, err := hadoopconf.LoadFromEnvironment() 170 if err != nil { 171 return nil, err 172 } 173 174 opts := hdfs.ClientOptionsFromConf(hconfig) 175 opts.NamenodeDialFunc = dialFunc 176 opts.DatanodeDialFunc = dialFunc 177 178 // Not addresses found, load it from command line. 179 var commonPath string 180 if len(opts.Addresses) == 0 { 181 var addresses []string 182 for _, s := range g.args { 183 u, err := xnet.ParseURL(s) 184 if err != nil { 185 return nil, err 186 } 187 if u.Scheme != "hdfs" { 188 return nil, fmt.Errorf("unsupported scheme %s, only supports hdfs://", u) 189 } 190 if commonPath != "" && commonPath != u.Path { 191 return nil, fmt.Errorf("all namenode paths should be same %s", g.args) 192 } 193 if commonPath == "" { 194 commonPath = u.Path 195 } 196 addresses = append(addresses, u.Host) 197 } 198 opts.Addresses = addresses 199 } 200 201 u, err := user.Current() 202 if err != nil { 203 return nil, fmt.Errorf("unable to lookup local user: %s", err) 204 } 205 206 if opts.KerberosClient != nil { 207 opts.KerberosClient, err = getKerberosClient() 208 if err != nil { 209 return nil, fmt.Errorf("unable to initialize kerberos client: %s", err) 210 } 211 } else { 212 opts.User = env.Get("HADOOP_USER_NAME", u.Username) 213 } 214 215 clnt, err := hdfs.NewClient(opts) 216 if err != nil { 217 return nil, fmt.Errorf("unable to initialize hdfsClient: %v", err) 218 } 219 220 if err = clnt.MkdirAll(minio.PathJoin(commonPath, hdfsSeparator, minioMetaTmpBucket), os.FileMode(0755)); err != nil { 221 return nil, err 222 } 223 224 return &hdfsObjects{clnt: clnt, subPath: commonPath, listPool: minio.NewTreeWalkPool(time.Minute * 30)}, nil 225 } 226 227 // Production - hdfs gateway is production ready. 228 func (g *HDFS) Production() bool { 229 return true 230 } 231 232 func (n *hdfsObjects) Shutdown(ctx context.Context) error { 233 return n.clnt.Close() 234 } 235 236 func (n *hdfsObjects) LocalStorageInfo(ctx context.Context) (si minio.StorageInfo, errs []error) { 237 return n.StorageInfo(ctx) 238 } 239 240 func (n *hdfsObjects) StorageInfo(ctx context.Context) (si minio.StorageInfo, errs []error) { 241 fsInfo, err := n.clnt.StatFs() 242 if err != nil { 243 return minio.StorageInfo{}, []error{err} 244 } 245 si.Disks = []madmin.Disk{{ 246 UsedSpace: fsInfo.Used, 247 }} 248 si.Backend.Type = madmin.Gateway 249 si.Backend.GatewayOnline = true 250 return si, nil 251 } 252 253 // hdfsObjects implements gateway for Minio and S3 compatible object storage servers. 254 type hdfsObjects struct { 255 minio.GatewayUnsupported 256 clnt *hdfs.Client 257 subPath string 258 listPool *minio.TreeWalkPool 259 } 260 261 func hdfsToObjectErr(ctx context.Context, err error, params ...string) error { 262 if err == nil { 263 return nil 264 } 265 bucket := "" 266 object := "" 267 uploadID := "" 268 switch len(params) { 269 case 3: 270 uploadID = params[2] 271 fallthrough 272 case 2: 273 object = params[1] 274 fallthrough 275 case 1: 276 bucket = params[0] 277 } 278 279 switch { 280 case os.IsNotExist(err): 281 if uploadID != "" { 282 return minio.InvalidUploadID{ 283 UploadID: uploadID, 284 } 285 } 286 if object != "" { 287 return minio.ObjectNotFound{Bucket: bucket, Object: object} 288 } 289 return minio.BucketNotFound{Bucket: bucket} 290 case os.IsExist(err): 291 if object != "" { 292 return minio.PrefixAccessDenied{Bucket: bucket, Object: object} 293 } 294 return minio.BucketAlreadyOwnedByYou{Bucket: bucket} 295 case errors.Is(err, syscall.ENOTEMPTY): 296 if object != "" { 297 return minio.PrefixAccessDenied{Bucket: bucket, Object: object} 298 } 299 return minio.BucketNotEmpty{Bucket: bucket} 300 default: 301 logger.LogIf(ctx, err) 302 return err 303 } 304 } 305 306 // hdfsIsValidBucketName verifies whether a bucket name is valid. 307 func hdfsIsValidBucketName(bucket string) bool { 308 return s3utils.CheckValidBucketNameStrict(bucket) == nil 309 } 310 311 func (n *hdfsObjects) hdfsPathJoin(args ...string) string { 312 return minio.PathJoin(append([]string{n.subPath, hdfsSeparator}, args...)...) 313 } 314 315 func (n *hdfsObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error { 316 if !hdfsIsValidBucketName(bucket) { 317 return minio.BucketNameInvalid{Bucket: bucket} 318 } 319 if forceDelete { 320 return hdfsToObjectErr(ctx, n.clnt.RemoveAll(n.hdfsPathJoin(bucket)), bucket) 321 } 322 return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(bucket)), bucket) 323 } 324 325 func (n *hdfsObjects) MakeBucketWithLocation(ctx context.Context, bucket string, opts minio.BucketOptions) error { 326 if opts.LockEnabled || opts.VersioningEnabled { 327 return minio.NotImplemented{} 328 } 329 330 if !hdfsIsValidBucketName(bucket) { 331 return minio.BucketNameInvalid{Bucket: bucket} 332 } 333 return hdfsToObjectErr(ctx, n.clnt.Mkdir(n.hdfsPathJoin(bucket), os.FileMode(0755)), bucket) 334 } 335 336 func (n *hdfsObjects) GetBucketInfo(ctx context.Context, bucket string) (bi minio.BucketInfo, err error) { 337 fi, err := n.clnt.Stat(n.hdfsPathJoin(bucket)) 338 if err != nil { 339 return bi, hdfsToObjectErr(ctx, err, bucket) 340 } 341 // As hdfs.Stat() doesn't carry anything other than ModTime(), use ModTime() as CreatedTime. 342 return minio.BucketInfo{ 343 Name: bucket, 344 Created: fi.ModTime(), 345 }, nil 346 } 347 348 func (n *hdfsObjects) ListBuckets(ctx context.Context) (buckets []minio.BucketInfo, err error) { 349 entries, err := n.clnt.ReadDir(n.hdfsPathJoin()) 350 if err != nil { 351 logger.LogIf(ctx, err) 352 return nil, hdfsToObjectErr(ctx, err) 353 } 354 355 for _, entry := range entries { 356 // Ignore all reserved bucket names and invalid bucket names. 357 if isReservedOrInvalidBucket(entry.Name(), false) { 358 continue 359 } 360 buckets = append(buckets, minio.BucketInfo{ 361 Name: entry.Name(), 362 // As hdfs.Stat() doesnt carry CreatedTime, use ModTime() as CreatedTime. 363 Created: entry.ModTime(), 364 }) 365 } 366 367 // Sort bucket infos by bucket name. 368 sort.Sort(byBucketName(buckets)) 369 return buckets, nil 370 } 371 372 func (n *hdfsObjects) isLeafDir(bucket, leafPath string) bool { 373 return n.isObjectDir(context.Background(), bucket, leafPath) 374 } 375 376 func (n *hdfsObjects) isLeaf(bucket, leafPath string) bool { 377 return !strings.HasSuffix(leafPath, hdfsSeparator) 378 } 379 380 func (n *hdfsObjects) listDirFactory() minio.ListDirFunc { 381 // listDir - lists all the entries at a given prefix and given entry in the prefix. 382 listDir := func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []string, delayIsLeaf bool) { 383 f, err := n.clnt.Open(n.hdfsPathJoin(bucket, prefixDir)) 384 if err != nil { 385 if os.IsNotExist(err) { 386 err = nil 387 } 388 logger.LogIf(minio.GlobalContext, err) 389 return 390 } 391 defer f.Close() 392 fis, err := f.Readdir(0) 393 if err != nil { 394 logger.LogIf(minio.GlobalContext, err) 395 return 396 } 397 if len(fis) == 0 { 398 return true, nil, false 399 } 400 for _, fi := range fis { 401 if fi.IsDir() { 402 entries = append(entries, fi.Name()+hdfsSeparator) 403 } else { 404 entries = append(entries, fi.Name()) 405 } 406 } 407 entries, delayIsLeaf = minio.FilterListEntries(bucket, prefixDir, entries, prefixEntry, n.isLeaf) 408 return false, entries, delayIsLeaf 409 } 410 411 // Return list factory instance. 412 return listDir 413 } 414 415 // ListObjects lists all blobs in HDFS bucket filtered by prefix. 416 func (n *hdfsObjects) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi minio.ListObjectsInfo, err error) { 417 fileInfos := make(map[string]os.FileInfo) 418 targetPath := n.hdfsPathJoin(bucket, prefix) 419 420 var targetFileInfo os.FileInfo 421 422 if targetFileInfo, err = n.populateDirectoryListing(targetPath, fileInfos); err != nil { 423 return loi, hdfsToObjectErr(ctx, err, bucket) 424 } 425 426 // If the user is trying to list a single file, bypass the entire directory-walking code below 427 // and just return the single file's information. 428 if !targetFileInfo.IsDir() { 429 return minio.ListObjectsInfo{ 430 IsTruncated: false, 431 NextMarker: "", 432 Objects: []minio.ObjectInfo{ 433 fileInfoToObjectInfo(bucket, prefix, targetFileInfo), 434 }, 435 Prefixes: []string{}, 436 }, nil 437 } 438 439 getObjectInfo := func(ctx context.Context, bucket, entry string) (minio.ObjectInfo, error) { 440 filePath := path.Clean(n.hdfsPathJoin(bucket, entry)) 441 fi, ok := fileInfos[filePath] 442 443 // If the file info is not known, this may be a recursive listing and filePath is a 444 // child of a sub-directory. In this case, obtain that sub-directory's listing. 445 if !ok { 446 parentPath := path.Dir(filePath) 447 448 if _, err := n.populateDirectoryListing(parentPath, fileInfos); err != nil { 449 return minio.ObjectInfo{}, hdfsToObjectErr(ctx, err, bucket) 450 } 451 452 fi, ok = fileInfos[filePath] 453 454 if !ok { 455 err = fmt.Errorf("could not get FileInfo for path '%s'", filePath) 456 return minio.ObjectInfo{}, hdfsToObjectErr(ctx, err, bucket, entry) 457 } 458 } 459 460 objectInfo := fileInfoToObjectInfo(bucket, entry, fi) 461 462 delete(fileInfos, filePath) 463 464 return objectInfo, nil 465 } 466 467 return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), n.isLeaf, n.isLeafDir, getObjectInfo, getObjectInfo) 468 } 469 470 func fileInfoToObjectInfo(bucket string, entry string, fi os.FileInfo) minio.ObjectInfo { 471 return minio.ObjectInfo{ 472 Bucket: bucket, 473 Name: entry, 474 ModTime: fi.ModTime(), 475 Size: fi.Size(), 476 IsDir: fi.IsDir(), 477 AccTime: fi.(*hdfs.FileInfo).AccessTime(), 478 } 479 } 480 481 // Lists a path's direct, first-level entries and populates them in the `fileInfos` cache which maps 482 // a path entry to an `os.FileInfo`. It also saves the listed path's `os.FileInfo` in the cache. 483 func (n *hdfsObjects) populateDirectoryListing(filePath string, fileInfos map[string]os.FileInfo) (os.FileInfo, error) { 484 dirReader, err := n.clnt.Open(filePath) 485 486 if err != nil { 487 return nil, err 488 } 489 490 dirStat := dirReader.Stat() 491 key := path.Clean(filePath) 492 493 if !dirStat.IsDir() { 494 return dirStat, nil 495 } 496 497 fileInfos[key] = dirStat 498 infos, err := dirReader.Readdir(0) 499 500 if err != nil { 501 return nil, err 502 } 503 504 for _, fileInfo := range infos { 505 filePath := minio.PathJoin(filePath, fileInfo.Name()) 506 fileInfos[filePath] = fileInfo 507 } 508 509 return dirStat, nil 510 } 511 512 // deleteObject deletes a file path if its empty. If it's successfully deleted, 513 // it will recursively move up the tree, deleting empty parent directories 514 // until it finds one with files in it. Returns nil for a non-empty directory. 515 func (n *hdfsObjects) deleteObject(basePath, deletePath string) error { 516 if basePath == deletePath { 517 return nil 518 } 519 520 // Attempt to remove path. 521 if err := n.clnt.Remove(deletePath); err != nil { 522 if errors.Is(err, syscall.ENOTEMPTY) { 523 // Ignore errors if the directory is not empty. The server relies on 524 // this functionality, and sometimes uses recursion that should not 525 // error on parent directories. 526 return nil 527 } 528 return err 529 } 530 531 // Trailing slash is removed when found to ensure 532 // slashpath.Dir() to work as intended. 533 deletePath = strings.TrimSuffix(deletePath, hdfsSeparator) 534 deletePath = path.Dir(deletePath) 535 536 // Delete parent directory. Errors for parent directories shouldn't trickle down. 537 n.deleteObject(basePath, deletePath) 538 539 return nil 540 } 541 542 // ListObjectsV2 lists all blobs in HDFS bucket filtered by prefix 543 func (n *hdfsObjects) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, 544 fetchOwner bool, startAfter string) (loi minio.ListObjectsV2Info, err error) { 545 // fetchOwner is not supported and unused. 546 marker := continuationToken 547 if marker == "" { 548 marker = startAfter 549 } 550 resultV1, err := n.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys) 551 if err != nil { 552 return loi, err 553 } 554 return minio.ListObjectsV2Info{ 555 Objects: resultV1.Objects, 556 Prefixes: resultV1.Prefixes, 557 ContinuationToken: continuationToken, 558 NextContinuationToken: resultV1.NextMarker, 559 IsTruncated: resultV1.IsTruncated, 560 }, nil 561 } 562 563 func (n *hdfsObjects) DeleteObject(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (minio.ObjectInfo, error) { 564 err := hdfsToObjectErr(ctx, n.deleteObject(n.hdfsPathJoin(bucket), n.hdfsPathJoin(bucket, object)), bucket, object) 565 return minio.ObjectInfo{ 566 Bucket: bucket, 567 Name: object, 568 }, err 569 } 570 571 func (n *hdfsObjects) DeleteObjects(ctx context.Context, bucket string, objects []minio.ObjectToDelete, opts minio.ObjectOptions) ([]minio.DeletedObject, []error) { 572 errs := make([]error, len(objects)) 573 dobjects := make([]minio.DeletedObject, len(objects)) 574 for idx, object := range objects { 575 _, errs[idx] = n.DeleteObject(ctx, bucket, object.ObjectName, opts) 576 if errs[idx] == nil { 577 dobjects[idx] = minio.DeletedObject{ 578 ObjectName: object.ObjectName, 579 } 580 } 581 } 582 return dobjects, errs 583 } 584 585 func (n *hdfsObjects) GetObjectNInfo(ctx context.Context, bucket, object string, rs *minio.HTTPRangeSpec, h http.Header, lockType minio.LockType, opts minio.ObjectOptions) (gr *minio.GetObjectReader, err error) { 586 objInfo, err := n.GetObjectInfo(ctx, bucket, object, opts) 587 if err != nil { 588 return nil, err 589 } 590 591 var startOffset, length int64 592 startOffset, length, err = rs.GetOffsetLength(objInfo.Size) 593 if err != nil { 594 return nil, err 595 } 596 597 pr, pw := io.Pipe() 598 go func() { 599 nerr := n.getObject(ctx, bucket, object, startOffset, length, pw, objInfo.ETag, opts) 600 pw.CloseWithError(nerr) 601 }() 602 603 // Setup cleanup function to cause the above go-routine to 604 // exit in case of partial read 605 pipeCloser := func() { pr.Close() } 606 return minio.NewGetObjectReaderFromReader(pr, objInfo, opts, pipeCloser) 607 608 } 609 610 func (n *hdfsObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (minio.ObjectInfo, error) { 611 cpSrcDstSame := minio.IsStringEqual(n.hdfsPathJoin(srcBucket, srcObject), n.hdfsPathJoin(dstBucket, dstObject)) 612 if cpSrcDstSame { 613 return n.GetObjectInfo(ctx, srcBucket, srcObject, minio.ObjectOptions{}) 614 } 615 616 return n.PutObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, minio.ObjectOptions{ 617 ServerSideEncryption: dstOpts.ServerSideEncryption, 618 UserDefined: srcInfo.UserDefined, 619 }) 620 } 621 622 func (n *hdfsObjects) getObject(ctx context.Context, bucket, key string, startOffset, length int64, writer io.Writer, etag string, opts minio.ObjectOptions) error { 623 if _, err := n.clnt.Stat(n.hdfsPathJoin(bucket)); err != nil { 624 return hdfsToObjectErr(ctx, err, bucket) 625 } 626 rd, err := n.clnt.Open(n.hdfsPathJoin(bucket, key)) 627 if err != nil { 628 return hdfsToObjectErr(ctx, err, bucket, key) 629 } 630 defer rd.Close() 631 _, err = io.Copy(writer, io.NewSectionReader(rd, startOffset, length)) 632 if err == io.ErrClosedPipe { 633 // hdfs library doesn't send EOF correctly, so io.Copy attempts 634 // to write which returns io.ErrClosedPipe - just ignore 635 // this for now. 636 err = nil 637 } 638 return hdfsToObjectErr(ctx, err, bucket, key) 639 } 640 641 func (n *hdfsObjects) isObjectDir(ctx context.Context, bucket, object string) bool { 642 f, err := n.clnt.Open(n.hdfsPathJoin(bucket, object)) 643 if err != nil { 644 if os.IsNotExist(err) { 645 return false 646 } 647 logger.LogIf(ctx, err) 648 return false 649 } 650 defer f.Close() 651 fis, err := f.Readdir(1) 652 if err != nil && err != io.EOF { 653 logger.LogIf(ctx, err) 654 return false 655 } 656 // Readdir returns an io.EOF when len(fis) == 0. 657 return len(fis) == 0 658 } 659 660 // GetObjectInfo reads object info and replies back ObjectInfo. 661 func (n *hdfsObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { 662 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 663 if err != nil { 664 return objInfo, hdfsToObjectErr(ctx, err, bucket) 665 } 666 if strings.HasSuffix(object, hdfsSeparator) && !n.isObjectDir(ctx, bucket, object) { 667 return objInfo, hdfsToObjectErr(ctx, os.ErrNotExist, bucket, object) 668 } 669 670 fi, err := n.clnt.Stat(n.hdfsPathJoin(bucket, object)) 671 if err != nil { 672 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 673 } 674 return minio.ObjectInfo{ 675 Bucket: bucket, 676 Name: object, 677 ModTime: fi.ModTime(), 678 Size: fi.Size(), 679 IsDir: fi.IsDir(), 680 AccTime: fi.(*hdfs.FileInfo).AccessTime(), 681 }, nil 682 } 683 684 func (n *hdfsObjects) PutObject(ctx context.Context, bucket string, object string, r *minio.PutObjReader, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { 685 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 686 if err != nil { 687 return objInfo, hdfsToObjectErr(ctx, err, bucket) 688 } 689 690 name := n.hdfsPathJoin(bucket, object) 691 692 // If its a directory create a prefix { 693 if strings.HasSuffix(object, hdfsSeparator) && r.Size() == 0 { 694 if err = n.clnt.MkdirAll(name, os.FileMode(0755)); err != nil { 695 n.deleteObject(n.hdfsPathJoin(bucket), name) 696 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 697 } 698 } else { 699 tmpname := n.hdfsPathJoin(minioMetaTmpBucket, minio.MustGetUUID()) 700 var w *hdfs.FileWriter 701 w, err = n.clnt.Create(tmpname) 702 if err != nil { 703 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 704 } 705 defer n.deleteObject(n.hdfsPathJoin(minioMetaTmpBucket), tmpname) 706 if _, err = io.Copy(w, r); err != nil { 707 w.Close() 708 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 709 } 710 dir := path.Dir(name) 711 if dir != "" { 712 if err = n.clnt.MkdirAll(dir, os.FileMode(0755)); err != nil { 713 w.Close() 714 n.deleteObject(n.hdfsPathJoin(bucket), dir) 715 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 716 } 717 } 718 w.Close() 719 if err = n.clnt.Rename(tmpname, name); err != nil { 720 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 721 } 722 } 723 fi, err := n.clnt.Stat(name) 724 if err != nil { 725 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 726 } 727 return minio.ObjectInfo{ 728 Bucket: bucket, 729 Name: object, 730 ETag: r.MD5CurrentHexString(), 731 ModTime: fi.ModTime(), 732 Size: fi.Size(), 733 IsDir: fi.IsDir(), 734 AccTime: fi.(*hdfs.FileInfo).AccessTime(), 735 }, nil 736 } 737 738 func (n *hdfsObjects) NewMultipartUpload(ctx context.Context, bucket string, object string, opts minio.ObjectOptions) (uploadID string, err error) { 739 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 740 if err != nil { 741 return uploadID, hdfsToObjectErr(ctx, err, bucket) 742 } 743 744 uploadID = minio.MustGetUUID() 745 if err = n.clnt.CreateEmptyFile(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)); err != nil { 746 return uploadID, hdfsToObjectErr(ctx, err, bucket) 747 } 748 749 return uploadID, nil 750 } 751 752 func (n *hdfsObjects) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi minio.ListMultipartsInfo, err error) { 753 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 754 if err != nil { 755 return lmi, hdfsToObjectErr(ctx, err, bucket) 756 } 757 758 // It's decided not to support List Multipart Uploads, hence returning empty result. 759 return lmi, nil 760 } 761 762 func (n *hdfsObjects) checkUploadIDExists(ctx context.Context, bucket, object, uploadID string) (err error) { 763 _, err = n.clnt.Stat(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)) 764 if err != nil { 765 return hdfsToObjectErr(ctx, err, bucket, object, uploadID) 766 } 767 return nil 768 } 769 770 // GetMultipartInfo returns multipart info of the uploadId of the object 771 func (n *hdfsObjects) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts minio.ObjectOptions) (result minio.MultipartInfo, err error) { 772 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 773 if err != nil { 774 return result, hdfsToObjectErr(ctx, err, bucket) 775 } 776 777 if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 778 return result, err 779 } 780 781 result.Bucket = bucket 782 result.Object = object 783 result.UploadID = uploadID 784 return result, nil 785 } 786 787 func (n *hdfsObjects) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int, opts minio.ObjectOptions) (result minio.ListPartsInfo, err error) { 788 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 789 if err != nil { 790 return result, hdfsToObjectErr(ctx, err, bucket) 791 } 792 793 if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 794 return result, err 795 } 796 797 // It's decided not to support List parts, hence returning empty result. 798 return result, nil 799 } 800 801 func (n *hdfsObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, 802 startOffset int64, length int64, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (minio.PartInfo, error) { 803 return n.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, srcInfo.PutObjReader, dstOpts) 804 } 805 806 func (n *hdfsObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, r *minio.PutObjReader, opts minio.ObjectOptions) (info minio.PartInfo, err error) { 807 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 808 if err != nil { 809 return info, hdfsToObjectErr(ctx, err, bucket) 810 } 811 812 var w *hdfs.FileWriter 813 w, err = n.clnt.Append(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)) 814 if err != nil { 815 return info, hdfsToObjectErr(ctx, err, bucket, object, uploadID) 816 } 817 defer w.Close() 818 _, err = io.Copy(w, r.Reader) 819 if err != nil { 820 return info, hdfsToObjectErr(ctx, err, bucket, object, uploadID) 821 } 822 823 info.PartNumber = partID 824 info.ETag = r.MD5CurrentHexString() 825 info.LastModified = minio.UTCNow() 826 info.Size = r.Reader.Size() 827 828 return info, nil 829 } 830 831 func (n *hdfsObjects) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []minio.CompletePart, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) { 832 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 833 if err != nil { 834 return objInfo, hdfsToObjectErr(ctx, err, bucket) 835 } 836 837 if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 838 return objInfo, err 839 } 840 841 name := n.hdfsPathJoin(bucket, object) 842 dir := path.Dir(name) 843 if dir != "" { 844 if err = n.clnt.MkdirAll(dir, os.FileMode(0755)); err != nil { 845 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 846 } 847 } 848 849 err = n.clnt.Rename(n.hdfsPathJoin(minioMetaTmpBucket, uploadID), name) 850 // Object already exists is an error on HDFS 851 // remove it and then create it again. 852 if os.IsExist(err) { 853 if err = n.clnt.Remove(name); err != nil { 854 if dir != "" { 855 n.deleteObject(n.hdfsPathJoin(bucket), dir) 856 } 857 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 858 } 859 if err = n.clnt.Rename(n.hdfsPathJoin(minioMetaTmpBucket, uploadID), name); err != nil { 860 if dir != "" { 861 n.deleteObject(n.hdfsPathJoin(bucket), dir) 862 } 863 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 864 } 865 } 866 fi, err := n.clnt.Stat(name) 867 if err != nil { 868 return objInfo, hdfsToObjectErr(ctx, err, bucket, object) 869 } 870 871 // Calculate s3 compatible md5sum for complete multipart. 872 s3MD5 := minio.ComputeCompleteMultipartMD5(parts) 873 874 return minio.ObjectInfo{ 875 Bucket: bucket, 876 Name: object, 877 ETag: s3MD5, 878 ModTime: fi.ModTime(), 879 Size: fi.Size(), 880 IsDir: fi.IsDir(), 881 AccTime: fi.(*hdfs.FileInfo).AccessTime(), 882 }, nil 883 } 884 885 func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts minio.ObjectOptions) (err error) { 886 _, err = n.clnt.Stat(n.hdfsPathJoin(bucket)) 887 if err != nil { 888 return hdfsToObjectErr(ctx, err, bucket) 889 } 890 return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)), bucket, object, uploadID) 891 }