github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dload/utils.go (about)

     1  // Package dload implements functionality to download resources into AIS cluster from external source.
     2  /*
     3   * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package dload
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"net/http"
    11  	"net/url"
    12  	"path"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/api/apc"
    17  	"github.com/NVIDIA/aistore/cmn"
    18  	"github.com/NVIDIA/aistore/cmn/cos"
    19  	"github.com/NVIDIA/aistore/cmn/debug"
    20  	"github.com/NVIDIA/aistore/cmn/nlog"
    21  	"github.com/NVIDIA/aistore/core"
    22  	"github.com/NVIDIA/aistore/core/meta"
    23  	jsoniter "github.com/json-iterator/go"
    24  )
    25  
    26  const headReqTimeout = 5 * time.Second
    27  
    28  var errInvalidTarget = errors.New("downloader: invalid target")
    29  
    30  func clientForURL(u string) *http.Client {
    31  	if cos.IsHTTPS(u) {
    32  		return g.clientTLS
    33  	}
    34  	return g.clientH
    35  }
    36  
    37  //nolint:gocritic // need a copy of cos.ParsedTemplate
    38  func countObjects(pt cos.ParsedTemplate, dir string, bck *meta.Bck) (cnt int, err error) {
    39  	var (
    40  		smap = core.T.Sowner().Get()
    41  		sid  = core.T.SID()
    42  		si   *meta.Snode
    43  	)
    44  	pt.InitIter()
    45  	for link, ok := pt.Next(); ok; link, ok = pt.Next() {
    46  		name := path.Join(dir, path.Base(link))
    47  		name, err = NormalizeObjName(name)
    48  		if err != nil {
    49  			return
    50  		}
    51  		si, err = smap.HrwName2T(bck.MakeUname(name))
    52  		if err != nil {
    53  			return
    54  		}
    55  		if si.ID() == sid {
    56  			cnt++
    57  		}
    58  	}
    59  	return cnt, nil
    60  }
    61  
    62  // buildDlObjs returns list of objects that must be downloaded by target.
    63  func buildDlObjs(bck *meta.Bck, objects cos.StrKVs) ([]dlObj, error) {
    64  	var (
    65  		smap = core.T.Sowner().Get()
    66  		sid  = core.T.SID()
    67  	)
    68  
    69  	objs := make([]dlObj, 0, len(objects))
    70  	for name, link := range objects {
    71  		obj, err := makeDlObj(smap, sid, bck, name, link)
    72  		if err != nil {
    73  			if err == errInvalidTarget {
    74  				continue
    75  			}
    76  			return nil, err
    77  		}
    78  		objs = append(objs, obj)
    79  	}
    80  	return objs, nil
    81  }
    82  
    83  func makeDlObj(smap *meta.Smap, sid string, bck *meta.Bck, objName, link string) (dlObj, error) {
    84  	objName, err := NormalizeObjName(objName)
    85  	if err != nil {
    86  		return dlObj{}, err
    87  	}
    88  
    89  	si, err := smap.HrwName2T(bck.MakeUname(objName))
    90  	if err != nil {
    91  		return dlObj{}, err
    92  	}
    93  	if si.ID() != sid {
    94  		return dlObj{}, errInvalidTarget
    95  	}
    96  
    97  	return dlObj{
    98  		objName: objName,
    99  		// Make sure that link contains protocol (absence of protocol can result in errors).
   100  		link:       cmn.PrependProtocol(link),
   101  		fromRemote: link == "",
   102  	}, nil
   103  }
   104  
   105  // Removes everything that goes after '?', eg. "?query=key..." so it will not
   106  // be part of final object name.
   107  func NormalizeObjName(objName string) (string, error) {
   108  	u, err := url.Parse(objName)
   109  	if err != nil {
   110  		return "", nil
   111  	}
   112  
   113  	if u.Path == "" {
   114  		return objName, nil
   115  	}
   116  
   117  	return url.PathUnescape(u.Path)
   118  }
   119  
   120  func ParseStartRequest(bck *meta.Bck, id string, dlb Body, xdl *Xact) (jobif, error) {
   121  	switch dlb.Type {
   122  	case TypeBackend:
   123  		dp := &BackendBody{}
   124  		err := jsoniter.Unmarshal(dlb.RawMessage, dp)
   125  		if err != nil {
   126  			return nil, err
   127  		}
   128  		if err := dp.Validate(); err != nil {
   129  			return nil, err
   130  		}
   131  		return newBackendDlJob(id, bck, dp, xdl)
   132  	case TypeMulti:
   133  		dp := &MultiBody{}
   134  		err := jsoniter.Unmarshal(dlb.RawMessage, dp)
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  		if err := dp.Validate(); err != nil {
   139  			return nil, err
   140  		}
   141  		return newMultiDlJob(id, bck, dp, xdl)
   142  	case TypeRange:
   143  		dp := &RangeBody{}
   144  		err := jsoniter.Unmarshal(dlb.RawMessage, dp)
   145  		if err != nil {
   146  			return nil, err
   147  		}
   148  		if err := dp.Validate(); err != nil {
   149  			return nil, err
   150  		}
   151  		return newRangeDlJob(id, bck, dp, xdl)
   152  	case TypeSingle:
   153  		dp := &SingleBody{}
   154  		err := jsoniter.Unmarshal(dlb.RawMessage, dp)
   155  		if err != nil {
   156  			return nil, err
   157  		}
   158  		if err := dp.Validate(); err != nil {
   159  			return nil, err
   160  		}
   161  		return newSingleDlJob(id, bck, dp, xdl)
   162  	default:
   163  		return nil, errors.New("input does not match any of the supported formats (single, range, multi, backend)")
   164  	}
   165  }
   166  
   167  // Given URL (link) and response header parse object attrs for GCP, S3 and Azure.
   168  func attrsFromLink(link string, resp *http.Response, oah cos.OAH) (size int64) {
   169  	u, err := url.Parse(link)
   170  	debug.AssertNoErr(err)
   171  	switch {
   172  	case cos.IsGoogleStorageURL(u) || cos.IsGoogleAPIURL(u):
   173  		h := cmn.BackendHelpers.Google
   174  		oah.SetCustomKey(cmn.SourceObjMD, apc.GCP)
   175  		if v, ok := h.EncodeVersion(resp.Header.Get(cos.GsVersionHeader)); ok {
   176  			oah.SetCustomKey(cmn.VersionObjMD, v)
   177  		}
   178  		if hdr := resp.Header[http.CanonicalHeaderKey(cos.GsCksumHeader)]; len(hdr) > 0 {
   179  			for cksumType, cksumValue := range parseGoogleCksumHeader(hdr) {
   180  				switch cksumType {
   181  				case cos.ChecksumMD5:
   182  					oah.SetCustomKey(cmn.MD5ObjMD, cksumValue)
   183  				case cos.ChecksumCRC32C:
   184  					oah.SetCustomKey(cmn.CRC32CObjMD, cksumValue)
   185  				default:
   186  					nlog.Errorf("unimplemented cksum type for custom metadata: %s", cksumType)
   187  				}
   188  			}
   189  		}
   190  	case cos.IsS3URL(link):
   191  		h := cmn.BackendHelpers.Amazon
   192  		oah.SetCustomKey(cmn.SourceObjMD, apc.AWS)
   193  		if v, ok := h.EncodeVersion(resp.Header.Get(cos.S3VersionHeader)); ok {
   194  			oah.SetCustomKey(cmn.VersionObjMD, v)
   195  		}
   196  		if v, ok := h.EncodeCksum(resp.Header.Get(cos.S3CksumHeader)); ok {
   197  			oah.SetCustomKey(cmn.MD5ObjMD, v)
   198  		}
   199  	case cos.IsAzureURL(u):
   200  		h := cmn.BackendHelpers.Azure
   201  		oah.SetCustomKey(cmn.SourceObjMD, apc.Azure)
   202  		if v, ok := h.EncodeVersion(resp.Header.Get(cos.AzVersionHeader)); ok {
   203  			oah.SetCustomKey(cmn.VersionObjMD, v)
   204  		}
   205  		if v, ok := h.EncodeCksum(resp.Header.Get(cos.AzCksumHeader)); ok {
   206  			oah.SetCustomKey(cmn.MD5ObjMD, v)
   207  		}
   208  	default:
   209  		oah.SetCustomKey(cmn.SourceObjMD, cmn.WebObjMD)
   210  	}
   211  	return resp.ContentLength
   212  }
   213  
   214  func parseGoogleCksumHeader(hdr []string) cos.StrKVs {
   215  	var (
   216  		h      = cmn.BackendHelpers.Google
   217  		cksums = make(cos.StrKVs, 2)
   218  	)
   219  	for _, v := range hdr {
   220  		entry := strings.SplitN(v, "=", 2)
   221  		debug.Assert(len(entry) == 2)
   222  		if v, ok := h.EncodeCksum(entry[1]); ok {
   223  			cksums[entry[0]] = v
   224  		}
   225  	}
   226  	return cksums
   227  }
   228  
   229  func headLink(link string) (resp *http.Response, err error) {
   230  	var (
   231  		req         *http.Request
   232  		ctx, cancel = context.WithTimeout(context.Background(), headReqTimeout)
   233  	)
   234  	req, err = http.NewRequestWithContext(ctx, http.MethodHead, link, http.NoBody)
   235  	if err == nil {
   236  		resp, err = clientForURL(link).Do(req)
   237  	}
   238  	cancel()
   239  	return
   240  }
   241  
   242  // Use all available metadata including {size, version, ETag, MD5, CRC}
   243  // to compare local object with its remote counterpart (source).
   244  func CompareObjects(lom *core.LOM, dst *DstElement) (bool /*equal*/, error) {
   245  	if dst.Link == "" {
   246  		res := lom.CheckRemoteMD(true /*rlocked*/, false /*sync*/, nil /*origReq*/) // TODO: use job.Sync()
   247  		return res.Eq, res.Err
   248  		// TODO: make use of res.ObjAttrs
   249  	}
   250  
   251  	resp, err := headLink(dst.Link) //nolint:bodyclose // cos.Close
   252  	if err != nil {
   253  		return false, err
   254  	}
   255  	cos.Close(resp.Body)
   256  
   257  	oa := &cmn.ObjAttrs{}
   258  	oa.Size = attrsFromLink(dst.Link, resp, oa) // fill in from resp
   259  
   260  	return lom.Equal(oa), nil
   261  }
   262  
   263  // called via ais/prxnotifs generic mechanism
   264  func AbortReq(jobID string) cmn.HreqArgs {
   265  	var (
   266  		xid    = "nabrt-" + cos.GenUUID()
   267  		q      = url.Values{apc.QparamUUID: []string{xid}} // ditto
   268  		args   = cmn.HreqArgs{Method: http.MethodDelete, Query: q}
   269  		dlBody = AdminBody{
   270  			ID: jobID,
   271  		}
   272  	)
   273  	args.Path = apc.URLPathDownloadAbort.S
   274  	args.Body = cos.MustMarshal(dlBody)
   275  	return args
   276  }