github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dload/utils.go (about) 1 // Package dload implements functionality to download resources into AIS cluster from external source. 2 /* 3 * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package dload 6 7 import ( 8 "context" 9 "errors" 10 "net/http" 11 "net/url" 12 "path" 13 "strings" 14 "time" 15 16 "github.com/NVIDIA/aistore/api/apc" 17 "github.com/NVIDIA/aistore/cmn" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/nlog" 21 "github.com/NVIDIA/aistore/core" 22 "github.com/NVIDIA/aistore/core/meta" 23 jsoniter "github.com/json-iterator/go" 24 ) 25 26 const headReqTimeout = 5 * time.Second 27 28 var errInvalidTarget = errors.New("downloader: invalid target") 29 30 func clientForURL(u string) *http.Client { 31 if cos.IsHTTPS(u) { 32 return g.clientTLS 33 } 34 return g.clientH 35 } 36 37 //nolint:gocritic // need a copy of cos.ParsedTemplate 38 func countObjects(pt cos.ParsedTemplate, dir string, bck *meta.Bck) (cnt int, err error) { 39 var ( 40 smap = core.T.Sowner().Get() 41 sid = core.T.SID() 42 si *meta.Snode 43 ) 44 pt.InitIter() 45 for link, ok := pt.Next(); ok; link, ok = pt.Next() { 46 name := path.Join(dir, path.Base(link)) 47 name, err = NormalizeObjName(name) 48 if err != nil { 49 return 50 } 51 si, err = smap.HrwName2T(bck.MakeUname(name)) 52 if err != nil { 53 return 54 } 55 if si.ID() == sid { 56 cnt++ 57 } 58 } 59 return cnt, nil 60 } 61 62 // buildDlObjs returns list of objects that must be downloaded by target. 63 func buildDlObjs(bck *meta.Bck, objects cos.StrKVs) ([]dlObj, error) { 64 var ( 65 smap = core.T.Sowner().Get() 66 sid = core.T.SID() 67 ) 68 69 objs := make([]dlObj, 0, len(objects)) 70 for name, link := range objects { 71 obj, err := makeDlObj(smap, sid, bck, name, link) 72 if err != nil { 73 if err == errInvalidTarget { 74 continue 75 } 76 return nil, err 77 } 78 objs = append(objs, obj) 79 } 80 return objs, nil 81 } 82 83 func makeDlObj(smap *meta.Smap, sid string, bck *meta.Bck, objName, link string) (dlObj, error) { 84 objName, err := NormalizeObjName(objName) 85 if err != nil { 86 return dlObj{}, err 87 } 88 89 si, err := smap.HrwName2T(bck.MakeUname(objName)) 90 if err != nil { 91 return dlObj{}, err 92 } 93 if si.ID() != sid { 94 return dlObj{}, errInvalidTarget 95 } 96 97 return dlObj{ 98 objName: objName, 99 // Make sure that link contains protocol (absence of protocol can result in errors). 100 link: cmn.PrependProtocol(link), 101 fromRemote: link == "", 102 }, nil 103 } 104 105 // Removes everything that goes after '?', eg. "?query=key..." so it will not 106 // be part of final object name. 107 func NormalizeObjName(objName string) (string, error) { 108 u, err := url.Parse(objName) 109 if err != nil { 110 return "", nil 111 } 112 113 if u.Path == "" { 114 return objName, nil 115 } 116 117 return url.PathUnescape(u.Path) 118 } 119 120 func ParseStartRequest(bck *meta.Bck, id string, dlb Body, xdl *Xact) (jobif, error) { 121 switch dlb.Type { 122 case TypeBackend: 123 dp := &BackendBody{} 124 err := jsoniter.Unmarshal(dlb.RawMessage, dp) 125 if err != nil { 126 return nil, err 127 } 128 if err := dp.Validate(); err != nil { 129 return nil, err 130 } 131 return newBackendDlJob(id, bck, dp, xdl) 132 case TypeMulti: 133 dp := &MultiBody{} 134 err := jsoniter.Unmarshal(dlb.RawMessage, dp) 135 if err != nil { 136 return nil, err 137 } 138 if err := dp.Validate(); err != nil { 139 return nil, err 140 } 141 return newMultiDlJob(id, bck, dp, xdl) 142 case TypeRange: 143 dp := &RangeBody{} 144 err := jsoniter.Unmarshal(dlb.RawMessage, dp) 145 if err != nil { 146 return nil, err 147 } 148 if err := dp.Validate(); err != nil { 149 return nil, err 150 } 151 return newRangeDlJob(id, bck, dp, xdl) 152 case TypeSingle: 153 dp := &SingleBody{} 154 err := jsoniter.Unmarshal(dlb.RawMessage, dp) 155 if err != nil { 156 return nil, err 157 } 158 if err := dp.Validate(); err != nil { 159 return nil, err 160 } 161 return newSingleDlJob(id, bck, dp, xdl) 162 default: 163 return nil, errors.New("input does not match any of the supported formats (single, range, multi, backend)") 164 } 165 } 166 167 // Given URL (link) and response header parse object attrs for GCP, S3 and Azure. 168 func attrsFromLink(link string, resp *http.Response, oah cos.OAH) (size int64) { 169 u, err := url.Parse(link) 170 debug.AssertNoErr(err) 171 switch { 172 case cos.IsGoogleStorageURL(u) || cos.IsGoogleAPIURL(u): 173 h := cmn.BackendHelpers.Google 174 oah.SetCustomKey(cmn.SourceObjMD, apc.GCP) 175 if v, ok := h.EncodeVersion(resp.Header.Get(cos.GsVersionHeader)); ok { 176 oah.SetCustomKey(cmn.VersionObjMD, v) 177 } 178 if hdr := resp.Header[http.CanonicalHeaderKey(cos.GsCksumHeader)]; len(hdr) > 0 { 179 for cksumType, cksumValue := range parseGoogleCksumHeader(hdr) { 180 switch cksumType { 181 case cos.ChecksumMD5: 182 oah.SetCustomKey(cmn.MD5ObjMD, cksumValue) 183 case cos.ChecksumCRC32C: 184 oah.SetCustomKey(cmn.CRC32CObjMD, cksumValue) 185 default: 186 nlog.Errorf("unimplemented cksum type for custom metadata: %s", cksumType) 187 } 188 } 189 } 190 case cos.IsS3URL(link): 191 h := cmn.BackendHelpers.Amazon 192 oah.SetCustomKey(cmn.SourceObjMD, apc.AWS) 193 if v, ok := h.EncodeVersion(resp.Header.Get(cos.S3VersionHeader)); ok { 194 oah.SetCustomKey(cmn.VersionObjMD, v) 195 } 196 if v, ok := h.EncodeCksum(resp.Header.Get(cos.S3CksumHeader)); ok { 197 oah.SetCustomKey(cmn.MD5ObjMD, v) 198 } 199 case cos.IsAzureURL(u): 200 h := cmn.BackendHelpers.Azure 201 oah.SetCustomKey(cmn.SourceObjMD, apc.Azure) 202 if v, ok := h.EncodeVersion(resp.Header.Get(cos.AzVersionHeader)); ok { 203 oah.SetCustomKey(cmn.VersionObjMD, v) 204 } 205 if v, ok := h.EncodeCksum(resp.Header.Get(cos.AzCksumHeader)); ok { 206 oah.SetCustomKey(cmn.MD5ObjMD, v) 207 } 208 default: 209 oah.SetCustomKey(cmn.SourceObjMD, cmn.WebObjMD) 210 } 211 return resp.ContentLength 212 } 213 214 func parseGoogleCksumHeader(hdr []string) cos.StrKVs { 215 var ( 216 h = cmn.BackendHelpers.Google 217 cksums = make(cos.StrKVs, 2) 218 ) 219 for _, v := range hdr { 220 entry := strings.SplitN(v, "=", 2) 221 debug.Assert(len(entry) == 2) 222 if v, ok := h.EncodeCksum(entry[1]); ok { 223 cksums[entry[0]] = v 224 } 225 } 226 return cksums 227 } 228 229 func headLink(link string) (resp *http.Response, err error) { 230 var ( 231 req *http.Request 232 ctx, cancel = context.WithTimeout(context.Background(), headReqTimeout) 233 ) 234 req, err = http.NewRequestWithContext(ctx, http.MethodHead, link, http.NoBody) 235 if err == nil { 236 resp, err = clientForURL(link).Do(req) 237 } 238 cancel() 239 return 240 } 241 242 // Use all available metadata including {size, version, ETag, MD5, CRC} 243 // to compare local object with its remote counterpart (source). 244 func CompareObjects(lom *core.LOM, dst *DstElement) (bool /*equal*/, error) { 245 if dst.Link == "" { 246 res := lom.CheckRemoteMD(true /*rlocked*/, false /*sync*/, nil /*origReq*/) // TODO: use job.Sync() 247 return res.Eq, res.Err 248 // TODO: make use of res.ObjAttrs 249 } 250 251 resp, err := headLink(dst.Link) //nolint:bodyclose // cos.Close 252 if err != nil { 253 return false, err 254 } 255 cos.Close(resp.Body) 256 257 oa := &cmn.ObjAttrs{} 258 oa.Size = attrsFromLink(dst.Link, resp, oa) // fill in from resp 259 260 return lom.Equal(oa), nil 261 } 262 263 // called via ais/prxnotifs generic mechanism 264 func AbortReq(jobID string) cmn.HreqArgs { 265 var ( 266 xid = "nabrt-" + cos.GenUUID() 267 q = url.Values{apc.QparamUUID: []string{xid}} // ditto 268 args = cmn.HreqArgs{Method: http.MethodDelete, Query: q} 269 dlBody = AdminBody{ 270 ID: jobID, 271 } 272 ) 273 args.Path = apc.URLPathDownloadAbort.S 274 args.Body = cos.MustMarshal(dlBody) 275 return args 276 }