github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/htcommon.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "context" 9 "crypto/tls" 10 "crypto/x509" 11 "errors" 12 "fmt" 13 "io" 14 "log" 15 "net" 16 "net/http" 17 "net/url" 18 "os" 19 rdebug "runtime/debug" 20 "strings" 21 "sync" 22 "syscall" 23 "time" 24 25 "github.com/NVIDIA/aistore/3rdparty/golang/mux" 26 "github.com/NVIDIA/aistore/api/apc" 27 "github.com/NVIDIA/aistore/cmn" 28 "github.com/NVIDIA/aistore/cmn/cifl" 29 "github.com/NVIDIA/aistore/cmn/cos" 30 "github.com/NVIDIA/aistore/cmn/debug" 31 "github.com/NVIDIA/aistore/cmn/nlog" 32 "github.com/NVIDIA/aistore/core/meta" 33 "github.com/NVIDIA/aistore/ext/etl" 34 "github.com/NVIDIA/aistore/memsys" 35 "github.com/NVIDIA/aistore/xact/xreg" 36 jsoniter "github.com/json-iterator/go" 37 "github.com/tinylib/msgp/msgp" 38 ) 39 40 const ua = "aisnode" 41 42 const unknownDaemonID = "unknown" 43 44 const whatRenamedLB = "renamedlb" 45 46 // common error formats 47 const ( 48 fmtErrInsuffMpaths1 = "%s: not enough mountpaths (%d) to configure %s as %d-way mirror" 49 fmtErrInsuffMpaths2 = "%s: not enough mountpaths (%d) to replicate %s (configured) %d times" 50 fmtErrInvaldAction = "invalid action %q (expected one of %v)" 51 fmtUnknownQue = "unexpected query [what=%s]" 52 fmtNested = "%s: nested (%v): failed to %s %q: %v" 53 fmtOutside = "%s is present (vs requested 'flt-outside'(%d))" 54 fmtFailedRejoin = "%s failed to rejoin cluster: %v(%d)" 55 fmtSelfNotPresent = "%s (self) not present in %s" 56 ) 57 58 // intra-cluster control messages 59 type ( 60 // cluster-wide control information - replicated, versioned, and synchronized 61 // usage: elect new primary, join cluster, ... 62 cluMeta struct { 63 Smap *smapX `json:"smap"` 64 BMD *bucketMD `json:"bmd"` 65 RMD *rebMD `json:"rmd"` 66 EtlMD *etlMD `json:"etlMD"` 67 Config *globalConfig `json:"config"` 68 SI *meta.Snode `json:"si"` 69 PrimeTime int64 `json:"prime_time"` 70 Flags cifl.Flags `json:"flags"` 71 } 72 73 // extend control msg: ActionMsg with an extra information for node <=> node control plane communications 74 aisMsg struct { 75 apc.ActMsg 76 UUID string `json:"uuid"` // cluster-wide ID of this action (operation, transaction) 77 BMDVersion int64 `json:"bmdversion,string"` 78 RMDVersion int64 `json:"rmdversion,string"` 79 } 80 81 cleanmark struct { 82 OldVer int64 `json:"oldver,string"` 83 NewVer int64 `json:"newver,string"` 84 Interrupted bool `json:"interrupted"` 85 Restarted bool `json:"restarted"` 86 } 87 ) 88 89 type ( 90 byteRanges struct { 91 Range string // cos.HdrRange, see https://www.rfc-editor.org/rfc/rfc7233#section-2.1 92 Size int64 // size, in bytes 93 } 94 95 // callResult contains HTTP response. 96 callResult struct { 97 v any // unmarshalled value (only when requested via `callArgs.v`) 98 err error 99 si *meta.Snode 100 header http.Header 101 details string 102 bytes []byte // response bytes (raw) 103 status int 104 } 105 106 sliceResults []*callResult 107 bcastResults struct { 108 s sliceResults 109 mu sync.Mutex 110 } 111 112 // cresv: call result value factory and result-type specific decoder 113 // (used in both callArgs and bcastArgs) 114 cresv interface { 115 newV() any 116 read(*callResult, io.Reader) 117 } 118 119 // callArgs: unicast control-plane call arguments 120 callArgs struct { 121 cresv cresv 122 si *meta.Snode 123 req cmn.HreqArgs 124 timeout time.Duration 125 } 126 127 // bcastArgs: intra-cluster broadcast call args 128 bcastArgs struct { 129 cresv cresv // call result value (comment above) 130 smap *smapX // Smap to use 131 network string // one of the cmn.KnownNetworks 132 req cmn.HreqArgs // h.call args 133 nodes []meta.NodeMap // broadcast destinations - map(s) 134 selected meta.Nodes // broadcast destinations - slice of selected few 135 timeout time.Duration // call timeout 136 to int // (all targets, all proxies, all nodes) enum 137 nodeCount int // m.b. greater or equal destination count 138 ignoreMaintenance bool // do not skip nodes in maintenance mode 139 async bool // ignore results 140 } 141 142 networkHandler struct { 143 r string // resource 144 h http.HandlerFunc // handler 145 net netAccess // handler network access 146 } 147 148 nodeRegPool []cluMeta 149 150 // what data to omit when sending request/response (join-cluster, kalive) 151 cmetaFillOpt struct { 152 htext htext 153 skipSmap bool 154 skipBMD bool 155 skipRMD bool 156 skipConfig bool 157 skipEtlMD bool 158 fillRebMarker bool 159 skipPrimeTime bool 160 } 161 162 getMaxCii struct { 163 h *htrun 164 maxCii *cifl.Info 165 query url.Values 166 maxConfVer int64 167 timeout time.Duration 168 mu sync.Mutex 169 cnt int 170 checkAll bool 171 } 172 173 httpMuxers map[string]*mux.ServeMux // by http.Method 174 175 // http server and http runner (common for proxy and target) 176 netServer struct { 177 sync.Mutex 178 s *http.Server 179 muxers httpMuxers 180 sndRcvBufSize int 181 } 182 183 nlogWriter struct{} 184 ) 185 186 // error types 187 type ( 188 errTgtBmdUUIDDiffer struct{ detail string } // BMD & its uuid 189 errPrxBmdUUIDDiffer struct{ detail string } 190 errBmdUUIDSplit struct{ detail string } 191 errSmapUUIDDiffer struct{ detail string } // ditto Smap 192 errNodeNotFound struct { 193 msg string 194 id string 195 si *meta.Snode 196 smap *smapX 197 } 198 errNotEnoughTargets struct { 199 si *meta.Snode 200 smap *smapX 201 required int // should at least contain 202 } 203 errDowngrade struct { 204 si *meta.Snode 205 from, to string 206 } 207 errNotPrimary struct { 208 si *meta.Snode 209 smap *smapX 210 detail string 211 } 212 errNoUnregister struct { 213 action string 214 } 215 ) 216 217 var allHTTPverbs = []string{ 218 http.MethodGet, http.MethodHead, http.MethodPost, http.MethodPut, http.MethodPatch, 219 http.MethodDelete, http.MethodConnect, http.MethodOptions, http.MethodTrace, 220 } 221 222 var ( 223 errRebalanceDisabled = errors.New("rebalance is disabled") 224 errForwarded = errors.New("forwarded") 225 errSendingResp = errors.New("err-sending-resp") 226 errFastKalive = errors.New("cannot fast-keepalive") 227 ) 228 229 // BMD uuid errs 230 var errNoBMD = errors.New("no bucket metadata") 231 232 func (e *errTgtBmdUUIDDiffer) Error() string { return e.detail } 233 func (e *errBmdUUIDSplit) Error() string { return e.detail } 234 func (e *errPrxBmdUUIDDiffer) Error() string { return e.detail } 235 func (e *errSmapUUIDDiffer) Error() string { return e.detail } 236 func (e *errNodeNotFound) Error() string { 237 return fmt.Sprintf("%s: %s node %s not present in the %s", e.si, e.msg, e.id, e.smap) 238 } 239 240 ///////////////////// 241 // errNoUnregister // 242 ///////////////////// 243 244 func (e *errNoUnregister) Error() string { return e.action } 245 246 func isErrNoUnregister(err error) (ok bool) { 247 _, ok = err.(*errNoUnregister) 248 return 249 } 250 251 ////////////////// 252 // errDowngrade // 253 ////////////////// 254 255 func newErrDowngrade(si *meta.Snode, from, to string) *errDowngrade { 256 return &errDowngrade{si, from, to} 257 } 258 259 func (e *errDowngrade) Error() string { 260 return fmt.Sprintf("%s: attempt to downgrade %s to %s", e.si, e.from, e.to) 261 } 262 263 func isErrDowngrade(err error) bool { 264 if _, ok := err.(*errDowngrade); ok { 265 return true 266 } 267 erd := &errDowngrade{} 268 return errors.As(err, &erd) 269 } 270 271 ///////////////////////// 272 // errNotEnoughTargets // 273 ///////////////////////// 274 275 func (e *errNotEnoughTargets) Error() string { 276 return fmt.Sprintf("%s: not enough targets: %s, need %d, have %d", 277 e.si, e.smap, e.required, e.smap.CountActiveTs()) 278 } 279 280 /////////////////// 281 // errNotPrimary // 282 /////////////////// 283 284 func newErrNotPrimary(si *meta.Snode, smap *smapX, detail ...string) *errNotPrimary { 285 if len(detail) == 0 { 286 return &errNotPrimary{si, smap, ""} 287 } 288 return &errNotPrimary{si, smap, detail[0]} 289 } 290 291 func (e *errNotPrimary) Error() string { 292 var present, detail string 293 if !e.smap.isPresent(e.si) { 294 present = "not present in the " 295 } 296 if e.detail != "" { 297 detail = ": " + e.detail 298 } 299 return fmt.Sprintf("%s is not primary [%s%s]%s", e.si, present, e.smap.StringEx(), detail) 300 } 301 302 /////////////// 303 // bargsPool & callArgsPool 304 /////////////// 305 306 var ( 307 bargsPool, cargsPool sync.Pool 308 bargs0 bcastArgs 309 cargs0 callArgs 310 ) 311 312 func allocBcArgs() (a *bcastArgs) { 313 if v := bargsPool.Get(); v != nil { 314 a = v.(*bcastArgs) 315 return 316 } 317 return &bcastArgs{} 318 } 319 320 func freeBcArgs(a *bcastArgs) { 321 sel := a.selected 322 *a = bargs0 323 if sel != nil { 324 a.selected = sel[:0] 325 } 326 bargsPool.Put(a) 327 } 328 329 func allocCargs() (a *callArgs) { 330 if v := cargsPool.Get(); v != nil { 331 a = v.(*callArgs) 332 return 333 } 334 return &callArgs{} 335 } 336 337 func freeCargs(a *callArgs) { 338 *a = cargs0 339 cargsPool.Put(a) 340 } 341 342 /////////////////////// 343 // call result pools // 344 /////////////////////// 345 346 var ( 347 resultsPool sync.Pool 348 callResPool sync.Pool 349 callRes0 callResult 350 ) 351 352 func allocCR() (a *callResult) { 353 if v := callResPool.Get(); v != nil { 354 a = v.(*callResult) 355 debug.Assert(a.si == nil) 356 return 357 } 358 return &callResult{} 359 } 360 361 func freeCR(res *callResult) { 362 *res = callRes0 363 callResPool.Put(res) 364 } 365 366 func allocBcastRes(n int) sliceResults { 367 if v := resultsPool.Get(); v != nil { 368 a := v.(*sliceResults) 369 return *a 370 } 371 return make(sliceResults, 0, n) 372 } 373 374 func freeBcastRes(results sliceResults) { 375 for _, res := range results { 376 freeCR(res) 377 } 378 results = results[:0] 379 resultsPool.Put(&results) 380 } 381 382 // 383 // all `cresv` implementations 384 // and common read-body methods w/ optional value-unmarshaling 385 // 386 387 type ( 388 cresCM struct{} // -> cluMeta; selectively and alternatively, via `recvCluMetaBytes` 389 cresSM struct{} // -> smapX 390 cresND struct{} // -> meta.Snode 391 cresBA struct{} // -> cmn.BackendInfoAIS 392 cresEI struct{} // -> etl.InfoList 393 cresEL struct{} // -> etl.Logs 394 cresEM struct{} // -> etl.CPUMemUsed 395 cresIC struct{} // -> icBundle 396 cresBM struct{} // -> bucketMD 397 398 cresLso struct{} // -> cmn.LsoRes 399 cresBsumm struct{} // -> cmn.AllBsummResults 400 ) 401 402 var ( 403 _ cresv = cresCM{} 404 _ cresv = cresLso{} 405 _ cresv = cresSM{} 406 _ cresv = cresND{} 407 _ cresv = cresBA{} 408 _ cresv = cresEI{} 409 _ cresv = cresEL{} 410 _ cresv = cresEM{} 411 _ cresv = cresIC{} 412 _ cresv = cresBM{} 413 _ cresv = cresBsumm{} 414 ) 415 416 func (res *callResult) read(body io.Reader) { res.bytes, res.err = io.ReadAll(body) } 417 func (res *callResult) jread(body io.Reader) { res.err = jsoniter.NewDecoder(body).Decode(res.v) } 418 419 func (res *callResult) mread(body io.Reader) { 420 vv, ok := res.v.(msgp.Decodable) 421 debug.Assert(ok) 422 buf, slab := memsys.PageMM().AllocSize(cmn.MsgpLsoBufSize) 423 res.err = vv.DecodeMsg(msgp.NewReaderBuf(body, buf)) 424 slab.Free(buf) 425 } 426 427 func (cresCM) newV() any { return &cluMeta{} } 428 func (c cresCM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 429 430 func (cresLso) newV() any { return &cmn.LsoRes{} } 431 func (c cresLso) read(res *callResult, body io.Reader) { res.v = c.newV(); res.mread(body) } 432 433 func (cresSM) newV() any { return &smapX{} } 434 func (c cresSM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 435 436 func (cresND) newV() any { return &meta.Snode{} } 437 func (c cresND) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 438 439 func (cresBA) newV() any { return &meta.RemAisVec{} } 440 func (c cresBA) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 441 442 func (cresEI) newV() any { return &etl.InfoList{} } 443 func (c cresEI) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 444 445 func (cresEL) newV() any { return &etl.Logs{} } 446 func (c cresEL) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 447 448 func (cresEM) newV() any { return &etl.CPUMemUsed{} } 449 func (c cresEM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 450 451 func (cresIC) newV() any { return &icBundle{} } 452 func (c cresIC) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 453 454 func (cresBM) newV() any { return &bucketMD{} } 455 func (c cresBM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 456 457 func (cresBsumm) newV() any { return &cmn.AllBsummResults{} } 458 func (c cresBsumm) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) } 459 460 //////////////// 461 // nlogWriter // 462 //////////////// 463 464 const tlsHandshakeErrorPrefix = "http: TLS handshake error" 465 466 func (*nlogWriter) Write(p []byte) (int, error) { 467 s := string(p) 468 // Ignore TLS handshake errors (see: https://github.com/golang/go/issues/26918). 469 if strings.Contains(s, tlsHandshakeErrorPrefix) { 470 return len(p), nil 471 } 472 473 nlog.Errorln(s) 474 475 stacktrace := rdebug.Stack() 476 nlog.Errorln(string(stacktrace)) 477 return len(p), nil 478 } 479 480 /////////////// 481 // netServer // 482 /////////////// 483 484 // Override muxer ServeHTTP to support proxying HTTPS requests. Clients 485 // initiate all HTTPS requests with CONNECT method instead of GET/PUT etc. 486 func (server *netServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { 487 if r.Method != http.MethodConnect { 488 server.muxers.ServeHTTP(w, r) 489 return 490 } 491 492 // TODO: add support for caching HTTPS requests 493 destConn, err := net.DialTimeout("tcp", r.Host, 10*time.Second) 494 if err != nil { 495 cmn.WriteErr(w, r, err, http.StatusServiceUnavailable) 496 return 497 } 498 499 // Second, hijack the connection. A kind of man-in-the-middle attack 500 // From this point on, this function is responsible for HTTP connection 501 hijacker, ok := w.(http.Hijacker) 502 if !ok { 503 cmn.WriteErr(w, r, errors.New("response writer does not support hijacking"), 504 http.StatusInternalServerError) 505 return 506 } 507 508 // First, send that everything is OK. Trying to write a header after 509 // hijacking generates a warning and nothing works 510 w.WriteHeader(http.StatusOK) 511 512 clientConn, _, err := hijacker.Hijack() 513 if err != nil { 514 // NOTE: cannot send error because we have already written a header. 515 nlog.Errorln(err) 516 return 517 } 518 519 // Third, start transparently sending data between source and destination 520 // by creating a tunnel between them 521 transfer := func(destination io.WriteCloser, source io.ReadCloser) { 522 io.Copy(destination, source) 523 source.Close() 524 destination.Close() 525 } 526 527 // NOTE: it looks like double closing both connections. 528 // Need to check how the tunnel works 529 go transfer(destConn, clientConn) 530 go transfer(clientConn, destConn) 531 } 532 533 func (server *netServer) listen(addr string, logger *log.Logger, tlsConf *tls.Config, config *cmn.Config) (err error) { 534 var ( 535 httpHandler = server.muxers 536 tag = "HTTP" 537 retried bool 538 ) 539 server.Lock() 540 server.s = &http.Server{ 541 Addr: addr, 542 Handler: httpHandler, 543 ErrorLog: logger, 544 ReadHeaderTimeout: apc.ReadHeaderTimeout, 545 } 546 if timeout, isSet := cmn.ParseReadHeaderTimeout(); isSet { // optional env var 547 server.s.ReadHeaderTimeout = timeout 548 } 549 if server.sndRcvBufSize > 0 && !config.Net.HTTP.UseHTTPS { 550 server.s.ConnState = server.connStateListener // setsockopt; see also cmn.NewTransport 551 } 552 server.s.TLSConfig = tlsConf 553 server.Unlock() 554 retry: 555 if config.Net.HTTP.UseHTTPS { 556 tag = "HTTPS" 557 err = server.s.ListenAndServeTLS(config.Net.HTTP.Certificate, config.Net.HTTP.CertKey) 558 } else { 559 err = server.s.ListenAndServe() 560 } 561 if err == http.ErrServerClosed { 562 return nil 563 } 564 if errors.Is(err, syscall.EADDRINUSE) && !retried { 565 nlog.Warningf("%q - shutting-down-and-restarting or else? will retry once...", err) 566 time.Sleep(max(5*time.Second, config.Timeout.MaxKeepalive.D())) 567 retried = true 568 goto retry 569 } 570 nlog.Errorf("%s terminated with error: %v", tag, err) 571 return 572 } 573 574 func newTLS(conf *cmn.HTTPConf) (tlsConf *tls.Config, err error) { 575 var ( 576 pool *x509.CertPool 577 caCert []byte 578 clientAuth = tls.ClientAuthType(conf.ClientAuthTLS) 579 ) 580 if clientAuth > tls.RequestClientCert { 581 if caCert, err = os.ReadFile(conf.ClientCA); err != nil { 582 return 583 } 584 pool = x509.NewCertPool() 585 if ok := pool.AppendCertsFromPEM(caCert); !ok { 586 return nil, fmt.Errorf("tls: failed to append CA certs from PEM: %q", conf.ClientCA) 587 } 588 } 589 tlsConf = &tls.Config{ClientAuth: clientAuth, ClientCAs: pool} 590 return 591 } 592 593 func (server *netServer) connStateListener(c net.Conn, cs http.ConnState) { 594 if cs != http.StateNew { 595 return 596 } 597 tcpconn, ok := c.(*net.TCPConn) 598 cos.Assert(ok) 599 rawconn, _ := tcpconn.SyscallConn() 600 args := cmn.TransportArgs{SndRcvBufSize: server.sndRcvBufSize} 601 rawconn.Control(args.ConnControl(rawconn)) 602 } 603 604 func (server *netServer) shutdown(config *cmn.Config) { 605 server.Lock() 606 defer server.Unlock() 607 if server.s == nil { 608 return 609 } 610 ctx, cancel := context.WithTimeout(context.Background(), config.Timeout.MaxHostBusy.D()) 611 if err := server.s.Shutdown(ctx); err != nil { 612 nlog.Infoln("http server shutdown err:", err) 613 } 614 cancel() 615 } 616 617 //////////////// 618 // httpMuxers // 619 //////////////// 620 621 // interface guard 622 var _ http.Handler = (*httpMuxers)(nil) 623 624 func newMuxers() httpMuxers { 625 m := make(httpMuxers, len(allHTTPverbs)) 626 for _, v := range allHTTPverbs { 627 m[v] = mux.NewServeMux() 628 } 629 return m 630 } 631 632 // ServeHTTP dispatches the request to the handler whose 633 // pattern most closely matches the request URL. 634 func (m httpMuxers) ServeHTTP(w http.ResponseWriter, r *http.Request) { 635 if sm, ok := m[r.Method]; ok { 636 sm.ServeHTTP(w, r) 637 return 638 } 639 w.WriteHeader(http.StatusBadRequest) 640 } 641 642 ///////////////// 643 // clusterInfo // 644 ///////////////// 645 646 func (p *proxy) ciiFill(cii *cifl.Info) { 647 p.htrun.fill(cii) 648 onl := true 649 flt := nlFilter{Kind: apc.ActRebalance, OnlyRunning: &onl} 650 if nl := p.notifs.find(flt); nl != nil { 651 cii.Flags = cii.Flags.Set(cifl.Rebalancing) 652 } 653 } 654 655 func (t *target) ciiFill(cii *cifl.Info) { 656 t.htrun.fill(cii) 657 marked := xreg.GetRebMarked() 658 if marked.Xact != nil { 659 cii.Flags = cii.Flags.Set(cifl.Rebalancing) 660 } 661 if marked.Interrupted { 662 cii.Flags = cii.Flags.Set(cifl.RebalanceInterrupted) 663 } 664 if marked.Restarted { 665 cii.Flags = cii.Flags.Set(cifl.Restarted) 666 } 667 marked = xreg.GetResilverMarked() 668 if marked.Xact != nil { 669 cii.Flags = cii.Flags.Set(cifl.Resilvering) 670 } 671 if marked.Interrupted { 672 cii.Flags = cii.Flags.Set(cifl.ResilverInterrupted) 673 } 674 } 675 676 func (h *htrun) fill(cii *cifl.Info) { 677 var ( 678 smap = h.owner.smap.get() 679 bmd = h.owner.bmd.get() 680 rmd = h.owner.rmd.get() 681 etl = h.owner.etl.get() 682 ) 683 smap.fill(cii) 684 cii.BMD.Version = bmd.version() 685 cii.BMD.UUID = bmd.UUID 686 cii.RMD.Version = rmd.Version 687 cii.Config.Version = h.owner.config.version() 688 cii.EtlMD.Version = etl.version() 689 if h.ClusterStarted() { 690 cii.Flags = cii.Flags.Set(cifl.ClusterStarted) 691 } 692 if h.NodeStarted() { 693 cii.Flags = cii.Flags.Set(cifl.NodeStarted) 694 } 695 } 696 697 func (smap *smapX) fill(cii *cifl.Info) { 698 cii.Smap.Version = smap.version() 699 cii.Smap.UUID = smap.UUID 700 if smap.Primary != nil { 701 cii.Smap.Primary.CtrlURL = smap.Primary.URL(cmn.NetIntraControl) 702 cii.Smap.Primary.PubURL = smap.Primary.URL(cmn.NetPublic) 703 cii.Smap.Primary.ID = smap.Primary.ID() 704 if voteInProgress() != nil { 705 cii.Flags = cii.Flags.Set(cifl.VoteInProgress) 706 } 707 } 708 } 709 710 /////////////// 711 // getMaxCii // 712 /////////////// 713 714 func (c *getMaxCii) do(si *meta.Snode, wg cos.WG, smap *smapX) { 715 var cii *cifl.Info 716 body, _, err := c.h.reqHealth(si, c.timeout, c.query, smap) 717 if err != nil { 718 goto ret 719 } 720 if cii = extractCii(body, smap, c.h.si, si); cii == nil { 721 goto ret 722 } 723 if cii.Smap.UUID != smap.UUID { 724 if cii.Smap.UUID == "" { 725 goto ret 726 } 727 if smap.UUID != "" { 728 // FATAL: cluster integrity error (cie) 729 cos.ExitLogf("%s: split-brain uuid [%s %s] vs %+v", ciError(10), c.h, smap.StringEx(), cii.Smap) 730 } 731 } 732 c.mu.Lock() 733 if c.maxCii.Smap.Version < cii.Smap.Version { 734 // reset confirmation count if there's any sign of disagreement 735 if c.maxCii.Smap.Primary.ID != cii.Smap.Primary.ID || cii.Flags.IsSet(cifl.VoteInProgress) { 736 c.cnt = 1 737 } else { 738 c.cnt++ 739 } 740 c.maxCii = cii 741 } else if c.maxCii.SmapEqual(cii) { 742 c.cnt++ 743 } 744 if c.maxConfVer < cii.Config.Version { 745 c.maxConfVer = cii.Config.Version 746 } 747 c.mu.Unlock() 748 ret: 749 wg.Done() 750 } 751 752 // have enough confirmations? 753 func (c *getMaxCii) haveEnough() (yes bool) { 754 c.mu.Lock() 755 yes = c.cnt >= maxVerConfirmations 756 c.mu.Unlock() 757 return 758 } 759 760 func extractCii(body []byte, smap *smapX, self, si *meta.Snode) *cifl.Info { 761 var cii cifl.Info 762 if err := jsoniter.Unmarshal(body, &cii); err != nil { 763 nlog.Errorf("%s: failed to unmarshal clusterInfo, err: %v", self, err) 764 return nil 765 } 766 if smap.UUID != cii.Smap.UUID { 767 nlog.Errorf("%s: Smap have different UUIDs: %s and %s from %s", self, smap.UUID, cii.Smap.UUID, si) 768 return nil 769 } 770 return &cii 771 } 772 773 //////////////// 774 // apiRequest // 775 //////////////// 776 777 type apiRequest struct { 778 bck *meta.Bck // out: initialized bucket 779 780 // URL query: the conventional/slow and 781 // the fast alternative tailored exclusively for the datapath (either/or) 782 dpq *dpq 783 query url.Values 784 785 prefix []string // in: URL must start with these items 786 items []string // out: URL items after the prefix 787 788 after int // in: the number of items after the prefix 789 bckIdx int // in: ordinal number of bucket in URL (some paths starts with extra items: EC & ETL) 790 } 791 792 var ( 793 apiReqPool sync.Pool 794 apireq0 apiRequest 795 ) 796 797 func apiReqAlloc(after int, prefix []string, useDpq bool) (a *apiRequest) { 798 if v := apiReqPool.Get(); v != nil { 799 a = v.(*apiRequest) 800 } else { 801 a = &apiRequest{} 802 } 803 a.after, a.prefix = after, prefix 804 if useDpq { 805 a.dpq = dpqAlloc() 806 } 807 return a 808 } 809 810 func apiReqFree(a *apiRequest) { 811 if a.dpq != nil { 812 dpqFree(a.dpq) 813 } 814 *a = apireq0 815 apiReqPool.Put(a) 816 } 817 818 // 819 // misc helpers 820 // 821 822 func newBckFromQ(bckName string, query url.Values, dpq *dpq) (*meta.Bck, error) { 823 bck := _bckFromQ(bckName, query, dpq) 824 normp, err := cmn.NormalizeProvider(bck.Provider) 825 if err == nil { 826 bck.Provider = normp 827 err = bck.Validate() 828 } 829 return bck, err 830 } 831 832 func newQbckFromQ(bckName string, query url.Values, dpq *dpq) (*cmn.QueryBcks, error) { 833 qbck := (*cmn.QueryBcks)(_bckFromQ(bckName, query, dpq)) 834 return qbck, qbck.Validate() 835 } 836 837 func _bckFromQ(bckName string, query url.Values, dpq *dpq) *meta.Bck { 838 var ( 839 provider string 840 namespace cmn.Ns 841 ) 842 if query != nil { 843 debug.Assert(dpq == nil) 844 provider = query.Get(apc.QparamProvider) 845 namespace = cmn.ParseNsUname(query.Get(apc.QparamNamespace)) 846 } else { 847 provider = dpq.bck.provider 848 namespace = cmn.ParseNsUname(dpq.bck.namespace) 849 } 850 return &meta.Bck{Name: bckName, Provider: provider, Ns: namespace} 851 } 852 853 func newBckFromQuname(query url.Values, required bool) (*meta.Bck, error) { 854 uname := query.Get(apc.QparamBckTo) 855 if uname == "" { 856 if required { 857 return nil, fmt.Errorf("missing %q query parameter", apc.QparamBckTo) 858 } 859 return nil, nil 860 } 861 bck, objName := cmn.ParseUname(uname) 862 if objName != "" { 863 return nil, fmt.Errorf("bucket %s: unexpected non-empty object name %q", bck, objName) 864 } 865 if err := bck.Validate(); err != nil { 866 return nil, err 867 } 868 return meta.CloneBck(&bck), nil 869 } 870 871 func _reMirror(bprops, nprops *cmn.Bprops) bool { 872 if !bprops.Mirror.Enabled && nprops.Mirror.Enabled { 873 return true 874 } 875 if bprops.Mirror.Enabled && nprops.Mirror.Enabled { 876 return bprops.Mirror.Copies != nprops.Mirror.Copies 877 } 878 return false 879 } 880 881 func _reEC(bprops, nprops *cmn.Bprops, bck *meta.Bck, smap *smapX) (targetCnt int, yes bool) { 882 if !nprops.EC.Enabled { 883 if bprops.EC.Enabled { 884 // abort running ec-encode xaction, if exists 885 flt := xreg.Flt{Kind: apc.ActECEncode, Bck: bck} 886 xreg.DoAbort(flt, errors.New("ec-disabled")) 887 } 888 return 889 } 890 if smap != nil { 891 targetCnt = smap.CountActiveTs() 892 } 893 if !bprops.EC.Enabled || 894 (bprops.EC.DataSlices != nprops.EC.DataSlices || bprops.EC.ParitySlices != nprops.EC.ParitySlices) { 895 yes = true 896 } 897 return 898 }