github.com/minio/madmin-go@v1.7.5/heal-commands.go (about)

     1  //
     2  // MinIO Object Storage (c) 2021 MinIO, Inc.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //      http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  //
    16  
    17  package madmin
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"net/http"
    25  	"net/url"
    26  	"sort"
    27  	"time"
    28  )
    29  
    30  // HealScanMode represents the type of healing scan
    31  type HealScanMode int
    32  
    33  const (
    34  	// HealUnknownScan default is unknown
    35  	HealUnknownScan HealScanMode = iota
    36  
    37  	// HealNormalScan checks if parts are present and not outdated
    38  	HealNormalScan
    39  
    40  	// HealDeepScan checks for parts bitrot checksums
    41  	HealDeepScan
    42  )
    43  
    44  // HealOpts - collection of options for a heal sequence
    45  type HealOpts struct {
    46  	Recursive bool         `json:"recursive"`
    47  	DryRun    bool         `json:"dryRun"`
    48  	Remove    bool         `json:"remove"`
    49  	Recreate  bool         `json:"recreate"` // Rewrite all resources specified at the bucket or prefix.
    50  	ScanMode  HealScanMode `json:"scanMode"`
    51  	NoLock    bool         `json:"nolock"`
    52  }
    53  
    54  // Equal returns true if no is same as o.
    55  func (o HealOpts) Equal(no HealOpts) bool {
    56  	if o.Recursive != no.Recursive {
    57  		return false
    58  	}
    59  	if o.DryRun != no.DryRun {
    60  		return false
    61  	}
    62  	if o.Remove != no.Remove {
    63  		return false
    64  	}
    65  	if o.Recreate != no.Recreate {
    66  		return false
    67  	}
    68  	return o.ScanMode == no.ScanMode
    69  }
    70  
    71  // HealStartSuccess - holds information about a successfully started
    72  // heal operation
    73  type HealStartSuccess struct {
    74  	ClientToken   string    `json:"clientToken"`
    75  	ClientAddress string    `json:"clientAddress"`
    76  	StartTime     time.Time `json:"startTime"`
    77  }
    78  
    79  // HealStopSuccess - holds information about a successfully stopped
    80  // heal operation.
    81  type HealStopSuccess HealStartSuccess
    82  
    83  // HealTaskStatus - status struct for a heal task
    84  type HealTaskStatus struct {
    85  	Summary       string    `json:"summary"`
    86  	FailureDetail string    `json:"detail"`
    87  	StartTime     time.Time `json:"startTime"`
    88  	HealSettings  HealOpts  `json:"settings"`
    89  
    90  	Items []HealResultItem `json:"items,omitempty"`
    91  }
    92  
    93  // HealItemType - specify the type of heal operation in a healing
    94  // result
    95  type HealItemType string
    96  
    97  // HealItemType constants
    98  const (
    99  	HealItemMetadata       HealItemType = "metadata"
   100  	HealItemBucket                      = "bucket"
   101  	HealItemBucketMetadata              = "bucket-metadata"
   102  	HealItemObject                      = "object"
   103  )
   104  
   105  // Drive state constants
   106  const (
   107  	DriveStateOk          string = "ok"
   108  	DriveStateOffline            = "offline"
   109  	DriveStateCorrupt            = "corrupt"
   110  	DriveStateMissing            = "missing"
   111  	DriveStatePermission         = "permission-denied"
   112  	DriveStateFaulty             = "faulty"
   113  	DriveStateUnknown            = "unknown"
   114  	DriveStateUnformatted        = "unformatted" // only returned by disk
   115  )
   116  
   117  // HealDriveInfo - struct for an individual drive info item.
   118  type HealDriveInfo struct {
   119  	UUID     string `json:"uuid"`
   120  	Endpoint string `json:"endpoint"`
   121  	State    string `json:"state"`
   122  }
   123  
   124  // HealResultItem - struct for an individual heal result item
   125  type HealResultItem struct {
   126  	ResultIndex  int64        `json:"resultId"`
   127  	Type         HealItemType `json:"type"`
   128  	Bucket       string       `json:"bucket"`
   129  	Object       string       `json:"object"`
   130  	VersionID    string       `json:"versionId"`
   131  	Detail       string       `json:"detail"`
   132  	ParityBlocks int          `json:"parityBlocks,omitempty"`
   133  	DataBlocks   int          `json:"dataBlocks,omitempty"`
   134  	DiskCount    int          `json:"diskCount"`
   135  	SetCount     int          `json:"setCount"`
   136  	// below slices are from drive info.
   137  	Before struct {
   138  		Drives []HealDriveInfo `json:"drives"`
   139  	} `json:"before"`
   140  	After struct {
   141  		Drives []HealDriveInfo `json:"drives"`
   142  	} `json:"after"`
   143  	ObjectSize int64 `json:"objectSize"`
   144  }
   145  
   146  // GetMissingCounts - returns the number of missing disks before
   147  // and after heal
   148  func (hri *HealResultItem) GetMissingCounts() (b, a int) {
   149  	if hri == nil {
   150  		return
   151  	}
   152  	for _, v := range hri.Before.Drives {
   153  		if v.State == DriveStateMissing {
   154  			b++
   155  		}
   156  	}
   157  	for _, v := range hri.After.Drives {
   158  		if v.State == DriveStateMissing {
   159  			a++
   160  		}
   161  	}
   162  	return
   163  }
   164  
   165  // GetOfflineCounts - returns the number of offline disks before
   166  // and after heal
   167  func (hri *HealResultItem) GetOfflineCounts() (b, a int) {
   168  	if hri == nil {
   169  		return
   170  	}
   171  	for _, v := range hri.Before.Drives {
   172  		if v.State == DriveStateOffline {
   173  			b++
   174  		}
   175  	}
   176  	for _, v := range hri.After.Drives {
   177  		if v.State == DriveStateOffline {
   178  			a++
   179  		}
   180  	}
   181  	return
   182  }
   183  
   184  // GetCorruptedCounts - returns the number of corrupted disks before
   185  // and after heal
   186  func (hri *HealResultItem) GetCorruptedCounts() (b, a int) {
   187  	if hri == nil {
   188  		return
   189  	}
   190  	for _, v := range hri.Before.Drives {
   191  		if v.State == DriveStateCorrupt {
   192  			b++
   193  		}
   194  	}
   195  	for _, v := range hri.After.Drives {
   196  		if v.State == DriveStateCorrupt {
   197  			a++
   198  		}
   199  	}
   200  	return
   201  }
   202  
   203  // GetOnlineCounts - returns the number of online disks before
   204  // and after heal
   205  func (hri *HealResultItem) GetOnlineCounts() (b, a int) {
   206  	if hri == nil {
   207  		return
   208  	}
   209  	for _, v := range hri.Before.Drives {
   210  		if v.State == DriveStateOk {
   211  			b++
   212  		}
   213  	}
   214  	for _, v := range hri.After.Drives {
   215  		if v.State == DriveStateOk {
   216  			a++
   217  		}
   218  	}
   219  	return
   220  }
   221  
   222  // Heal - API endpoint to start heal and to fetch status
   223  // forceStart and forceStop are mutually exclusive, you can either
   224  // set one of them to 'true'. If both are set 'forceStart' will be
   225  // honored.
   226  func (adm *AdminClient) Heal(ctx context.Context, bucket, prefix string,
   227  	healOpts HealOpts, clientToken string, forceStart, forceStop bool) (
   228  	healStart HealStartSuccess, healTaskStatus HealTaskStatus, err error,
   229  ) {
   230  	if forceStart && forceStop {
   231  		return healStart, healTaskStatus, ErrInvalidArgument("forceStart and forceStop set to true is not allowed")
   232  	}
   233  
   234  	body, err := json.Marshal(healOpts)
   235  	if err != nil {
   236  		return healStart, healTaskStatus, err
   237  	}
   238  
   239  	path := fmt.Sprintf(adminAPIPrefix+"/heal/%s", bucket)
   240  	if bucket != "" && prefix != "" {
   241  		path += "/" + prefix
   242  	}
   243  
   244  	// execute POST request to heal api
   245  	queryVals := make(url.Values)
   246  	if clientToken != "" {
   247  		queryVals.Set("clientToken", clientToken)
   248  		body = []byte{}
   249  	}
   250  
   251  	// Anyone can be set, either force start or forceStop.
   252  	if forceStart {
   253  		queryVals.Set("forceStart", "true")
   254  	} else if forceStop {
   255  		queryVals.Set("forceStop", "true")
   256  	}
   257  
   258  	resp, err := adm.executeMethod(ctx,
   259  		http.MethodPost, requestData{
   260  			relPath:     path,
   261  			content:     body,
   262  			queryValues: queryVals,
   263  		})
   264  	defer closeResponse(resp)
   265  	if err != nil {
   266  		return healStart, healTaskStatus, err
   267  	}
   268  
   269  	if resp.StatusCode != http.StatusOK {
   270  		return healStart, healTaskStatus, httpRespToErrorResponse(resp)
   271  	}
   272  
   273  	respBytes, err := ioutil.ReadAll(resp.Body)
   274  	if err != nil {
   275  		return healStart, healTaskStatus, err
   276  	}
   277  
   278  	// Was it a status request?
   279  	if clientToken == "" {
   280  		// As a special operation forceStop would return a
   281  		// similar struct as healStart will have the
   282  		// heal sequence information about the heal which
   283  		// was stopped.
   284  		err = json.Unmarshal(respBytes, &healStart)
   285  	} else {
   286  		err = json.Unmarshal(respBytes, &healTaskStatus)
   287  	}
   288  	if err != nil {
   289  		// May be the server responded with error after success
   290  		// message, handle it separately here.
   291  		var errResp ErrorResponse
   292  		err = json.Unmarshal(respBytes, &errResp)
   293  		if err != nil {
   294  			// Unknown structure return error anyways.
   295  			return healStart, healTaskStatus, err
   296  		}
   297  		return healStart, healTaskStatus, errResp
   298  	}
   299  	return healStart, healTaskStatus, nil
   300  }
   301  
   302  // MRFStatus exposes MRF metrics of a server
   303  type MRFStatus struct {
   304  	BytesHealed uint64 `json:"bytes_healed"`
   305  	ItemsHealed uint64 `json:"items_healed"`
   306  
   307  	TotalItems uint64 `json:"total_items"`
   308  	TotalBytes uint64 `json:"total_bytes"`
   309  
   310  	Started time.Time `json:"started"`
   311  }
   312  
   313  // BgHealState represents the status of the background heal
   314  type BgHealState struct {
   315  	// List of offline endpoints with no background heal state info
   316  	OfflineEndpoints []string `json:"offline_nodes"`
   317  	// Total items scanned by the continuous background healing
   318  	ScannedItemsCount int64
   319  	// Disks currently in heal states
   320  	HealDisks []string
   321  	// SetStatus contains information for each set.
   322  	Sets []SetStatus `json:"sets"`
   323  	// Endpoint -> MRF Status
   324  	MRF map[string]MRFStatus `json:"mrf"`
   325  	// Parity per storage class
   326  	SCParity map[string]int `json:"sc_parity"`
   327  }
   328  
   329  // SetStatus contains information about the heal status of a set.
   330  type SetStatus struct {
   331  	ID           string `json:"id"`
   332  	PoolIndex    int    `json:"pool_index"`
   333  	SetIndex     int    `json:"set_index"`
   334  	HealStatus   string `json:"heal_status"`
   335  	HealPriority string `json:"heal_priority"`
   336  	TotalObjects int    `json:"total_objects"`
   337  	Disks        []Disk `json:"disks"`
   338  }
   339  
   340  // HealingDisk contains information about
   341  type HealingDisk struct {
   342  	// Copied from cmd/background-newdisks-heal-ops.go
   343  	// When adding new field, update (*healingTracker).toHealingDisk
   344  
   345  	ID         string    `json:"id"`
   346  	PoolIndex  int       `json:"pool_index"`
   347  	SetIndex   int       `json:"set_index"`
   348  	DiskIndex  int       `json:"disk_index"`
   349  	Endpoint   string    `json:"endpoint"`
   350  	Path       string    `json:"path"`
   351  	Started    time.Time `json:"started"`
   352  	LastUpdate time.Time `json:"last_update"`
   353  
   354  	ObjectsTotalCount uint64 `json:"objects_total_count"`
   355  	ObjectsTotalSize  uint64 `json:"objects_total_size"`
   356  
   357  	ItemsHealed uint64 `json:"items_healed"`
   358  	ItemsFailed uint64 `json:"items_failed"`
   359  	BytesDone   uint64 `json:"bytes_done"`
   360  	BytesFailed uint64 `json:"bytes_failed"`
   361  
   362  	ObjectsHealed uint64 `json:"objects_healed"` // Deprecated July 2021
   363  	ObjectsFailed uint64 `json:"objects_failed"` // Deprecated July 2021
   364  
   365  	// Last object scanned.
   366  	Bucket string `json:"current_bucket"`
   367  	Object string `json:"current_object"`
   368  
   369  	// Filled on startup/restarts.
   370  	QueuedBuckets []string `json:"queued_buckets"`
   371  
   372  	// Filled during heal.
   373  	HealedBuckets []string `json:"healed_buckets"`
   374  	// future add more tracking capabilities
   375  }
   376  
   377  // Merge others into b.
   378  func (b *BgHealState) Merge(others ...BgHealState) {
   379  	// SCParity is the same from all nodes, just pick
   380  	// the information from the first node.
   381  	if b.SCParity == nil && len(others) > 0 {
   382  		b.SCParity = make(map[string]int)
   383  		for k, v := range others[0].SCParity {
   384  			b.SCParity[k] = v
   385  		}
   386  	}
   387  	if b.MRF == nil {
   388  		b.MRF = make(map[string]MRFStatus)
   389  	}
   390  	for _, other := range others {
   391  		b.OfflineEndpoints = append(b.OfflineEndpoints, other.OfflineEndpoints...)
   392  		for k, v := range other.MRF {
   393  			b.MRF[k] = v
   394  		}
   395  		b.ScannedItemsCount += other.ScannedItemsCount
   396  		if len(b.Sets) == 0 {
   397  			b.Sets = make([]SetStatus, len(other.Sets))
   398  			copy(b.Sets, other.Sets)
   399  			continue
   400  		}
   401  
   402  		// Add disk if not present.
   403  		// If present select the one with latest lastupdate.
   404  		addSet := func(set SetStatus) {
   405  			for eSetIdx, existing := range b.Sets {
   406  				if existing.ID != set.ID {
   407  					continue
   408  				}
   409  				if len(existing.Disks) < len(set.Disks) {
   410  					b.Sets[eSetIdx].Disks = set.Disks
   411  				}
   412  				if len(existing.Disks) < len(set.Disks) {
   413  					return
   414  				}
   415  				for i, disk := range set.Disks {
   416  					// Disks should be the same.
   417  					if disk.HealInfo != nil {
   418  						existing.Disks[i].HealInfo = disk.HealInfo
   419  					}
   420  				}
   421  				return
   422  			}
   423  			b.Sets = append(b.Sets, set)
   424  		}
   425  		for _, disk := range other.Sets {
   426  			addSet(disk)
   427  		}
   428  	}
   429  	sort.Slice(b.Sets, func(i, j int) bool {
   430  		if b.Sets[i].PoolIndex != b.Sets[j].PoolIndex {
   431  			return b.Sets[i].PoolIndex < b.Sets[j].PoolIndex
   432  		}
   433  		return b.Sets[i].SetIndex < b.Sets[j].SetIndex
   434  	})
   435  }
   436  
   437  // BackgroundHealStatus returns the background heal status of the
   438  // current server or cluster.
   439  func (adm *AdminClient) BackgroundHealStatus(ctx context.Context) (BgHealState, error) {
   440  	// Execute POST request to background heal status api
   441  	resp, err := adm.executeMethod(ctx,
   442  		http.MethodPost,
   443  		requestData{relPath: adminAPIPrefix + "/background-heal/status"})
   444  	if err != nil {
   445  		return BgHealState{}, err
   446  	}
   447  	defer closeResponse(resp)
   448  
   449  	if resp.StatusCode != http.StatusOK {
   450  		return BgHealState{}, httpRespToErrorResponse(resp)
   451  	}
   452  
   453  	respBytes, err := ioutil.ReadAll(resp.Body)
   454  	if err != nil {
   455  		return BgHealState{}, err
   456  	}
   457  
   458  	var healState BgHealState
   459  
   460  	err = json.Unmarshal(respBytes, &healState)
   461  	if err != nil {
   462  		return BgHealState{}, err
   463  	}
   464  	return healState, nil
   465  }