github.com/minio/madmin-go@v1.7.5/heal-commands.go (about) 1 // 2 // MinIO Object Storage (c) 2021 MinIO, Inc. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 package madmin 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "io/ioutil" 24 "net/http" 25 "net/url" 26 "sort" 27 "time" 28 ) 29 30 // HealScanMode represents the type of healing scan 31 type HealScanMode int 32 33 const ( 34 // HealUnknownScan default is unknown 35 HealUnknownScan HealScanMode = iota 36 37 // HealNormalScan checks if parts are present and not outdated 38 HealNormalScan 39 40 // HealDeepScan checks for parts bitrot checksums 41 HealDeepScan 42 ) 43 44 // HealOpts - collection of options for a heal sequence 45 type HealOpts struct { 46 Recursive bool `json:"recursive"` 47 DryRun bool `json:"dryRun"` 48 Remove bool `json:"remove"` 49 Recreate bool `json:"recreate"` // Rewrite all resources specified at the bucket or prefix. 50 ScanMode HealScanMode `json:"scanMode"` 51 NoLock bool `json:"nolock"` 52 } 53 54 // Equal returns true if no is same as o. 55 func (o HealOpts) Equal(no HealOpts) bool { 56 if o.Recursive != no.Recursive { 57 return false 58 } 59 if o.DryRun != no.DryRun { 60 return false 61 } 62 if o.Remove != no.Remove { 63 return false 64 } 65 if o.Recreate != no.Recreate { 66 return false 67 } 68 return o.ScanMode == no.ScanMode 69 } 70 71 // HealStartSuccess - holds information about a successfully started 72 // heal operation 73 type HealStartSuccess struct { 74 ClientToken string `json:"clientToken"` 75 ClientAddress string `json:"clientAddress"` 76 StartTime time.Time `json:"startTime"` 77 } 78 79 // HealStopSuccess - holds information about a successfully stopped 80 // heal operation. 81 type HealStopSuccess HealStartSuccess 82 83 // HealTaskStatus - status struct for a heal task 84 type HealTaskStatus struct { 85 Summary string `json:"summary"` 86 FailureDetail string `json:"detail"` 87 StartTime time.Time `json:"startTime"` 88 HealSettings HealOpts `json:"settings"` 89 90 Items []HealResultItem `json:"items,omitempty"` 91 } 92 93 // HealItemType - specify the type of heal operation in a healing 94 // result 95 type HealItemType string 96 97 // HealItemType constants 98 const ( 99 HealItemMetadata HealItemType = "metadata" 100 HealItemBucket = "bucket" 101 HealItemBucketMetadata = "bucket-metadata" 102 HealItemObject = "object" 103 ) 104 105 // Drive state constants 106 const ( 107 DriveStateOk string = "ok" 108 DriveStateOffline = "offline" 109 DriveStateCorrupt = "corrupt" 110 DriveStateMissing = "missing" 111 DriveStatePermission = "permission-denied" 112 DriveStateFaulty = "faulty" 113 DriveStateUnknown = "unknown" 114 DriveStateUnformatted = "unformatted" // only returned by disk 115 ) 116 117 // HealDriveInfo - struct for an individual drive info item. 118 type HealDriveInfo struct { 119 UUID string `json:"uuid"` 120 Endpoint string `json:"endpoint"` 121 State string `json:"state"` 122 } 123 124 // HealResultItem - struct for an individual heal result item 125 type HealResultItem struct { 126 ResultIndex int64 `json:"resultId"` 127 Type HealItemType `json:"type"` 128 Bucket string `json:"bucket"` 129 Object string `json:"object"` 130 VersionID string `json:"versionId"` 131 Detail string `json:"detail"` 132 ParityBlocks int `json:"parityBlocks,omitempty"` 133 DataBlocks int `json:"dataBlocks,omitempty"` 134 DiskCount int `json:"diskCount"` 135 SetCount int `json:"setCount"` 136 // below slices are from drive info. 137 Before struct { 138 Drives []HealDriveInfo `json:"drives"` 139 } `json:"before"` 140 After struct { 141 Drives []HealDriveInfo `json:"drives"` 142 } `json:"after"` 143 ObjectSize int64 `json:"objectSize"` 144 } 145 146 // GetMissingCounts - returns the number of missing disks before 147 // and after heal 148 func (hri *HealResultItem) GetMissingCounts() (b, a int) { 149 if hri == nil { 150 return 151 } 152 for _, v := range hri.Before.Drives { 153 if v.State == DriveStateMissing { 154 b++ 155 } 156 } 157 for _, v := range hri.After.Drives { 158 if v.State == DriveStateMissing { 159 a++ 160 } 161 } 162 return 163 } 164 165 // GetOfflineCounts - returns the number of offline disks before 166 // and after heal 167 func (hri *HealResultItem) GetOfflineCounts() (b, a int) { 168 if hri == nil { 169 return 170 } 171 for _, v := range hri.Before.Drives { 172 if v.State == DriveStateOffline { 173 b++ 174 } 175 } 176 for _, v := range hri.After.Drives { 177 if v.State == DriveStateOffline { 178 a++ 179 } 180 } 181 return 182 } 183 184 // GetCorruptedCounts - returns the number of corrupted disks before 185 // and after heal 186 func (hri *HealResultItem) GetCorruptedCounts() (b, a int) { 187 if hri == nil { 188 return 189 } 190 for _, v := range hri.Before.Drives { 191 if v.State == DriveStateCorrupt { 192 b++ 193 } 194 } 195 for _, v := range hri.After.Drives { 196 if v.State == DriveStateCorrupt { 197 a++ 198 } 199 } 200 return 201 } 202 203 // GetOnlineCounts - returns the number of online disks before 204 // and after heal 205 func (hri *HealResultItem) GetOnlineCounts() (b, a int) { 206 if hri == nil { 207 return 208 } 209 for _, v := range hri.Before.Drives { 210 if v.State == DriveStateOk { 211 b++ 212 } 213 } 214 for _, v := range hri.After.Drives { 215 if v.State == DriveStateOk { 216 a++ 217 } 218 } 219 return 220 } 221 222 // Heal - API endpoint to start heal and to fetch status 223 // forceStart and forceStop are mutually exclusive, you can either 224 // set one of them to 'true'. If both are set 'forceStart' will be 225 // honored. 226 func (adm *AdminClient) Heal(ctx context.Context, bucket, prefix string, 227 healOpts HealOpts, clientToken string, forceStart, forceStop bool) ( 228 healStart HealStartSuccess, healTaskStatus HealTaskStatus, err error, 229 ) { 230 if forceStart && forceStop { 231 return healStart, healTaskStatus, ErrInvalidArgument("forceStart and forceStop set to true is not allowed") 232 } 233 234 body, err := json.Marshal(healOpts) 235 if err != nil { 236 return healStart, healTaskStatus, err 237 } 238 239 path := fmt.Sprintf(adminAPIPrefix+"/heal/%s", bucket) 240 if bucket != "" && prefix != "" { 241 path += "/" + prefix 242 } 243 244 // execute POST request to heal api 245 queryVals := make(url.Values) 246 if clientToken != "" { 247 queryVals.Set("clientToken", clientToken) 248 body = []byte{} 249 } 250 251 // Anyone can be set, either force start or forceStop. 252 if forceStart { 253 queryVals.Set("forceStart", "true") 254 } else if forceStop { 255 queryVals.Set("forceStop", "true") 256 } 257 258 resp, err := adm.executeMethod(ctx, 259 http.MethodPost, requestData{ 260 relPath: path, 261 content: body, 262 queryValues: queryVals, 263 }) 264 defer closeResponse(resp) 265 if err != nil { 266 return healStart, healTaskStatus, err 267 } 268 269 if resp.StatusCode != http.StatusOK { 270 return healStart, healTaskStatus, httpRespToErrorResponse(resp) 271 } 272 273 respBytes, err := ioutil.ReadAll(resp.Body) 274 if err != nil { 275 return healStart, healTaskStatus, err 276 } 277 278 // Was it a status request? 279 if clientToken == "" { 280 // As a special operation forceStop would return a 281 // similar struct as healStart will have the 282 // heal sequence information about the heal which 283 // was stopped. 284 err = json.Unmarshal(respBytes, &healStart) 285 } else { 286 err = json.Unmarshal(respBytes, &healTaskStatus) 287 } 288 if err != nil { 289 // May be the server responded with error after success 290 // message, handle it separately here. 291 var errResp ErrorResponse 292 err = json.Unmarshal(respBytes, &errResp) 293 if err != nil { 294 // Unknown structure return error anyways. 295 return healStart, healTaskStatus, err 296 } 297 return healStart, healTaskStatus, errResp 298 } 299 return healStart, healTaskStatus, nil 300 } 301 302 // MRFStatus exposes MRF metrics of a server 303 type MRFStatus struct { 304 BytesHealed uint64 `json:"bytes_healed"` 305 ItemsHealed uint64 `json:"items_healed"` 306 307 TotalItems uint64 `json:"total_items"` 308 TotalBytes uint64 `json:"total_bytes"` 309 310 Started time.Time `json:"started"` 311 } 312 313 // BgHealState represents the status of the background heal 314 type BgHealState struct { 315 // List of offline endpoints with no background heal state info 316 OfflineEndpoints []string `json:"offline_nodes"` 317 // Total items scanned by the continuous background healing 318 ScannedItemsCount int64 319 // Disks currently in heal states 320 HealDisks []string 321 // SetStatus contains information for each set. 322 Sets []SetStatus `json:"sets"` 323 // Endpoint -> MRF Status 324 MRF map[string]MRFStatus `json:"mrf"` 325 // Parity per storage class 326 SCParity map[string]int `json:"sc_parity"` 327 } 328 329 // SetStatus contains information about the heal status of a set. 330 type SetStatus struct { 331 ID string `json:"id"` 332 PoolIndex int `json:"pool_index"` 333 SetIndex int `json:"set_index"` 334 HealStatus string `json:"heal_status"` 335 HealPriority string `json:"heal_priority"` 336 TotalObjects int `json:"total_objects"` 337 Disks []Disk `json:"disks"` 338 } 339 340 // HealingDisk contains information about 341 type HealingDisk struct { 342 // Copied from cmd/background-newdisks-heal-ops.go 343 // When adding new field, update (*healingTracker).toHealingDisk 344 345 ID string `json:"id"` 346 PoolIndex int `json:"pool_index"` 347 SetIndex int `json:"set_index"` 348 DiskIndex int `json:"disk_index"` 349 Endpoint string `json:"endpoint"` 350 Path string `json:"path"` 351 Started time.Time `json:"started"` 352 LastUpdate time.Time `json:"last_update"` 353 354 ObjectsTotalCount uint64 `json:"objects_total_count"` 355 ObjectsTotalSize uint64 `json:"objects_total_size"` 356 357 ItemsHealed uint64 `json:"items_healed"` 358 ItemsFailed uint64 `json:"items_failed"` 359 BytesDone uint64 `json:"bytes_done"` 360 BytesFailed uint64 `json:"bytes_failed"` 361 362 ObjectsHealed uint64 `json:"objects_healed"` // Deprecated July 2021 363 ObjectsFailed uint64 `json:"objects_failed"` // Deprecated July 2021 364 365 // Last object scanned. 366 Bucket string `json:"current_bucket"` 367 Object string `json:"current_object"` 368 369 // Filled on startup/restarts. 370 QueuedBuckets []string `json:"queued_buckets"` 371 372 // Filled during heal. 373 HealedBuckets []string `json:"healed_buckets"` 374 // future add more tracking capabilities 375 } 376 377 // Merge others into b. 378 func (b *BgHealState) Merge(others ...BgHealState) { 379 // SCParity is the same from all nodes, just pick 380 // the information from the first node. 381 if b.SCParity == nil && len(others) > 0 { 382 b.SCParity = make(map[string]int) 383 for k, v := range others[0].SCParity { 384 b.SCParity[k] = v 385 } 386 } 387 if b.MRF == nil { 388 b.MRF = make(map[string]MRFStatus) 389 } 390 for _, other := range others { 391 b.OfflineEndpoints = append(b.OfflineEndpoints, other.OfflineEndpoints...) 392 for k, v := range other.MRF { 393 b.MRF[k] = v 394 } 395 b.ScannedItemsCount += other.ScannedItemsCount 396 if len(b.Sets) == 0 { 397 b.Sets = make([]SetStatus, len(other.Sets)) 398 copy(b.Sets, other.Sets) 399 continue 400 } 401 402 // Add disk if not present. 403 // If present select the one with latest lastupdate. 404 addSet := func(set SetStatus) { 405 for eSetIdx, existing := range b.Sets { 406 if existing.ID != set.ID { 407 continue 408 } 409 if len(existing.Disks) < len(set.Disks) { 410 b.Sets[eSetIdx].Disks = set.Disks 411 } 412 if len(existing.Disks) < len(set.Disks) { 413 return 414 } 415 for i, disk := range set.Disks { 416 // Disks should be the same. 417 if disk.HealInfo != nil { 418 existing.Disks[i].HealInfo = disk.HealInfo 419 } 420 } 421 return 422 } 423 b.Sets = append(b.Sets, set) 424 } 425 for _, disk := range other.Sets { 426 addSet(disk) 427 } 428 } 429 sort.Slice(b.Sets, func(i, j int) bool { 430 if b.Sets[i].PoolIndex != b.Sets[j].PoolIndex { 431 return b.Sets[i].PoolIndex < b.Sets[j].PoolIndex 432 } 433 return b.Sets[i].SetIndex < b.Sets[j].SetIndex 434 }) 435 } 436 437 // BackgroundHealStatus returns the background heal status of the 438 // current server or cluster. 439 func (adm *AdminClient) BackgroundHealStatus(ctx context.Context) (BgHealState, error) { 440 // Execute POST request to background heal status api 441 resp, err := adm.executeMethod(ctx, 442 http.MethodPost, 443 requestData{relPath: adminAPIPrefix + "/background-heal/status"}) 444 if err != nil { 445 return BgHealState{}, err 446 } 447 defer closeResponse(resp) 448 449 if resp.StatusCode != http.StatusOK { 450 return BgHealState{}, httpRespToErrorResponse(resp) 451 } 452 453 respBytes, err := ioutil.ReadAll(resp.Body) 454 if err != nil { 455 return BgHealState{}, err 456 } 457 458 var healState BgHealState 459 460 err = json.Unmarshal(respBytes, &healState) 461 if err != nil { 462 return BgHealState{}, err 463 } 464 return healState, nil 465 }