storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-healing-common_test.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"testing"
    26  	"time"
    27  
    28  	"storj.io/minio/pkg/madmin"
    29  )
    30  
    31  // validates functionality provided to find most common
    32  // time occurrence from a list of time.
    33  func TestCommonTime(t *testing.T) {
    34  	// List of test cases for common modTime.
    35  	testCases := []struct {
    36  		times []time.Time
    37  		time  time.Time
    38  	}{
    39  		{
    40  			// 1. Tests common times when slice has varying time elements.
    41  			[]time.Time{
    42  				time.Unix(0, 1).UTC(),
    43  				time.Unix(0, 2).UTC(),
    44  				time.Unix(0, 3).UTC(),
    45  				time.Unix(0, 3).UTC(),
    46  				time.Unix(0, 2).UTC(),
    47  				time.Unix(0, 3).UTC(),
    48  				time.Unix(0, 1).UTC(),
    49  			}, time.Unix(0, 3).UTC(),
    50  		},
    51  		{
    52  			// 2. Tests common time obtained when all elements are equal.
    53  			[]time.Time{
    54  				time.Unix(0, 3).UTC(),
    55  				time.Unix(0, 3).UTC(),
    56  				time.Unix(0, 3).UTC(),
    57  				time.Unix(0, 3).UTC(),
    58  				time.Unix(0, 3).UTC(),
    59  				time.Unix(0, 3).UTC(),
    60  				time.Unix(0, 3).UTC(),
    61  			}, time.Unix(0, 3).UTC(),
    62  		},
    63  		{
    64  			// 3. Tests common time obtained when elements have a mixture
    65  			// of sentinel values.
    66  			[]time.Time{
    67  				time.Unix(0, 3).UTC(),
    68  				time.Unix(0, 3).UTC(),
    69  				time.Unix(0, 2).UTC(),
    70  				time.Unix(0, 1).UTC(),
    71  				time.Unix(0, 3).UTC(),
    72  				time.Unix(0, 4).UTC(),
    73  				time.Unix(0, 3).UTC(),
    74  				timeSentinel,
    75  				timeSentinel,
    76  				timeSentinel,
    77  			}, time.Unix(0, 3).UTC(),
    78  		},
    79  	}
    80  
    81  	// Tests all the testcases, and validates them against expected
    82  	// common modtime. Tests fail if modtime does not match.
    83  	for i, testCase := range testCases {
    84  		// Obtain a common mod time from modTimes slice.
    85  		ctime, _ := commonTime(testCase.times, nil)
    86  		if !testCase.time.Equal(ctime) {
    87  			t.Fatalf("Test case %d, expect to pass but failed. Wanted modTime: %s, got modTime: %s\n", i+1, testCase.time, ctime)
    88  		}
    89  	}
    90  }
    91  
    92  // TestListOnlineDisks - checks if listOnlineDisks and outDatedDisks
    93  // are consistent with each other.
    94  func TestListOnlineDisks(t *testing.T) {
    95  	ctx, cancel := context.WithCancel(context.Background())
    96  	defer cancel()
    97  
    98  	obj, disks, err := prepareErasure16(ctx)
    99  	if err != nil {
   100  		t.Fatalf("Prepare Erasure backend failed - %v", err)
   101  	}
   102  	defer obj.Shutdown(context.Background())
   103  	defer removeRoots(disks)
   104  
   105  	type tamperKind int
   106  	const (
   107  		noTamper    tamperKind = iota
   108  		deletePart  tamperKind = iota
   109  		corruptPart tamperKind = iota
   110  	)
   111  	threeNanoSecs := time.Unix(0, 3).UTC()
   112  	fourNanoSecs := time.Unix(0, 4).UTC()
   113  	modTimesThreeNone := []time.Time{
   114  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   115  		threeNanoSecs, threeNanoSecs, threeNanoSecs,
   116  		timeSentinel, timeSentinel, timeSentinel, timeSentinel,
   117  		timeSentinel, timeSentinel, timeSentinel, timeSentinel,
   118  		timeSentinel,
   119  	}
   120  	modTimesThreeFour := []time.Time{
   121  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   122  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   123  		fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs,
   124  		fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs,
   125  	}
   126  	testCases := []struct {
   127  		modTimes       []time.Time
   128  		expectedTime   time.Time
   129  		errs           []error
   130  		_tamperBackend tamperKind
   131  	}{
   132  		{
   133  			modTimes:     modTimesThreeFour,
   134  			expectedTime: fourNanoSecs,
   135  			errs: []error{
   136  				nil, nil, nil, nil, nil, nil, nil, nil, nil,
   137  				nil, nil, nil, nil, nil, nil, nil,
   138  			},
   139  			_tamperBackend: noTamper,
   140  		},
   141  		{
   142  			modTimes:     modTimesThreeNone,
   143  			expectedTime: threeNanoSecs,
   144  			errs: []error{
   145  				// Disks that have a valid xl.meta.
   146  				nil, nil, nil, nil, nil, nil, nil,
   147  				// Majority of disks don't have xl.meta.
   148  				errFileNotFound, errFileNotFound,
   149  				errFileNotFound, errFileNotFound,
   150  				errFileNotFound, errDiskAccessDenied,
   151  				errDiskNotFound, errFileNotFound,
   152  				errFileNotFound,
   153  			},
   154  			_tamperBackend: deletePart,
   155  		},
   156  		{
   157  			modTimes:     modTimesThreeNone,
   158  			expectedTime: threeNanoSecs,
   159  			errs: []error{
   160  				// Disks that have a valid xl.meta.
   161  				nil, nil, nil, nil, nil, nil, nil,
   162  				// Majority of disks don't have xl.meta.
   163  				errFileNotFound, errFileNotFound,
   164  				errFileNotFound, errFileNotFound,
   165  				errFileNotFound, errDiskAccessDenied,
   166  				errDiskNotFound, errFileNotFound,
   167  				errFileNotFound,
   168  			},
   169  			_tamperBackend: corruptPart,
   170  		},
   171  	}
   172  
   173  	bucket := "bucket"
   174  	err = obj.MakeBucketWithLocation(ctx, "bucket", BucketOptions{})
   175  	if err != nil {
   176  		t.Fatalf("Failed to make a bucket %v", err)
   177  	}
   178  
   179  	object := "object"
   180  	data := bytes.Repeat([]byte("a"), smallFileThreshold*16)
   181  	z := obj.(*erasureServerPools)
   182  	erasureDisks := z.serverPools[0].sets[0].getDisks()
   183  	for i, test := range testCases {
   184  		test := test
   185  		t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) {
   186  			_, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), ObjectOptions{})
   187  			if err != nil {
   188  				t.Fatalf("Failed to putObject %v", err)
   189  			}
   190  
   191  			partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
   192  			fi, err := getLatestFileInfo(ctx, partsMetadata, errs)
   193  			if err != nil {
   194  				t.Fatalf("Failed to getLatestFileInfo %v", err)
   195  			}
   196  
   197  			for j := range partsMetadata {
   198  				if errs[j] != nil {
   199  					t.Fatalf("expected error to be nil: %s", errs[j])
   200  				}
   201  				partsMetadata[j].ModTime = test.modTimes[j]
   202  			}
   203  
   204  			tamperedIndex := -1
   205  			switch test._tamperBackend {
   206  			case deletePart:
   207  				for index, err := range test.errs {
   208  					if err != nil {
   209  						continue
   210  					}
   211  					// Remove a part from a disk
   212  					// which has a valid xl.meta,
   213  					// and check if that disk
   214  					// appears in outDatedDisks.
   215  					tamperedIndex = index
   216  					dErr := erasureDisks[index].Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
   217  					if dErr != nil {
   218  						t.Fatalf("Failed to delete %s - %v", filepath.Join(object, "part.1"), dErr)
   219  					}
   220  					break
   221  				}
   222  			case corruptPart:
   223  				for index, err := range test.errs {
   224  					if err != nil {
   225  						continue
   226  					}
   227  					// Corrupt a part from a disk
   228  					// which has a valid xl.meta,
   229  					// and check if that disk
   230  					// appears in outDatedDisks.
   231  					tamperedIndex = index
   232  					filePath := pathJoin(erasureDisks[index].String(), bucket, object, fi.DataDir, "part.1")
   233  					f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_SYNC, 0)
   234  					if err != nil {
   235  						t.Fatalf("Failed to open %s: %s\n", filePath, err)
   236  					}
   237  					f.Write([]byte("oops")) // Will cause bitrot error
   238  					f.Close()
   239  					break
   240  				}
   241  
   242  			}
   243  
   244  			onlineDisks, modTime, dataDir := listOnlineDisks(erasureDisks, partsMetadata, test.errs)
   245  			if !modTime.Equal(test.expectedTime) {
   246  				t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
   247  					test.expectedTime, modTime)
   248  			}
   249  			if fi.DataDir != dataDir {
   250  				t.Fatalf("Expected dataDir to be equal to %v but was found to be %v",
   251  					fi.DataDir, dataDir)
   252  			}
   253  			availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
   254  			test.errs = newErrs
   255  
   256  			if test._tamperBackend != noTamper {
   257  				if tamperedIndex != -1 && availableDisks[tamperedIndex] != nil {
   258  					t.Fatalf("disk (%v) with part.1 missing is not a disk with available data",
   259  						erasureDisks[tamperedIndex])
   260  				}
   261  			}
   262  		})
   263  	}
   264  }
   265  
   266  // TestListOnlineDisksSmallObjects - checks if listOnlineDisks and outDatedDisks
   267  // are consistent with each other.
   268  func TestListOnlineDisksSmallObjects(t *testing.T) {
   269  	ctx, cancel := context.WithCancel(context.Background())
   270  	defer cancel()
   271  
   272  	obj, disks, err := prepareErasure16(ctx)
   273  	if err != nil {
   274  		t.Fatalf("Prepare Erasure backend failed - %v", err)
   275  	}
   276  	defer obj.Shutdown(context.Background())
   277  	defer removeRoots(disks)
   278  
   279  	type tamperKind int
   280  	const (
   281  		noTamper    tamperKind = iota
   282  		deletePart  tamperKind = iota
   283  		corruptPart tamperKind = iota
   284  	)
   285  	timeSentinel := time.Unix(1, 0).UTC()
   286  	threeNanoSecs := time.Unix(3, 0).UTC()
   287  	fourNanoSecs := time.Unix(4, 0).UTC()
   288  	modTimesThreeNone := []time.Time{
   289  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   290  		threeNanoSecs, threeNanoSecs, threeNanoSecs,
   291  		timeSentinel, timeSentinel, timeSentinel, timeSentinel,
   292  		timeSentinel, timeSentinel, timeSentinel, timeSentinel,
   293  		timeSentinel,
   294  	}
   295  	modTimesThreeFour := []time.Time{
   296  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   297  		threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs,
   298  		fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs,
   299  		fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs,
   300  	}
   301  	testCases := []struct {
   302  		modTimes       []time.Time
   303  		expectedTime   time.Time
   304  		errs           []error
   305  		_tamperBackend tamperKind
   306  	}{
   307  		{
   308  			modTimes:     modTimesThreeFour,
   309  			expectedTime: fourNanoSecs,
   310  			errs: []error{
   311  				nil, nil, nil, nil, nil, nil, nil, nil, nil,
   312  				nil, nil, nil, nil, nil, nil, nil,
   313  			},
   314  			_tamperBackend: noTamper,
   315  		},
   316  		{
   317  			modTimes:     modTimesThreeNone,
   318  			expectedTime: threeNanoSecs,
   319  			errs: []error{
   320  				// Disks that have a valid xl.meta.
   321  				nil, nil, nil, nil, nil, nil, nil,
   322  				// Majority of disks don't have xl.meta.
   323  				errFileNotFound, errFileNotFound,
   324  				errFileNotFound, errFileNotFound,
   325  				errFileNotFound, errDiskAccessDenied,
   326  				errDiskNotFound, errFileNotFound,
   327  				errFileNotFound,
   328  			},
   329  			_tamperBackend: deletePart,
   330  		},
   331  		{
   332  			modTimes:     modTimesThreeNone,
   333  			expectedTime: threeNanoSecs,
   334  			errs: []error{
   335  				// Disks that have a valid xl.meta.
   336  				nil, nil, nil, nil, nil, nil, nil,
   337  				// Majority of disks don't have xl.meta.
   338  				errFileNotFound, errFileNotFound,
   339  				errFileNotFound, errFileNotFound,
   340  				errFileNotFound, errDiskAccessDenied,
   341  				errDiskNotFound, errFileNotFound,
   342  				errFileNotFound,
   343  			},
   344  			_tamperBackend: corruptPart,
   345  		},
   346  	}
   347  
   348  	bucket := "bucket"
   349  	err = obj.MakeBucketWithLocation(ctx, "bucket", BucketOptions{})
   350  	if err != nil {
   351  		t.Fatalf("Failed to make a bucket %v", err)
   352  	}
   353  
   354  	object := "object"
   355  	data := bytes.Repeat([]byte("a"), smallFileThreshold/2)
   356  	z := obj.(*erasureServerPools)
   357  	erasureDisks := z.serverPools[0].sets[0].getDisks()
   358  	for i, test := range testCases {
   359  		test := test
   360  		t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) {
   361  			_, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), ObjectOptions{})
   362  			if err != nil {
   363  				t.Fatalf("Failed to putObject %v", err)
   364  			}
   365  
   366  			partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", true)
   367  			fi, err := getLatestFileInfo(ctx, partsMetadata, errs)
   368  			if err != nil {
   369  				t.Fatalf("Failed to getLatestFileInfo %v", err)
   370  			}
   371  
   372  			for j := range partsMetadata {
   373  				if errs[j] != nil {
   374  					t.Fatalf("expected error to be nil: %s", errs[j])
   375  				}
   376  				partsMetadata[j].ModTime = test.modTimes[j]
   377  			}
   378  
   379  			if erasureDisks, err = writeUniqueFileInfo(ctx, erasureDisks, bucket, object, partsMetadata, diskCount(erasureDisks)); err != nil {
   380  				t.Fatal(ctx, err)
   381  			}
   382  
   383  			tamperedIndex := -1
   384  			switch test._tamperBackend {
   385  			case deletePart:
   386  				for index, err := range test.errs {
   387  					if err != nil {
   388  						continue
   389  					}
   390  					// Remove a part from a disk
   391  					// which has a valid xl.meta,
   392  					// and check if that disk
   393  					// appears in outDatedDisks.
   394  					tamperedIndex = index
   395  					dErr := erasureDisks[index].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
   396  					if dErr != nil {
   397  						t.Fatalf("Failed to delete %s - %v", pathJoin(object, xlStorageFormatFile), dErr)
   398  					}
   399  					break
   400  				}
   401  			case corruptPart:
   402  				for index, err := range test.errs {
   403  					if err != nil {
   404  						continue
   405  					}
   406  					// Corrupt a part from a disk
   407  					// which has a valid xl.meta,
   408  					// and check if that disk
   409  					// appears in outDatedDisks.
   410  					tamperedIndex = index
   411  					filePath := pathJoin(erasureDisks[index].String(), bucket, object, xlStorageFormatFile)
   412  					f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_SYNC, 0)
   413  					if err != nil {
   414  						t.Fatalf("Failed to open %s: %s\n", filePath, err)
   415  					}
   416  					f.Write([]byte("oops")) // Will cause bitrot error
   417  					f.Close()
   418  					break
   419  				}
   420  
   421  			}
   422  			partsMetadata, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", true)
   423  			_, err = getLatestFileInfo(ctx, partsMetadata, errs)
   424  			if err != nil {
   425  				t.Fatalf("Failed to getLatestFileInfo %v", err)
   426  			}
   427  
   428  			onlineDisks, modTime, dataDir := listOnlineDisks(erasureDisks, partsMetadata, test.errs)
   429  			if !modTime.Equal(test.expectedTime) {
   430  				t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
   431  					test.expectedTime, modTime)
   432  			}
   433  
   434  			if fi.DataDir != dataDir {
   435  				t.Fatalf("Expected dataDir to be equal to %v but was found to be %v",
   436  					fi.DataDir, dataDir)
   437  			}
   438  
   439  			availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
   440  			test.errs = newErrs
   441  
   442  			if test._tamperBackend != noTamper {
   443  				if tamperedIndex != -1 && availableDisks[tamperedIndex] != nil {
   444  					t.Fatalf("disk (%v) with part.1 missing is not a disk with available data",
   445  						erasureDisks[tamperedIndex])
   446  				}
   447  			}
   448  		})
   449  	}
   450  }
   451  
   452  func TestDisksWithAllParts(t *testing.T) {
   453  	ctx, cancel := context.WithCancel(context.Background())
   454  	defer cancel()
   455  	obj, disks, err := prepareErasure16(ctx)
   456  	if err != nil {
   457  		t.Fatalf("Prepare Erasure backend failed - %v", err)
   458  	}
   459  	defer obj.Shutdown(context.Background())
   460  	defer removeRoots(disks)
   461  
   462  	bucket := "bucket"
   463  	object := "object"
   464  	// make data with more than one part
   465  	partCount := 3
   466  	data := bytes.Repeat([]byte("a"), 6*1024*1024*partCount)
   467  	z := obj.(*erasureServerPools)
   468  	s := z.serverPools[0].sets[0]
   469  	erasureDisks := s.getDisks()
   470  	err = obj.MakeBucketWithLocation(ctx, "bucket", BucketOptions{})
   471  	if err != nil {
   472  		t.Fatalf("Failed to make a bucket %v", err)
   473  	}
   474  
   475  	_, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), ObjectOptions{})
   476  	if err != nil {
   477  		t.Fatalf("Failed to putObject %v", err)
   478  	}
   479  
   480  	_, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
   481  	readQuorum := len(erasureDisks) / 2
   482  	if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
   483  		t.Fatalf("Failed to read xl meta data %v", reducedErr)
   484  	}
   485  
   486  	// Test that all disks are returned without any failures with
   487  	// unmodified meta data
   488  	partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
   489  	if err != nil {
   490  		t.Fatalf("Failed to read xl meta data %v", err)
   491  	}
   492  
   493  	filteredDisks, errs := disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
   494  
   495  	if len(filteredDisks) != len(erasureDisks) {
   496  		t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
   497  	}
   498  
   499  	for diskIndex, disk := range filteredDisks {
   500  		if errs[diskIndex] != nil {
   501  			t.Errorf("Unexpected error %s", errs[diskIndex])
   502  		}
   503  
   504  		if disk == nil {
   505  			t.Errorf("Disk erroneously filtered, diskIndex: %d", diskIndex)
   506  		}
   507  	}
   508  
   509  	diskFailures := make(map[int]string)
   510  	// key = disk index, value = part name with hash mismatch
   511  	diskFailures[0] = "part.1"
   512  	diskFailures[3] = "part.1"
   513  	diskFailures[15] = "part.1"
   514  
   515  	for diskIndex, partName := range diskFailures {
   516  		for i := range partsMetadata[diskIndex].Erasure.Checksums {
   517  			if fmt.Sprintf("part.%d", i+1) == partName {
   518  				filePath := pathJoin(erasureDisks[diskIndex].String(), bucket, object, partsMetadata[diskIndex].DataDir, partName)
   519  				f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_SYNC, 0)
   520  				if err != nil {
   521  					t.Fatalf("Failed to open %s: %s\n", filePath, err)
   522  				}
   523  				f.Write([]byte("oops")) // Will cause bitrot error
   524  				f.Close()
   525  			}
   526  		}
   527  	}
   528  
   529  	errs = make([]error, len(erasureDisks))
   530  	filteredDisks, errs = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
   531  
   532  	if len(filteredDisks) != len(erasureDisks) {
   533  		t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
   534  	}
   535  
   536  	for diskIndex, disk := range filteredDisks {
   537  		if _, ok := diskFailures[diskIndex]; ok {
   538  			if disk != nil {
   539  				t.Errorf("Disk not filtered as expected, disk: %d", diskIndex)
   540  			}
   541  			if errs[diskIndex] == nil {
   542  				t.Errorf("Expected error not received, diskIndex: %d", diskIndex)
   543  			}
   544  		} else {
   545  			if disk == nil {
   546  				t.Errorf("Disk erroneously filtered, diskIndex: %d", diskIndex)
   547  			}
   548  			if errs[diskIndex] != nil {
   549  				t.Errorf("Unexpected error, %s, diskIndex: %d", errs[diskIndex], diskIndex)
   550  			}
   551  
   552  		}
   553  	}
   554  }