github.com/m3db/m3@v1.5.0/src/cmd/tools/split_index_shards/main/main.go (about)

     1  // Copyright (c) 2021 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package main
    22  
    23  import (
    24  	"fmt"
    25  	iofs "io/fs"
    26  	"io/ioutil"
    27  	"log"
    28  	"os"
    29  	"path/filepath"
    30  	"regexp"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	"github.com/pborman/getopt"
    36  	"go.uber.org/zap"
    37  
    38  	"github.com/m3db/m3/src/dbnode/digest"
    39  	"github.com/m3db/m3/src/dbnode/generated/proto/index"
    40  	"github.com/m3db/m3/src/dbnode/persist/fs"
    41  	xerrors "github.com/m3db/m3/src/x/errors"
    42  	"github.com/m3db/m3/src/x/ident"
    43  	xos "github.com/m3db/m3/src/x/os"
    44  	xtime "github.com/m3db/m3/src/x/time"
    45  )
    46  
    47  var (
    48  	checkpointPattern = regexp.MustCompile(`/index/data/(\w+)/fileset-([0-9]+)-([0-9]+)-checkpoint\.db$`)
    49  	genericPattern    = regexp.MustCompile(`/index/data/(\w+)/fileset-([0-9]+)-([0-9]+)-.*\.db$`)
    50  )
    51  
    52  func main() {
    53  	var (
    54  		optPath       = getopt.StringLong("path", 'p', "", "Index path [e.g. /temp/lib/m3db/index]")
    55  		optBlockUntil = getopt.Int64Long("block-until", 'b', 0, "Block Until Time, exclusive [in nsec]")
    56  		optSrcShards  = getopt.Uint32Long("src-shards", 'h', 0, "Original (source) number of shards")
    57  		optFactor     = getopt.IntLong("factor", 'f', 0, "Integer factor to increase the number of shards by")
    58  	)
    59  	getopt.Parse()
    60  
    61  	rawLogger, err := zap.NewDevelopment()
    62  	if err != nil {
    63  		log.Fatalf("unable to create logger: %+v", err)
    64  	}
    65  	logger := rawLogger.Sugar()
    66  
    67  	if *optPath == "" ||
    68  		*optBlockUntil <= 0 ||
    69  		*optSrcShards == 0 ||
    70  		*optFactor <= 0 {
    71  		getopt.Usage()
    72  		os.Exit(1)
    73  	}
    74  
    75  	var (
    76  		filesetLocation = dropIndexSuffix(*optPath)
    77  		fsOpts          = fs.NewOptions().SetFilePathPrefix(filesetLocation)
    78  	)
    79  
    80  	start := time.Now()
    81  
    82  	// Delete filesets of all blocks starting from optBlockUntil (inclusive).
    83  	if err := filepath.WalkDir(*optPath, func(path string, d iofs.DirEntry, err error) error {
    84  		if err != nil || d.IsDir() {
    85  			return err
    86  		}
    87  		pathParts := genericPattern.FindStringSubmatch(path)
    88  		if len(pathParts) != 4 {
    89  			return fmt.Errorf("failed to parse path %s", path)
    90  		}
    91  
    92  		blockStart, err := strconv.Atoi(pathParts[2])
    93  		if err != nil {
    94  			return err
    95  		}
    96  
    97  		if blockStart >= int(*optBlockUntil) {
    98  			fmt.Printf("%s - deleting too recent %s\n", time.Now().Local(), path) // nolint: forbidigo
    99  			return os.Remove(path)
   100  		}
   101  
   102  		return nil
   103  	}); err != nil {
   104  		logger.Fatalf("unable to walk the source dir: %+v", err)
   105  	}
   106  
   107  	// Update info files of all blocks before optBlockUntil.
   108  	if err := filepath.WalkDir(*optPath, func(path string, d iofs.DirEntry, err error) error {
   109  		if err != nil || d.IsDir() || !strings.HasSuffix(d.Name(), "-checkpoint.db") {
   110  			return err
   111  		}
   112  		fmt.Printf("%s - updating fileset of %s\n", time.Now().Local(), path) // nolint: forbidigo
   113  		pathParts := checkpointPattern.FindStringSubmatch(path)
   114  		if len(pathParts) != 4 {
   115  			return fmt.Errorf("failed to parse path %s", path)
   116  		}
   117  
   118  		var (
   119  			namespace        = pathParts[1]
   120  			blockStart, err1 = strconv.Atoi(pathParts[2])
   121  			volume, err2     = strconv.Atoi(pathParts[3])
   122  		)
   123  		if err = xerrors.FirstError(err1, err2); err != nil {
   124  			return err
   125  		}
   126  
   127  		if blockStart >= int(*optBlockUntil) {
   128  			return fmt.Errorf(
   129  				"encountered fileset too recent to split (should have been deleted in previous step): %s",
   130  				path)
   131  		}
   132  
   133  		namespaceDir := fs.NamespaceIndexDataDirPath(filesetLocation, ident.StringID(namespace))
   134  
   135  		if err = updateIndexInfoFile(
   136  			namespaceDir, xtime.UnixNano(blockStart), volume, *optSrcShards, *optFactor, fsOpts); err != nil {
   137  			if strings.Contains(err.Error(), "no such file or directory") {
   138  				fmt.Println(" - skip (incomplete fileset)") // nolint: forbidigo
   139  				return nil
   140  			}
   141  			return err
   142  		}
   143  
   144  		return err
   145  	}); err != nil {
   146  		logger.Fatalf("unable to walk the source dir: %+v", err)
   147  	}
   148  
   149  	runTime := time.Since(start)
   150  	fmt.Printf("Running time: %s\n", runTime) // nolint: forbidigo
   151  }
   152  
   153  func updateIndexInfoFile(
   154  	namespaceDir string,
   155  	blockStart xtime.UnixNano,
   156  	volume int,
   157  	srcNumShards uint32,
   158  	factor int,
   159  	fsOpts fs.Options,
   160  ) error {
   161  	var (
   162  		infoFilePath       = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.InfoFileSuffix)
   163  		digestFilePath     = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.DigestFileSuffix)
   164  		checkpointFilePath = fs.FilesetPathFromTimeAndIndex(namespaceDir, blockStart, volume, fs.CheckpointFileSuffix)
   165  
   166  		info    = index.IndexVolumeInfo{}
   167  		digests = index.IndexDigests{}
   168  	)
   169  
   170  	digestsData, err := ioutil.ReadFile(digestFilePath) // nolint: gosec
   171  	if err != nil {
   172  		return err
   173  	}
   174  	if err = digests.Unmarshal(digestsData); err != nil {
   175  		return err
   176  	}
   177  
   178  	infoData, err := ioutil.ReadFile(infoFilePath) // nolint: gosec
   179  	if err != nil {
   180  		return err
   181  	}
   182  	if err = info.Unmarshal(infoData); err != nil {
   183  		return err
   184  	}
   185  
   186  	var newShards []uint32
   187  	for _, srcShard := range info.Shards {
   188  		if srcShard >= srcNumShards {
   189  			return fmt.Errorf("unexpected source shard ID %d (must be under %d)", srcShard, srcNumShards)
   190  		}
   191  		for i := 0; i < factor; i++ {
   192  			dstShard := mapToDstShard(srcNumShards, i, srcShard)
   193  			newShards = append(newShards, dstShard)
   194  		}
   195  	}
   196  	info.Shards = newShards
   197  
   198  	newInfoData, err := info.Marshal()
   199  	if err != nil {
   200  		return err
   201  	}
   202  
   203  	if err = xos.WriteFileSync(infoFilePath, newInfoData, fsOpts.NewFileMode()); err != nil {
   204  		return err
   205  	}
   206  
   207  	digests.InfoDigest = digest.Checksum(newInfoData)
   208  	newDigestsData, err := digests.Marshal()
   209  	if err != nil {
   210  		return err
   211  	}
   212  
   213  	if err = ioutil.WriteFile(digestFilePath, newDigestsData, fsOpts.NewFileMode()); err != nil {
   214  		return err
   215  	}
   216  
   217  	digestBuffer := digest.NewBuffer()
   218  	digestBuffer.WriteDigest(digest.Checksum(newDigestsData))
   219  
   220  	return ioutil.WriteFile(checkpointFilePath, digestBuffer, fsOpts.NewFileMode())
   221  }
   222  
   223  func mapToDstShard(srcNumShards uint32, i int, srcShard uint32) uint32 {
   224  	return srcNumShards*uint32(i) + srcShard
   225  }
   226  
   227  func dropIndexSuffix(path string) string {
   228  	idx := strings.LastIndex(path, "/index")
   229  	if idx < 0 {
   230  		return path
   231  	}
   232  	return path[:idx]
   233  }