github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/fsck.go (about)

     1  package git
     2  
     3  import (
     4  	"compress/zlib"
     5  	"crypto/sha1"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"math/big"
    10  	"os"
    11  	"path/filepath"
    12  	"regexp"
    13  	"strings"
    14  )
    15  
    16  type FsckOptions struct {
    17  	Unreachable      bool
    18  	NoDangling       bool
    19  	Root             bool
    20  	Tags             bool
    21  	Cache            bool
    22  	NoReflogs        bool
    23  	NoFull           bool
    24  	ConnectivityOnly bool
    25  	Strict           bool
    26  	Verbose          bool
    27  	LostFound        bool
    28  	NameObjects      bool
    29  	NoProgress       bool
    30  }
    31  
    32  // Fsck implements the "git fsck" subcommand. It prints any error encountered to
    33  // the stderr argument, and returns an array of said errors.
    34  func Fsck(c *Client, stderr io.Writer, opts FsckOptions, objects []string) (errs []error) {
    35  	addErr := func(err error) {
    36  		fmt.Fprintln(stderr, err)
    37  		errs = append(errs, err)
    38  
    39  	}
    40  
    41  	if err := verifyHead(c, stderr, opts); err != nil {
    42  		addErr(err)
    43  	}
    44  
    45  	if opts.Verbose {
    46  		fmt.Fprintln(stderr, "Checking object directory")
    47  	}
    48  
    49  	// HaveObject doesn't do any validation, so we keep track of things
    50  	// we found that are corrupted so we can include error messages if
    51  	// they're used.
    52  	corrupted := make(map[Sha1]struct{})
    53  	objdir := c.GetObjectsDir().String()
    54  	objprefixes, err := ioutil.ReadDir(objdir)
    55  	if err != nil {
    56  		addErr(err)
    57  	} else {
    58  		// FIXME: This should verify the hashes in pack indexes too.
    59  		for _, prefixdir := range objprefixes {
    60  			// We wrap the loop in a closure function so that defers
    61  			// (ie file.Close()) don't need to wait until the entire repo
    62  			// is finished.
    63  			err := func() error {
    64  				// We only want the 2 character prefix directories so that we
    65  				// can check the objects inside of them.
    66  				if !prefixdir.IsDir() {
    67  					return nil
    68  				}
    69  				if len(prefixdir.Name()) != 2 {
    70  					return nil
    71  				}
    72  				objects, err := ioutil.ReadDir(
    73  					filepath.Join(objdir, prefixdir.Name()),
    74  				)
    75  				if err != nil {
    76  					return err
    77  				}
    78  				for _, object := range objects {
    79  					wantsha1 := fmt.Sprintf("%s%s", prefixdir.Name(), object.Name())
    80  					oid, err := Sha1FromString(wantsha1)
    81  					if err != nil {
    82  						return err
    83  					}
    84  
    85  					// The type of verifications done on blobs
    86  					// (ie. sha1 mismatch) are valid for all object types
    87  					if err := verifyBlob(c, opts, stderr, oid); err != nil {
    88  						corrupted[oid] = struct{}{}
    89  						return err
    90  					}
    91  					switch ty := oid.Type(c); ty {
    92  					case "commit":
    93  						if err := verifyCommit(c, opts, CommitID(oid)); err != nil {
    94  							return fmt.Errorf("error in commit %v: %v", oid, err)
    95  						}
    96  					case "tree":
    97  						if err := verifyTree(c, opts, TreeID(oid)); err != nil {
    98  							return fmt.Errorf("error in tree %v: %v", oid, err)
    99  						}
   100  					case "tag":
   101  						if errs := verifyTag(c, opts, oid); errs != nil {
   102  							for _, err := range errs {
   103  								addErr(err)
   104  							}
   105  							return nil
   106  						}
   107  					case "blob":
   108  						// There's not much to verify for a blob, but it's
   109  						// a known type.
   110  					default:
   111  						return fmt.Errorf("Unknown object type %v", ty)
   112  					}
   113  
   114  				}
   115  				return nil
   116  			}()
   117  			if err != nil {
   118  				addErr(err)
   119  			}
   120  		}
   121  	}
   122  
   123  	var hc []Commitish
   124  	// Either use RevParse or ShowRef to get a list of all commits that
   125  	// we want to be checking, depending on if anything was passed as
   126  	// an argument.
   127  	if len(objects) != 0 {
   128  		heads, err := RevParse(c, RevParseOptions{}, objects)
   129  		if err != nil {
   130  			addErr(err)
   131  			// We can't do much more if we can't figure out which objects
   132  			// we're supposed to be validating.
   133  			return errs
   134  		}
   135  		for _, head := range heads {
   136  			h, err := head.CommitID(c)
   137  			if err != nil {
   138  				addErr(err)
   139  			}
   140  			hc = append(hc, h)
   141  		}
   142  	} else {
   143  		heads, err := ShowRef(c, ShowRefOptions{}, nil)
   144  		if err != nil {
   145  			addErr(err)
   146  		}
   147  		for _, head := range heads {
   148  			t, err := c.GetObject(head.Value)
   149  			if err != nil {
   150  				addErr(err)
   151  			}
   152  			if t.GetType() == "tag" {
   153  				// This was verified by verifytag
   154  				continue
   155  			}
   156  			h, err := head.CommitID(c)
   157  			if err != nil {
   158  				addErr(fmt.Errorf("not a commit"))
   159  			}
   160  			hc = append(hc, h)
   161  		}
   162  
   163  	}
   164  
   165  	// Get a list of all reachable objects from the heads.
   166  	reachables, err := RevList(c, RevListOptions{Quiet: true, Objects: true}, nil, hc, nil)
   167  	if err != nil {
   168  		errs = append(errs, err)
   169  		return errs
   170  	}
   171  	for _, obj := range reachables {
   172  		if opts.Verbose {
   173  			fmt.Fprintf(stderr, "Checking %v\n", obj)
   174  		}
   175  		if _, ok := corrupted[obj]; ok {
   176  			addErr(fmt.Errorf("%v corrupt or missing", obj))
   177  			continue
   178  		}
   179  		o, _, err := c.HaveObject(obj)
   180  		if err != nil {
   181  			addErr(err)
   182  			continue
   183  		}
   184  		if !o {
   185  			addErr(fmt.Errorf("%v corrupt or missing", obj))
   186  			continue
   187  		}
   188  	}
   189  	return errs
   190  }
   191  
   192  // Verifies the HEAD pointer for fsck.
   193  func verifyHead(c *Client, stderr io.Writer, opts FsckOptions) error {
   194  	if opts.Verbose {
   195  		fmt.Fprintln(stderr, "Checking HEAD link")
   196  	}
   197  
   198  	hfile := c.GitDir.File("HEAD")
   199  	if !hfile.Exists() {
   200  		return fmt.Errorf("Missing head link")
   201  	}
   202  
   203  	line, err := hfile.ReadFirstLine()
   204  	if err != nil {
   205  		// this shouldn't happen since we already verified it exists
   206  		return err
   207  	}
   208  
   209  	sha1, err := Sha1FromString(line)
   210  	if err != nil {
   211  		// we couldn't convert it to a sha1, so it must be a ref
   212  		// pointer and should point to a head (not a tag or a remote)
   213  		if !strings.HasPrefix(line, "ref: refs/heads") {
   214  			return fmt.Errorf("error: HEAD points to something strange")
   215  		}
   216  		return nil
   217  	}
   218  
   219  	// We could convert the line to a Sha1, it's a detached head.
   220  	if sha1 == (Sha1{}) {
   221  		return fmt.Errorf("error: HEAD: detached HEAD points at nothing")
   222  	}
   223  	have, _, err := c.HaveObject(sha1)
   224  	if err != nil || !have {
   225  		return fmt.Errorf("error: invalid sha1 pointer %v", sha1)
   226  	}
   227  	return nil
   228  }
   229  
   230  func validatePerson(obj GitObject, typ string) error {
   231  	s := getObjectHeader(obj.GetContent(), typ)
   232  	// 0 = whole match
   233  	// 1 = name
   234  	// 2 = email
   235  	// 3 = timestamp
   236  	personRe := regexp.MustCompile(`(.*?)\<(.*?)\>(.*)`)
   237  	pieces := personRe.FindStringSubmatch(s)
   238  	if len(pieces) != 4 {
   239  		// This is mostly just to get the same error messages
   240  		// as git when running the official test suite"
   241  		// "foo asdf> 1234" is reported as bad name
   242  		// "foo 1234" is reported as bad email.
   243  		if strings.Count(s, ">") == 0 {
   244  			return fmt.Errorf("missingEmail: invalid %v line - missing email", typ)
   245  		}
   246  		return fmt.Errorf("badName: invalid %v line - bad name", typ)
   247  	}
   248  	if strings.Count(pieces[1], ">") > 0 {
   249  		return fmt.Errorf("badName: invalid %v line - bad name", typ)
   250  	}
   251  	if !strings.HasPrefix(pieces[3], " ") {
   252  		return fmt.Errorf("missingSpaceBeforeDate: invalid %v line - missing space before date", typ)
   253  	}
   254  
   255  	timestampRe := regexp.MustCompile(`^ (\d+) (\+|\-)(\d+)$`)
   256  	timepieces := timestampRe.FindStringSubmatch(pieces[3])
   257  	if len(timepieces) == 0 {
   258  		return fmt.Errorf("invalidateDate: invalid %v line - timestamp is not a valid date", typ)
   259  	}
   260  	// check for overflow of uint64
   261  	bignum, ok := new(big.Int).SetString(timepieces[1], 10)
   262  	if !ok {
   263  		// This shouldn't happen since the regexp validated
   264  		// that it was a string of digits.
   265  		panic("Could not convert integer to bignum")
   266  	}
   267  
   268  	// can't use math.Newint because it takes an int64, not a uint64
   269  	maxuint64, ok := new(big.Int).SetString("18446744073709551615", 10)
   270  	if !ok {
   271  		// This shouldn't happen since we're dealing with a const
   272  		panic("Could not convert max uint64 to bignum")
   273  	}
   274  	if bignum.Cmp(maxuint64) > 0 {
   275  		return fmt.Errorf("badDateOverflow: invalid %v line - date causes integer overflow", typ)
   276  	}
   277  	return nil
   278  }
   279  
   280  // Verifies a commit for fsck or rev-parse --verify-objects
   281  func verifyCommit(c *Client, opts FsckOptions, cmt CommitID) error {
   282  	obj, err := c.GetCommitObject(cmt)
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	if err := validatePerson(obj, "author"); err != nil {
   288  		return err
   289  	}
   290  	if err := validatePerson(obj, "committer"); err != nil {
   291  		return err
   292  	}
   293  
   294  	content := obj.GetContent()
   295  	for i, c := range content {
   296  		if c == 0 {
   297  			return fmt.Errorf("nulInHeader: unterminated header: NUL at offset %v", i)
   298  		}
   299  		if c == '\n' && i > 0 && content[i-1] == '\n' {
   300  			// reached the end of the headers.
   301  			break
   302  		}
   303  	}
   304  	if c.GetConfig("fsck.multipleAuthors") != "ignore" {
   305  		headers := objectHeaderCount(content)
   306  		if headers["author"] > 1 {
   307  			return fmt.Errorf("multipleAuthors: invalid format - multiple 'author' lines")
   308  		}
   309  	}
   310  	return nil
   311  }
   312  
   313  // Verifies a tree for fsck or rev-parse --verify-objects
   314  func verifyTree(c *Client, opts FsckOptions, tid TreeID) error {
   315  	paths := make(map[IndexPath]struct{})
   316  	obj, err := c.GetObject(Sha1(tid))
   317  	if err != nil {
   318  		return err
   319  	}
   320  	content := obj.GetContent()
   321  	i := 0
   322  	for i < len(content) {
   323  		name, entry, size, err := parseRawTreeLine(i, content)
   324  		if err != nil {
   325  			return err
   326  		}
   327  		if entry.Sha1 == (Sha1{}) {
   328  			fmt.Fprintf(os.Stderr, "warning in tree %v: nullSha1: contains entries pointing to null sha1\n", tid)
   329  		}
   330  		if _, ok := paths[name]; ok {
   331  			return fmt.Errorf("duplicateEntries: contains duplicate file entries")
   332  		}
   333  
   334  		// I don't know why these are warnings instead of errors, but
   335  		// git fsck is stupid that way.
   336  		sanitizedName := strings.Replace(name.String(), "\u200c", "", -1)
   337  		sanitizedName = strings.ToLower(sanitizedName)
   338  		switch sanitizedName {
   339  		case ".":
   340  			fmt.Fprintf(os.Stderr, "warning in tree %v: hasDot: contains '.'\n", tid)
   341  		case "..":
   342  			fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotdot: contains '..'\n", tid)
   343  		case ".git", ".git.":
   344  			fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid)
   345  		}
   346  		if strings.Index(sanitizedName, `\.git\`) >= 0 || strings.HasPrefix(sanitizedName, `.git\`) {
   347  
   348  			// Equivalent to .git on Windows
   349  			fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid)
   350  		}
   351  		if strings.HasPrefix(sanitizedName, "git~") {
   352  			// Equivalent to .git on Windows
   353  			fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid)
   354  		}
   355  		paths[name] = struct{}{}
   356  		i += size
   357  
   358  	}
   359  	return nil
   360  }
   361  
   362  func verifyTag(c *Client, opts FsckOptions, tid Sha1) []error {
   363  	var errs []error
   364  	tag, err := c.GetTagObject(tid)
   365  	if err != nil {
   366  		return []error{err}
   367  	}
   368  	objid := tag.GetHeader("object")
   369  	objsha, err := Sha1FromString(objid)
   370  	if err != nil {
   371  		return []error{err}
   372  	}
   373  
   374  	_, err = c.GetCommitObject(CommitID(objsha))
   375  	if err != nil {
   376  		// This is really stupid, but t1450.17 expects
   377  		// this one particular error on stdout instead
   378  		// of stderr, so we just print it instead of
   379  		// returning it.
   380  		fmt.Printf(
   381  			`broken link from tag %v
   382                to commit %v
   383  `, tid, objid,
   384  		)
   385  		errs = append(errs, fmt.Errorf(""))
   386  	}
   387  	if tg := tag.GetHeader("tag"); tg != "" {
   388  		words := strings.Fields(tg)
   389  		if len(words) > 1 {
   390  			// Similar stupidity to t1450.17, t1450.18
   391  			// expects these on stderr, but also expects
   392  			// that these leave an exit status of 0.
   393  			fmt.Fprintf(os.Stderr, "warning in tag %v: badTagName: invalid 'tag' name: wrong name format\n", tid)
   394  		}
   395  	}
   396  	tagger := tag.GetHeader("tagger")
   397  	if tagger == "" {
   398  		fmt.Fprintf(os.Stderr, "warning in tag %v: missingTaggerEntry: invalid format - expected 'tagger' line\n", tid)
   399  	} else if err := validatePerson(tag, "tagger"); err != nil {
   400  		errs = append(errs, fmt.Errorf("error in tag %v: invalid author/committer", tid))
   401  	}
   402  
   403  	content := tag.GetContent()
   404  	for i, c := range content {
   405  		if c == 0 {
   406  			errs = append(errs, fmt.Errorf("error in tag %v: nulInHeader: unterminated header: NUL at offset %v", tid, i))
   407  		}
   408  		if c == '\n' && i > 0 && content[i-1] == '\n' {
   409  			// reached the end of the headers.
   410  			break
   411  		}
   412  	}
   413  	return errs
   414  }
   415  
   416  func verifyBlob(c *Client, opts FsckOptions, stderr io.Writer, s Sha1) error {
   417  	// FIXME: Check blobs that are in packs too.
   418  	objdir := c.GetObjectsDir().String()
   419  	prefixdir := fmt.Sprintf("%0.2x", s[0:1])
   420  	fname := fmt.Sprintf("%0.38x", s[1:])
   421  	filename := filepath.Join(objdir, prefixdir, fname)
   422  	if opts.Verbose {
   423  		fmt.Fprintf(stderr, "Checking %s %s\n", s.Type(c), s)
   424  	}
   425  	f, err := os.Open(filepath.Join(filename))
   426  	if err != nil {
   427  		return err
   428  	}
   429  	defer f.Close()
   430  	zr, err := zlib.NewReader(f)
   431  	if err != nil {
   432  		return err
   433  	}
   434  	h := sha1.New()
   435  	if _, err := io.Copy(h, zr); err != nil {
   436  		return err
   437  	}
   438  	sum := h.Sum(nil)
   439  	sumsha1, err := Sha1FromSlice(sum)
   440  	if err != nil {
   441  		// This should never happen, a sha1 from crypto/sha1
   442  		// should always be convertable to our Sha1 type
   443  		panic(err)
   444  	}
   445  	if sumsha1 != s {
   446  		return fmt.Errorf("error: sha1 mismatch %v", s)
   447  	}
   448  	return nil
   449  }