github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/fsck.go (about) 1 package git 2 3 import ( 4 "compress/zlib" 5 "crypto/sha1" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "math/big" 10 "os" 11 "path/filepath" 12 "regexp" 13 "strings" 14 ) 15 16 type FsckOptions struct { 17 Unreachable bool 18 NoDangling bool 19 Root bool 20 Tags bool 21 Cache bool 22 NoReflogs bool 23 NoFull bool 24 ConnectivityOnly bool 25 Strict bool 26 Verbose bool 27 LostFound bool 28 NameObjects bool 29 NoProgress bool 30 } 31 32 // Fsck implements the "git fsck" subcommand. It prints any error encountered to 33 // the stderr argument, and returns an array of said errors. 34 func Fsck(c *Client, stderr io.Writer, opts FsckOptions, objects []string) (errs []error) { 35 addErr := func(err error) { 36 fmt.Fprintln(stderr, err) 37 errs = append(errs, err) 38 39 } 40 41 if err := verifyHead(c, stderr, opts); err != nil { 42 addErr(err) 43 } 44 45 if opts.Verbose { 46 fmt.Fprintln(stderr, "Checking object directory") 47 } 48 49 // HaveObject doesn't do any validation, so we keep track of things 50 // we found that are corrupted so we can include error messages if 51 // they're used. 52 corrupted := make(map[Sha1]struct{}) 53 objdir := c.GetObjectsDir().String() 54 objprefixes, err := ioutil.ReadDir(objdir) 55 if err != nil { 56 addErr(err) 57 } else { 58 // FIXME: This should verify the hashes in pack indexes too. 59 for _, prefixdir := range objprefixes { 60 // We wrap the loop in a closure function so that defers 61 // (ie file.Close()) don't need to wait until the entire repo 62 // is finished. 63 err := func() error { 64 // We only want the 2 character prefix directories so that we 65 // can check the objects inside of them. 66 if !prefixdir.IsDir() { 67 return nil 68 } 69 if len(prefixdir.Name()) != 2 { 70 return nil 71 } 72 objects, err := ioutil.ReadDir( 73 filepath.Join(objdir, prefixdir.Name()), 74 ) 75 if err != nil { 76 return err 77 } 78 for _, object := range objects { 79 wantsha1 := fmt.Sprintf("%s%s", prefixdir.Name(), object.Name()) 80 oid, err := Sha1FromString(wantsha1) 81 if err != nil { 82 return err 83 } 84 85 // The type of verifications done on blobs 86 // (ie. sha1 mismatch) are valid for all object types 87 if err := verifyBlob(c, opts, stderr, oid); err != nil { 88 corrupted[oid] = struct{}{} 89 return err 90 } 91 switch ty := oid.Type(c); ty { 92 case "commit": 93 if err := verifyCommit(c, opts, CommitID(oid)); err != nil { 94 return fmt.Errorf("error in commit %v: %v", oid, err) 95 } 96 case "tree": 97 if err := verifyTree(c, opts, TreeID(oid)); err != nil { 98 return fmt.Errorf("error in tree %v: %v", oid, err) 99 } 100 case "tag": 101 if errs := verifyTag(c, opts, oid); errs != nil { 102 for _, err := range errs { 103 addErr(err) 104 } 105 return nil 106 } 107 case "blob": 108 // There's not much to verify for a blob, but it's 109 // a known type. 110 default: 111 return fmt.Errorf("Unknown object type %v", ty) 112 } 113 114 } 115 return nil 116 }() 117 if err != nil { 118 addErr(err) 119 } 120 } 121 } 122 123 var hc []Commitish 124 // Either use RevParse or ShowRef to get a list of all commits that 125 // we want to be checking, depending on if anything was passed as 126 // an argument. 127 if len(objects) != 0 { 128 heads, err := RevParse(c, RevParseOptions{}, objects) 129 if err != nil { 130 addErr(err) 131 // We can't do much more if we can't figure out which objects 132 // we're supposed to be validating. 133 return errs 134 } 135 for _, head := range heads { 136 h, err := head.CommitID(c) 137 if err != nil { 138 addErr(err) 139 } 140 hc = append(hc, h) 141 } 142 } else { 143 heads, err := ShowRef(c, ShowRefOptions{}, nil) 144 if err != nil { 145 addErr(err) 146 } 147 for _, head := range heads { 148 t, err := c.GetObject(head.Value) 149 if err != nil { 150 addErr(err) 151 } 152 if t.GetType() == "tag" { 153 // This was verified by verifytag 154 continue 155 } 156 h, err := head.CommitID(c) 157 if err != nil { 158 addErr(fmt.Errorf("not a commit")) 159 } 160 hc = append(hc, h) 161 } 162 163 } 164 165 // Get a list of all reachable objects from the heads. 166 reachables, err := RevList(c, RevListOptions{Quiet: true, Objects: true}, nil, hc, nil) 167 if err != nil { 168 errs = append(errs, err) 169 return errs 170 } 171 for _, obj := range reachables { 172 if opts.Verbose { 173 fmt.Fprintf(stderr, "Checking %v\n", obj) 174 } 175 if _, ok := corrupted[obj]; ok { 176 addErr(fmt.Errorf("%v corrupt or missing", obj)) 177 continue 178 } 179 o, _, err := c.HaveObject(obj) 180 if err != nil { 181 addErr(err) 182 continue 183 } 184 if !o { 185 addErr(fmt.Errorf("%v corrupt or missing", obj)) 186 continue 187 } 188 } 189 return errs 190 } 191 192 // Verifies the HEAD pointer for fsck. 193 func verifyHead(c *Client, stderr io.Writer, opts FsckOptions) error { 194 if opts.Verbose { 195 fmt.Fprintln(stderr, "Checking HEAD link") 196 } 197 198 hfile := c.GitDir.File("HEAD") 199 if !hfile.Exists() { 200 return fmt.Errorf("Missing head link") 201 } 202 203 line, err := hfile.ReadFirstLine() 204 if err != nil { 205 // this shouldn't happen since we already verified it exists 206 return err 207 } 208 209 sha1, err := Sha1FromString(line) 210 if err != nil { 211 // we couldn't convert it to a sha1, so it must be a ref 212 // pointer and should point to a head (not a tag or a remote) 213 if !strings.HasPrefix(line, "ref: refs/heads") { 214 return fmt.Errorf("error: HEAD points to something strange") 215 } 216 return nil 217 } 218 219 // We could convert the line to a Sha1, it's a detached head. 220 if sha1 == (Sha1{}) { 221 return fmt.Errorf("error: HEAD: detached HEAD points at nothing") 222 } 223 have, _, err := c.HaveObject(sha1) 224 if err != nil || !have { 225 return fmt.Errorf("error: invalid sha1 pointer %v", sha1) 226 } 227 return nil 228 } 229 230 func validatePerson(obj GitObject, typ string) error { 231 s := getObjectHeader(obj.GetContent(), typ) 232 // 0 = whole match 233 // 1 = name 234 // 2 = email 235 // 3 = timestamp 236 personRe := regexp.MustCompile(`(.*?)\<(.*?)\>(.*)`) 237 pieces := personRe.FindStringSubmatch(s) 238 if len(pieces) != 4 { 239 // This is mostly just to get the same error messages 240 // as git when running the official test suite" 241 // "foo asdf> 1234" is reported as bad name 242 // "foo 1234" is reported as bad email. 243 if strings.Count(s, ">") == 0 { 244 return fmt.Errorf("missingEmail: invalid %v line - missing email", typ) 245 } 246 return fmt.Errorf("badName: invalid %v line - bad name", typ) 247 } 248 if strings.Count(pieces[1], ">") > 0 { 249 return fmt.Errorf("badName: invalid %v line - bad name", typ) 250 } 251 if !strings.HasPrefix(pieces[3], " ") { 252 return fmt.Errorf("missingSpaceBeforeDate: invalid %v line - missing space before date", typ) 253 } 254 255 timestampRe := regexp.MustCompile(`^ (\d+) (\+|\-)(\d+)$`) 256 timepieces := timestampRe.FindStringSubmatch(pieces[3]) 257 if len(timepieces) == 0 { 258 return fmt.Errorf("invalidateDate: invalid %v line - timestamp is not a valid date", typ) 259 } 260 // check for overflow of uint64 261 bignum, ok := new(big.Int).SetString(timepieces[1], 10) 262 if !ok { 263 // This shouldn't happen since the regexp validated 264 // that it was a string of digits. 265 panic("Could not convert integer to bignum") 266 } 267 268 // can't use math.Newint because it takes an int64, not a uint64 269 maxuint64, ok := new(big.Int).SetString("18446744073709551615", 10) 270 if !ok { 271 // This shouldn't happen since we're dealing with a const 272 panic("Could not convert max uint64 to bignum") 273 } 274 if bignum.Cmp(maxuint64) > 0 { 275 return fmt.Errorf("badDateOverflow: invalid %v line - date causes integer overflow", typ) 276 } 277 return nil 278 } 279 280 // Verifies a commit for fsck or rev-parse --verify-objects 281 func verifyCommit(c *Client, opts FsckOptions, cmt CommitID) error { 282 obj, err := c.GetCommitObject(cmt) 283 if err != nil { 284 return err 285 } 286 287 if err := validatePerson(obj, "author"); err != nil { 288 return err 289 } 290 if err := validatePerson(obj, "committer"); err != nil { 291 return err 292 } 293 294 content := obj.GetContent() 295 for i, c := range content { 296 if c == 0 { 297 return fmt.Errorf("nulInHeader: unterminated header: NUL at offset %v", i) 298 } 299 if c == '\n' && i > 0 && content[i-1] == '\n' { 300 // reached the end of the headers. 301 break 302 } 303 } 304 if c.GetConfig("fsck.multipleAuthors") != "ignore" { 305 headers := objectHeaderCount(content) 306 if headers["author"] > 1 { 307 return fmt.Errorf("multipleAuthors: invalid format - multiple 'author' lines") 308 } 309 } 310 return nil 311 } 312 313 // Verifies a tree for fsck or rev-parse --verify-objects 314 func verifyTree(c *Client, opts FsckOptions, tid TreeID) error { 315 paths := make(map[IndexPath]struct{}) 316 obj, err := c.GetObject(Sha1(tid)) 317 if err != nil { 318 return err 319 } 320 content := obj.GetContent() 321 i := 0 322 for i < len(content) { 323 name, entry, size, err := parseRawTreeLine(i, content) 324 if err != nil { 325 return err 326 } 327 if entry.Sha1 == (Sha1{}) { 328 fmt.Fprintf(os.Stderr, "warning in tree %v: nullSha1: contains entries pointing to null sha1\n", tid) 329 } 330 if _, ok := paths[name]; ok { 331 return fmt.Errorf("duplicateEntries: contains duplicate file entries") 332 } 333 334 // I don't know why these are warnings instead of errors, but 335 // git fsck is stupid that way. 336 sanitizedName := strings.Replace(name.String(), "\u200c", "", -1) 337 sanitizedName = strings.ToLower(sanitizedName) 338 switch sanitizedName { 339 case ".": 340 fmt.Fprintf(os.Stderr, "warning in tree %v: hasDot: contains '.'\n", tid) 341 case "..": 342 fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotdot: contains '..'\n", tid) 343 case ".git", ".git.": 344 fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid) 345 } 346 if strings.Index(sanitizedName, `\.git\`) >= 0 || strings.HasPrefix(sanitizedName, `.git\`) { 347 348 // Equivalent to .git on Windows 349 fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid) 350 } 351 if strings.HasPrefix(sanitizedName, "git~") { 352 // Equivalent to .git on Windows 353 fmt.Fprintf(os.Stderr, "warning in tree %v: hasDotgit: contains '.git'\n", tid) 354 } 355 paths[name] = struct{}{} 356 i += size 357 358 } 359 return nil 360 } 361 362 func verifyTag(c *Client, opts FsckOptions, tid Sha1) []error { 363 var errs []error 364 tag, err := c.GetTagObject(tid) 365 if err != nil { 366 return []error{err} 367 } 368 objid := tag.GetHeader("object") 369 objsha, err := Sha1FromString(objid) 370 if err != nil { 371 return []error{err} 372 } 373 374 _, err = c.GetCommitObject(CommitID(objsha)) 375 if err != nil { 376 // This is really stupid, but t1450.17 expects 377 // this one particular error on stdout instead 378 // of stderr, so we just print it instead of 379 // returning it. 380 fmt.Printf( 381 `broken link from tag %v 382 to commit %v 383 `, tid, objid, 384 ) 385 errs = append(errs, fmt.Errorf("")) 386 } 387 if tg := tag.GetHeader("tag"); tg != "" { 388 words := strings.Fields(tg) 389 if len(words) > 1 { 390 // Similar stupidity to t1450.17, t1450.18 391 // expects these on stderr, but also expects 392 // that these leave an exit status of 0. 393 fmt.Fprintf(os.Stderr, "warning in tag %v: badTagName: invalid 'tag' name: wrong name format\n", tid) 394 } 395 } 396 tagger := tag.GetHeader("tagger") 397 if tagger == "" { 398 fmt.Fprintf(os.Stderr, "warning in tag %v: missingTaggerEntry: invalid format - expected 'tagger' line\n", tid) 399 } else if err := validatePerson(tag, "tagger"); err != nil { 400 errs = append(errs, fmt.Errorf("error in tag %v: invalid author/committer", tid)) 401 } 402 403 content := tag.GetContent() 404 for i, c := range content { 405 if c == 0 { 406 errs = append(errs, fmt.Errorf("error in tag %v: nulInHeader: unterminated header: NUL at offset %v", tid, i)) 407 } 408 if c == '\n' && i > 0 && content[i-1] == '\n' { 409 // reached the end of the headers. 410 break 411 } 412 } 413 return errs 414 } 415 416 func verifyBlob(c *Client, opts FsckOptions, stderr io.Writer, s Sha1) error { 417 // FIXME: Check blobs that are in packs too. 418 objdir := c.GetObjectsDir().String() 419 prefixdir := fmt.Sprintf("%0.2x", s[0:1]) 420 fname := fmt.Sprintf("%0.38x", s[1:]) 421 filename := filepath.Join(objdir, prefixdir, fname) 422 if opts.Verbose { 423 fmt.Fprintf(stderr, "Checking %s %s\n", s.Type(c), s) 424 } 425 f, err := os.Open(filepath.Join(filename)) 426 if err != nil { 427 return err 428 } 429 defer f.Close() 430 zr, err := zlib.NewReader(f) 431 if err != nil { 432 return err 433 } 434 h := sha1.New() 435 if _, err := io.Copy(h, zr); err != nil { 436 return err 437 } 438 sum := h.Sum(nil) 439 sumsha1, err := Sha1FromSlice(sum) 440 if err != nil { 441 // This should never happen, a sha1 from crypto/sha1 442 // should always be convertable to our Sha1 type 443 panic(err) 444 } 445 if sumsha1 != s { 446 return fmt.Errorf("error: sha1 mismatch %v", s) 447 } 448 return nil 449 }