github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/build_log.go (about) 1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nin 16 17 import ( 18 "bytes" 19 "errors" 20 "fmt" 21 "io" 22 "io/ioutil" 23 "os" 24 "reflect" 25 "strconv" 26 "strings" 27 "unsafe" 28 ) 29 30 // LogEntry is an entry in BuildLog. 31 type LogEntry struct { 32 output string 33 commandHash uint64 34 startTime int32 35 endTime int32 36 mtime TimeStamp 37 } 38 39 // Equal compares two LogEntry. 40 func (l *LogEntry) Equal(r *LogEntry) bool { 41 return l.output == r.output && l.commandHash == r.commandHash && 42 l.startTime == r.startTime && l.endTime == r.endTime && 43 l.mtime == r.mtime 44 } 45 46 // Serialize writes an entry into a log file as a text form. 47 func (l *LogEntry) Serialize(w io.Writer) error { 48 _, err := fmt.Fprintf(w, "%d\t%d\t%d\t%s\t%x\n", l.startTime, l.endTime, l.mtime, l.output, l.commandHash) 49 return err 50 } 51 52 // Implementation details: 53 // Each run's log appends to the log file. 54 // To load, we run through all log entries in series, throwing away 55 // older runs. 56 // Once the number of redundant entries exceeds a threshold, we write 57 // out a new file and replace the existing one with it. 58 59 const ( 60 buildLogFileSignature = "# ninja log v%d\n" 61 buildLogOldestSupportedVersion = 4 62 buildLogCurrentVersion = 5 63 ) 64 65 // unsafeByteSlice converts string to a byte slice without memory allocation. 66 func unsafeByteSlice(s string) (b []byte) { 67 /* #nosec G103 */ 68 bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 69 /* #nosec G103 */ 70 sh := *(*reflect.StringHeader)(unsafe.Pointer(&s)) 71 bh.Data = sh.Data 72 bh.Len = sh.Len 73 bh.Cap = sh.Len 74 return 75 } 76 77 // unsafeUint64Slice converts string to a byte slice without memory allocation. 78 func unsafeUint64Slice(s string) (b []uint64) { 79 /* #nosec G103 */ 80 bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 81 /* #nosec G103 */ 82 sh := *(*reflect.StringHeader)(unsafe.Pointer(&s)) 83 bh.Data = sh.Data 84 bh.Len = sh.Len / 8 85 bh.Cap = sh.Len / 8 86 return 87 } 88 89 // HashCommand hashes a command using the MurmurHash2 algorithm by Austin 90 // Appleby. 91 func HashCommand(command string) uint64 { 92 seed := uint64(0xDECAFBADDECAFBAD) 93 const m = 0xc6a4a7935bd1e995 94 r := 47 95 l := len(command) 96 h := seed ^ (uint64(l) * m) 97 i := 0 98 if l > 7 { 99 // I tried a few combinations (data as []byte) and this one seemed to be the 100 // best. Feel free to micro-optimize. 101 //data := (*[0x7fff0000]uint64)(unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&command)).Data))[:l/8] 102 data := unsafeUint64Slice(command) 103 for ; i < len(data); i++ { 104 k := data[i] 105 k *= m 106 k ^= k >> r 107 k *= m 108 h ^= k 109 h *= m 110 } 111 } 112 113 //data2 := (*[0x7fff0000]byte)(unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&command)).Data))[8*i : 8*(i+1)] 114 data2 := unsafeByteSlice(command[i*8:]) 115 //switch (l - 8*i) & 7 { 116 switch (l - 8*i) & 7 { 117 case 7: 118 h ^= uint64(data2[6]) << 48 119 fallthrough 120 case 6: 121 h ^= uint64(data2[5]) << 40 122 fallthrough 123 case 5: 124 h ^= uint64(data2[4]) << 32 125 fallthrough 126 case 4: 127 h ^= uint64(data2[3]) << 24 128 fallthrough 129 case 3: 130 h ^= uint64(data2[2]) << 16 131 fallthrough 132 case 2: 133 h ^= uint64(data2[1]) << 8 134 fallthrough 135 case 1: 136 h ^= uint64(data2[0]) 137 h *= m 138 case 0: 139 } 140 h ^= h >> r 141 h *= m 142 h ^= h >> r 143 return h 144 } 145 146 // 147 148 // BuildLogUser answers questions about the manifest for the BuildLog. 149 type BuildLogUser interface { 150 IsPathDead(s string) bool 151 } 152 153 // BuildLog stores a log of every command ran for every build. 154 // 155 // It has a few uses: 156 // 157 // 1) (hashes of) command lines for existing output files, so we know when we 158 // need to rebuild due to the command changing. 159 // 160 // 2) timing information, perhaps for generating reports. 161 // 162 // 3) restat information. 163 type BuildLog struct { 164 Entries map[string]*LogEntry 165 logFile *os.File 166 logFilePath string 167 needsRecompaction bool 168 } 169 170 // Note: the C++ version uses ExternalStringHashMap<LogEntry*> for 171 // BuildLog.entries. 172 173 // NewBuildLog returns an initialized BuidLog. 174 func NewBuildLog() BuildLog { 175 return BuildLog{Entries: map[string]*LogEntry{}} 176 } 177 178 // OpenForWrite prepares writing to the log file without actually opening it - 179 // that will happen when/if it's needed. 180 func (b *BuildLog) OpenForWrite(path string, user BuildLogUser) error { 181 if b.needsRecompaction { 182 if err := b.Recompact(path, user); err != nil { 183 return err 184 } 185 } 186 187 if b.logFile != nil { 188 panic("oops") 189 } 190 b.logFilePath = path 191 // We don't actually open the file right now, but will 192 // do so on the first write attempt. 193 return nil 194 } 195 196 // RecordCommand records an edge. 197 func (b *BuildLog) RecordCommand(edge *Edge, startTime, endTime int32, mtime TimeStamp) error { 198 command := edge.EvaluateCommand(true) 199 commandHash := HashCommand(command) 200 for _, out := range edge.Outputs { 201 path := out.Path 202 i, ok := b.Entries[path] 203 var logEntry *LogEntry 204 if ok { 205 logEntry = i 206 } else { 207 logEntry = &LogEntry{output: path} 208 b.Entries[logEntry.output] = logEntry 209 } 210 logEntry.commandHash = commandHash 211 logEntry.startTime = startTime 212 logEntry.endTime = endTime 213 logEntry.mtime = mtime 214 215 if err := b.openForWriteIfNeeded(); err != nil { 216 return err 217 } 218 if b.logFile != nil { 219 if err := logEntry.Serialize(b.logFile); err != nil { 220 return err 221 } 222 // The C++ code does an fsync on the handle but the Go version doesn't 223 // buffer so it is unnecessary. 224 } 225 } 226 return nil 227 } 228 229 // Close closes the file handle. 230 func (b *BuildLog) Close() error { 231 err := b.openForWriteIfNeeded() // create the file even if nothing has been recorded 232 if b.logFile != nil { 233 _ = b.logFile.Close() 234 } 235 b.logFile = nil 236 return err 237 } 238 239 // openForWriteIfNeeded should be called before using logFile. 240 func (b *BuildLog) openForWriteIfNeeded() error { 241 if b.logFile != nil || b.logFilePath == "" { 242 return nil 243 } 244 var err error 245 b.logFile, err = os.OpenFile(b.logFilePath, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0o0666) 246 if b.logFile == nil { 247 return err 248 } 249 /*if setvbuf(b.logFile, nil, _IOLBF, BUFSIZ) != 0 { 250 return false 251 } 252 SetCloseOnExec(fileno(b.logFile)) 253 */ 254 255 // TODO(maruel): Confirm, I'm pretty sure it's not true on Go. 256 // Opening a file in append mode doesn't set the file pointer to the file's 257 // end on Windows. Do that explicitly. 258 p, err := b.logFile.Seek(0, os.SEEK_END) 259 if err != nil { 260 return err 261 } 262 if p == 0 { 263 // If the file was empty, write the header. 264 if _, err := fmt.Fprintf(b.logFile, buildLogFileSignature, buildLogCurrentVersion); err != nil { 265 return err 266 } 267 } 268 return nil 269 } 270 271 /* 272 type LineReader struct { 273 274 file *FILE 275 char buf[256 << 10] 276 bufEnd *char // Points one past the last valid byte in |buf|. 277 278 lineStart *char 279 // Points at the next \n in buf after lineStart, or NULL. 280 lineEnd *char 281 } 282 func NewLineReader(file *FILE) LineReader { 283 return LineReader{ 284 file: file, 285 bufEnd: buf, 286 lineStart: buf, 287 lineEnd: nil, 288 } 289 { memset(buf, 0, sizeof(buf)); } 290 } 291 // Reads a \n-terminated line from the file passed to the constructor. 292 // On return, *lineStart points to the beginning of the next line, and 293 // *lineEnd points to the \n at the end of the line. If no newline is seen 294 // in a fixed buffer size, *lineEnd is set to NULL. Returns false on EOF. 295 func (l *LineReader) ReadLine(lineStart *char*, lineEnd *char*) bool { 296 if l.lineStart >= l.bufEnd || !l.lineEnd { 297 // Buffer empty, refill. 298 sizeRead := fread(l.buf, 1, sizeof(l.buf), l.file) 299 if !sizeRead { 300 return false 301 } 302 l.lineStart = l.buf 303 l.bufEnd = l.buf + sizeRead 304 } else { 305 // Advance to next line in buffer. 306 l.lineStart = l.lineEnd + 1 307 } 308 309 l.lineEnd = (char*)memchr(l.lineStart, '\n', l.bufEnd - l.lineStart) 310 if !l.lineEnd { 311 // No newline. Move rest of data to start of buffer, fill rest. 312 sizeT alreadyConsumed = l.lineStart - l.buf 313 sizeT sizeRest = (l.bufEnd - l.buf) - alreadyConsumed 314 memmove(l.buf, l.lineStart, sizeRest) 315 316 sizeT read = fread(l.buf + sizeRest, 1, sizeof(l.buf) - sizeRest, l.file) 317 l.bufEnd = l.buf + sizeRest + read 318 l.lineStart = l.buf 319 l.lineEnd = (char*)memchr(l.lineStart, '\n', l.bufEnd - l.lineStart) 320 } 321 322 *lineStart = l.lineStart 323 *lineEnd = l.lineEnd 324 return true 325 } 326 */ 327 328 // Load the on-disk log. 329 // 330 // It can return a warning with success and an error. 331 // 332 // LoadNotFound is only returned when os.IsNotExist(err) is true. 333 func (b *BuildLog) Load(path string) (LoadStatus, error) { 334 defer metricRecord(".ninja_log load")() 335 file, err := ioutil.ReadFile(path) 336 if file == nil { 337 if os.IsNotExist(err) { 338 return LoadNotFound, err 339 } 340 return LoadError, err 341 } 342 343 if len(file) == 0 { 344 // File was empty. 345 return LoadSuccess, nil 346 } 347 348 logVersion := 0 349 uniqueEntryCount := 0 350 totalEntryCount := 0 351 352 // TODO(maruel): The LineReader implementation above is significantly faster 353 // because it modifies the data in-place. 354 reader := bytes.NewBuffer(file) 355 for { 356 line, e := reader.ReadString('\n') 357 if e != nil { 358 break 359 } 360 line = line[:len(line)-1] 361 if logVersion == 0 { 362 _, _ = fmt.Sscanf(line, buildLogFileSignature, &logVersion) 363 364 if logVersion < buildLogOldestSupportedVersion { 365 _ = os.Remove(path) 366 // Don't report this as a failure. An empty build log will cause 367 // us to rebuild the outputs anyway. 368 return LoadSuccess, errors.New("build log version invalid, perhaps due to being too old; starting over") 369 } 370 } 371 const fieldSeparator = byte('\t') 372 end := strings.IndexByte(line, fieldSeparator) 373 if end == -1 { 374 continue 375 } 376 377 startTime, err := strconv.ParseInt(line[:end], 10, 32) 378 if err != nil { 379 return LoadError, fmt.Errorf("invalid build log: %w", err) 380 } 381 line = line[end+1:] 382 end = strings.IndexByte(line, fieldSeparator) 383 if end == -1 { 384 continue 385 } 386 endTime, err := strconv.ParseInt(line[:end], 10, 32) 387 if err != nil { 388 return LoadError, fmt.Errorf("invalid build log: %w", err) 389 } 390 line = line[end+1:] 391 end = strings.IndexByte(line, fieldSeparator) 392 if end == -1 { 393 continue 394 } 395 restatMtime, err := strconv.ParseInt(line[:end], 10, 64) 396 if err != nil { 397 return LoadError, fmt.Errorf("invalid build log: %w", err) 398 } 399 line = line[end+1:] 400 end = strings.IndexByte(line, fieldSeparator) 401 if end == -1 { 402 continue 403 } 404 output := line[:end] 405 line = line[end+1:] 406 var entry *LogEntry 407 i, ok := b.Entries[output] 408 if ok { 409 entry = i 410 } else { 411 entry = &LogEntry{output: output} 412 b.Entries[entry.output] = entry 413 uniqueEntryCount++ 414 } 415 totalEntryCount++ 416 417 // TODO(maruel): Check overflows. 418 entry.startTime = int32(startTime) 419 entry.endTime = int32(endTime) 420 entry.mtime = TimeStamp(restatMtime) 421 if logVersion >= 5 { 422 entry.commandHash, _ = strconv.ParseUint(line, 16, 64) 423 } else { 424 entry.commandHash = HashCommand(line) 425 } 426 } 427 428 // Decide whether it's time to rebuild the log: 429 // - if we're upgrading versions 430 // - if it's getting large 431 const minCompactionEntryCount = 100 432 const compactionRatio = 3 433 if logVersion < buildLogCurrentVersion { 434 b.needsRecompaction = true 435 } else if totalEntryCount > minCompactionEntryCount && totalEntryCount > uniqueEntryCount*compactionRatio { 436 b.needsRecompaction = true 437 } 438 439 return LoadSuccess, nil 440 } 441 442 // Recompact rewrites the known log entries, throwing away old data. 443 func (b *BuildLog) Recompact(path string, user BuildLogUser) error { 444 defer metricRecord(".ninja_log recompact")() 445 _ = b.Close() 446 // TODO(maruel): Instead of truncating, overwrite the data, then adjust the 447 // size. 448 tempPath := path + ".recompact" 449 f, err := os.OpenFile(tempPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o666) 450 if f == nil { 451 return err 452 } 453 454 if _, err = fmt.Fprintf(f, buildLogFileSignature, buildLogCurrentVersion); err != nil { 455 _ = f.Close() 456 return err 457 } 458 459 var deadOutputs []string 460 // TODO(maruel): Save in order? 461 for name, entry := range b.Entries { 462 if user.IsPathDead(name) { 463 deadOutputs = append(deadOutputs, name) 464 continue 465 } 466 467 if err = entry.Serialize(f); err != nil { 468 _ = f.Close() 469 return err 470 } 471 } 472 473 for _, name := range deadOutputs { 474 delete(b.Entries, name) 475 } 476 477 _ = f.Close() 478 if err = os.Remove(path); err != nil { 479 return err 480 } 481 482 if err = os.Rename(tempPath, path); err != nil { 483 return err 484 } 485 return err 486 } 487 488 // Restat recompacts but stat()'s all outputs in the log. 489 func (b *BuildLog) Restat(path string, di DiskInterface, outputs []string) error { 490 defer metricRecord(".ninja_log restat")() 491 _ = b.Close() 492 tempPath := path + ".restat" 493 f, err := os.OpenFile(tempPath, os.O_CREATE|os.O_WRONLY, 0o666) 494 if f == nil { 495 return err 496 } 497 498 if _, err := fmt.Fprintf(f, buildLogFileSignature, buildLogCurrentVersion); err != nil { 499 _ = f.Close() 500 return err 501 } 502 for _, i := range b.Entries { 503 skip := len(outputs) > 0 504 // TODO(maruel): Sort plus binary search or create a map[string]struct{}? 505 for j := 0; j < len(outputs); j++ { 506 if i.output == outputs[j] { 507 skip = false 508 break 509 } 510 } 511 if !skip { 512 mtime, err := di.Stat(i.output) 513 if mtime == -1 { 514 _ = f.Close() 515 return err 516 } 517 i.mtime = mtime 518 } 519 520 if err := i.Serialize(f); err != nil { 521 _ = f.Close() 522 return err 523 } 524 } 525 526 _ = f.Close() 527 if err := os.Remove(path); err != nil { 528 return err 529 } 530 531 return os.Rename(tempPath, path) 532 }