github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/build_log.go (about)

     1  // Copyright 2011 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nin
    16  
    17  import (
    18  	"bytes"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"io/ioutil"
    23  	"os"
    24  	"reflect"
    25  	"strconv"
    26  	"strings"
    27  	"unsafe"
    28  )
    29  
    30  // LogEntry is an entry in BuildLog.
    31  type LogEntry struct {
    32  	output      string
    33  	commandHash uint64
    34  	startTime   int32
    35  	endTime     int32
    36  	mtime       TimeStamp
    37  }
    38  
    39  // Equal compares two LogEntry.
    40  func (l *LogEntry) Equal(r *LogEntry) bool {
    41  	return l.output == r.output && l.commandHash == r.commandHash &&
    42  		l.startTime == r.startTime && l.endTime == r.endTime &&
    43  		l.mtime == r.mtime
    44  }
    45  
    46  // Serialize writes an entry into a log file as a text form.
    47  func (l *LogEntry) Serialize(w io.Writer) error {
    48  	_, err := fmt.Fprintf(w, "%d\t%d\t%d\t%s\t%x\n", l.startTime, l.endTime, l.mtime, l.output, l.commandHash)
    49  	return err
    50  }
    51  
    52  // Implementation details:
    53  // Each run's log appends to the log file.
    54  // To load, we run through all log entries in series, throwing away
    55  // older runs.
    56  // Once the number of redundant entries exceeds a threshold, we write
    57  // out a new file and replace the existing one with it.
    58  
    59  const (
    60  	buildLogFileSignature          = "# ninja log v%d\n"
    61  	buildLogOldestSupportedVersion = 4
    62  	buildLogCurrentVersion         = 5
    63  )
    64  
    65  // unsafeByteSlice converts string to a byte slice without memory allocation.
    66  func unsafeByteSlice(s string) (b []byte) {
    67  	/* #nosec G103 */
    68  	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
    69  	/* #nosec G103 */
    70  	sh := *(*reflect.StringHeader)(unsafe.Pointer(&s))
    71  	bh.Data = sh.Data
    72  	bh.Len = sh.Len
    73  	bh.Cap = sh.Len
    74  	return
    75  }
    76  
    77  // unsafeUint64Slice converts string to a byte slice without memory allocation.
    78  func unsafeUint64Slice(s string) (b []uint64) {
    79  	/* #nosec G103 */
    80  	bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
    81  	/* #nosec G103 */
    82  	sh := *(*reflect.StringHeader)(unsafe.Pointer(&s))
    83  	bh.Data = sh.Data
    84  	bh.Len = sh.Len / 8
    85  	bh.Cap = sh.Len / 8
    86  	return
    87  }
    88  
    89  // HashCommand hashes a command using the MurmurHash2 algorithm by Austin
    90  // Appleby.
    91  func HashCommand(command string) uint64 {
    92  	seed := uint64(0xDECAFBADDECAFBAD)
    93  	const m = 0xc6a4a7935bd1e995
    94  	r := 47
    95  	l := len(command)
    96  	h := seed ^ (uint64(l) * m)
    97  	i := 0
    98  	if l > 7 {
    99  		// I tried a few combinations (data as []byte) and this one seemed to be the
   100  		// best. Feel free to micro-optimize.
   101  		//data := (*[0x7fff0000]uint64)(unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&command)).Data))[:l/8]
   102  		data := unsafeUint64Slice(command)
   103  		for ; i < len(data); i++ {
   104  			k := data[i]
   105  			k *= m
   106  			k ^= k >> r
   107  			k *= m
   108  			h ^= k
   109  			h *= m
   110  		}
   111  	}
   112  
   113  	//data2 := (*[0x7fff0000]byte)(unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&command)).Data))[8*i : 8*(i+1)]
   114  	data2 := unsafeByteSlice(command[i*8:])
   115  	//switch (l - 8*i) & 7 {
   116  	switch (l - 8*i) & 7 {
   117  	case 7:
   118  		h ^= uint64(data2[6]) << 48
   119  		fallthrough
   120  	case 6:
   121  		h ^= uint64(data2[5]) << 40
   122  		fallthrough
   123  	case 5:
   124  		h ^= uint64(data2[4]) << 32
   125  		fallthrough
   126  	case 4:
   127  		h ^= uint64(data2[3]) << 24
   128  		fallthrough
   129  	case 3:
   130  		h ^= uint64(data2[2]) << 16
   131  		fallthrough
   132  	case 2:
   133  		h ^= uint64(data2[1]) << 8
   134  		fallthrough
   135  	case 1:
   136  		h ^= uint64(data2[0])
   137  		h *= m
   138  	case 0:
   139  	}
   140  	h ^= h >> r
   141  	h *= m
   142  	h ^= h >> r
   143  	return h
   144  }
   145  
   146  //
   147  
   148  // BuildLogUser answers questions about the manifest for the BuildLog.
   149  type BuildLogUser interface {
   150  	IsPathDead(s string) bool
   151  }
   152  
   153  // BuildLog stores a log of every command ran for every build.
   154  //
   155  // It has a few uses:
   156  //
   157  // 1) (hashes of) command lines for existing output files, so we know when we
   158  // need to rebuild due to the command changing.
   159  //
   160  // 2) timing information, perhaps for generating reports.
   161  //
   162  // 3) restat information.
   163  type BuildLog struct {
   164  	Entries           map[string]*LogEntry
   165  	logFile           *os.File
   166  	logFilePath       string
   167  	needsRecompaction bool
   168  }
   169  
   170  // Note: the C++ version uses ExternalStringHashMap<LogEntry*> for
   171  // BuildLog.entries.
   172  
   173  // NewBuildLog returns an initialized BuidLog.
   174  func NewBuildLog() BuildLog {
   175  	return BuildLog{Entries: map[string]*LogEntry{}}
   176  }
   177  
   178  // OpenForWrite prepares writing to the log file without actually opening it -
   179  // that will happen when/if it's needed.
   180  func (b *BuildLog) OpenForWrite(path string, user BuildLogUser) error {
   181  	if b.needsRecompaction {
   182  		if err := b.Recompact(path, user); err != nil {
   183  			return err
   184  		}
   185  	}
   186  
   187  	if b.logFile != nil {
   188  		panic("oops")
   189  	}
   190  	b.logFilePath = path
   191  	// We don't actually open the file right now, but will
   192  	// do so on the first write attempt.
   193  	return nil
   194  }
   195  
   196  // RecordCommand records an edge.
   197  func (b *BuildLog) RecordCommand(edge *Edge, startTime, endTime int32, mtime TimeStamp) error {
   198  	command := edge.EvaluateCommand(true)
   199  	commandHash := HashCommand(command)
   200  	for _, out := range edge.Outputs {
   201  		path := out.Path
   202  		i, ok := b.Entries[path]
   203  		var logEntry *LogEntry
   204  		if ok {
   205  			logEntry = i
   206  		} else {
   207  			logEntry = &LogEntry{output: path}
   208  			b.Entries[logEntry.output] = logEntry
   209  		}
   210  		logEntry.commandHash = commandHash
   211  		logEntry.startTime = startTime
   212  		logEntry.endTime = endTime
   213  		logEntry.mtime = mtime
   214  
   215  		if err := b.openForWriteIfNeeded(); err != nil {
   216  			return err
   217  		}
   218  		if b.logFile != nil {
   219  			if err := logEntry.Serialize(b.logFile); err != nil {
   220  				return err
   221  			}
   222  			// The C++ code does an fsync on the handle but the Go version doesn't
   223  			// buffer so it is unnecessary.
   224  		}
   225  	}
   226  	return nil
   227  }
   228  
   229  // Close closes the file handle.
   230  func (b *BuildLog) Close() error {
   231  	err := b.openForWriteIfNeeded() // create the file even if nothing has been recorded
   232  	if b.logFile != nil {
   233  		_ = b.logFile.Close()
   234  	}
   235  	b.logFile = nil
   236  	return err
   237  }
   238  
   239  // openForWriteIfNeeded should be called before using logFile.
   240  func (b *BuildLog) openForWriteIfNeeded() error {
   241  	if b.logFile != nil || b.logFilePath == "" {
   242  		return nil
   243  	}
   244  	var err error
   245  	b.logFile, err = os.OpenFile(b.logFilePath, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0o0666)
   246  	if b.logFile == nil {
   247  		return err
   248  	}
   249  	/*if setvbuf(b.logFile, nil, _IOLBF, BUFSIZ) != 0 {
   250  		return false
   251  	}
   252  	SetCloseOnExec(fileno(b.logFile))
   253  	*/
   254  
   255  	// TODO(maruel): Confirm, I'm pretty sure it's not true on Go.
   256  	// Opening a file in append mode doesn't set the file pointer to the file's
   257  	// end on Windows. Do that explicitly.
   258  	p, err := b.logFile.Seek(0, os.SEEK_END)
   259  	if err != nil {
   260  		return err
   261  	}
   262  	if p == 0 {
   263  		// If the file was empty, write the header.
   264  		if _, err := fmt.Fprintf(b.logFile, buildLogFileSignature, buildLogCurrentVersion); err != nil {
   265  			return err
   266  		}
   267  	}
   268  	return nil
   269  }
   270  
   271  /*
   272  type LineReader struct {
   273  
   274    file *FILE
   275    char buf[256 << 10]
   276    bufEnd *char  // Points one past the last valid byte in |buf|.
   277  
   278    lineStart *char
   279    // Points at the next \n in buf after lineStart, or NULL.
   280    lineEnd *char
   281  }
   282  func NewLineReader(file *FILE) LineReader {
   283  	return LineReader{
   284  		file: file,
   285  		bufEnd: buf,
   286  		lineStart: buf,
   287  		lineEnd: nil,
   288  	}
   289  	{ memset(buf, 0, sizeof(buf)); }
   290  }
   291  // Reads a \n-terminated line from the file passed to the constructor.
   292  // On return, *lineStart points to the beginning of the next line, and
   293  // *lineEnd points to the \n at the end of the line. If no newline is seen
   294  // in a fixed buffer size, *lineEnd is set to NULL. Returns false on EOF.
   295  func (l *LineReader) ReadLine(lineStart *char*, lineEnd *char*) bool {
   296    if l.lineStart >= l.bufEnd || !l.lineEnd {
   297      // Buffer empty, refill.
   298      sizeRead := fread(l.buf, 1, sizeof(l.buf), l.file)
   299      if !sizeRead {
   300        return false
   301      }
   302      l.lineStart = l.buf
   303      l.bufEnd = l.buf + sizeRead
   304    } else {
   305      // Advance to next line in buffer.
   306      l.lineStart = l.lineEnd + 1
   307    }
   308  
   309    l.lineEnd = (char*)memchr(l.lineStart, '\n', l.bufEnd - l.lineStart)
   310    if !l.lineEnd {
   311      // No newline. Move rest of data to start of buffer, fill rest.
   312      sizeT alreadyConsumed = l.lineStart - l.buf
   313      sizeT sizeRest = (l.bufEnd - l.buf) - alreadyConsumed
   314      memmove(l.buf, l.lineStart, sizeRest)
   315  
   316      sizeT read = fread(l.buf + sizeRest, 1, sizeof(l.buf) - sizeRest, l.file)
   317      l.bufEnd = l.buf + sizeRest + read
   318      l.lineStart = l.buf
   319      l.lineEnd = (char*)memchr(l.lineStart, '\n', l.bufEnd - l.lineStart)
   320    }
   321  
   322    *lineStart = l.lineStart
   323    *lineEnd = l.lineEnd
   324    return true
   325  }
   326  */
   327  
   328  // Load the on-disk log.
   329  //
   330  // It can return a warning with success and an error.
   331  //
   332  // LoadNotFound is only returned when os.IsNotExist(err) is true.
   333  func (b *BuildLog) Load(path string) (LoadStatus, error) {
   334  	defer metricRecord(".ninja_log load")()
   335  	file, err := ioutil.ReadFile(path)
   336  	if file == nil {
   337  		if os.IsNotExist(err) {
   338  			return LoadNotFound, err
   339  		}
   340  		return LoadError, err
   341  	}
   342  
   343  	if len(file) == 0 {
   344  		// File was empty.
   345  		return LoadSuccess, nil
   346  	}
   347  
   348  	logVersion := 0
   349  	uniqueEntryCount := 0
   350  	totalEntryCount := 0
   351  
   352  	// TODO(maruel): The LineReader implementation above is significantly faster
   353  	// because it modifies the data in-place.
   354  	reader := bytes.NewBuffer(file)
   355  	for {
   356  		line, e := reader.ReadString('\n')
   357  		if e != nil {
   358  			break
   359  		}
   360  		line = line[:len(line)-1]
   361  		if logVersion == 0 {
   362  			_, _ = fmt.Sscanf(line, buildLogFileSignature, &logVersion)
   363  
   364  			if logVersion < buildLogOldestSupportedVersion {
   365  				_ = os.Remove(path)
   366  				// Don't report this as a failure.  An empty build log will cause
   367  				// us to rebuild the outputs anyway.
   368  				return LoadSuccess, errors.New("build log version invalid, perhaps due to being too old; starting over")
   369  			}
   370  		}
   371  		const fieldSeparator = byte('\t')
   372  		end := strings.IndexByte(line, fieldSeparator)
   373  		if end == -1 {
   374  			continue
   375  		}
   376  
   377  		startTime, err := strconv.ParseInt(line[:end], 10, 32)
   378  		if err != nil {
   379  			return LoadError, fmt.Errorf("invalid build log: %w", err)
   380  		}
   381  		line = line[end+1:]
   382  		end = strings.IndexByte(line, fieldSeparator)
   383  		if end == -1 {
   384  			continue
   385  		}
   386  		endTime, err := strconv.ParseInt(line[:end], 10, 32)
   387  		if err != nil {
   388  			return LoadError, fmt.Errorf("invalid build log: %w", err)
   389  		}
   390  		line = line[end+1:]
   391  		end = strings.IndexByte(line, fieldSeparator)
   392  		if end == -1 {
   393  			continue
   394  		}
   395  		restatMtime, err := strconv.ParseInt(line[:end], 10, 64)
   396  		if err != nil {
   397  			return LoadError, fmt.Errorf("invalid build log: %w", err)
   398  		}
   399  		line = line[end+1:]
   400  		end = strings.IndexByte(line, fieldSeparator)
   401  		if end == -1 {
   402  			continue
   403  		}
   404  		output := line[:end]
   405  		line = line[end+1:]
   406  		var entry *LogEntry
   407  		i, ok := b.Entries[output]
   408  		if ok {
   409  			entry = i
   410  		} else {
   411  			entry = &LogEntry{output: output}
   412  			b.Entries[entry.output] = entry
   413  			uniqueEntryCount++
   414  		}
   415  		totalEntryCount++
   416  
   417  		// TODO(maruel): Check overflows.
   418  		entry.startTime = int32(startTime)
   419  		entry.endTime = int32(endTime)
   420  		entry.mtime = TimeStamp(restatMtime)
   421  		if logVersion >= 5 {
   422  			entry.commandHash, _ = strconv.ParseUint(line, 16, 64)
   423  		} else {
   424  			entry.commandHash = HashCommand(line)
   425  		}
   426  	}
   427  
   428  	// Decide whether it's time to rebuild the log:
   429  	// - if we're upgrading versions
   430  	// - if it's getting large
   431  	const minCompactionEntryCount = 100
   432  	const compactionRatio = 3
   433  	if logVersion < buildLogCurrentVersion {
   434  		b.needsRecompaction = true
   435  	} else if totalEntryCount > minCompactionEntryCount && totalEntryCount > uniqueEntryCount*compactionRatio {
   436  		b.needsRecompaction = true
   437  	}
   438  
   439  	return LoadSuccess, nil
   440  }
   441  
   442  // Recompact rewrites the known log entries, throwing away old data.
   443  func (b *BuildLog) Recompact(path string, user BuildLogUser) error {
   444  	defer metricRecord(".ninja_log recompact")()
   445  	_ = b.Close()
   446  	// TODO(maruel): Instead of truncating, overwrite the data, then adjust the
   447  	// size.
   448  	tempPath := path + ".recompact"
   449  	f, err := os.OpenFile(tempPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o666)
   450  	if f == nil {
   451  		return err
   452  	}
   453  
   454  	if _, err = fmt.Fprintf(f, buildLogFileSignature, buildLogCurrentVersion); err != nil {
   455  		_ = f.Close()
   456  		return err
   457  	}
   458  
   459  	var deadOutputs []string
   460  	// TODO(maruel): Save in order?
   461  	for name, entry := range b.Entries {
   462  		if user.IsPathDead(name) {
   463  			deadOutputs = append(deadOutputs, name)
   464  			continue
   465  		}
   466  
   467  		if err = entry.Serialize(f); err != nil {
   468  			_ = f.Close()
   469  			return err
   470  		}
   471  	}
   472  
   473  	for _, name := range deadOutputs {
   474  		delete(b.Entries, name)
   475  	}
   476  
   477  	_ = f.Close()
   478  	if err = os.Remove(path); err != nil {
   479  		return err
   480  	}
   481  
   482  	if err = os.Rename(tempPath, path); err != nil {
   483  		return err
   484  	}
   485  	return err
   486  }
   487  
   488  // Restat recompacts but stat()'s all outputs in the log.
   489  func (b *BuildLog) Restat(path string, di DiskInterface, outputs []string) error {
   490  	defer metricRecord(".ninja_log restat")()
   491  	_ = b.Close()
   492  	tempPath := path + ".restat"
   493  	f, err := os.OpenFile(tempPath, os.O_CREATE|os.O_WRONLY, 0o666)
   494  	if f == nil {
   495  		return err
   496  	}
   497  
   498  	if _, err := fmt.Fprintf(f, buildLogFileSignature, buildLogCurrentVersion); err != nil {
   499  		_ = f.Close()
   500  		return err
   501  	}
   502  	for _, i := range b.Entries {
   503  		skip := len(outputs) > 0
   504  		// TODO(maruel): Sort plus binary search or create a map[string]struct{}?
   505  		for j := 0; j < len(outputs); j++ {
   506  			if i.output == outputs[j] {
   507  				skip = false
   508  				break
   509  			}
   510  		}
   511  		if !skip {
   512  			mtime, err := di.Stat(i.output)
   513  			if mtime == -1 {
   514  				_ = f.Close()
   515  				return err
   516  			}
   517  			i.mtime = mtime
   518  		}
   519  
   520  		if err := i.Serialize(f); err != nil {
   521  			_ = f.Close()
   522  			return err
   523  		}
   524  	}
   525  
   526  	_ = f.Close()
   527  	if err := os.Remove(path); err != nil {
   528  		return err
   529  	}
   530  
   531  	return os.Rename(tempPath, path)
   532  }