golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/bisect/bisect.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package bisect can be used by compilers and other programs
     6  // to serve as a target for the bisect debugging tool.
     7  // See [golang.org/x/tools/cmd/bisect] for details about using the tool.
     8  //
     9  // To be a bisect target, allowing bisect to help determine which of a set of independent
    10  // changes provokes a failure, a program needs to:
    11  //
    12  //  1. Define a way to accept a change pattern on its command line or in its environment.
    13  //     The most common mechanism is a command-line flag.
    14  //     The pattern can be passed to [New] to create a [Matcher], the compiled form of a pattern.
    15  //
    16  //  2. Assign each change a unique ID. One possibility is to use a sequence number,
    17  //     but the most common mechanism is to hash some kind of identifying information
    18  //     like the file and line number where the change might be applied.
    19  //     [Hash] hashes its arguments to compute an ID.
    20  //
    21  //  3. Enable each change that the pattern says should be enabled.
    22  //     The [Matcher.Enable] method answers this question for a given change ID.
    23  //
    24  //  4. Report each change that the pattern says should be reported.
    25  //     The [Matcher.Report] method answers this question for a given change ID.
    26  //     The report consists of one more lines on standard error or standard output
    27  //     that contain a “match marker”. [Marker] returns the match marker for a given ID.
    28  //     When bisect reports a change as causing the failure, it identifies the change
    29  //     by printing those report lines, with the match marker removed.
    30  //
    31  // # Example Usage
    32  //
    33  // A program starts by defining how it receives the pattern. In this example, we will assume a flag.
    34  // The next step is to compile the pattern:
    35  //
    36  //	m, err := bisect.New(patternFlag)
    37  //	if err != nil {
    38  //		log.Fatal(err)
    39  //	}
    40  //
    41  // Then, each time a potential change is considered, the program computes
    42  // a change ID by hashing identifying information (source file and line, in this case)
    43  // and then calls m.ShouldEnable and m.ShouldReport to decide whether to
    44  // enable and report the change, respectively:
    45  //
    46  //	for each change {
    47  //		h := bisect.Hash(file, line)
    48  //		if m.ShouldEnable(h) {
    49  //			enableChange()
    50  //		}
    51  //		if m.ShouldReport(h) {
    52  //			log.Printf("%v %s:%d", bisect.Marker(h), file, line)
    53  //		}
    54  //	}
    55  //
    56  // Note that the two return different values when bisect is searching for a
    57  // minimal set of changes to disable to provoke a failure.
    58  //
    59  // Finally, note that New returns a nil Matcher when there is no pattern,
    60  // meaning that the target is not running under bisect at all.
    61  // In that common case, the computation of the hash can be avoided entirely
    62  // by checking for m == nil first:
    63  //
    64  //	for each change {
    65  //		if m == nil {
    66  //			enableChange()
    67  //		} else {
    68  //			h := bisect.Hash(file, line)
    69  //			if m.ShouldEnable(h) {
    70  //				enableChange()
    71  //			}
    72  //			if m.ShouldReport(h) {
    73  //				log.Printf("%v %s:%d", bisect.Marker(h), file, line)
    74  //			}
    75  //		}
    76  //	}
    77  //
    78  // # Pattern Syntax
    79  //
    80  // Patterns are generated by the bisect tool and interpreted by [New].
    81  // Users should not have to understand the patterns except when
    82  // debugging a target's bisect support or debugging the bisect tool itself.
    83  //
    84  // The pattern syntax selecting a change is a sequence of bit strings
    85  // separated by + and - operators. Each bit string denotes the set of
    86  // changes with IDs ending in those bits, + is set addition, - is set subtraction,
    87  // and the expression is evaluated in the usual left-to-right order.
    88  // The special binary number “y” denotes the set of all changes,
    89  // standing in for the empty bit string.
    90  // In the expression, all the + operators must appear before all the - operators.
    91  // A leading + adds to an empty set. A leading - subtracts from the set of all
    92  // possible suffixes.
    93  //
    94  // For example:
    95  //
    96  //   - “01+10” and “+01+10” both denote the set of changes
    97  //     with IDs ending with the bits 01 or 10.
    98  //
    99  //   - “01+10-1001” denotes the set of changes with IDs
   100  //     ending with the bits 01 or 10, but excluding those ending in 1001.
   101  //
   102  //   - “-01-1000” and “y-01-1000 both denote the set of all changes
   103  //     with IDs not ending in 01 nor 1000.
   104  //
   105  //   - “0+1-01+001” is not a valid pattern, because all the + operators do not
   106  //     appear before all the - operators.
   107  //
   108  // In the syntaxes described so far, the pattern specifies the changes to
   109  // enable and report. If a pattern is prefixed by a “!”, the meaning
   110  // changes: the pattern specifies the changes to DISABLE and report. This
   111  // mode of operation is needed when a program passes with all changes
   112  // enabled but fails with no changes enabled. In this case, bisect
   113  // searches for minimal sets of changes to disable.
   114  // Put another way, the leading “!” inverts the result from [Matcher.ShouldEnable]
   115  // but does not invert the result from [Matcher.ShouldReport].
   116  //
   117  // As a convenience for manual debugging, “n” is an alias for “!y”,
   118  // meaning to disable and report all changes.
   119  //
   120  // Finally, a leading “v” in the pattern indicates that the reports will be shown
   121  // to the user of bisect to describe the changes involved in a failure.
   122  // At the API level, the leading “v” causes [Matcher.Visible] to return true.
   123  // See the next section for details.
   124  //
   125  // # Match Reports
   126  //
   127  // The target program must enable only those changed matched
   128  // by the pattern, and it must print a match report for each such change.
   129  // A match report consists of one or more lines of text that will be
   130  // printed by the bisect tool to describe a change implicated in causing
   131  // a failure. Each line in the report for a given change must contain a
   132  // match marker with that change ID, as returned by [Marker].
   133  // The markers are elided when displaying the lines to the user.
   134  //
   135  // A match marker has the form “[bisect-match 0x1234]” where
   136  // 0x1234 is the change ID in hexadecimal.
   137  // An alternate form is “[bisect-match 010101]”, giving the change ID in binary.
   138  //
   139  // When [Matcher.Visible] returns false, the match reports are only
   140  // being processed by bisect to learn the set of enabled changes,
   141  // not shown to the user, meaning that each report can be a match
   142  // marker on a line by itself, eliding the usual textual description.
   143  // When the textual description is expensive to compute,
   144  // checking [Matcher.Visible] can help the avoid that expense
   145  // in most runs.
   146  package bisect
   147  
   148  // New creates and returns a new Matcher implementing the given pattern.
   149  // The pattern syntax is defined in the package doc comment.
   150  //
   151  // In addition to the pattern syntax syntax, New("") returns nil, nil.
   152  // The nil *Matcher is valid for use: it returns true from ShouldEnable
   153  // and false from ShouldReport for all changes. Callers can avoid calling
   154  // [Hash], [Matcher.ShouldEnable], and [Matcher.ShouldPrint] entirely
   155  // when they recognize the nil Matcher.
   156  func New(pattern string) (*Matcher, error) {
   157  	if pattern == "" {
   158  		return nil, nil
   159  	}
   160  
   161  	m := new(Matcher)
   162  
   163  	// Allow multiple v, so that “bisect cmd vPATTERN” can force verbose all the time.
   164  	p := pattern
   165  	for len(p) > 0 && p[0] == 'v' {
   166  		m.verbose = true
   167  		p = p[1:]
   168  		if p == "" {
   169  			return nil, &parseError{"invalid pattern syntax: " + pattern}
   170  		}
   171  	}
   172  
   173  	// Allow multiple !, each negating the last, so that “bisect cmd !PATTERN” works
   174  	// even when bisect chooses to add its own !.
   175  	m.enable = true
   176  	for len(p) > 0 && p[0] == '!' {
   177  		m.enable = !m.enable
   178  		p = p[1:]
   179  		if p == "" {
   180  			return nil, &parseError{"invalid pattern syntax: " + pattern}
   181  		}
   182  	}
   183  
   184  	if p == "n" {
   185  		// n is an alias for !y.
   186  		m.enable = !m.enable
   187  		p = "y"
   188  	}
   189  
   190  	// Parse actual pattern syntax.
   191  	result := true
   192  	bits := uint64(0)
   193  	start := 0
   194  	wid := 1 // 1-bit (binary); sometimes 4-bit (hex)
   195  	for i := 0; i <= len(p); i++ {
   196  		// Imagine a trailing - at the end of the pattern to flush final suffix
   197  		c := byte('-')
   198  		if i < len(p) {
   199  			c = p[i]
   200  		}
   201  		if i == start && wid == 1 && c == 'x' { // leading x for hex
   202  			start = i + 1
   203  			wid = 4
   204  			continue
   205  		}
   206  		switch c {
   207  		default:
   208  			return nil, &parseError{"invalid pattern syntax: " + pattern}
   209  		case '2', '3', '4', '5', '6', '7', '8', '9':
   210  			if wid != 4 {
   211  				return nil, &parseError{"invalid pattern syntax: " + pattern}
   212  			}
   213  			fallthrough
   214  		case '0', '1':
   215  			bits <<= wid
   216  			bits |= uint64(c - '0')
   217  		case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F':
   218  			if wid != 4 {
   219  				return nil, &parseError{"invalid pattern syntax: " + pattern}
   220  			}
   221  			bits <<= 4
   222  			bits |= uint64(c&^0x20 - 'A' + 10)
   223  		case 'y':
   224  			if i+1 < len(p) && (p[i+1] == '0' || p[i+1] == '1') {
   225  				return nil, &parseError{"invalid pattern syntax: " + pattern}
   226  			}
   227  			bits = 0
   228  		case '+', '-':
   229  			if c == '+' && result == false {
   230  				// Have already seen a -. Should be - from here on.
   231  				return nil, &parseError{"invalid pattern syntax (+ after -): " + pattern}
   232  			}
   233  			if i > 0 {
   234  				n := (i - start) * wid
   235  				if n > 64 {
   236  					return nil, &parseError{"pattern bits too long: " + pattern}
   237  				}
   238  				if n <= 0 {
   239  					return nil, &parseError{"invalid pattern syntax: " + pattern}
   240  				}
   241  				if p[start] == 'y' {
   242  					n = 0
   243  				}
   244  				mask := uint64(1)<<n - 1
   245  				m.list = append(m.list, cond{mask, bits, result})
   246  			} else if c == '-' {
   247  				// leading - subtracts from complete set
   248  				m.list = append(m.list, cond{0, 0, true})
   249  			}
   250  			bits = 0
   251  			result = c == '+'
   252  			start = i + 1
   253  			wid = 1
   254  		}
   255  	}
   256  	return m, nil
   257  }
   258  
   259  // A Matcher is the parsed, compiled form of a PATTERN string.
   260  // The nil *Matcher is valid: it has all changes enabled but none reported.
   261  type Matcher struct {
   262  	verbose bool
   263  	enable  bool   // when true, list is for “enable and report” (when false, “disable and report”)
   264  	list    []cond // conditions; later ones win over earlier ones
   265  }
   266  
   267  // A cond is a single condition in the matcher.
   268  // Given an input id, if id&mask == bits, return the result.
   269  type cond struct {
   270  	mask   uint64
   271  	bits   uint64
   272  	result bool
   273  }
   274  
   275  // Verbose reports whether the reports will be shown to users
   276  // and need to include a human-readable change description.
   277  // If not, the target can print just the Marker on a line by itself
   278  // and perhaps save some computation.
   279  func (m *Matcher) Verbose() bool {
   280  	return m.verbose
   281  }
   282  
   283  // ShouldEnable reports whether the change with the given id should be enabled.
   284  func (m *Matcher) ShouldEnable(id uint64) bool {
   285  	if m == nil {
   286  		return true
   287  	}
   288  	for i := len(m.list) - 1; i >= 0; i-- {
   289  		c := &m.list[i]
   290  		if id&c.mask == c.bits {
   291  			return c.result == m.enable
   292  		}
   293  	}
   294  	return false == m.enable
   295  }
   296  
   297  // ShouldReport reports whether the change with the given id should be reported.
   298  func (m *Matcher) ShouldReport(id uint64) bool {
   299  	if m == nil {
   300  		return false
   301  	}
   302  	for i := len(m.list) - 1; i >= 0; i-- {
   303  		c := &m.list[i]
   304  		if id&c.mask == c.bits {
   305  			return c.result
   306  		}
   307  	}
   308  	return false
   309  }
   310  
   311  // Marker returns the match marker text to use on any line reporting details
   312  // about a match of the given ID.
   313  // It always returns the hexadecimal format.
   314  func Marker(id uint64) string {
   315  	return string(AppendMarker(nil, id))
   316  }
   317  
   318  // AppendMarker is like [Marker] but appends the marker to dst.
   319  func AppendMarker(dst []byte, id uint64) []byte {
   320  	const prefix = "[bisect-match 0x"
   321  	var buf [len(prefix) + 16 + 1]byte
   322  	copy(buf[:], prefix)
   323  	for i := 0; i < 16; i++ {
   324  		buf[len(prefix)+i] = "0123456789abcdef"[id>>60]
   325  		id <<= 4
   326  	}
   327  	buf[len(prefix)+16] = ']'
   328  	return append(dst, buf[:]...)
   329  }
   330  
   331  // CutMarker finds the first match marker in line and removes it,
   332  // returning the shortened line (with the marker removed),
   333  // the ID from the match marker,
   334  // and whether a marker was found at all.
   335  // If there is no marker, CutMarker returns line, 0, false.
   336  func CutMarker(line string) (short string, id uint64, ok bool) {
   337  	// Find first instance of prefix.
   338  	prefix := "[bisect-match "
   339  	i := 0
   340  	for ; ; i++ {
   341  		if i >= len(line)-len(prefix) {
   342  			return line, 0, false
   343  		}
   344  		if line[i] == '[' && line[i:i+len(prefix)] == prefix {
   345  			break
   346  		}
   347  	}
   348  
   349  	// Scan to ].
   350  	j := i + len(prefix)
   351  	for j < len(line) && line[j] != ']' {
   352  		j++
   353  	}
   354  	if j >= len(line) {
   355  		return line, 0, false
   356  	}
   357  
   358  	// Parse id.
   359  	idstr := line[i+len(prefix) : j]
   360  	if len(idstr) >= 3 && idstr[:2] == "0x" {
   361  		// parse hex
   362  		if len(idstr) > 2+16 { // max 0x + 16 digits
   363  			return line, 0, false
   364  		}
   365  		for i := 2; i < len(idstr); i++ {
   366  			id <<= 4
   367  			switch c := idstr[i]; {
   368  			case '0' <= c && c <= '9':
   369  				id |= uint64(c - '0')
   370  			case 'a' <= c && c <= 'f':
   371  				id |= uint64(c - 'a' + 10)
   372  			case 'A' <= c && c <= 'F':
   373  				id |= uint64(c - 'A' + 10)
   374  			}
   375  		}
   376  	} else {
   377  		if idstr == "" || len(idstr) > 64 { // min 1 digit, max 64 digits
   378  			return line, 0, false
   379  		}
   380  		// parse binary
   381  		for i := 0; i < len(idstr); i++ {
   382  			id <<= 1
   383  			switch c := idstr[i]; c {
   384  			default:
   385  				return line, 0, false
   386  			case '0', '1':
   387  				id |= uint64(c - '0')
   388  			}
   389  		}
   390  	}
   391  
   392  	// Construct shortened line.
   393  	// Remove at most one space from around the marker,
   394  	// so that "foo [marker] bar" shortens to "foo bar".
   395  	j++ // skip ]
   396  	if i > 0 && line[i-1] == ' ' {
   397  		i--
   398  	} else if j < len(line) && line[j] == ' ' {
   399  		j++
   400  	}
   401  	short = line[:i] + line[j:]
   402  	return short, id, true
   403  }
   404  
   405  // Hash computes a hash of the data arguments,
   406  // each of which must be of type string, byte, int, uint, int32, uint32, int64, uint64, uintptr, or a slice of one of those types.
   407  func Hash(data ...any) uint64 {
   408  	h := offset64
   409  	for _, v := range data {
   410  		switch v := v.(type) {
   411  		default:
   412  			// Note: Not printing the type, because reflect.ValueOf(v)
   413  			// would make the interfaces prepared by the caller escape
   414  			// and therefore allocate. This way, Hash(file, line) runs
   415  			// without any allocation. It should be clear from the
   416  			// source code calling Hash what the bad argument was.
   417  			panic("bisect.Hash: unexpected argument type")
   418  		case string:
   419  			h = fnvString(h, v)
   420  		case byte:
   421  			h = fnv(h, v)
   422  		case int:
   423  			h = fnvUint64(h, uint64(v))
   424  		case uint:
   425  			h = fnvUint64(h, uint64(v))
   426  		case int32:
   427  			h = fnvUint32(h, uint32(v))
   428  		case uint32:
   429  			h = fnvUint32(h, v)
   430  		case int64:
   431  			h = fnvUint64(h, uint64(v))
   432  		case uint64:
   433  			h = fnvUint64(h, v)
   434  		case uintptr:
   435  			h = fnvUint64(h, uint64(v))
   436  		case []string:
   437  			for _, x := range v {
   438  				h = fnvString(h, x)
   439  			}
   440  		case []byte:
   441  			for _, x := range v {
   442  				h = fnv(h, x)
   443  			}
   444  		case []int:
   445  			for _, x := range v {
   446  				h = fnvUint64(h, uint64(x))
   447  			}
   448  		case []uint:
   449  			for _, x := range v {
   450  				h = fnvUint64(h, uint64(x))
   451  			}
   452  		case []int32:
   453  			for _, x := range v {
   454  				h = fnvUint32(h, uint32(x))
   455  			}
   456  		case []uint32:
   457  			for _, x := range v {
   458  				h = fnvUint32(h, x)
   459  			}
   460  		case []int64:
   461  			for _, x := range v {
   462  				h = fnvUint64(h, uint64(x))
   463  			}
   464  		case []uint64:
   465  			for _, x := range v {
   466  				h = fnvUint64(h, x)
   467  			}
   468  		case []uintptr:
   469  			for _, x := range v {
   470  				h = fnvUint64(h, uint64(x))
   471  			}
   472  		}
   473  	}
   474  	return h
   475  }
   476  
   477  // Trivial error implementation, here to avoid importing errors.
   478  
   479  type parseError struct{ text string }
   480  
   481  func (e *parseError) Error() string { return e.text }
   482  
   483  // FNV-1a implementation. See Go's hash/fnv/fnv.go.
   484  // Copied here for simplicity (can handle uints directly)
   485  // and to avoid the dependency.
   486  
   487  const (
   488  	offset64 uint64 = 14695981039346656037
   489  	prime64  uint64 = 1099511628211
   490  )
   491  
   492  func fnv(h uint64, x byte) uint64 {
   493  	h ^= uint64(x)
   494  	h *= prime64
   495  	return h
   496  }
   497  
   498  func fnvString(h uint64, x string) uint64 {
   499  	for i := 0; i < len(x); i++ {
   500  		h ^= uint64(x[i])
   501  		h *= prime64
   502  	}
   503  	return h
   504  }
   505  
   506  func fnvUint64(h uint64, x uint64) uint64 {
   507  	for i := 0; i < 8; i++ {
   508  		h ^= uint64(x & 0xFF)
   509  		x >>= 8
   510  		h *= prime64
   511  	}
   512  	return h
   513  }
   514  
   515  func fnvUint32(h uint64, x uint32) uint64 {
   516  	for i := 0; i < 4; i++ {
   517  		h ^= uint64(x & 0xFF)
   518  		x >>= 8
   519  		h *= prime64
   520  	}
   521  	return h
   522  }