github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/lintcmd/sarif.go (about)

     1  package lintcmd
     2  
     3  // Notes on GitHub-specific restrictions:
     4  //
     5  // Result.Message needs to either have ID or Text set. Markdown
     6  // gets ignored. Text isn't treated verbatim however: Markdown
     7  // formatting gets stripped, except for links.
     8  //
     9  // GitHub does not display RelatedLocations. The only way to make
    10  // use of them is to link to them (via their ID) in the
    11  // Result.Message. And even then, it will only show the referred
    12  // line of code, not the message. We can duplicate the messages in
    13  // the Result.Message, but we can't even indent them, because
    14  // leading whitespace gets stripped.
    15  //
    16  // GitHub does use the Markdown version of rule help, but it
    17  // renders it the way it renders comments on issues – that is, it
    18  // turns line breaks into hard line breaks, even though it
    19  // shouldn't.
    20  //
    21  // GitHub doesn't make use of the tool's URI or version, nor of
    22  // the help URIs of rules.
    23  //
    24  // There does not seem to be a way of using SARIF for "normal" CI,
    25  // without results showing up as code scanning alerts. Also, a
    26  // SARIF file containing only warnings, no errors, will not fail
    27  // CI by default, but this is configurable.
    28  // GitHub does display some parts of SARIF results in PRs, but
    29  // most of the useful parts of SARIF, such as help text of rules,
    30  // is only accessible via the code scanning alerts, which are only
    31  // accessible by users with write permissions.
    32  //
    33  // Result.Suppressions is being ignored.
    34  //
    35  //
    36  // Notes on other tools
    37  //
    38  // VS Code Sarif viewer
    39  //
    40  // The Sarif viewer in VS Code displays the full message in the
    41  // tabular view, removing newlines. That makes our multi-line
    42  // messages (which we use as a workaround for missing related
    43  // information) very ugly.
    44  //
    45  // Much like GitHub, the Sarif viewer does not make related
    46  // information visible unless we explicitly refer to it in the
    47  // message.
    48  //
    49  // Suggested fixes are not exposed in any way.
    50  //
    51  // It only shows the shortDescription or fullDescription of a
    52  // rule, not its help. We can't put the help in fullDescription,
    53  // because the fullDescription isn't meant to be that long. For
    54  // example, GitHub displays it in a single line, under the
    55  // shortDescription.
    56  //
    57  // VS Code can filter based on Result.Suppressions, but it doesn't
    58  // display our suppression message. Also, by default, suppressed
    59  // results get shown, and the column indicating that a result is
    60  // suppressed is hidden, which makes for a confusing experience.
    61  //
    62  // When a rule has only an ID, no name, VS Code displays a
    63  // prominent dash in place of the name. When the name and ID are
    64  // identical, it prints both. However, we can't make them
    65  // identical, as SARIF requires that either the ID and name are
    66  // different, or that the name is omitted.
    67  
    68  // FIXME(dh): we're currently reporting column information using UTF-8
    69  // byte offsets, not using Unicode code points or UTF-16, which are
    70  // the only two ways allowed by SARIF.
    71  
    72  // TODO(dh) set properties.tags – we can use different tags for the
    73  // staticcheck, simple, stylecheck and unused checks, so users can
    74  // filter their results
    75  
    76  import (
    77  	"encoding/json"
    78  	"fmt"
    79  	"net/url"
    80  	"os"
    81  	"path/filepath"
    82  	"regexp"
    83  	"strings"
    84  
    85  	"github.com/amarpal/go-tools/analysis/lint"
    86  	"github.com/amarpal/go-tools/sarif"
    87  )
    88  
    89  type sarifFormatter struct {
    90  	driverName    string
    91  	driverVersion string
    92  	driverWebsite string
    93  }
    94  
    95  func sarifLevel(severity lint.Severity) string {
    96  	switch severity {
    97  	case lint.SeverityNone:
    98  		// no configured severity, default to warning
    99  		return "warning"
   100  	case lint.SeverityError:
   101  		return "error"
   102  	case lint.SeverityDeprecated:
   103  		return "warning"
   104  	case lint.SeverityWarning:
   105  		return "warning"
   106  	case lint.SeverityInfo:
   107  		return "note"
   108  	case lint.SeverityHint:
   109  		return "note"
   110  	default:
   111  		// unreachable
   112  		return "none"
   113  	}
   114  }
   115  
   116  func encodePath(path string) string {
   117  	return (&url.URL{Path: path}).EscapedPath()
   118  }
   119  
   120  func sarifURI(path string) string {
   121  	u := url.URL{
   122  		Scheme: "file",
   123  		Path:   path,
   124  	}
   125  	return u.String()
   126  }
   127  
   128  func sarifArtifactLocation(name string) sarif.ArtifactLocation {
   129  	// Ideally we use relative paths so that GitHub can resolve them
   130  	name = shortPath(name)
   131  	if filepath.IsAbs(name) {
   132  		return sarif.ArtifactLocation{
   133  			URI: sarifURI(name),
   134  		}
   135  	} else {
   136  		return sarif.ArtifactLocation{
   137  			URI:       encodePath(name),
   138  			URIBaseID: "%SRCROOT%", // This is specific to GitHub,
   139  		}
   140  	}
   141  }
   142  
   143  func sarifFormatText(s string) string {
   144  	// GitHub doesn't ignore line breaks, even though it should, so we remove them.
   145  
   146  	var out strings.Builder
   147  	lines := strings.Split(s, "\n")
   148  	for i, line := range lines[:len(lines)-1] {
   149  		out.WriteString(line)
   150  		if line == "" {
   151  			out.WriteString("\n")
   152  		} else {
   153  			nextLine := lines[i+1]
   154  			if nextLine == "" || strings.HasPrefix(line, "> ") || strings.HasPrefix(line, "    ") {
   155  				out.WriteString("\n")
   156  			} else {
   157  				out.WriteString(" ")
   158  			}
   159  		}
   160  	}
   161  	out.WriteString(lines[len(lines)-1])
   162  	return convertCodeBlocks(out.String())
   163  }
   164  
   165  func moreCodeFollows(lines []string) bool {
   166  	for _, line := range lines {
   167  		if line == "" {
   168  			continue
   169  		}
   170  		if strings.HasPrefix(line, "    ") {
   171  			return true
   172  		} else {
   173  			return false
   174  		}
   175  	}
   176  	return false
   177  }
   178  
   179  var alpha = regexp.MustCompile(`^[a-zA-Z ]+$`)
   180  
   181  func convertCodeBlocks(text string) string {
   182  	var buf strings.Builder
   183  	lines := strings.Split(text, "\n")
   184  
   185  	inCode := false
   186  	empties := 0
   187  	for i, line := range lines {
   188  		if inCode {
   189  			if !moreCodeFollows(lines[i:]) {
   190  				if inCode {
   191  					fmt.Fprintln(&buf, "```")
   192  					inCode = false
   193  				}
   194  			}
   195  		}
   196  
   197  		prevEmpties := empties
   198  		if line == "" && !inCode {
   199  			empties++
   200  		} else {
   201  			empties = 0
   202  		}
   203  
   204  		if line == "" {
   205  			fmt.Fprintln(&buf)
   206  			continue
   207  		}
   208  
   209  		if strings.HasPrefix(line, "    ") {
   210  			line = line[4:]
   211  			if !inCode {
   212  				fmt.Fprintln(&buf, "```go")
   213  				inCode = true
   214  			}
   215  		}
   216  
   217  		onlyAlpha := alpha.MatchString(line)
   218  		out := line
   219  		if !inCode && prevEmpties >= 2 && onlyAlpha {
   220  			fmt.Fprintf(&buf, "## %s\n", out)
   221  		} else {
   222  			fmt.Fprint(&buf, out)
   223  			fmt.Fprintln(&buf)
   224  		}
   225  	}
   226  	if inCode {
   227  		fmt.Fprintln(&buf, "```")
   228  	}
   229  
   230  	return buf.String()
   231  }
   232  
   233  func (o *sarifFormatter) Format(checks []*lint.Analyzer, diagnostics []diagnostic) {
   234  	// TODO(dh): some diagnostics shouldn't be reported as results. For example, when the user specifies a package on the command line that doesn't exist.
   235  
   236  	cwd, _ := os.Getwd()
   237  	run := sarif.Run{
   238  		Tool: sarif.Tool{
   239  			Driver: sarif.ToolComponent{
   240  				Name:           o.driverName,
   241  				Version:        o.driverVersion,
   242  				InformationURI: o.driverWebsite,
   243  			},
   244  		},
   245  		Invocations: []sarif.Invocation{{
   246  			Arguments: os.Args[1:],
   247  			WorkingDirectory: sarif.ArtifactLocation{
   248  				URI: sarifURI(cwd),
   249  			},
   250  			ExecutionSuccessful: true,
   251  		}},
   252  	}
   253  	for _, c := range checks {
   254  		run.Tool.Driver.Rules = append(run.Tool.Driver.Rules,
   255  			sarif.ReportingDescriptor{
   256  				// We don't set Name, as Name and ID mustn't be identical.
   257  				ID: c.Analyzer.Name,
   258  				ShortDescription: sarif.Message{
   259  					Text:     c.Doc.Title,
   260  					Markdown: c.Doc.TitleMarkdown,
   261  				},
   262  				HelpURI: "https://staticcheck.dev/docs/checks#" + c.Analyzer.Name,
   263  				// We use our markdown as the plain text version, too. We
   264  				// use very little markdown, primarily quotations,
   265  				// indented code blocks and backticks. All of these are
   266  				// fine as plain text, too.
   267  				Help: sarif.Message{
   268  					Text:     sarifFormatText(c.Doc.Format(false)),
   269  					Markdown: sarifFormatText(c.Doc.FormatMarkdown(false)),
   270  				},
   271  				DefaultConfiguration: sarif.ReportingConfiguration{
   272  					// TODO(dh): we could figure out which checks were disabled globally
   273  					Enabled: true,
   274  					Level:   sarifLevel(c.Doc.Severity),
   275  				},
   276  			})
   277  	}
   278  
   279  	for _, p := range diagnostics {
   280  		r := sarif.Result{
   281  			RuleID: p.Category,
   282  			Kind:   sarif.Fail,
   283  			Message: sarif.Message{
   284  				Text: p.Message,
   285  			},
   286  		}
   287  		r.Locations = []sarif.Location{{
   288  			PhysicalLocation: sarif.PhysicalLocation{
   289  				ArtifactLocation: sarifArtifactLocation(p.Position.Filename),
   290  				Region: sarif.Region{
   291  					StartLine:   p.Position.Line,
   292  					StartColumn: p.Position.Column,
   293  					EndLine:     p.End.Line,
   294  					EndColumn:   p.End.Column,
   295  				},
   296  			},
   297  		}}
   298  		for _, fix := range p.SuggestedFixes {
   299  			sfix := sarif.Fix{
   300  				Description: sarif.Message{
   301  					Text: fix.Message,
   302  				},
   303  			}
   304  			// file name -> replacements
   305  			changes := map[string][]sarif.Replacement{}
   306  			for _, edit := range fix.TextEdits {
   307  				changes[edit.Position.Filename] = append(changes[edit.Position.Filename], sarif.Replacement{
   308  					DeletedRegion: sarif.Region{
   309  						StartLine:   edit.Position.Line,
   310  						StartColumn: edit.Position.Column,
   311  						EndLine:     edit.End.Line,
   312  						EndColumn:   edit.End.Column,
   313  					},
   314  					InsertedContent: sarif.ArtifactContent{
   315  						Text: string(edit.NewText),
   316  					},
   317  				})
   318  			}
   319  			for path, replacements := range changes {
   320  				sfix.ArtifactChanges = append(sfix.ArtifactChanges, sarif.ArtifactChange{
   321  					ArtifactLocation: sarifArtifactLocation(path),
   322  					Replacements:     replacements,
   323  				})
   324  			}
   325  			r.Fixes = append(r.Fixes, sfix)
   326  		}
   327  		for i, related := range p.Related {
   328  			r.Message.Text += fmt.Sprintf("\n\t[%s](%d)", related.Message, i+1)
   329  
   330  			r.RelatedLocations = append(r.RelatedLocations,
   331  				sarif.Location{
   332  					ID: i + 1,
   333  					Message: &sarif.Message{
   334  						Text: related.Message,
   335  					},
   336  					PhysicalLocation: sarif.PhysicalLocation{
   337  						ArtifactLocation: sarifArtifactLocation(related.Position.Filename),
   338  						Region: sarif.Region{
   339  							StartLine:   related.Position.Line,
   340  							StartColumn: related.Position.Column,
   341  							EndLine:     related.End.Line,
   342  							EndColumn:   related.End.Column,
   343  						},
   344  					},
   345  				})
   346  		}
   347  
   348  		if p.Severity == severityIgnored {
   349  			// Note that GitHub does not support suppressions, which is why Staticcheck still requires the -show-ignored flag to be set for us to emit ignored diagnostics.
   350  
   351  			r.Suppressions = []sarif.Suppression{{
   352  				Kind: "inSource",
   353  				// TODO(dh): populate the Justification field
   354  			}}
   355  		} else {
   356  			// We want an empty slice, not nil. SARIF differentiates
   357  			// between the two. An empty slice means that the diagnostic
   358  			// wasn't suppressed, while nil means that we don't have the
   359  			// information available.
   360  			r.Suppressions = []sarif.Suppression{}
   361  		}
   362  		run.Results = append(run.Results, r)
   363  	}
   364  
   365  	json.NewEncoder(os.Stdout).Encode(sarif.Log{
   366  		Version: sarif.Version,
   367  		Schema:  sarif.Schema,
   368  		Runs:    []sarif.Run{run},
   369  	})
   370  }