honnef.co/go/tools@v0.5.0-0.dev.0.20240520180541-dcae280a5e87/stylecheck/st1018/st1018.go (about) 1 package st1018 2 3 import ( 4 "fmt" 5 "go/ast" 6 "go/token" 7 "strconv" 8 "unicode" 9 "unicode/utf8" 10 11 "honnef.co/go/tools/analysis/code" 12 "honnef.co/go/tools/analysis/lint" 13 "honnef.co/go/tools/analysis/report" 14 15 "golang.org/x/tools/go/analysis" 16 "golang.org/x/tools/go/analysis/passes/inspect" 17 ) 18 19 var SCAnalyzer = lint.InitializeAnalyzer(&lint.Analyzer{ 20 Analyzer: &analysis.Analyzer{ 21 Name: "ST1018", 22 Run: run, 23 Requires: []*analysis.Analyzer{inspect.Analyzer}, 24 }, 25 Doc: &lint.Documentation{ 26 Title: `Avoid zero-width and control characters in string literals`, 27 Since: "2019.2", 28 MergeIf: lint.MergeIfAny, 29 }, 30 }) 31 32 var Analyzer = SCAnalyzer.Analyzer 33 34 func run(pass *analysis.Pass) (interface{}, error) { 35 fn := func(node ast.Node) { 36 lit := node.(*ast.BasicLit) 37 if lit.Kind != token.STRING { 38 return 39 } 40 41 type invalid struct { 42 r rune 43 off int 44 } 45 var invalids []invalid 46 hasFormat := false 47 hasControl := false 48 prev := rune(-1) 49 const zwj = '\u200d' 50 for off, r := range lit.Value { 51 if unicode.Is(unicode.Cf, r) { 52 if r >= '\U000e0020' && r <= '\U000e007f' { 53 // These are used for spelling out country codes for flag emoji 54 } else if unicode.Is(unicode.Variation_Selector, r) { 55 // Always allow variation selectors 56 } else if r == zwj && (unicode.Is(unicode.S, prev) || unicode.Is(unicode.Variation_Selector, prev)) { 57 // Allow zero-width joiner in emoji, including those that use variation selectors. 58 59 // Technically some foreign scripts make valid use of zero-width joiners, too, but for now we'll err 60 // on the side of flagging all non-emoji uses of ZWJ. 61 } else { 62 switch r { 63 case '\u0600', '\u0601', '\u0602', '\u0603', '\u0604', '\u0605', '\u0890', '\u0891', '\u08e2': 64 // Arabic characters that are not actually invisible. If anyone knows why these are in the 65 // Other, Format category please let me know. 66 case '\u061c', '\u202A', '\u202B', '\u202D', '\u202E', '\u2066', '\u2067', '\u2068', '\u202C', '\u2069': 67 // Bidirectional formatting characters. At best they will render confusingly, at worst they're used 68 // to cause confusion. 69 fallthrough 70 default: 71 invalids = append(invalids, invalid{r, off}) 72 hasFormat = true 73 } 74 } 75 } else if unicode.Is(unicode.Cc, r) && r != '\n' && r != '\t' && r != '\r' { 76 invalids = append(invalids, invalid{r, off}) 77 hasControl = true 78 } 79 prev = r 80 } 81 82 switch len(invalids) { 83 case 0: 84 return 85 case 1: 86 var kind string 87 if hasFormat { 88 kind = "format" 89 } else if hasControl { 90 kind = "control" 91 } else { 92 panic("unreachable") 93 } 94 95 r := invalids[0] 96 msg := fmt.Sprintf("string literal contains the Unicode %s character %U, consider using the %q escape sequence instead", kind, r.r, r.r) 97 98 replacement := strconv.QuoteRune(r.r) 99 replacement = replacement[1 : len(replacement)-1] 100 edit := analysis.SuggestedFix{ 101 Message: fmt.Sprintf("replace %s character %U with %q", kind, r.r, r.r), 102 TextEdits: []analysis.TextEdit{{ 103 Pos: lit.Pos() + token.Pos(r.off), 104 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)), 105 NewText: []byte(replacement), 106 }}, 107 } 108 delete := analysis.SuggestedFix{ 109 Message: fmt.Sprintf("delete %s character %U", kind, r.r), 110 TextEdits: []analysis.TextEdit{{ 111 Pos: lit.Pos() + token.Pos(r.off), 112 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)), 113 }}, 114 } 115 report.Report(pass, lit, msg, report.Fixes(edit, delete)) 116 default: 117 var kind string 118 if hasFormat && hasControl { 119 kind = "format and control" 120 } else if hasFormat { 121 kind = "format" 122 } else if hasControl { 123 kind = "control" 124 } else { 125 panic("unreachable") 126 } 127 128 msg := fmt.Sprintf("string literal contains Unicode %s characters, consider using escape sequences instead", kind) 129 var edits []analysis.TextEdit 130 var deletions []analysis.TextEdit 131 for _, r := range invalids { 132 replacement := strconv.QuoteRune(r.r) 133 replacement = replacement[1 : len(replacement)-1] 134 edits = append(edits, analysis.TextEdit{ 135 Pos: lit.Pos() + token.Pos(r.off), 136 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)), 137 NewText: []byte(replacement), 138 }) 139 deletions = append(deletions, analysis.TextEdit{ 140 Pos: lit.Pos() + token.Pos(r.off), 141 End: lit.Pos() + token.Pos(r.off) + token.Pos(utf8.RuneLen(r.r)), 142 }) 143 } 144 edit := analysis.SuggestedFix{ 145 Message: fmt.Sprintf("replace all %s characters with escape sequences", kind), 146 TextEdits: edits, 147 } 148 delete := analysis.SuggestedFix{ 149 Message: fmt.Sprintf("delete all %s characters", kind), 150 TextEdits: deletions, 151 } 152 report.Report(pass, lit, msg, report.Fixes(edit, delete)) 153 } 154 } 155 code.Preorder(pass, fn, (*ast.BasicLit)(nil)) 156 return nil, nil 157 }