github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/scripts/linkpatch/linkpatch.go (about) 1 // Program linkpatch rewrites absolute URLs pointing to targets in GitHub in 2 // Markdown link tags to target a different branch. 3 // 4 // This is used to update documentation links for backport branches. 5 // See https://github.com/ari-anchor/sei-tendermint/issues/7675 for context. 6 package main 7 8 import ( 9 "bytes" 10 "flag" 11 "fmt" 12 "io/fs" 13 "log" 14 "os" 15 "path/filepath" 16 "regexp" 17 "strings" 18 19 "github.com/creachadair/atomicfile" 20 ) 21 22 var ( 23 repoName = flag.String("repo", "tendermint/tendermint", "Repository name to match") 24 sourceBranch = flag.String("source", "master", "Source branch name (required)") 25 targetBranch = flag.String("target", "", "Target branch name (required)") 26 doRecur = flag.Bool("recur", false, "Recur into subdirectories") 27 28 skipPath stringList 29 skipMatch regexpFlag 30 31 // Match markdown links pointing to absolute URLs. 32 // This only works for "inline" links, not referenced links. 33 // The submetch selects the URL. 34 linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`) 35 ) 36 37 func init() { 38 flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)") 39 flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)") 40 41 flag.Usage = func() { 42 fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] <file/dir>... 43 44 Rewrite absolute Markdown links targeting the specified GitHub repository 45 and source branch name to point to the target branch instead. Matching 46 files are updated in-place. 47 48 Each path names either a directory to list, or a single file path to 49 rewrite. By default, only the top level of a directory is scanned; use -recur 50 to recur into subdirectories. 51 52 Options: 53 `, filepath.Base(os.Args[0])) 54 flag.PrintDefaults() 55 } 56 } 57 58 func main() { 59 flag.Parse() 60 switch { 61 case *repoName == "": 62 log.Fatal("You must specify a non-empty -repo name (org/repo)") 63 case *targetBranch == "": 64 log.Fatal("You must specify a non-empty -target branch") 65 case *sourceBranch == "": 66 log.Fatal("You must specify a non-empty -source branch") 67 case *sourceBranch == *targetBranch: 68 log.Fatalf("Source and target branch are the same (%q)", *sourceBranch) 69 case flag.NArg() == 0: 70 log.Fatal("You must specify at least one file/directory to rewrite") 71 } 72 73 r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`, 74 *repoName, *sourceBranch)) 75 if err != nil { 76 log.Fatalf("Compiling regexp: %v", err) 77 } 78 for _, path := range flag.Args() { 79 if err := processPath(r, path); err != nil { 80 log.Fatalf("Processing %q failed: %v", path, err) 81 } 82 } 83 } 84 85 func processPath(r *regexp.Regexp, path string) error { 86 fi, err := os.Lstat(path) 87 if err != nil { 88 return err 89 } 90 if fi.Mode().IsDir() { 91 return processDir(r, path) 92 } else if fi.Mode().IsRegular() { 93 return processFile(r, path) 94 } 95 return nil // nothing to do with links, device files, sockets, etc. 96 } 97 98 func processDir(r *regexp.Regexp, root string) error { 99 return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error { 100 if err != nil { 101 return err 102 } 103 if fi.IsDir() { 104 if skipPath.Contains(path) { 105 log.Printf("Skipping %q (per -skip-path)", path) 106 return filepath.SkipDir // explicitly skipped 107 } else if !*doRecur && path != root { 108 return filepath.SkipDir // skipped because we aren't recurring 109 } 110 return nil // nothing else to do for directories 111 } else if skipPath.Contains(path) { 112 log.Printf("Skipping %q (per -skip-path)", path) 113 return nil // explicitly skipped 114 } else if filepath.Ext(path) != ".md" { 115 return nil // nothing to do for non-Markdown files 116 } 117 118 return processFile(r, path) 119 }) 120 } 121 122 func processFile(r *regexp.Regexp, path string) error { 123 log.Printf("Processing file %q", path) 124 input, err := os.ReadFile(path) 125 if err != nil { 126 return err 127 } 128 129 pos := 0 130 var output bytes.Buffer 131 for _, m := range linkRE.FindAllSubmatchIndex(input, -1) { 132 href := string(input[m[2]:m[3]]) 133 u := r.FindStringIndex(href) 134 if u == nil || skipMatch.MatchString(href) { 135 if u != nil { 136 log.Printf("Skipped URL %q (by -skip-match)", href) 137 } 138 output.Write(input[pos:m[1]]) // copy the existing data as-is 139 pos = m[1] 140 continue 141 } 142 143 // Copy everything before the URL as-is, then write the replacement. 144 output.Write(input[pos:m[2]]) // everything up to the URL 145 fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:]) 146 147 // Write out the tail of the match, everything after the URL. 148 output.Write(input[m[3]:m[1]]) 149 pos = m[1] 150 } 151 output.Write(input[pos:]) // the rest of the file 152 153 _, err = atomicfile.WriteAll(path, &output, 0644) 154 return err 155 } 156 157 // stringList implements the flag.Value interface for a comma-separated list of strings. 158 type stringList []string 159 160 func (lst *stringList) Set(s string) error { 161 if s == "" { 162 *lst = nil 163 } else { 164 *lst = strings.Split(s, ",") 165 } 166 return nil 167 } 168 169 // Contains reports whether lst contains s. 170 func (lst stringList) Contains(s string) bool { 171 for _, elt := range lst { 172 if s == elt { 173 return true 174 } 175 } 176 return false 177 } 178 179 func (lst stringList) String() string { return strings.Join([]string(lst), ",") } 180 181 // regexpFlag implements the flag.Value interface for a regular expression. 182 type regexpFlag struct{ *regexp.Regexp } 183 184 func (r regexpFlag) MatchString(s string) bool { 185 if r.Regexp == nil { 186 return false 187 } 188 return r.Regexp.MatchString(s) 189 } 190 191 func (r *regexpFlag) Set(s string) error { 192 c, err := regexp.Compile(s) 193 if err != nil { 194 return err 195 } 196 r.Regexp = c 197 return nil 198 } 199 200 func (r regexpFlag) String() string { 201 if r.Regexp == nil { 202 return "" 203 } 204 return r.Regexp.String() 205 }