github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/scripts/linkpatch/linkpatch.go (about)

     1  // Program linkpatch rewrites absolute URLs pointing to targets in GitHub in
     2  // Markdown link tags to target a different branch.
     3  //
     4  // This is used to update documentation links for backport branches.
     5  // See https://github.com/ari-anchor/sei-tendermint/issues/7675 for context.
     6  package main
     7  
     8  import (
     9  	"bytes"
    10  	"flag"
    11  	"fmt"
    12  	"io/fs"
    13  	"log"
    14  	"os"
    15  	"path/filepath"
    16  	"regexp"
    17  	"strings"
    18  
    19  	"github.com/creachadair/atomicfile"
    20  )
    21  
    22  var (
    23  	repoName     = flag.String("repo", "tendermint/tendermint", "Repository name to match")
    24  	sourceBranch = flag.String("source", "master", "Source branch name (required)")
    25  	targetBranch = flag.String("target", "", "Target branch name (required)")
    26  	doRecur      = flag.Bool("recur", false, "Recur into subdirectories")
    27  
    28  	skipPath  stringList
    29  	skipMatch regexpFlag
    30  
    31  	// Match markdown links pointing to absolute URLs.
    32  	// This only works for "inline" links, not referenced links.
    33  	// The submetch selects the URL.
    34  	linkRE = regexp.MustCompile(`(?m)\[.*?\]\((https?://.*?)\)`)
    35  )
    36  
    37  func init() {
    38  	flag.Var(&skipPath, "skip-path", "Skip these paths (comma-separated)")
    39  	flag.Var(&skipMatch, "skip-match", "Skip URLs matching this regexp (RE2)")
    40  
    41  	flag.Usage = func() {
    42  		fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] <file/dir>...
    43  
    44  Rewrite absolute Markdown links targeting the specified GitHub repository
    45  and source branch name to point to the target branch instead. Matching
    46  files are updated in-place.
    47  
    48  Each path names either a directory to list, or a single file path to
    49  rewrite. By default, only the top level of a directory is scanned; use -recur
    50  to recur into subdirectories.
    51  
    52  Options:
    53  `, filepath.Base(os.Args[0]))
    54  		flag.PrintDefaults()
    55  	}
    56  }
    57  
    58  func main() {
    59  	flag.Parse()
    60  	switch {
    61  	case *repoName == "":
    62  		log.Fatal("You must specify a non-empty -repo name (org/repo)")
    63  	case *targetBranch == "":
    64  		log.Fatal("You must specify a non-empty -target branch")
    65  	case *sourceBranch == "":
    66  		log.Fatal("You must specify a non-empty -source branch")
    67  	case *sourceBranch == *targetBranch:
    68  		log.Fatalf("Source and target branch are the same (%q)", *sourceBranch)
    69  	case flag.NArg() == 0:
    70  		log.Fatal("You must specify at least one file/directory to rewrite")
    71  	}
    72  
    73  	r, err := regexp.Compile(fmt.Sprintf(`^https?://github.com/%s/(?:blob|tree)/%s`,
    74  		*repoName, *sourceBranch))
    75  	if err != nil {
    76  		log.Fatalf("Compiling regexp: %v", err)
    77  	}
    78  	for _, path := range flag.Args() {
    79  		if err := processPath(r, path); err != nil {
    80  			log.Fatalf("Processing %q failed: %v", path, err)
    81  		}
    82  	}
    83  }
    84  
    85  func processPath(r *regexp.Regexp, path string) error {
    86  	fi, err := os.Lstat(path)
    87  	if err != nil {
    88  		return err
    89  	}
    90  	if fi.Mode().IsDir() {
    91  		return processDir(r, path)
    92  	} else if fi.Mode().IsRegular() {
    93  		return processFile(r, path)
    94  	}
    95  	return nil // nothing to do with links, device files, sockets, etc.
    96  }
    97  
    98  func processDir(r *regexp.Regexp, root string) error {
    99  	return filepath.Walk(root, func(path string, fi fs.FileInfo, err error) error {
   100  		if err != nil {
   101  			return err
   102  		}
   103  		if fi.IsDir() {
   104  			if skipPath.Contains(path) {
   105  				log.Printf("Skipping %q (per -skip-path)", path)
   106  				return filepath.SkipDir // explicitly skipped
   107  			} else if !*doRecur && path != root {
   108  				return filepath.SkipDir // skipped because we aren't recurring
   109  			}
   110  			return nil // nothing else to do for directories
   111  		} else if skipPath.Contains(path) {
   112  			log.Printf("Skipping %q (per -skip-path)", path)
   113  			return nil // explicitly skipped
   114  		} else if filepath.Ext(path) != ".md" {
   115  			return nil // nothing to do for non-Markdown files
   116  		}
   117  
   118  		return processFile(r, path)
   119  	})
   120  }
   121  
   122  func processFile(r *regexp.Regexp, path string) error {
   123  	log.Printf("Processing file %q", path)
   124  	input, err := os.ReadFile(path)
   125  	if err != nil {
   126  		return err
   127  	}
   128  
   129  	pos := 0
   130  	var output bytes.Buffer
   131  	for _, m := range linkRE.FindAllSubmatchIndex(input, -1) {
   132  		href := string(input[m[2]:m[3]])
   133  		u := r.FindStringIndex(href)
   134  		if u == nil || skipMatch.MatchString(href) {
   135  			if u != nil {
   136  				log.Printf("Skipped URL %q (by -skip-match)", href)
   137  			}
   138  			output.Write(input[pos:m[1]]) // copy the existing data as-is
   139  			pos = m[1]
   140  			continue
   141  		}
   142  
   143  		// Copy everything before the URL as-is, then write the replacement.
   144  		output.Write(input[pos:m[2]]) // everything up to the URL
   145  		fmt.Fprintf(&output, `https://github.com/%s/blob/%s%s`, *repoName, *targetBranch, href[u[1]:])
   146  
   147  		// Write out the tail of the match, everything after the URL.
   148  		output.Write(input[m[3]:m[1]])
   149  		pos = m[1]
   150  	}
   151  	output.Write(input[pos:]) // the rest of the file
   152  
   153  	_, err = atomicfile.WriteAll(path, &output, 0644)
   154  	return err
   155  }
   156  
   157  // stringList implements the flag.Value interface for a comma-separated list of strings.
   158  type stringList []string
   159  
   160  func (lst *stringList) Set(s string) error {
   161  	if s == "" {
   162  		*lst = nil
   163  	} else {
   164  		*lst = strings.Split(s, ",")
   165  	}
   166  	return nil
   167  }
   168  
   169  // Contains reports whether lst contains s.
   170  func (lst stringList) Contains(s string) bool {
   171  	for _, elt := range lst {
   172  		if s == elt {
   173  			return true
   174  		}
   175  	}
   176  	return false
   177  }
   178  
   179  func (lst stringList) String() string { return strings.Join([]string(lst), ",") }
   180  
   181  // regexpFlag implements the flag.Value interface for a regular expression.
   182  type regexpFlag struct{ *regexp.Regexp }
   183  
   184  func (r regexpFlag) MatchString(s string) bool {
   185  	if r.Regexp == nil {
   186  		return false
   187  	}
   188  	return r.Regexp.MatchString(s)
   189  }
   190  
   191  func (r *regexpFlag) Set(s string) error {
   192  	c, err := regexp.Compile(s)
   193  	if err != nil {
   194  		return err
   195  	}
   196  	r.Regexp = c
   197  	return nil
   198  }
   199  
   200  func (r regexpFlag) String() string {
   201  	if r.Regexp == nil {
   202  		return ""
   203  	}
   204  	return r.Regexp.String()
   205  }