github.com/rogpeppe/go-internal@v1.12.1-0.20240509064211-c8567cf8e95f/txtar/archive.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package txtar implements a trivial text-based file archive format.
     6  //
     7  // The goals for the format are:
     8  //
     9  //   - be trivial enough to create and edit by hand.
    10  //   - be able to store trees of text files describing go command test cases.
    11  //   - diff nicely in git history and code reviews.
    12  //
    13  // Non-goals include being a completely general archive format,
    14  // storing binary data, storing file modes, storing special files like
    15  // symbolic links, and so on.
    16  //
    17  // # Txtar format
    18  //
    19  // A txtar archive is zero or more comment lines and then a sequence of file entries.
    20  // Each file entry begins with a file marker line of the form "-- FILENAME --"
    21  // and is followed by zero or more file content lines making up the file data.
    22  // The comment or file content ends at the next file marker line.
    23  // The file marker line must begin with the three-byte sequence "-- "
    24  // and end with the three-byte sequence " --", but the enclosed
    25  // file name can be surrounding by additional white space,
    26  // all of which is stripped.
    27  //
    28  // If the txtar file is missing a trailing newline on the final line,
    29  // parsers should consider a final newline to be present anyway.
    30  //
    31  // There are no possible syntax errors in a txtar archive.
    32  package txtar
    33  
    34  import (
    35  	"bytes"
    36  	"errors"
    37  	"fmt"
    38  	"os"
    39  	"path/filepath"
    40  	"strings"
    41  	"unicode/utf8"
    42  
    43  	"golang.org/x/tools/txtar"
    44  )
    45  
    46  // An Archive is a collection of files.
    47  type Archive = txtar.Archive
    48  
    49  // A File is a single file in an archive.
    50  type File = txtar.File
    51  
    52  // Format returns the serialized form of an Archive.
    53  // It is assumed that the Archive data structure is well-formed:
    54  // a.Comment and all a.File[i].Data contain no file marker lines,
    55  // and all a.File[i].Name is non-empty.
    56  func Format(a *Archive) []byte {
    57  	return txtar.Format(a)
    58  }
    59  
    60  // ParseFile parses the named file as an archive.
    61  func ParseFile(file string) (*Archive, error) {
    62  	data, err := os.ReadFile(file)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	return Parse(data), nil
    67  }
    68  
    69  // Parse parses the serialized form of an Archive.
    70  // The returned Archive holds slices of data.
    71  //
    72  // TODO use golang.org/x/tools/txtar.Parse when https://github.com/golang/go/issues/59264
    73  // is fixed.
    74  func Parse(data []byte) *Archive {
    75  	a := new(Archive)
    76  	var name string
    77  	a.Comment, name, data = findFileMarker(data)
    78  	for name != "" {
    79  		f := File{name, nil}
    80  		f.Data, name, data = findFileMarker(data)
    81  		a.Files = append(a.Files, f)
    82  	}
    83  	return a
    84  }
    85  
    86  // NeedsQuote reports whether the given data needs to
    87  // be quoted before it's included as a txtar file.
    88  func NeedsQuote(data []byte) bool {
    89  	_, _, after := findFileMarker(data)
    90  	return after != nil
    91  }
    92  
    93  // Quote quotes the data so that it can be safely stored in a txtar
    94  // file. This copes with files that contain lines that look like txtar
    95  // separators.
    96  //
    97  // The original data can be recovered with Unquote. It returns an error
    98  // if the data cannot be quoted (for example because it has no final
    99  // newline or it holds unprintable characters)
   100  func Quote(data []byte) ([]byte, error) {
   101  	if len(data) == 0 {
   102  		return nil, nil
   103  	}
   104  	if data[len(data)-1] != '\n' {
   105  		return nil, errors.New("data has no final newline")
   106  	}
   107  	if !utf8.Valid(data) {
   108  		return nil, fmt.Errorf("data contains non-UTF-8 characters")
   109  	}
   110  	var nd []byte
   111  	prev := byte('\n')
   112  	for _, b := range data {
   113  		if prev == '\n' {
   114  			nd = append(nd, '>')
   115  		}
   116  		nd = append(nd, b)
   117  		prev = b
   118  	}
   119  	return nd, nil
   120  }
   121  
   122  // Unquote unquotes data as quoted by Quote.
   123  func Unquote(data []byte) ([]byte, error) {
   124  	if len(data) == 0 {
   125  		return nil, nil
   126  	}
   127  	if data[0] != '>' || data[len(data)-1] != '\n' {
   128  		return nil, errors.New("data does not appear to be quoted")
   129  	}
   130  	data = bytes.Replace(data, []byte("\n>"), []byte("\n"), -1)
   131  	data = bytes.TrimPrefix(data, []byte(">"))
   132  	return data, nil
   133  }
   134  
   135  var (
   136  	newlineMarker = []byte("\n-- ")
   137  	marker        = []byte("-- ")
   138  	markerEnd     = []byte(" --")
   139  )
   140  
   141  // findFileMarker finds the next file marker in data,
   142  // extracts the file name, and returns the data before the marker,
   143  // the file name, and the data after the marker.
   144  // If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil.
   145  func findFileMarker(data []byte) (before []byte, name string, after []byte) {
   146  	var i int
   147  	for {
   148  		if name, after = isMarker(data[i:]); name != "" {
   149  			return data[:i], name, after
   150  		}
   151  		j := bytes.Index(data[i:], newlineMarker)
   152  		if j < 0 {
   153  			return fixNL(data), "", nil
   154  		}
   155  		i += j + 1 // positioned at start of new possible marker
   156  	}
   157  }
   158  
   159  // isMarker checks whether data begins with a file marker line.
   160  // If so, it returns the name from the line and the data after the line.
   161  // Otherwise it returns name == "" with an unspecified after.
   162  func isMarker(data []byte) (name string, after []byte) {
   163  	if !bytes.HasPrefix(data, marker) {
   164  		return "", nil
   165  	}
   166  	if i := bytes.IndexByte(data, '\n'); i >= 0 {
   167  		data, after = data[:i], data[i+1:]
   168  		if data[i-1] == '\r' {
   169  			data = data[:len(data)-1]
   170  		}
   171  	}
   172  	if !bytes.HasSuffix(data, markerEnd) {
   173  		return "", nil
   174  	}
   175  	return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after
   176  }
   177  
   178  // If data is empty or ends in \n, fixNL returns data.
   179  // Otherwise fixNL returns a new slice consisting of data with a final \n added.
   180  func fixNL(data []byte) []byte {
   181  	if len(data) == 0 || data[len(data)-1] == '\n' {
   182  		return data
   183  	}
   184  	d := make([]byte, len(data)+1)
   185  	copy(d, data)
   186  	d[len(data)] = '\n'
   187  	return d
   188  }
   189  
   190  // Write writes each File in an Archive to the given directory, returning any
   191  // errors encountered. An error is also returned in the event a file would be
   192  // written outside of dir.
   193  func Write(a *Archive, dir string) error {
   194  	for _, f := range a.Files {
   195  		fp := filepath.Clean(filepath.FromSlash(f.Name))
   196  		if isAbs(fp) || strings.HasPrefix(fp, ".."+string(filepath.Separator)) {
   197  			return fmt.Errorf("%q: outside parent directory", f.Name)
   198  		}
   199  		fp = filepath.Join(dir, fp)
   200  
   201  		if err := os.MkdirAll(filepath.Dir(fp), 0o777); err != nil {
   202  			return err
   203  		}
   204  		// Avoid overwriting existing files by using O_EXCL.
   205  		out, err := os.OpenFile(fp, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o666)
   206  		if err != nil {
   207  			return err
   208  		}
   209  
   210  		_, err = out.Write(f.Data)
   211  		cerr := out.Close()
   212  		if err != nil {
   213  			return err
   214  		}
   215  		if cerr != nil {
   216  			return cerr
   217  		}
   218  	}
   219  	return nil
   220  }
   221  
   222  func isAbs(p string) bool {
   223  	// Note: under Windows, filepath.IsAbs(`\foo`) returns false,
   224  	// so we need to check for that case specifically.
   225  	return filepath.IsAbs(p) || strings.HasPrefix(p, string(filepath.Separator))
   226  }