github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/fileembed/genfileembed/genfileembed.go (about)

     1  /*
     2  Copyright 2012 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // The genfileembed command embeds resources into Go files, to eliminate run-time
    18  // dependencies on files on the filesystem.
    19  package main
    20  
    21  import (
    22  	"bytes"
    23  	"compress/zlib"
    24  	"crypto/sha1"
    25  	"encoding/base64"
    26  	"flag"
    27  	"fmt"
    28  	"go/parser"
    29  	"go/printer"
    30  	"go/token"
    31  	"io"
    32  	"io/ioutil"
    33  	"log"
    34  	"os"
    35  	"path/filepath"
    36  	"regexp"
    37  	"strings"
    38  	"time"
    39  
    40  	"camlistore.org/pkg/rollsum"
    41  )
    42  
    43  var (
    44  	processAll = flag.Bool("all", false, "process all files (if false, only process modified files)")
    45  
    46  	fileEmbedPkgPath = flag.String("fileembed-package", "camlistore.org/pkg/fileembed", "the Go package name for fileembed. If you have vendored fileembed (e.g. with goven), you can use this flag to ensure that generated code imports the vendored package.")
    47  
    48  	chunkThreshold = flag.Int64("chunk-threshold", 0, "If non-zero, the maximum size of a file before it's cut up into content-addressable chunks with a rolling checksum")
    49  	chunkPackage   = flag.String("chunk-package", "", "Package to hold chunks")
    50  )
    51  
    52  const (
    53  	maxUncompressed = 50 << 10 // 50KB
    54  	// Threshold ratio for compression.
    55  	// Files which don't compress at least as well are kept uncompressed.
    56  	zRatio = 0.5
    57  )
    58  
    59  func usage() {
    60  	fmt.Fprintf(os.Stderr, "usage: genfileembed [flags] [<dir>]\n")
    61  	flag.PrintDefaults()
    62  	os.Exit(2)
    63  }
    64  
    65  func main() {
    66  	flag.Usage = usage
    67  	flag.Parse()
    68  
    69  	dir := "."
    70  	switch flag.NArg() {
    71  	case 0:
    72  	case 1:
    73  		dir = flag.Arg(0)
    74  		if err := os.Chdir(dir); err != nil {
    75  			log.Fatalf("chdir(%q) = %v", dir, err)
    76  		}
    77  	default:
    78  		flag.Usage()
    79  	}
    80  
    81  	pkgName, filePattern, fileEmbedModTime, err := parseFileEmbed()
    82  	if err != nil {
    83  		log.Fatalf("Error parsing %s/fileembed.go: %v", dir, err)
    84  	}
    85  
    86  	for _, fileName := range matchingFiles(filePattern) {
    87  		fi, err := os.Stat(fileName)
    88  		if err != nil {
    89  			log.Fatal(err)
    90  		}
    91  
    92  		embedName := "zembed_" + strings.Replace(fileName, string(filepath.Separator), "_", -1) + ".go"
    93  		zfi, zerr := os.Stat(embedName)
    94  		genFile := func() bool {
    95  			if *processAll || zerr != nil {
    96  				return true
    97  			}
    98  			if zfi.ModTime().Before(fi.ModTime()) {
    99  				return true
   100  			}
   101  			if zfi.ModTime().Before(fileEmbedModTime) {
   102  				return true
   103  			}
   104  			return false
   105  		}
   106  		if !genFile() {
   107  			continue
   108  		}
   109  		log.Printf("Updating %s (package %s)", embedName, pkgName)
   110  
   111  		bs, err := ioutil.ReadFile(fileName)
   112  		if err != nil {
   113  			log.Fatal(err)
   114  		}
   115  
   116  		zb, fileSize := compressFile(bytes.NewReader(bs))
   117  		ratio := float64(len(zb)) / float64(fileSize)
   118  		byteStreamType := ""
   119  		var qb []byte // quoted string, or Go expression evaluating to a string
   120  		var imports string
   121  		if *chunkThreshold > 0 && int64(len(bs)) > *chunkThreshold {
   122  			byteStreamType = "fileembed.Multi"
   123  			qb = chunksOf(bs)
   124  			if *chunkPackage == "" {
   125  				log.Fatalf("Must provide a --chunk-package value with --chunk-threshold")
   126  			}
   127  			imports = fmt.Sprintf("import chunkpkg \"%s\"\n", *chunkPackage)
   128  		} else if fileSize < maxUncompressed || ratio > zRatio {
   129  			byteStreamType = "fileembed.String"
   130  			qb = quote(bs)
   131  		} else {
   132  			byteStreamType = "fileembed.ZlibCompressedBase64"
   133  			qb = quote([]byte(base64.StdEncoding.EncodeToString(zb)))
   134  		}
   135  
   136  		var b bytes.Buffer
   137  		fmt.Fprintf(&b, "// THIS FILE IS AUTO-GENERATED FROM %s\n", fileName)
   138  		fmt.Fprintf(&b, "// DO NOT EDIT.\n\n")
   139  		fmt.Fprintf(&b, "package %s\n\n", pkgName)
   140  		fmt.Fprintf(&b, "import \"time\"\n\n")
   141  		fmt.Fprintf(&b, "import \""+*fileEmbedPkgPath+"\"\n\n")
   142  		b.WriteString(imports)
   143  		fmt.Fprintf(&b, "func init() {\n\tFiles.Add(%q, %d, time.Unix(0, %d), %s(%s));\n}\n",
   144  			fileName, fileSize, fi.ModTime().UnixNano(), byteStreamType, qb)
   145  
   146  		// gofmt it
   147  		fset := token.NewFileSet()
   148  		ast, err := parser.ParseFile(fset, "", b.Bytes(), parser.ParseComments)
   149  		if err != nil {
   150  			log.Fatal(err)
   151  		}
   152  
   153  		var clean bytes.Buffer
   154  		config := &printer.Config{
   155  			Mode:     printer.TabIndent | printer.UseSpaces,
   156  			Tabwidth: 8,
   157  		}
   158  		err = config.Fprint(&clean, fset, ast)
   159  		if err != nil {
   160  			log.Fatal(err)
   161  		}
   162  
   163  		if err := writeFileIfDifferent(embedName, clean.Bytes()); err != nil {
   164  			log.Fatal(err)
   165  		}
   166  	}
   167  }
   168  
   169  func writeFileIfDifferent(filename string, contents []byte) error {
   170  	fi, err := os.Stat(filename)
   171  	if err == nil && fi.Size() == int64(len(contents)) && contentsEqual(filename, contents) {
   172  		os.Chtimes(filename, time.Now(), time.Now())
   173  		return nil
   174  	}
   175  	return ioutil.WriteFile(filename, contents, 0644)
   176  }
   177  
   178  func contentsEqual(filename string, contents []byte) bool {
   179  	got, err := ioutil.ReadFile(filename)
   180  	if err != nil {
   181  		return false
   182  	}
   183  	return bytes.Equal(got, contents)
   184  }
   185  
   186  func compressFile(r io.Reader) ([]byte, int64) {
   187  	var zb bytes.Buffer
   188  	w := zlib.NewWriter(&zb)
   189  	n, err := io.Copy(w, r)
   190  	if err != nil {
   191  		log.Fatal(err)
   192  	}
   193  	w.Close()
   194  	return zb.Bytes(), n
   195  }
   196  
   197  func quote(bs []byte) []byte {
   198  	var qb bytes.Buffer
   199  	qb.WriteByte('"')
   200  	run := 0
   201  	for _, b := range bs {
   202  		if b == '\n' {
   203  			qb.WriteString(`\n`)
   204  		}
   205  		if b == '\n' || run > 80 {
   206  			qb.WriteString("\" +\n\t\"")
   207  			run = 0
   208  		}
   209  		if b == '\n' {
   210  			continue
   211  		}
   212  		run++
   213  		if b == '\\' {
   214  			qb.WriteString(`\\`)
   215  			continue
   216  		}
   217  		if b == '"' {
   218  			qb.WriteString(`\"`)
   219  			continue
   220  		}
   221  		if (b >= 32 && b <= 126) || b == '\t' {
   222  			qb.WriteByte(b)
   223  			continue
   224  		}
   225  		fmt.Fprintf(&qb, "\\x%02x", b)
   226  	}
   227  	qb.WriteByte('"')
   228  	return qb.Bytes()
   229  }
   230  
   231  // matchingFiles finds all files matching a regex that should be embedded. This
   232  // skips files prefixed with "zembed_", since those are an implementation
   233  // detail of the embedding process itself.
   234  func matchingFiles(p *regexp.Regexp) []string {
   235  	var f []string
   236  	err := filepath.Walk(".", func(path string, fi os.FileInfo, err error) error {
   237  		if err != nil {
   238  			return err
   239  		}
   240  		n := filepath.Base(path)
   241  		if !fi.IsDir() && !strings.HasPrefix(n, "zembed_") && p.MatchString(n) {
   242  			f = append(f, path)
   243  		}
   244  		return nil
   245  	})
   246  	if err != nil {
   247  		log.Fatalf("Error walking directory tree: %s", err)
   248  		return nil
   249  	}
   250  	return f
   251  }
   252  
   253  func parseFileEmbed() (pkgName string, filePattern *regexp.Regexp, modTime time.Time, err error) {
   254  	fe, err := os.Open("fileembed.go")
   255  	if err != nil {
   256  		return
   257  	}
   258  	defer fe.Close()
   259  
   260  	fi, err := fe.Stat()
   261  	if err != nil {
   262  		return
   263  	}
   264  	modTime = fi.ModTime()
   265  
   266  	fs := token.NewFileSet()
   267  	astf, err := parser.ParseFile(fs, "fileembed.go", fe, parser.PackageClauseOnly|parser.ParseComments)
   268  	if err != nil {
   269  		return
   270  	}
   271  	pkgName = astf.Name.Name
   272  
   273  	if astf.Doc == nil {
   274  		err = fmt.Errorf("no package comment before the %q line", "package "+pkgName)
   275  		return
   276  	}
   277  
   278  	pkgComment := astf.Doc.Text()
   279  	findPattern := regexp.MustCompile(`(?m)^#fileembed\s+pattern\s+(\S+)\s*$`)
   280  	m := findPattern.FindStringSubmatch(pkgComment)
   281  	if m == nil {
   282  		err = fmt.Errorf("package comment lacks line of form: #fileembed pattern <pattern>")
   283  		return
   284  	}
   285  	pattern := m[1]
   286  	filePattern, err = regexp.Compile(pattern)
   287  	if err != nil {
   288  		err = fmt.Errorf("bad regexp %q: %v", pattern, err)
   289  		return
   290  	}
   291  	return
   292  }
   293  
   294  // chunksOf takes a (presumably large) file's uncompressed input,
   295  // rolling-checksum splits it into ~514 byte chunks, compresses each,
   296  // base64s each, and writes chunk files out, with each file just
   297  // defining an exported fileembed.Opener variable named C<xxxx> where
   298  // xxxx is the first 8 lowercase hex digits of the SHA-1 of the chunk
   299  // value pre-compression.  The return value is a Go expression
   300  // referencing each of those chunks concatenated together.
   301  func chunksOf(in []byte) (stringExpression []byte) {
   302  	var multiParts [][]byte
   303  	rs := rollsum.New()
   304  	const nBits = 9 // ~512 byte chunks
   305  	last := 0
   306  	for i, b := range in {
   307  		rs.Roll(b)
   308  		if rs.OnSplitWithBits(nBits) || i == len(in)-1 {
   309  			raw := in[last : i+1] // inclusive
   310  			last = i + 1
   311  			s1 := sha1.New()
   312  			s1.Write(raw)
   313  			sha1hex := fmt.Sprintf("%x", s1.Sum(nil))[:8]
   314  			writeChunkFile(sha1hex, raw)
   315  			multiParts = append(multiParts, []byte(fmt.Sprintf("chunkpkg.C%s", sha1hex)))
   316  		}
   317  	}
   318  	return bytes.Join(multiParts, []byte(",\n\t"))
   319  }
   320  
   321  func writeChunkFile(hex string, raw []byte) {
   322  	path := os.Getenv("GOPATH")
   323  	if path == "" {
   324  		log.Fatalf("No GOPATH set")
   325  	}
   326  	path = filepath.SplitList(path)[0]
   327  	file := filepath.Join(path, "src", filepath.FromSlash(*chunkPackage), "chunk_"+hex+".go")
   328  	zb, _ := compressFile(bytes.NewReader(raw))
   329  	var buf bytes.Buffer
   330  	buf.WriteString("// THIS FILE IS AUTO-GENERATED. SEE README.\n\n")
   331  	buf.WriteString("package chunkpkg\n")
   332  	buf.WriteString("import \"" + *fileEmbedPkgPath + "\"\n\n")
   333  	fmt.Fprintf(&buf, "var C%s fileembed.Opener\n\nfunc init() { C%s = fileembed.ZlibCompressedBase64(%s)\n }\n",
   334  		hex,
   335  		hex,
   336  		quote([]byte(base64.StdEncoding.EncodeToString(zb))))
   337  	err := writeFileIfDifferent(file, buf.Bytes())
   338  	if err != nil {
   339  		log.Fatalf("Error writing chunk %s to %v: %v", hex, file, err)
   340  	}
   341  }