go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/internal/compile/serial.go (about)

     1  package compile
     2  
     3  // This file defines functions to read and write a compile.Program to a file.
     4  //
     5  // It is the client's responsibility to avoid version skew between the
     6  // compiler used to produce a file and the interpreter that consumes it.
     7  // The version number is provided as a constant.
     8  // Incompatible protocol changes should also increment the version number.
     9  //
    10  // Encoding
    11  //
    12  // Program:
    13  //	"sky!"		[4]byte		# magic number
    14  //	str		uint32le	# offset of <strings> section
    15  //	version		varint		# must match Version
    16  //	filename	string
    17  //	numloads	varint
    18  //	loads		[]Ident
    19  //	numnames	varint
    20  //	names		[]string
    21  //	numconsts	varint
    22  //	consts		[]Constant
    23  //	numglobals	varint
    24  //	globals		[]Ident
    25  //	toplevel	Funcode
    26  //	numfuncs	varint
    27  //	funcs		[]Funcode
    28  //	recursion	varint (0 or 1)
    29  //	<strings>	[]byte		# concatenation of all referenced strings
    30  //	EOF
    31  //
    32  // Funcode:
    33  //	id		Ident
    34  //	code		[]byte
    35  //	pclinetablen	varint
    36  //	pclinetab	[]varint
    37  //	numlocals	varint
    38  //	locals		[]Ident
    39  //	numcells	varint
    40  //	cells		[]int
    41  //	numfreevars	varint
    42  //	freevar		[]Ident
    43  //	maxstack	varint
    44  //	numparams	varint
    45  //	numkwonlyparams	varint
    46  //	hasvarargs	varint (0 or 1)
    47  //	haskwargs	varint (0 or 1)
    48  //
    49  // Ident:
    50  //	filename	string
    51  //	line, col	varint
    52  //
    53  // Constant:                            # type      data
    54  //      type            varint          # 0=string  string
    55  //      data            ...             # 1=bytes   string
    56  //                                      # 2=int     varint
    57  //                                      # 3=float   varint (bits as uint64)
    58  //                                      # 4=bigint  string (decimal ASCII text)
    59  //
    60  // The encoding starts with a four-byte magic number.
    61  // The next four bytes are a little-endian uint32
    62  // that provides the offset of the string section
    63  // at the end of the file, which contains the ordered
    64  // concatenation of all strings referenced by the
    65  // program. This design permits the decoder to read
    66  // the first and second parts of the file into different
    67  // memory allocations: the first (the encoded program)
    68  // is transient, but the second (the strings) persists
    69  // for the life of the Program.
    70  //
    71  // Within the encoded program, all strings are referred
    72  // to by their length. As the encoder and decoder process
    73  // the entire file sequentially, they are in lock step,
    74  // so the start offset of each string is implicit.
    75  //
    76  // Program.Code is represented as a []byte slice to permit
    77  // modification when breakpoints are set. All other strings
    78  // are represented as strings. They all (unsafely) share the
    79  // same backing byte slice.
    80  //
    81  // Aside from the str field, all integers are encoded as varints.
    82  
    83  import (
    84  	"encoding/binary"
    85  	"fmt"
    86  	"math"
    87  	"math/big"
    88  	debugpkg "runtime/debug"
    89  	"unsafe"
    90  
    91  	"go.starlark.net/syntax"
    92  )
    93  
    94  const magic = "!sky"
    95  
    96  // Encode encodes a compiled Starlark program.
    97  func (prog *Program) Encode() []byte {
    98  	var e encoder
    99  	e.p = append(e.p, magic...)
   100  	e.p = append(e.p, "????"...) // string data offset; filled in later
   101  	e.int(Version)
   102  	e.string(prog.Toplevel.Pos.Filename())
   103  	e.bindings(prog.Loads)
   104  	e.int(len(prog.Names))
   105  	for _, name := range prog.Names {
   106  		e.string(name)
   107  	}
   108  	e.int(len(prog.Constants))
   109  	for _, c := range prog.Constants {
   110  		switch c := c.(type) {
   111  		case string:
   112  			e.int(0)
   113  			e.string(c)
   114  		case Bytes:
   115  			e.int(1)
   116  			e.string(string(c))
   117  		case int64:
   118  			e.int(2)
   119  			e.int64(c)
   120  		case float64:
   121  			e.int(3)
   122  			e.uint64(math.Float64bits(c))
   123  		case *big.Int:
   124  			e.int(4)
   125  			e.string(c.Text(10))
   126  		}
   127  	}
   128  	e.bindings(prog.Globals)
   129  	e.function(prog.Toplevel)
   130  	e.int(len(prog.Functions))
   131  	for _, fn := range prog.Functions {
   132  		e.function(fn)
   133  	}
   134  	e.int(b2i(prog.Recursion))
   135  
   136  	// Patch in the offset of the string data section.
   137  	binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
   138  
   139  	return append(e.p, e.s...)
   140  }
   141  
   142  type encoder struct {
   143  	p   []byte // encoded program
   144  	s   []byte // strings
   145  	tmp [binary.MaxVarintLen64]byte
   146  }
   147  
   148  func (e *encoder) int(x int) {
   149  	e.int64(int64(x))
   150  }
   151  
   152  func (e *encoder) int64(x int64) {
   153  	n := binary.PutVarint(e.tmp[:], x)
   154  	e.p = append(e.p, e.tmp[:n]...)
   155  }
   156  
   157  func (e *encoder) uint64(x uint64) {
   158  	n := binary.PutUvarint(e.tmp[:], x)
   159  	e.p = append(e.p, e.tmp[:n]...)
   160  }
   161  
   162  func (e *encoder) string(s string) {
   163  	e.int(len(s))
   164  	e.s = append(e.s, s...)
   165  }
   166  
   167  func (e *encoder) bytes(b []byte) {
   168  	e.int(len(b))
   169  	e.s = append(e.s, b...)
   170  }
   171  
   172  func (e *encoder) binding(bind Binding) {
   173  	e.string(bind.Name)
   174  	e.int(int(bind.Pos.Line))
   175  	e.int(int(bind.Pos.Col))
   176  }
   177  
   178  func (e *encoder) bindings(binds []Binding) {
   179  	e.int(len(binds))
   180  	for _, bind := range binds {
   181  		e.binding(bind)
   182  	}
   183  }
   184  
   185  func (e *encoder) function(fn *Funcode) {
   186  	e.binding(Binding{fn.Name, fn.Pos})
   187  	e.string(fn.Doc)
   188  	e.bytes(fn.Code)
   189  	e.int(len(fn.pclinetab))
   190  	for _, x := range fn.pclinetab {
   191  		e.int64(int64(x))
   192  	}
   193  	e.bindings(fn.Locals)
   194  	e.int(len(fn.Cells))
   195  	for _, index := range fn.Cells {
   196  		e.int(index)
   197  	}
   198  	e.bindings(fn.Freevars)
   199  	e.int(fn.MaxStack)
   200  	e.int(fn.NumParams)
   201  	e.int(fn.NumKwonlyParams)
   202  	e.int(b2i(fn.HasVarargs))
   203  	e.int(b2i(fn.HasKwargs))
   204  }
   205  
   206  func b2i(b bool) int {
   207  	if b {
   208  		return 1
   209  	} else {
   210  		return 0
   211  	}
   212  }
   213  
   214  // DecodeProgram decodes a compiled Starlark program from data.
   215  func DecodeProgram(data []byte) (_ *Program, err error) {
   216  	if len(data) < len(magic) {
   217  		return nil, fmt.Errorf("not a compiled module: no magic number")
   218  	}
   219  	if got := string(data[:4]); got != magic {
   220  		return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
   221  			got, magic)
   222  	}
   223  	defer func() {
   224  		if x := recover(); x != nil {
   225  			debugpkg.PrintStack()
   226  			err = fmt.Errorf("internal error while decoding program: %v", x)
   227  		}
   228  	}()
   229  
   230  	offset := binary.LittleEndian.Uint32(data[4:8])
   231  	d := decoder{
   232  		p: data[8:offset],
   233  		s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
   234  	}
   235  
   236  	if v := d.int(); v != Version {
   237  		return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
   238  	}
   239  
   240  	filename := d.string()
   241  	d.filename = &filename
   242  
   243  	loads := d.bindings()
   244  
   245  	names := make([]string, d.int())
   246  	for i := range names {
   247  		names[i] = d.string()
   248  	}
   249  
   250  	// constants
   251  	constants := make([]interface{}, d.int())
   252  	for i := range constants {
   253  		var c interface{}
   254  		switch d.int() {
   255  		case 0:
   256  			c = d.string()
   257  		case 1:
   258  			c = Bytes(d.string())
   259  		case 2:
   260  			c = d.int64()
   261  		case 3:
   262  			c = math.Float64frombits(d.uint64())
   263  		case 4:
   264  			c, _ = new(big.Int).SetString(d.string(), 10)
   265  		}
   266  		constants[i] = c
   267  	}
   268  
   269  	globals := d.bindings()
   270  	toplevel := d.function()
   271  	funcs := make([]*Funcode, d.int())
   272  	for i := range funcs {
   273  		funcs[i] = d.function()
   274  	}
   275  	recursion := d.int() != 0
   276  
   277  	prog := &Program{
   278  		Loads:     loads,
   279  		Names:     names,
   280  		Constants: constants,
   281  		Globals:   globals,
   282  		Functions: funcs,
   283  		Toplevel:  toplevel,
   284  		Recursion: recursion,
   285  	}
   286  	toplevel.Prog = prog
   287  	for _, f := range funcs {
   288  		f.Prog = prog
   289  	}
   290  
   291  	if len(d.p)+len(d.s) > 0 {
   292  		return nil, fmt.Errorf("internal error: unconsumed data during decoding")
   293  	}
   294  
   295  	return prog, nil
   296  }
   297  
   298  type decoder struct {
   299  	p        []byte  // encoded program
   300  	s        []byte  // strings
   301  	filename *string // (indirect to avoid keeping decoder live)
   302  }
   303  
   304  func (d *decoder) int() int {
   305  	return int(d.int64())
   306  }
   307  
   308  func (d *decoder) int64() int64 {
   309  	x, len := binary.Varint(d.p[:])
   310  	d.p = d.p[len:]
   311  	return x
   312  }
   313  
   314  func (d *decoder) uint64() uint64 {
   315  	x, len := binary.Uvarint(d.p[:])
   316  	d.p = d.p[len:]
   317  	return x
   318  }
   319  
   320  func (d *decoder) string() (s string) {
   321  	if slice := d.bytes(); len(slice) > 0 {
   322  		// Avoid a memory allocation for each string
   323  		// by unsafely aliasing slice.
   324  		type string struct {
   325  			data *byte
   326  			len  int
   327  		}
   328  		ptr := (*string)(unsafe.Pointer(&s))
   329  		ptr.data = &slice[0]
   330  		ptr.len = len(slice)
   331  	}
   332  	return s
   333  }
   334  
   335  func (d *decoder) bytes() []byte {
   336  	len := d.int()
   337  	r := d.s[:len:len]
   338  	d.s = d.s[len:]
   339  	return r
   340  }
   341  
   342  func (d *decoder) binding() Binding {
   343  	name := d.string()
   344  	line := int32(d.int())
   345  	col := int32(d.int())
   346  	return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
   347  }
   348  
   349  func (d *decoder) bindings() []Binding {
   350  	bindings := make([]Binding, d.int())
   351  	for i := range bindings {
   352  		bindings[i] = d.binding()
   353  	}
   354  	return bindings
   355  }
   356  
   357  func (d *decoder) ints() []int {
   358  	ints := make([]int, d.int())
   359  	for i := range ints {
   360  		ints[i] = d.int()
   361  	}
   362  	return ints
   363  }
   364  
   365  func (d *decoder) bool() bool { return d.int() != 0 }
   366  
   367  func (d *decoder) function() *Funcode {
   368  	id := d.binding()
   369  	doc := d.string()
   370  	code := d.bytes()
   371  	pclinetab := make([]uint16, d.int())
   372  	for i := range pclinetab {
   373  		pclinetab[i] = uint16(d.int())
   374  	}
   375  	locals := d.bindings()
   376  	cells := d.ints()
   377  	freevars := d.bindings()
   378  	maxStack := d.int()
   379  	numParams := d.int()
   380  	numKwonlyParams := d.int()
   381  	hasVarargs := d.int() != 0
   382  	hasKwargs := d.int() != 0
   383  	return &Funcode{
   384  		// Prog is filled in later.
   385  		Pos:             id.Pos,
   386  		Name:            id.Name,
   387  		Doc:             doc,
   388  		Code:            code,
   389  		pclinetab:       pclinetab,
   390  		Locals:          locals,
   391  		Cells:           cells,
   392  		Freevars:        freevars,
   393  		MaxStack:        maxStack,
   394  		NumParams:       numParams,
   395  		NumKwonlyParams: numKwonlyParams,
   396  		HasVarargs:      hasVarargs,
   397  		HasKwargs:       hasKwargs,
   398  	}
   399  }