github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/internal/compile/serial.go (about)

     1  package compile
     2  
     3  // This file defines functions to read and write a compile.Program to a file.
     4  //
     5  // It is the client's responsibility to avoid version skew between the
     6  // compiler used to produce a file and the interpreter that consumes it.
     7  // The version number is provided as a constant.
     8  // Incompatible protocol changes should also increment the version number.
     9  //
    10  // Encoding
    11  //
    12  // Program:
    13  //	"sky!"		[4]byte		# magic number
    14  //	str		uint32le	# offset of <strings> section
    15  //	version		varint		# must match Version
    16  //	filename	string
    17  //	numloads	varint
    18  //	loads		[]Ident
    19  //	numnames	varint
    20  //	names		[]string
    21  //	numconsts	varint
    22  //	consts		[]Constant
    23  //	numglobals	varint
    24  //	globals		[]Ident
    25  //	toplevel	Funcode
    26  //	numfuncs	varint
    27  //	funcs		[]Funcode
    28  //	<strings>	[]byte		# concatenation of all referenced strings
    29  //	EOF
    30  //
    31  // Funcode:
    32  //	id		Ident
    33  //	code		[]byte
    34  //	pclinetablen	varint
    35  //	pclinetab	[]varint
    36  //	numlocals	varint
    37  //	locals		[]Ident
    38  //	numcells	varint
    39  //	cells		[]int
    40  //	numfreevars	varint
    41  //	freevar		[]Ident
    42  //	maxstack	varint
    43  //	numparams	varint
    44  //	numkwonlyparams	varint
    45  //	hasvarargs	varint (0 or 1)
    46  //	haskwargs	varint (0 or 1)
    47  //
    48  // Ident:
    49  //	filename	string
    50  //	line, col	varint
    51  //
    52  // Constant:                            # type      data
    53  //      type            varint          # 0=string  string
    54  //      data            ...             # 1=int     varint
    55  //                                      # 2=float   varint (bits as uint64)
    56  //                                      # 3=bigint  string (decimal ASCII text)
    57  //
    58  // The encoding starts with a four-byte magic number.
    59  // The next four bytes are a little-endian uint32
    60  // that provides the offset of the string section
    61  // at the end of the file, which contains the ordered
    62  // concatenation of all strings referenced by the
    63  // program. This design permits the decoder to read
    64  // the first and second parts of the file into different
    65  // memory allocations: the first (the encoded program)
    66  // is transient, but the second (the strings) persists
    67  // for the life of the Program.
    68  //
    69  // Within the encoded program, all strings are referred
    70  // to by their length. As the encoder and decoder process
    71  // the entire file sequentially, they are in lock step,
    72  // so the start offset of each string is implicit.
    73  //
    74  // Program.Code is represented as a []byte slice to permit
    75  // modification when breakpoints are set. All other strings
    76  // are represented as strings. They all (unsafely) share the
    77  // same backing byte slice.
    78  //
    79  // Aside from the str field, all integers are encoded as varints.
    80  
    81  import (
    82  	"encoding/binary"
    83  	"fmt"
    84  	"math"
    85  	"math/big"
    86  	debugpkg "runtime/debug"
    87  	"unsafe"
    88  
    89  	"github.com/lab47/exprcore/syntax"
    90  )
    91  
    92  const magic = "!sky"
    93  
    94  // Encode encodes a compiled exprcore program.
    95  func (prog *Program) Encode() []byte {
    96  	var e encoder
    97  	e.p = append(e.p, magic...)
    98  	e.p = append(e.p, "????"...) // string data offset; filled in later
    99  	e.int(Version)
   100  	e.string(prog.Toplevel.Pos.Filename())
   101  	e.bindings(prog.Loads)
   102  	e.int(len(prog.Names))
   103  	for _, name := range prog.Names {
   104  		e.string(name)
   105  	}
   106  	e.int(len(prog.Constants))
   107  	for _, c := range prog.Constants {
   108  		switch c := c.(type) {
   109  		case string:
   110  			e.int(0)
   111  			e.string(c)
   112  		case int64:
   113  			e.int(1)
   114  			e.int64(c)
   115  		case float64:
   116  			e.int(2)
   117  			e.uint64(math.Float64bits(c))
   118  		case *big.Int:
   119  			e.int(3)
   120  			e.string(c.Text(10))
   121  		}
   122  	}
   123  	e.bindings(prog.Globals)
   124  	e.function(prog.Toplevel)
   125  	e.int(len(prog.Functions))
   126  	for _, fn := range prog.Functions {
   127  		e.function(fn)
   128  	}
   129  
   130  	// Patch in the offset of the string data section.
   131  	binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
   132  
   133  	return append(e.p, e.s...)
   134  }
   135  
   136  type encoder struct {
   137  	p   []byte // encoded program
   138  	s   []byte // strings
   139  	tmp [binary.MaxVarintLen64]byte
   140  }
   141  
   142  func (e *encoder) int(x int) {
   143  	e.int64(int64(x))
   144  }
   145  
   146  func (e *encoder) int64(x int64) {
   147  	n := binary.PutVarint(e.tmp[:], x)
   148  	e.p = append(e.p, e.tmp[:n]...)
   149  }
   150  
   151  func (e *encoder) uint64(x uint64) {
   152  	n := binary.PutUvarint(e.tmp[:], x)
   153  	e.p = append(e.p, e.tmp[:n]...)
   154  }
   155  
   156  func (e *encoder) string(s string) {
   157  	e.int(len(s))
   158  	e.s = append(e.s, s...)
   159  }
   160  
   161  func (e *encoder) bytes(b []byte) {
   162  	e.int(len(b))
   163  	e.s = append(e.s, b...)
   164  }
   165  
   166  func (e *encoder) binding(bind Binding) {
   167  	e.string(bind.Name)
   168  	e.int(int(bind.Pos.Line))
   169  	e.int(int(bind.Pos.Col))
   170  }
   171  
   172  func (e *encoder) bindings(binds []Binding) {
   173  	e.int(len(binds))
   174  	for _, bind := range binds {
   175  		e.binding(bind)
   176  	}
   177  }
   178  
   179  func (e *encoder) function(fn *Funcode) {
   180  	e.binding(Binding{fn.Name, fn.Pos})
   181  	e.string(fn.Doc)
   182  	e.bytes(fn.Code)
   183  	e.int(len(fn.pclinetab))
   184  	for _, x := range fn.pclinetab {
   185  		e.int64(int64(x))
   186  	}
   187  	e.bindings(fn.Locals)
   188  	e.int(len(fn.Cells))
   189  	for _, index := range fn.Cells {
   190  		e.int(index)
   191  	}
   192  	e.bindings(fn.Freevars)
   193  	e.int(fn.MaxStack)
   194  	e.int(fn.NumParams)
   195  	e.int(fn.NumKwonlyParams)
   196  	e.int(b2i(fn.HasVarargs))
   197  	e.int(b2i(fn.HasKwargs))
   198  }
   199  
   200  func b2i(b bool) int {
   201  	if b {
   202  		return 1
   203  	} else {
   204  		return 0
   205  	}
   206  }
   207  
   208  // DecodeProgram decodes a compiled exprcore program from data.
   209  func DecodeProgram(data []byte) (_ *Program, err error) {
   210  	if len(data) < len(magic) {
   211  		return nil, fmt.Errorf("not a compiled module: no magic number")
   212  	}
   213  	if got := string(data[:4]); got != magic {
   214  		return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
   215  			got, magic)
   216  	}
   217  	defer func() {
   218  		if x := recover(); x != nil {
   219  			debugpkg.PrintStack()
   220  			err = fmt.Errorf("internal error while decoding program: %v", x)
   221  		}
   222  	}()
   223  
   224  	offset := binary.LittleEndian.Uint32(data[4:8])
   225  	d := decoder{
   226  		p: data[8:offset],
   227  		s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
   228  	}
   229  
   230  	if v := d.int(); v != Version {
   231  		return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
   232  	}
   233  
   234  	filename := d.string()
   235  	d.filename = &filename
   236  
   237  	loads := d.bindings()
   238  
   239  	names := make([]string, d.int())
   240  	for i := range names {
   241  		names[i] = d.string()
   242  	}
   243  
   244  	// constants
   245  	constants := make([]interface{}, d.int())
   246  	for i := range constants {
   247  		var c interface{}
   248  		switch d.int() {
   249  		case 0:
   250  			c = d.string()
   251  		case 1:
   252  			c = d.int64()
   253  		case 2:
   254  			c = math.Float64frombits(d.uint64())
   255  		case 3:
   256  			c, _ = new(big.Int).SetString(d.string(), 10)
   257  		}
   258  		constants[i] = c
   259  	}
   260  
   261  	globals := d.bindings()
   262  	toplevel := d.function()
   263  	funcs := make([]*Funcode, d.int())
   264  	for i := range funcs {
   265  		funcs[i] = d.function()
   266  	}
   267  
   268  	prog := &Program{
   269  		Loads:     loads,
   270  		Names:     names,
   271  		Constants: constants,
   272  		Globals:   globals,
   273  		Functions: funcs,
   274  		Toplevel:  toplevel,
   275  	}
   276  	toplevel.Prog = prog
   277  	for _, f := range funcs {
   278  		f.Prog = prog
   279  	}
   280  
   281  	if len(d.p)+len(d.s) > 0 {
   282  		return nil, fmt.Errorf("internal error: unconsumed data during decoding")
   283  	}
   284  
   285  	return prog, nil
   286  }
   287  
   288  type decoder struct {
   289  	p        []byte  // encoded program
   290  	s        []byte  // strings
   291  	filename *string // (indirect to avoid keeping decoder live)
   292  }
   293  
   294  func (d *decoder) int() int {
   295  	return int(d.int64())
   296  }
   297  
   298  func (d *decoder) int64() int64 {
   299  	x, len := binary.Varint(d.p[:])
   300  	d.p = d.p[len:]
   301  	return x
   302  }
   303  
   304  func (d *decoder) uint64() uint64 {
   305  	x, len := binary.Uvarint(d.p[:])
   306  	d.p = d.p[len:]
   307  	return x
   308  }
   309  
   310  func (d *decoder) string() (s string) {
   311  	if slice := d.bytes(); len(slice) > 0 {
   312  		// Avoid a memory allocation for each string
   313  		// by unsafely aliasing slice.
   314  		type string struct {
   315  			data *byte
   316  			len  int
   317  		}
   318  		ptr := (*string)(unsafe.Pointer(&s))
   319  		ptr.data = &slice[0]
   320  		ptr.len = len(slice)
   321  	}
   322  	return s
   323  }
   324  
   325  func (d *decoder) bytes() []byte {
   326  	len := d.int()
   327  	r := d.s[:len:len]
   328  	d.s = d.s[len:]
   329  	return r
   330  }
   331  
   332  func (d *decoder) binding() Binding {
   333  	name := d.string()
   334  	line := int32(d.int())
   335  	col := int32(d.int())
   336  	return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
   337  }
   338  
   339  func (d *decoder) bindings() []Binding {
   340  	bindings := make([]Binding, d.int())
   341  	for i := range bindings {
   342  		bindings[i] = d.binding()
   343  	}
   344  	return bindings
   345  }
   346  
   347  func (d *decoder) ints() []int {
   348  	ints := make([]int, d.int())
   349  	for i := range ints {
   350  		ints[i] = d.int()
   351  	}
   352  	return ints
   353  }
   354  
   355  func (d *decoder) bool() bool { return d.int() != 0 }
   356  
   357  func (d *decoder) function() *Funcode {
   358  	id := d.binding()
   359  	doc := d.string()
   360  	code := d.bytes()
   361  	pclinetab := make([]uint16, d.int())
   362  	for i := range pclinetab {
   363  		pclinetab[i] = uint16(d.int())
   364  	}
   365  	locals := d.bindings()
   366  	cells := d.ints()
   367  	freevars := d.bindings()
   368  	maxStack := d.int()
   369  	numParams := d.int()
   370  	numKwonlyParams := d.int()
   371  	hasVarargs := d.int() != 0
   372  	hasKwargs := d.int() != 0
   373  	return &Funcode{
   374  		// Prog is filled in later.
   375  		Pos:             id.Pos,
   376  		Name:            id.Name,
   377  		Doc:             doc,
   378  		Code:            code,
   379  		pclinetab:       pclinetab,
   380  		Locals:          locals,
   381  		Cells:           cells,
   382  		Freevars:        freevars,
   383  		MaxStack:        maxStack,
   384  		NumParams:       numParams,
   385  		NumKwonlyParams: numKwonlyParams,
   386  		HasVarargs:      hasVarargs,
   387  		HasKwargs:       hasKwargs,
   388  	}
   389  }