github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/internal/compile/serial.go (about) 1 package compile 2 3 // This file defines functions to read and write a compile.Program to a file. 4 // 5 // It is the client's responsibility to avoid version skew between the 6 // compiler used to produce a file and the interpreter that consumes it. 7 // The version number is provided as a constant. 8 // Incompatible protocol changes should also increment the version number. 9 // 10 // Encoding 11 // 12 // Program: 13 // "sky!" [4]byte # magic number 14 // str uint32le # offset of <strings> section 15 // version varint # must match Version 16 // filename string 17 // numloads varint 18 // loads []Ident 19 // numnames varint 20 // names []string 21 // numconsts varint 22 // consts []Constant 23 // numglobals varint 24 // globals []Ident 25 // toplevel Funcode 26 // numfuncs varint 27 // funcs []Funcode 28 // <strings> []byte # concatenation of all referenced strings 29 // EOF 30 // 31 // Funcode: 32 // id Ident 33 // code []byte 34 // pclinetablen varint 35 // pclinetab []varint 36 // numlocals varint 37 // locals []Ident 38 // numcells varint 39 // cells []int 40 // numfreevars varint 41 // freevar []Ident 42 // maxstack varint 43 // numparams varint 44 // numkwonlyparams varint 45 // hasvarargs varint (0 or 1) 46 // haskwargs varint (0 or 1) 47 // 48 // Ident: 49 // filename string 50 // line, col varint 51 // 52 // Constant: # type data 53 // type varint # 0=string string 54 // data ... # 1=int varint 55 // # 2=float varint (bits as uint64) 56 // # 3=bigint string (decimal ASCII text) 57 // 58 // The encoding starts with a four-byte magic number. 59 // The next four bytes are a little-endian uint32 60 // that provides the offset of the string section 61 // at the end of the file, which contains the ordered 62 // concatenation of all strings referenced by the 63 // program. This design permits the decoder to read 64 // the first and second parts of the file into different 65 // memory allocations: the first (the encoded program) 66 // is transient, but the second (the strings) persists 67 // for the life of the Program. 68 // 69 // Within the encoded program, all strings are referred 70 // to by their length. As the encoder and decoder process 71 // the entire file sequentially, they are in lock step, 72 // so the start offset of each string is implicit. 73 // 74 // Program.Code is represented as a []byte slice to permit 75 // modification when breakpoints are set. All other strings 76 // are represented as strings. They all (unsafely) share the 77 // same backing byte slice. 78 // 79 // Aside from the str field, all integers are encoded as varints. 80 81 import ( 82 "encoding/binary" 83 "fmt" 84 "math" 85 "math/big" 86 debugpkg "runtime/debug" 87 "unsafe" 88 89 "github.com/lab47/exprcore/syntax" 90 ) 91 92 const magic = "!sky" 93 94 // Encode encodes a compiled exprcore program. 95 func (prog *Program) Encode() []byte { 96 var e encoder 97 e.p = append(e.p, magic...) 98 e.p = append(e.p, "????"...) // string data offset; filled in later 99 e.int(Version) 100 e.string(prog.Toplevel.Pos.Filename()) 101 e.bindings(prog.Loads) 102 e.int(len(prog.Names)) 103 for _, name := range prog.Names { 104 e.string(name) 105 } 106 e.int(len(prog.Constants)) 107 for _, c := range prog.Constants { 108 switch c := c.(type) { 109 case string: 110 e.int(0) 111 e.string(c) 112 case int64: 113 e.int(1) 114 e.int64(c) 115 case float64: 116 e.int(2) 117 e.uint64(math.Float64bits(c)) 118 case *big.Int: 119 e.int(3) 120 e.string(c.Text(10)) 121 } 122 } 123 e.bindings(prog.Globals) 124 e.function(prog.Toplevel) 125 e.int(len(prog.Functions)) 126 for _, fn := range prog.Functions { 127 e.function(fn) 128 } 129 130 // Patch in the offset of the string data section. 131 binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) 132 133 return append(e.p, e.s...) 134 } 135 136 type encoder struct { 137 p []byte // encoded program 138 s []byte // strings 139 tmp [binary.MaxVarintLen64]byte 140 } 141 142 func (e *encoder) int(x int) { 143 e.int64(int64(x)) 144 } 145 146 func (e *encoder) int64(x int64) { 147 n := binary.PutVarint(e.tmp[:], x) 148 e.p = append(e.p, e.tmp[:n]...) 149 } 150 151 func (e *encoder) uint64(x uint64) { 152 n := binary.PutUvarint(e.tmp[:], x) 153 e.p = append(e.p, e.tmp[:n]...) 154 } 155 156 func (e *encoder) string(s string) { 157 e.int(len(s)) 158 e.s = append(e.s, s...) 159 } 160 161 func (e *encoder) bytes(b []byte) { 162 e.int(len(b)) 163 e.s = append(e.s, b...) 164 } 165 166 func (e *encoder) binding(bind Binding) { 167 e.string(bind.Name) 168 e.int(int(bind.Pos.Line)) 169 e.int(int(bind.Pos.Col)) 170 } 171 172 func (e *encoder) bindings(binds []Binding) { 173 e.int(len(binds)) 174 for _, bind := range binds { 175 e.binding(bind) 176 } 177 } 178 179 func (e *encoder) function(fn *Funcode) { 180 e.binding(Binding{fn.Name, fn.Pos}) 181 e.string(fn.Doc) 182 e.bytes(fn.Code) 183 e.int(len(fn.pclinetab)) 184 for _, x := range fn.pclinetab { 185 e.int64(int64(x)) 186 } 187 e.bindings(fn.Locals) 188 e.int(len(fn.Cells)) 189 for _, index := range fn.Cells { 190 e.int(index) 191 } 192 e.bindings(fn.Freevars) 193 e.int(fn.MaxStack) 194 e.int(fn.NumParams) 195 e.int(fn.NumKwonlyParams) 196 e.int(b2i(fn.HasVarargs)) 197 e.int(b2i(fn.HasKwargs)) 198 } 199 200 func b2i(b bool) int { 201 if b { 202 return 1 203 } else { 204 return 0 205 } 206 } 207 208 // DecodeProgram decodes a compiled exprcore program from data. 209 func DecodeProgram(data []byte) (_ *Program, err error) { 210 if len(data) < len(magic) { 211 return nil, fmt.Errorf("not a compiled module: no magic number") 212 } 213 if got := string(data[:4]); got != magic { 214 return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", 215 got, magic) 216 } 217 defer func() { 218 if x := recover(); x != nil { 219 debugpkg.PrintStack() 220 err = fmt.Errorf("internal error while decoding program: %v", x) 221 } 222 }() 223 224 offset := binary.LittleEndian.Uint32(data[4:8]) 225 d := decoder{ 226 p: data[8:offset], 227 s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist 228 } 229 230 if v := d.int(); v != Version { 231 return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) 232 } 233 234 filename := d.string() 235 d.filename = &filename 236 237 loads := d.bindings() 238 239 names := make([]string, d.int()) 240 for i := range names { 241 names[i] = d.string() 242 } 243 244 // constants 245 constants := make([]interface{}, d.int()) 246 for i := range constants { 247 var c interface{} 248 switch d.int() { 249 case 0: 250 c = d.string() 251 case 1: 252 c = d.int64() 253 case 2: 254 c = math.Float64frombits(d.uint64()) 255 case 3: 256 c, _ = new(big.Int).SetString(d.string(), 10) 257 } 258 constants[i] = c 259 } 260 261 globals := d.bindings() 262 toplevel := d.function() 263 funcs := make([]*Funcode, d.int()) 264 for i := range funcs { 265 funcs[i] = d.function() 266 } 267 268 prog := &Program{ 269 Loads: loads, 270 Names: names, 271 Constants: constants, 272 Globals: globals, 273 Functions: funcs, 274 Toplevel: toplevel, 275 } 276 toplevel.Prog = prog 277 for _, f := range funcs { 278 f.Prog = prog 279 } 280 281 if len(d.p)+len(d.s) > 0 { 282 return nil, fmt.Errorf("internal error: unconsumed data during decoding") 283 } 284 285 return prog, nil 286 } 287 288 type decoder struct { 289 p []byte // encoded program 290 s []byte // strings 291 filename *string // (indirect to avoid keeping decoder live) 292 } 293 294 func (d *decoder) int() int { 295 return int(d.int64()) 296 } 297 298 func (d *decoder) int64() int64 { 299 x, len := binary.Varint(d.p[:]) 300 d.p = d.p[len:] 301 return x 302 } 303 304 func (d *decoder) uint64() uint64 { 305 x, len := binary.Uvarint(d.p[:]) 306 d.p = d.p[len:] 307 return x 308 } 309 310 func (d *decoder) string() (s string) { 311 if slice := d.bytes(); len(slice) > 0 { 312 // Avoid a memory allocation for each string 313 // by unsafely aliasing slice. 314 type string struct { 315 data *byte 316 len int 317 } 318 ptr := (*string)(unsafe.Pointer(&s)) 319 ptr.data = &slice[0] 320 ptr.len = len(slice) 321 } 322 return s 323 } 324 325 func (d *decoder) bytes() []byte { 326 len := d.int() 327 r := d.s[:len:len] 328 d.s = d.s[len:] 329 return r 330 } 331 332 func (d *decoder) binding() Binding { 333 name := d.string() 334 line := int32(d.int()) 335 col := int32(d.int()) 336 return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} 337 } 338 339 func (d *decoder) bindings() []Binding { 340 bindings := make([]Binding, d.int()) 341 for i := range bindings { 342 bindings[i] = d.binding() 343 } 344 return bindings 345 } 346 347 func (d *decoder) ints() []int { 348 ints := make([]int, d.int()) 349 for i := range ints { 350 ints[i] = d.int() 351 } 352 return ints 353 } 354 355 func (d *decoder) bool() bool { return d.int() != 0 } 356 357 func (d *decoder) function() *Funcode { 358 id := d.binding() 359 doc := d.string() 360 code := d.bytes() 361 pclinetab := make([]uint16, d.int()) 362 for i := range pclinetab { 363 pclinetab[i] = uint16(d.int()) 364 } 365 locals := d.bindings() 366 cells := d.ints() 367 freevars := d.bindings() 368 maxStack := d.int() 369 numParams := d.int() 370 numKwonlyParams := d.int() 371 hasVarargs := d.int() != 0 372 hasKwargs := d.int() != 0 373 return &Funcode{ 374 // Prog is filled in later. 375 Pos: id.Pos, 376 Name: id.Name, 377 Doc: doc, 378 Code: code, 379 pclinetab: pclinetab, 380 Locals: locals, 381 Cells: cells, 382 Freevars: freevars, 383 MaxStack: maxStack, 384 NumParams: numParams, 385 NumKwonlyParams: numKwonlyParams, 386 HasVarargs: hasVarargs, 387 HasKwargs: hasKwargs, 388 } 389 }