go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/internal/compile/serial.go (about) 1 package compile 2 3 // This file defines functions to read and write a compile.Program to a file. 4 // 5 // It is the client's responsibility to avoid version skew between the 6 // compiler used to produce a file and the interpreter that consumes it. 7 // The version number is provided as a constant. 8 // Incompatible protocol changes should also increment the version number. 9 // 10 // Encoding 11 // 12 // Program: 13 // "sky!" [4]byte # magic number 14 // str uint32le # offset of <strings> section 15 // version varint # must match Version 16 // filename string 17 // numloads varint 18 // loads []Ident 19 // numnames varint 20 // names []string 21 // numconsts varint 22 // consts []Constant 23 // numglobals varint 24 // globals []Ident 25 // toplevel Funcode 26 // numfuncs varint 27 // funcs []Funcode 28 // recursion varint (0 or 1) 29 // <strings> []byte # concatenation of all referenced strings 30 // EOF 31 // 32 // Funcode: 33 // id Ident 34 // code []byte 35 // pclinetablen varint 36 // pclinetab []varint 37 // numlocals varint 38 // locals []Ident 39 // numcells varint 40 // cells []int 41 // numfreevars varint 42 // freevar []Ident 43 // maxstack varint 44 // numparams varint 45 // numkwonlyparams varint 46 // hasvarargs varint (0 or 1) 47 // haskwargs varint (0 or 1) 48 // 49 // Ident: 50 // filename string 51 // line, col varint 52 // 53 // Constant: # type data 54 // type varint # 0=string string 55 // data ... # 1=bytes string 56 // # 2=int varint 57 // # 3=float varint (bits as uint64) 58 // # 4=bigint string (decimal ASCII text) 59 // 60 // The encoding starts with a four-byte magic number. 61 // The next four bytes are a little-endian uint32 62 // that provides the offset of the string section 63 // at the end of the file, which contains the ordered 64 // concatenation of all strings referenced by the 65 // program. This design permits the decoder to read 66 // the first and second parts of the file into different 67 // memory allocations: the first (the encoded program) 68 // is transient, but the second (the strings) persists 69 // for the life of the Program. 70 // 71 // Within the encoded program, all strings are referred 72 // to by their length. As the encoder and decoder process 73 // the entire file sequentially, they are in lock step, 74 // so the start offset of each string is implicit. 75 // 76 // Program.Code is represented as a []byte slice to permit 77 // modification when breakpoints are set. All other strings 78 // are represented as strings. They all (unsafely) share the 79 // same backing byte slice. 80 // 81 // Aside from the str field, all integers are encoded as varints. 82 83 import ( 84 "encoding/binary" 85 "fmt" 86 "math" 87 "math/big" 88 debugpkg "runtime/debug" 89 "unsafe" 90 91 "go.starlark.net/syntax" 92 ) 93 94 const magic = "!sky" 95 96 // Encode encodes a compiled Starlark program. 97 func (prog *Program) Encode() []byte { 98 var e encoder 99 e.p = append(e.p, magic...) 100 e.p = append(e.p, "????"...) // string data offset; filled in later 101 e.int(Version) 102 e.string(prog.Toplevel.Pos.Filename()) 103 e.bindings(prog.Loads) 104 e.int(len(prog.Names)) 105 for _, name := range prog.Names { 106 e.string(name) 107 } 108 e.int(len(prog.Constants)) 109 for _, c := range prog.Constants { 110 switch c := c.(type) { 111 case string: 112 e.int(0) 113 e.string(c) 114 case Bytes: 115 e.int(1) 116 e.string(string(c)) 117 case int64: 118 e.int(2) 119 e.int64(c) 120 case float64: 121 e.int(3) 122 e.uint64(math.Float64bits(c)) 123 case *big.Int: 124 e.int(4) 125 e.string(c.Text(10)) 126 } 127 } 128 e.bindings(prog.Globals) 129 e.function(prog.Toplevel) 130 e.int(len(prog.Functions)) 131 for _, fn := range prog.Functions { 132 e.function(fn) 133 } 134 e.int(b2i(prog.Recursion)) 135 136 // Patch in the offset of the string data section. 137 binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) 138 139 return append(e.p, e.s...) 140 } 141 142 type encoder struct { 143 p []byte // encoded program 144 s []byte // strings 145 tmp [binary.MaxVarintLen64]byte 146 } 147 148 func (e *encoder) int(x int) { 149 e.int64(int64(x)) 150 } 151 152 func (e *encoder) int64(x int64) { 153 n := binary.PutVarint(e.tmp[:], x) 154 e.p = append(e.p, e.tmp[:n]...) 155 } 156 157 func (e *encoder) uint64(x uint64) { 158 n := binary.PutUvarint(e.tmp[:], x) 159 e.p = append(e.p, e.tmp[:n]...) 160 } 161 162 func (e *encoder) string(s string) { 163 e.int(len(s)) 164 e.s = append(e.s, s...) 165 } 166 167 func (e *encoder) bytes(b []byte) { 168 e.int(len(b)) 169 e.s = append(e.s, b...) 170 } 171 172 func (e *encoder) binding(bind Binding) { 173 e.string(bind.Name) 174 e.int(int(bind.Pos.Line)) 175 e.int(int(bind.Pos.Col)) 176 } 177 178 func (e *encoder) bindings(binds []Binding) { 179 e.int(len(binds)) 180 for _, bind := range binds { 181 e.binding(bind) 182 } 183 } 184 185 func (e *encoder) function(fn *Funcode) { 186 e.binding(Binding{fn.Name, fn.Pos}) 187 e.string(fn.Doc) 188 e.bytes(fn.Code) 189 e.int(len(fn.pclinetab)) 190 for _, x := range fn.pclinetab { 191 e.int64(int64(x)) 192 } 193 e.bindings(fn.Locals) 194 e.int(len(fn.Cells)) 195 for _, index := range fn.Cells { 196 e.int(index) 197 } 198 e.bindings(fn.Freevars) 199 e.int(fn.MaxStack) 200 e.int(fn.NumParams) 201 e.int(fn.NumKwonlyParams) 202 e.int(b2i(fn.HasVarargs)) 203 e.int(b2i(fn.HasKwargs)) 204 } 205 206 func b2i(b bool) int { 207 if b { 208 return 1 209 } else { 210 return 0 211 } 212 } 213 214 // DecodeProgram decodes a compiled Starlark program from data. 215 func DecodeProgram(data []byte) (_ *Program, err error) { 216 if len(data) < len(magic) { 217 return nil, fmt.Errorf("not a compiled module: no magic number") 218 } 219 if got := string(data[:4]); got != magic { 220 return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", 221 got, magic) 222 } 223 defer func() { 224 if x := recover(); x != nil { 225 debugpkg.PrintStack() 226 err = fmt.Errorf("internal error while decoding program: %v", x) 227 } 228 }() 229 230 offset := binary.LittleEndian.Uint32(data[4:8]) 231 d := decoder{ 232 p: data[8:offset], 233 s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist 234 } 235 236 if v := d.int(); v != Version { 237 return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) 238 } 239 240 filename := d.string() 241 d.filename = &filename 242 243 loads := d.bindings() 244 245 names := make([]string, d.int()) 246 for i := range names { 247 names[i] = d.string() 248 } 249 250 // constants 251 constants := make([]interface{}, d.int()) 252 for i := range constants { 253 var c interface{} 254 switch d.int() { 255 case 0: 256 c = d.string() 257 case 1: 258 c = Bytes(d.string()) 259 case 2: 260 c = d.int64() 261 case 3: 262 c = math.Float64frombits(d.uint64()) 263 case 4: 264 c, _ = new(big.Int).SetString(d.string(), 10) 265 } 266 constants[i] = c 267 } 268 269 globals := d.bindings() 270 toplevel := d.function() 271 funcs := make([]*Funcode, d.int()) 272 for i := range funcs { 273 funcs[i] = d.function() 274 } 275 recursion := d.int() != 0 276 277 prog := &Program{ 278 Loads: loads, 279 Names: names, 280 Constants: constants, 281 Globals: globals, 282 Functions: funcs, 283 Toplevel: toplevel, 284 Recursion: recursion, 285 } 286 toplevel.Prog = prog 287 for _, f := range funcs { 288 f.Prog = prog 289 } 290 291 if len(d.p)+len(d.s) > 0 { 292 return nil, fmt.Errorf("internal error: unconsumed data during decoding") 293 } 294 295 return prog, nil 296 } 297 298 type decoder struct { 299 p []byte // encoded program 300 s []byte // strings 301 filename *string // (indirect to avoid keeping decoder live) 302 } 303 304 func (d *decoder) int() int { 305 return int(d.int64()) 306 } 307 308 func (d *decoder) int64() int64 { 309 x, len := binary.Varint(d.p[:]) 310 d.p = d.p[len:] 311 return x 312 } 313 314 func (d *decoder) uint64() uint64 { 315 x, len := binary.Uvarint(d.p[:]) 316 d.p = d.p[len:] 317 return x 318 } 319 320 func (d *decoder) string() (s string) { 321 if slice := d.bytes(); len(slice) > 0 { 322 // Avoid a memory allocation for each string 323 // by unsafely aliasing slice. 324 type string struct { 325 data *byte 326 len int 327 } 328 ptr := (*string)(unsafe.Pointer(&s)) 329 ptr.data = &slice[0] 330 ptr.len = len(slice) 331 } 332 return s 333 } 334 335 func (d *decoder) bytes() []byte { 336 len := d.int() 337 r := d.s[:len:len] 338 d.s = d.s[len:] 339 return r 340 } 341 342 func (d *decoder) binding() Binding { 343 name := d.string() 344 line := int32(d.int()) 345 col := int32(d.int()) 346 return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} 347 } 348 349 func (d *decoder) bindings() []Binding { 350 bindings := make([]Binding, d.int()) 351 for i := range bindings { 352 bindings[i] = d.binding() 353 } 354 return bindings 355 } 356 357 func (d *decoder) ints() []int { 358 ints := make([]int, d.int()) 359 for i := range ints { 360 ints[i] = d.int() 361 } 362 return ints 363 } 364 365 func (d *decoder) bool() bool { return d.int() != 0 } 366 367 func (d *decoder) function() *Funcode { 368 id := d.binding() 369 doc := d.string() 370 code := d.bytes() 371 pclinetab := make([]uint16, d.int()) 372 for i := range pclinetab { 373 pclinetab[i] = uint16(d.int()) 374 } 375 locals := d.bindings() 376 cells := d.ints() 377 freevars := d.bindings() 378 maxStack := d.int() 379 numParams := d.int() 380 numKwonlyParams := d.int() 381 hasVarargs := d.int() != 0 382 hasKwargs := d.int() != 0 383 return &Funcode{ 384 // Prog is filled in later. 385 Pos: id.Pos, 386 Name: id.Name, 387 Doc: doc, 388 Code: code, 389 pclinetab: pclinetab, 390 Locals: locals, 391 Cells: cells, 392 Freevars: freevars, 393 MaxStack: maxStack, 394 NumParams: numParams, 395 NumKwonlyParams: numKwonlyParams, 396 HasVarargs: hasVarargs, 397 HasKwargs: hasKwargs, 398 } 399 }