github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/mat/io.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mat 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "io" 13 "math" 14 ) 15 16 // version is the current on-disk codec version. 17 const version uint32 = 0x1 18 19 // maxLen is the biggest slice/array len one can create on a 32/64b platform. 20 const maxLen = int64(int(^uint(0) >> 1)) 21 22 var ( 23 headerSize = binary.Size(storage{}) 24 sizeFloat64 = binary.Size(float64(0)) 25 26 errWrongType = errors.New("mat: wrong data type") 27 28 errTooBig = errors.New("mat: resulting data slice too big") 29 errTooSmall = errors.New("mat: input slice too small") 30 errBadBuffer = errors.New("mat: data buffer size mismatch") 31 errBadSize = errors.New("mat: invalid dimension") 32 ) 33 34 // Type encoding scheme: 35 // 36 // Type Form Packing Uplo Unit Rows Columns kU kL 37 // uint8 [GST] uint8 [BPF] uint8 [AUL] bool int64 int64 int64 int64 38 // General 'G' 'F' 'A' false r c 0 0 39 // Band 'G' 'B' 'A' false r c kU kL 40 // Symmetric 'S' 'F' ul false n n 0 0 41 // SymmetricBand 'S' 'B' ul false n n k k 42 // SymmetricPacked 'S' 'P' ul false n n 0 0 43 // Triangular 'T' 'F' ul Diag==Unit n n 0 0 44 // TriangularBand 'T' 'B' ul Diag==Unit n n k k 45 // TriangularPacked 'T' 'P' ul Diag==Unit n n 0 0 46 // 47 // G - general, S - symmetric, T - triangular 48 // F - full, B - band, P - packed 49 // A - all, U - upper, L - lower 50 51 // MarshalBinary encodes the receiver into a binary form and returns the result. 52 // 53 // Dense is little-endian encoded as follows: 54 // 0 - 3 Version = 1 (uint32) 55 // 4 'G' (byte) 56 // 5 'F' (byte) 57 // 6 'A' (byte) 58 // 7 0 (byte) 59 // 8 - 15 number of rows (int64) 60 // 16 - 23 number of columns (int64) 61 // 24 - 31 0 (int64) 62 // 32 - 39 0 (int64) 63 // 40 - .. matrix data elements (float64) 64 // [0,0] [0,1] ... [0,ncols-1] 65 // [1,0] [1,1] ... [1,ncols-1] 66 // ... 67 // [nrows-1,0] ... [nrows-1,ncols-1] 68 func (m Dense) MarshalBinary() ([]byte, error) { 69 bufLen := int64(headerSize) + int64(m.mat.Rows)*int64(m.mat.Cols)*int64(sizeFloat64) 70 if bufLen <= 0 { 71 // bufLen is too big and has wrapped around. 72 return nil, errTooBig 73 } 74 75 header := storage{ 76 Form: 'G', Packing: 'F', Uplo: 'A', 77 Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols), 78 Version: version, 79 } 80 buf := make([]byte, bufLen) 81 n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0])) 82 if err != nil { 83 return buf[:n], err 84 } 85 86 p := headerSize 87 r, c := m.Dims() 88 for i := 0; i < r; i++ { 89 for j := 0; j < c; j++ { 90 binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(m.at(i, j))) 91 p += sizeFloat64 92 } 93 } 94 95 return buf, nil 96 } 97 98 // MarshalBinaryTo encodes the receiver into a binary form and writes it into w. 99 // MarshalBinaryTo returns the number of bytes written into w and an error, if any. 100 // 101 // See MarshalBinary for the on-disk layout. 102 func (m Dense) MarshalBinaryTo(w io.Writer) (int, error) { 103 header := storage{ 104 Form: 'G', Packing: 'F', Uplo: 'A', 105 Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols), 106 Version: version, 107 } 108 n, err := header.marshalBinaryTo(w) 109 if err != nil { 110 return n, err 111 } 112 113 r, c := m.Dims() 114 var b [8]byte 115 for i := 0; i < r; i++ { 116 for j := 0; j < c; j++ { 117 binary.LittleEndian.PutUint64(b[:], math.Float64bits(m.at(i, j))) 118 nn, err := w.Write(b[:]) 119 n += nn 120 if err != nil { 121 return n, err 122 } 123 } 124 } 125 126 return n, nil 127 } 128 129 // UnmarshalBinary decodes the binary form into the receiver. 130 // It panics if the receiver is a non-empty Dense matrix. 131 // 132 // See MarshalBinary for the on-disk layout. 133 // 134 // Limited checks on the validity of the binary input are performed: 135 // - matrix.ErrShape is returned if the number of rows or columns is negative, 136 // - an error is returned if the resulting Dense matrix is too 137 // big for the current architecture (e.g. a 16GB matrix written by a 138 // 64b application and read back from a 32b application.) 139 // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so 140 // it should not be used on untrusted data. 141 func (m *Dense) UnmarshalBinary(data []byte) error { 142 if !m.IsEmpty() { 143 panic("mat: unmarshal into non-empty matrix") 144 } 145 146 if len(data) < headerSize { 147 return errTooSmall 148 } 149 150 var header storage 151 err := header.unmarshalBinary(data[:headerSize]) 152 if err != nil { 153 return err 154 } 155 rows := header.Rows 156 cols := header.Cols 157 header.Version = 0 158 header.Rows = 0 159 header.Cols = 0 160 if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { 161 return errWrongType 162 } 163 if rows < 0 || cols < 0 { 164 return errBadSize 165 } 166 size := rows * cols 167 if size == 0 { 168 return ErrZeroLength 169 } 170 if int(size) < 0 || size > maxLen { 171 return errTooBig 172 } 173 if len(data) != headerSize+int(rows*cols)*sizeFloat64 { 174 return errBadBuffer 175 } 176 177 p := headerSize 178 m.reuseAsNonZeroed(int(rows), int(cols)) 179 for i := range m.mat.Data { 180 m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64])) 181 p += sizeFloat64 182 } 183 184 return nil 185 } 186 187 // UnmarshalBinaryFrom decodes the binary form into the receiver and returns 188 // the number of bytes read and an error if any. 189 // It panics if the receiver is a non-empty Dense matrix. 190 // 191 // See MarshalBinary for the on-disk layout. 192 // 193 // Limited checks on the validity of the binary input are performed: 194 // - matrix.ErrShape is returned if the number of rows or columns is negative, 195 // - an error is returned if the resulting Dense matrix is too 196 // big for the current architecture (e.g. a 16GB matrix written by a 197 // 64b application and read back from a 32b application.) 198 // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so 199 // it should not be used on untrusted data. 200 func (m *Dense) UnmarshalBinaryFrom(r io.Reader) (int, error) { 201 if !m.IsEmpty() { 202 panic("mat: unmarshal into non-empty matrix") 203 } 204 205 var header storage 206 n, err := header.unmarshalBinaryFrom(r) 207 if err != nil { 208 return n, err 209 } 210 rows := header.Rows 211 cols := header.Cols 212 header.Version = 0 213 header.Rows = 0 214 header.Cols = 0 215 if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { 216 return n, errWrongType 217 } 218 if rows < 0 || cols < 0 { 219 return n, errBadSize 220 } 221 size := rows * cols 222 if size == 0 { 223 return n, ErrZeroLength 224 } 225 if int(size) < 0 || size > maxLen { 226 return n, errTooBig 227 } 228 229 m.reuseAsNonZeroed(int(rows), int(cols)) 230 var b [8]byte 231 for i := range m.mat.Data { 232 nn, err := readFull(r, b[:]) 233 n += nn 234 if err != nil { 235 if err == io.EOF { 236 return n, io.ErrUnexpectedEOF 237 } 238 return n, err 239 } 240 m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:])) 241 } 242 243 return n, nil 244 } 245 246 // MarshalBinary encodes the receiver into a binary form and returns the result. 247 // 248 // VecDense is little-endian encoded as follows: 249 // 250 // 0 - 3 Version = 1 (uint32) 251 // 4 'G' (byte) 252 // 5 'F' (byte) 253 // 6 'A' (byte) 254 // 7 0 (byte) 255 // 8 - 15 number of elements (int64) 256 // 16 - 23 1 (int64) 257 // 24 - 31 0 (int64) 258 // 32 - 39 0 (int64) 259 // 40 - .. vector's data elements (float64) 260 func (v VecDense) MarshalBinary() ([]byte, error) { 261 bufLen := int64(headerSize) + int64(v.mat.N)*int64(sizeFloat64) 262 if bufLen <= 0 { 263 // bufLen is too big and has wrapped around. 264 return nil, errTooBig 265 } 266 267 header := storage{ 268 Form: 'G', Packing: 'F', Uplo: 'A', 269 Rows: int64(v.mat.N), Cols: 1, 270 Version: version, 271 } 272 buf := make([]byte, bufLen) 273 n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0])) 274 if err != nil { 275 return buf[:n], err 276 } 277 278 p := headerSize 279 for i := 0; i < v.mat.N; i++ { 280 binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(v.at(i))) 281 p += sizeFloat64 282 } 283 284 return buf, nil 285 } 286 287 // MarshalBinaryTo encodes the receiver into a binary form, writes it to w and 288 // returns the number of bytes written and an error if any. 289 // 290 // See MarshalBainry for the on-disk format. 291 func (v VecDense) MarshalBinaryTo(w io.Writer) (int, error) { 292 header := storage{ 293 Form: 'G', Packing: 'F', Uplo: 'A', 294 Rows: int64(v.mat.N), Cols: 1, 295 Version: version, 296 } 297 n, err := header.marshalBinaryTo(w) 298 if err != nil { 299 return n, err 300 } 301 302 var buf [8]byte 303 for i := 0; i < v.mat.N; i++ { 304 binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v.at(i))) 305 nn, err := w.Write(buf[:]) 306 n += nn 307 if err != nil { 308 return n, err 309 } 310 } 311 312 return n, nil 313 } 314 315 // UnmarshalBinary decodes the binary form into the receiver. 316 // It panics if the receiver is a non-empty VecDense. 317 // 318 // See MarshalBinary for the on-disk layout. 319 // 320 // Limited checks on the validity of the binary input are performed: 321 // - matrix.ErrShape is returned if the number of rows is negative, 322 // - an error is returned if the resulting VecDense is too 323 // big for the current architecture (e.g. a 16GB vector written by a 324 // 64b application and read back from a 32b application.) 325 // UnmarshalBinary does not limit the size of the unmarshaled vector, and so 326 // it should not be used on untrusted data. 327 func (v *VecDense) UnmarshalBinary(data []byte) error { 328 if !v.IsEmpty() { 329 panic("mat: unmarshal into non-empty vector") 330 } 331 332 if len(data) < headerSize { 333 return errTooSmall 334 } 335 336 var header storage 337 err := header.unmarshalBinary(data[:headerSize]) 338 if err != nil { 339 return err 340 } 341 if header.Cols != 1 { 342 return ErrShape 343 } 344 n := header.Rows 345 header.Version = 0 346 header.Rows = 0 347 header.Cols = 0 348 if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { 349 return errWrongType 350 } 351 if n == 0 { 352 return ErrZeroLength 353 } 354 if n < 0 { 355 return errBadSize 356 } 357 if int64(maxLen) < n { 358 return errTooBig 359 } 360 if len(data) != headerSize+int(n)*sizeFloat64 { 361 return errBadBuffer 362 } 363 364 p := headerSize 365 v.reuseAsNonZeroed(int(n)) 366 for i := range v.mat.Data { 367 v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64])) 368 p += sizeFloat64 369 } 370 371 return nil 372 } 373 374 // UnmarshalBinaryFrom decodes the binary form into the receiver, from the 375 // io.Reader and returns the number of bytes read and an error if any. 376 // It panics if the receiver is a non-empty VecDense. 377 // 378 // See MarshalBinary for the on-disk layout. 379 // See UnmarshalBinary for the list of sanity checks performed on the input. 380 func (v *VecDense) UnmarshalBinaryFrom(r io.Reader) (int, error) { 381 if !v.IsEmpty() { 382 panic("mat: unmarshal into non-empty vector") 383 } 384 385 var header storage 386 n, err := header.unmarshalBinaryFrom(r) 387 if err != nil { 388 return n, err 389 } 390 if header.Cols != 1 { 391 return n, ErrShape 392 } 393 l := header.Rows 394 header.Version = 0 395 header.Rows = 0 396 header.Cols = 0 397 if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) { 398 return n, errWrongType 399 } 400 if l == 0 { 401 return n, ErrZeroLength 402 } 403 if l < 0 { 404 return n, errBadSize 405 } 406 if int64(maxLen) < l { 407 return n, errTooBig 408 } 409 410 v.reuseAsNonZeroed(int(l)) 411 var b [8]byte 412 for i := range v.mat.Data { 413 nn, err := readFull(r, b[:]) 414 n += nn 415 if err != nil { 416 if err == io.EOF { 417 return n, io.ErrUnexpectedEOF 418 } 419 return n, err 420 } 421 v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:])) 422 } 423 424 return n, nil 425 } 426 427 // storage is the internal representation of the storage format of a 428 // serialised matrix. 429 type storage struct { 430 Version uint32 // Keep this first. 431 Form byte // [GST] 432 Packing byte // [BPF] 433 Uplo byte // [AUL] 434 Unit bool 435 Rows int64 436 Cols int64 437 KU int64 438 KL int64 439 } 440 441 // TODO(kortschak): Consider replacing these with calls to direct 442 // encoding/decoding of fields rather than to binary.Write/binary.Read. 443 444 func (s storage) marshalBinaryTo(w io.Writer) (int, error) { 445 buf := bytes.NewBuffer(make([]byte, 0, headerSize)) 446 err := binary.Write(buf, binary.LittleEndian, s) 447 if err != nil { 448 return 0, err 449 } 450 return w.Write(buf.Bytes()) 451 } 452 453 func (s *storage) unmarshalBinary(buf []byte) error { 454 err := binary.Read(bytes.NewReader(buf), binary.LittleEndian, s) 455 if err != nil { 456 return err 457 } 458 if s.Version != version { 459 return fmt.Errorf("mat: incorrect version: %d", s.Version) 460 } 461 return nil 462 } 463 464 func (s *storage) unmarshalBinaryFrom(r io.Reader) (int, error) { 465 buf := make([]byte, headerSize) 466 n, err := readFull(r, buf) 467 if err != nil { 468 return n, err 469 } 470 return n, s.unmarshalBinary(buf[:n]) 471 } 472 473 // readFull reads from r into buf until it has read len(buf). 474 // It returns the number of bytes copied and an error if fewer bytes were read. 475 // If an EOF happens after reading fewer than len(buf) bytes, io.ErrUnexpectedEOF is returned. 476 func readFull(r io.Reader, buf []byte) (int, error) { 477 var n int 478 var err error 479 for n < len(buf) && err == nil { 480 var nn int 481 nn, err = r.Read(buf[n:]) 482 n += nn 483 } 484 if n == len(buf) { 485 return n, nil 486 } 487 if err == io.EOF { 488 return n, io.ErrUnexpectedEOF 489 } 490 return n, err 491 }