github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/mat/io.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mat
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  )
    15  
    16  // version is the current on-disk codec version.
    17  const version uint32 = 0x1
    18  
    19  // maxLen is the biggest slice/array len one can create on a 32/64b platform.
    20  const maxLen = int64(int(^uint(0) >> 1))
    21  
    22  var (
    23  	headerSize  = binary.Size(storage{})
    24  	sizeFloat64 = binary.Size(float64(0))
    25  
    26  	errWrongType = errors.New("mat: wrong data type")
    27  
    28  	errTooBig    = errors.New("mat: resulting data slice too big")
    29  	errTooSmall  = errors.New("mat: input slice too small")
    30  	errBadBuffer = errors.New("mat: data buffer size mismatch")
    31  	errBadSize   = errors.New("mat: invalid dimension")
    32  )
    33  
    34  // Type encoding scheme:
    35  //
    36  // Type 		Form 	Packing 	Uplo 		Unit 		Rows 	Columns kU 	kL
    37  // uint8 		[GST] 	uint8 [BPF] 	uint8 [AUL] 	bool 		int64 	int64 	int64 	int64
    38  // General 		'G' 	'F' 		'A' 		false 		r 	c 	0 	0
    39  // Band 		'G' 	'B' 		'A' 		false 		r 	c 	kU 	kL
    40  // Symmetric 		'S' 	'F' 		ul 		false 		n 	n 	0 	0
    41  // SymmetricBand 	'S' 	'B' 		ul 		false 		n 	n 	k 	k
    42  // SymmetricPacked 	'S' 	'P' 		ul 		false 		n 	n 	0 	0
    43  // Triangular 		'T' 	'F' 		ul 		Diag==Unit 	n 	n 	0 	0
    44  // TriangularBand 	'T' 	'B' 		ul 		Diag==Unit 	n 	n 	k 	k
    45  // TriangularPacked 	'T' 	'P' 		ul	 	Diag==Unit 	n 	n 	0 	0
    46  //
    47  // G - general, S - symmetric, T - triangular
    48  // F - full, B - band, P - packed
    49  // A - all, U - upper, L - lower
    50  
    51  // MarshalBinary encodes the receiver into a binary form and returns the result.
    52  //
    53  // Dense is little-endian encoded as follows:
    54  //   0 -  3  Version = 1          (uint32)
    55  //   4       'G'                  (byte)
    56  //   5       'F'                  (byte)
    57  //   6       'A'                  (byte)
    58  //   7       0                    (byte)
    59  //   8 - 15  number of rows       (int64)
    60  //  16 - 23  number of columns    (int64)
    61  //  24 - 31  0                    (int64)
    62  //  32 - 39  0                    (int64)
    63  //  40 - ..  matrix data elements (float64)
    64  //           [0,0] [0,1] ... [0,ncols-1]
    65  //           [1,0] [1,1] ... [1,ncols-1]
    66  //           ...
    67  //           [nrows-1,0] ... [nrows-1,ncols-1]
    68  func (m Dense) MarshalBinary() ([]byte, error) {
    69  	bufLen := int64(headerSize) + int64(m.mat.Rows)*int64(m.mat.Cols)*int64(sizeFloat64)
    70  	if bufLen <= 0 {
    71  		// bufLen is too big and has wrapped around.
    72  		return nil, errTooBig
    73  	}
    74  
    75  	header := storage{
    76  		Form: 'G', Packing: 'F', Uplo: 'A',
    77  		Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols),
    78  		Version: version,
    79  	}
    80  	buf := make([]byte, bufLen)
    81  	n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0]))
    82  	if err != nil {
    83  		return buf[:n], err
    84  	}
    85  
    86  	p := headerSize
    87  	r, c := m.Dims()
    88  	for i := 0; i < r; i++ {
    89  		for j := 0; j < c; j++ {
    90  			binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(m.at(i, j)))
    91  			p += sizeFloat64
    92  		}
    93  	}
    94  
    95  	return buf, nil
    96  }
    97  
    98  // MarshalBinaryTo encodes the receiver into a binary form and writes it into w.
    99  // MarshalBinaryTo returns the number of bytes written into w and an error, if any.
   100  //
   101  // See MarshalBinary for the on-disk layout.
   102  func (m Dense) MarshalBinaryTo(w io.Writer) (int, error) {
   103  	header := storage{
   104  		Form: 'G', Packing: 'F', Uplo: 'A',
   105  		Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols),
   106  		Version: version,
   107  	}
   108  	n, err := header.marshalBinaryTo(w)
   109  	if err != nil {
   110  		return n, err
   111  	}
   112  
   113  	r, c := m.Dims()
   114  	var b [8]byte
   115  	for i := 0; i < r; i++ {
   116  		for j := 0; j < c; j++ {
   117  			binary.LittleEndian.PutUint64(b[:], math.Float64bits(m.at(i, j)))
   118  			nn, err := w.Write(b[:])
   119  			n += nn
   120  			if err != nil {
   121  				return n, err
   122  			}
   123  		}
   124  	}
   125  
   126  	return n, nil
   127  }
   128  
   129  // UnmarshalBinary decodes the binary form into the receiver.
   130  // It panics if the receiver is a non-empty Dense matrix.
   131  //
   132  // See MarshalBinary for the on-disk layout.
   133  //
   134  // Limited checks on the validity of the binary input are performed:
   135  //  - matrix.ErrShape is returned if the number of rows or columns is negative,
   136  //  - an error is returned if the resulting Dense matrix is too
   137  //  big for the current architecture (e.g. a 16GB matrix written by a
   138  //  64b application and read back from a 32b application.)
   139  // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so
   140  // it should not be used on untrusted data.
   141  func (m *Dense) UnmarshalBinary(data []byte) error {
   142  	if !m.IsEmpty() {
   143  		panic("mat: unmarshal into non-empty matrix")
   144  	}
   145  
   146  	if len(data) < headerSize {
   147  		return errTooSmall
   148  	}
   149  
   150  	var header storage
   151  	err := header.unmarshalBinary(data[:headerSize])
   152  	if err != nil {
   153  		return err
   154  	}
   155  	rows := header.Rows
   156  	cols := header.Cols
   157  	header.Version = 0
   158  	header.Rows = 0
   159  	header.Cols = 0
   160  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   161  		return errWrongType
   162  	}
   163  	if rows < 0 || cols < 0 {
   164  		return errBadSize
   165  	}
   166  	size := rows * cols
   167  	if size == 0 {
   168  		return ErrZeroLength
   169  	}
   170  	if int(size) < 0 || size > maxLen {
   171  		return errTooBig
   172  	}
   173  	if len(data) != headerSize+int(rows*cols)*sizeFloat64 {
   174  		return errBadBuffer
   175  	}
   176  
   177  	p := headerSize
   178  	m.reuseAsNonZeroed(int(rows), int(cols))
   179  	for i := range m.mat.Data {
   180  		m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64]))
   181  		p += sizeFloat64
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  // UnmarshalBinaryFrom decodes the binary form into the receiver and returns
   188  // the number of bytes read and an error if any.
   189  // It panics if the receiver is a non-empty Dense matrix.
   190  //
   191  // See MarshalBinary for the on-disk layout.
   192  //
   193  // Limited checks on the validity of the binary input are performed:
   194  //  - matrix.ErrShape is returned if the number of rows or columns is negative,
   195  //  - an error is returned if the resulting Dense matrix is too
   196  //  big for the current architecture (e.g. a 16GB matrix written by a
   197  //  64b application and read back from a 32b application.)
   198  // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so
   199  // it should not be used on untrusted data.
   200  func (m *Dense) UnmarshalBinaryFrom(r io.Reader) (int, error) {
   201  	if !m.IsEmpty() {
   202  		panic("mat: unmarshal into non-empty matrix")
   203  	}
   204  
   205  	var header storage
   206  	n, err := header.unmarshalBinaryFrom(r)
   207  	if err != nil {
   208  		return n, err
   209  	}
   210  	rows := header.Rows
   211  	cols := header.Cols
   212  	header.Version = 0
   213  	header.Rows = 0
   214  	header.Cols = 0
   215  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   216  		return n, errWrongType
   217  	}
   218  	if rows < 0 || cols < 0 {
   219  		return n, errBadSize
   220  	}
   221  	size := rows * cols
   222  	if size == 0 {
   223  		return n, ErrZeroLength
   224  	}
   225  	if int(size) < 0 || size > maxLen {
   226  		return n, errTooBig
   227  	}
   228  
   229  	m.reuseAsNonZeroed(int(rows), int(cols))
   230  	var b [8]byte
   231  	for i := range m.mat.Data {
   232  		nn, err := readFull(r, b[:])
   233  		n += nn
   234  		if err != nil {
   235  			if err == io.EOF {
   236  				return n, io.ErrUnexpectedEOF
   237  			}
   238  			return n, err
   239  		}
   240  		m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:]))
   241  	}
   242  
   243  	return n, nil
   244  }
   245  
   246  // MarshalBinary encodes the receiver into a binary form and returns the result.
   247  //
   248  // VecDense is little-endian encoded as follows:
   249  //
   250  //   0 -  3  Version = 1            (uint32)
   251  //   4       'G'                    (byte)
   252  //   5       'F'                    (byte)
   253  //   6       'A'                    (byte)
   254  //   7       0                      (byte)
   255  //   8 - 15  number of elements     (int64)
   256  //  16 - 23  1                      (int64)
   257  //  24 - 31  0                      (int64)
   258  //  32 - 39  0                      (int64)
   259  //  40 - ..  vector's data elements (float64)
   260  func (v VecDense) MarshalBinary() ([]byte, error) {
   261  	bufLen := int64(headerSize) + int64(v.mat.N)*int64(sizeFloat64)
   262  	if bufLen <= 0 {
   263  		// bufLen is too big and has wrapped around.
   264  		return nil, errTooBig
   265  	}
   266  
   267  	header := storage{
   268  		Form: 'G', Packing: 'F', Uplo: 'A',
   269  		Rows: int64(v.mat.N), Cols: 1,
   270  		Version: version,
   271  	}
   272  	buf := make([]byte, bufLen)
   273  	n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0]))
   274  	if err != nil {
   275  		return buf[:n], err
   276  	}
   277  
   278  	p := headerSize
   279  	for i := 0; i < v.mat.N; i++ {
   280  		binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(v.at(i)))
   281  		p += sizeFloat64
   282  	}
   283  
   284  	return buf, nil
   285  }
   286  
   287  // MarshalBinaryTo encodes the receiver into a binary form, writes it to w and
   288  // returns the number of bytes written and an error if any.
   289  //
   290  // See MarshalBainry for the on-disk format.
   291  func (v VecDense) MarshalBinaryTo(w io.Writer) (int, error) {
   292  	header := storage{
   293  		Form: 'G', Packing: 'F', Uplo: 'A',
   294  		Rows: int64(v.mat.N), Cols: 1,
   295  		Version: version,
   296  	}
   297  	n, err := header.marshalBinaryTo(w)
   298  	if err != nil {
   299  		return n, err
   300  	}
   301  
   302  	var buf [8]byte
   303  	for i := 0; i < v.mat.N; i++ {
   304  		binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v.at(i)))
   305  		nn, err := w.Write(buf[:])
   306  		n += nn
   307  		if err != nil {
   308  			return n, err
   309  		}
   310  	}
   311  
   312  	return n, nil
   313  }
   314  
   315  // UnmarshalBinary decodes the binary form into the receiver.
   316  // It panics if the receiver is a non-empty VecDense.
   317  //
   318  // See MarshalBinary for the on-disk layout.
   319  //
   320  // Limited checks on the validity of the binary input are performed:
   321  //  - matrix.ErrShape is returned if the number of rows is negative,
   322  //  - an error is returned if the resulting VecDense is too
   323  //  big for the current architecture (e.g. a 16GB vector written by a
   324  //  64b application and read back from a 32b application.)
   325  // UnmarshalBinary does not limit the size of the unmarshaled vector, and so
   326  // it should not be used on untrusted data.
   327  func (v *VecDense) UnmarshalBinary(data []byte) error {
   328  	if !v.IsEmpty() {
   329  		panic("mat: unmarshal into non-empty vector")
   330  	}
   331  
   332  	if len(data) < headerSize {
   333  		return errTooSmall
   334  	}
   335  
   336  	var header storage
   337  	err := header.unmarshalBinary(data[:headerSize])
   338  	if err != nil {
   339  		return err
   340  	}
   341  	if header.Cols != 1 {
   342  		return ErrShape
   343  	}
   344  	n := header.Rows
   345  	header.Version = 0
   346  	header.Rows = 0
   347  	header.Cols = 0
   348  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   349  		return errWrongType
   350  	}
   351  	if n == 0 {
   352  		return ErrZeroLength
   353  	}
   354  	if n < 0 {
   355  		return errBadSize
   356  	}
   357  	if int64(maxLen) < n {
   358  		return errTooBig
   359  	}
   360  	if len(data) != headerSize+int(n)*sizeFloat64 {
   361  		return errBadBuffer
   362  	}
   363  
   364  	p := headerSize
   365  	v.reuseAsNonZeroed(int(n))
   366  	for i := range v.mat.Data {
   367  		v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64]))
   368  		p += sizeFloat64
   369  	}
   370  
   371  	return nil
   372  }
   373  
   374  // UnmarshalBinaryFrom decodes the binary form into the receiver, from the
   375  // io.Reader and returns the number of bytes read and an error if any.
   376  // It panics if the receiver is a non-empty VecDense.
   377  //
   378  // See MarshalBinary for the on-disk layout.
   379  // See UnmarshalBinary for the list of sanity checks performed on the input.
   380  func (v *VecDense) UnmarshalBinaryFrom(r io.Reader) (int, error) {
   381  	if !v.IsEmpty() {
   382  		panic("mat: unmarshal into non-empty vector")
   383  	}
   384  
   385  	var header storage
   386  	n, err := header.unmarshalBinaryFrom(r)
   387  	if err != nil {
   388  		return n, err
   389  	}
   390  	if header.Cols != 1 {
   391  		return n, ErrShape
   392  	}
   393  	l := header.Rows
   394  	header.Version = 0
   395  	header.Rows = 0
   396  	header.Cols = 0
   397  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   398  		return n, errWrongType
   399  	}
   400  	if l == 0 {
   401  		return n, ErrZeroLength
   402  	}
   403  	if l < 0 {
   404  		return n, errBadSize
   405  	}
   406  	if int64(maxLen) < l {
   407  		return n, errTooBig
   408  	}
   409  
   410  	v.reuseAsNonZeroed(int(l))
   411  	var b [8]byte
   412  	for i := range v.mat.Data {
   413  		nn, err := readFull(r, b[:])
   414  		n += nn
   415  		if err != nil {
   416  			if err == io.EOF {
   417  				return n, io.ErrUnexpectedEOF
   418  			}
   419  			return n, err
   420  		}
   421  		v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:]))
   422  	}
   423  
   424  	return n, nil
   425  }
   426  
   427  // storage is the internal representation of the storage format of a
   428  // serialised matrix.
   429  type storage struct {
   430  	Version uint32 // Keep this first.
   431  	Form    byte   // [GST]
   432  	Packing byte   // [BPF]
   433  	Uplo    byte   // [AUL]
   434  	Unit    bool
   435  	Rows    int64
   436  	Cols    int64
   437  	KU      int64
   438  	KL      int64
   439  }
   440  
   441  // TODO(kortschak): Consider replacing these with calls to direct
   442  // encoding/decoding of fields rather than to binary.Write/binary.Read.
   443  
   444  func (s storage) marshalBinaryTo(w io.Writer) (int, error) {
   445  	buf := bytes.NewBuffer(make([]byte, 0, headerSize))
   446  	err := binary.Write(buf, binary.LittleEndian, s)
   447  	if err != nil {
   448  		return 0, err
   449  	}
   450  	return w.Write(buf.Bytes())
   451  }
   452  
   453  func (s *storage) unmarshalBinary(buf []byte) error {
   454  	err := binary.Read(bytes.NewReader(buf), binary.LittleEndian, s)
   455  	if err != nil {
   456  		return err
   457  	}
   458  	if s.Version != version {
   459  		return fmt.Errorf("mat: incorrect version: %d", s.Version)
   460  	}
   461  	return nil
   462  }
   463  
   464  func (s *storage) unmarshalBinaryFrom(r io.Reader) (int, error) {
   465  	buf := make([]byte, headerSize)
   466  	n, err := readFull(r, buf)
   467  	if err != nil {
   468  		return n, err
   469  	}
   470  	return n, s.unmarshalBinary(buf[:n])
   471  }
   472  
   473  // readFull reads from r into buf until it has read len(buf).
   474  // It returns the number of bytes copied and an error if fewer bytes were read.
   475  // If an EOF happens after reading fewer than len(buf) bytes, io.ErrUnexpectedEOF is returned.
   476  func readFull(r io.Reader, buf []byte) (int, error) {
   477  	var n int
   478  	var err error
   479  	for n < len(buf) && err == nil {
   480  		var nn int
   481  		nn, err = r.Read(buf[n:])
   482  		n += nn
   483  	}
   484  	if n == len(buf) {
   485  		return n, nil
   486  	}
   487  	if err == io.EOF {
   488  		return n, io.ErrUnexpectedEOF
   489  	}
   490  	return n, err
   491  }