gonum.org/v1/gonum@v0.14.0/mat/io.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mat
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  )
    15  
    16  // version is the current on-disk codec version.
    17  const version uint32 = 0x1
    18  
    19  // maxLen is the biggest slice/array len one can create on a 32/64b platform.
    20  const maxLen = int64(int(^uint(0) >> 1))
    21  
    22  var (
    23  	headerSize  = binary.Size(storage{})
    24  	sizeFloat64 = binary.Size(float64(0))
    25  
    26  	errWrongType = errors.New("mat: wrong data type")
    27  
    28  	errTooBig    = errors.New("mat: resulting data slice too big")
    29  	errTooSmall  = errors.New("mat: input slice too small")
    30  	errBadBuffer = errors.New("mat: data buffer size mismatch")
    31  	errBadSize   = errors.New("mat: invalid dimension")
    32  )
    33  
    34  // Type encoding scheme:
    35  //
    36  // Type 		Form 	Packing 	Uplo 		Unit 		Rows 	Columns kU 	kL
    37  // uint8 		[GST] 	uint8 [BPF] 	uint8 [AUL] 	bool 		int64 	int64 	int64 	int64
    38  // General 		'G' 	'F' 		'A' 		false 		r 	c 	0 	0
    39  // Band 		'G' 	'B' 		'A' 		false 		r 	c 	kU 	kL
    40  // Symmetric 		'S' 	'F' 		ul 		false 		n 	n 	0 	0
    41  // SymmetricBand 	'S' 	'B' 		ul 		false 		n 	n 	k 	k
    42  // SymmetricPacked 	'S' 	'P' 		ul 		false 		n 	n 	0 	0
    43  // Triangular 		'T' 	'F' 		ul 		Diag==Unit 	n 	n 	0 	0
    44  // TriangularBand 	'T' 	'B' 		ul 		Diag==Unit 	n 	n 	k 	k
    45  // TriangularPacked 	'T' 	'P' 		ul	 	Diag==Unit 	n 	n 	0 	0
    46  //
    47  // G - general, S - symmetric, T - triangular
    48  // F - full, B - band, P - packed
    49  // A - all, U - upper, L - lower
    50  
    51  // MarshalBinary encodes the receiver into a binary form and returns the result.
    52  //
    53  // Dense is little-endian encoded as follows:
    54  //
    55  //	 0 -  3  Version = 1          (uint32)
    56  //	 4       'G'                  (byte)
    57  //	 5       'F'                  (byte)
    58  //	 6       'A'                  (byte)
    59  //	 7       0                    (byte)
    60  //	 8 - 15  number of rows       (int64)
    61  //	16 - 23  number of columns    (int64)
    62  //	24 - 31  0                    (int64)
    63  //	32 - 39  0                    (int64)
    64  //	40 - ..  matrix data elements (float64)
    65  //	         [0,0] [0,1] ... [0,ncols-1]
    66  //	         [1,0] [1,1] ... [1,ncols-1]
    67  //	         ...
    68  //	         [nrows-1,0] ... [nrows-1,ncols-1]
    69  func (m Dense) MarshalBinary() ([]byte, error) {
    70  	bufLen := int64(headerSize) + int64(m.mat.Rows)*int64(m.mat.Cols)*int64(sizeFloat64)
    71  	if bufLen <= 0 {
    72  		// bufLen is too big and has wrapped around.
    73  		return nil, errTooBig
    74  	}
    75  
    76  	header := storage{
    77  		Form: 'G', Packing: 'F', Uplo: 'A',
    78  		Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols),
    79  		Version: version,
    80  	}
    81  	buf := make([]byte, bufLen)
    82  	n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0]))
    83  	if err != nil {
    84  		return buf[:n], err
    85  	}
    86  
    87  	p := headerSize
    88  	r, c := m.Dims()
    89  	for i := 0; i < r; i++ {
    90  		for j := 0; j < c; j++ {
    91  			binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(m.at(i, j)))
    92  			p += sizeFloat64
    93  		}
    94  	}
    95  
    96  	return buf, nil
    97  }
    98  
    99  // MarshalBinaryTo encodes the receiver into a binary form and writes it into w.
   100  // MarshalBinaryTo returns the number of bytes written into w and an error, if any.
   101  //
   102  // See MarshalBinary for the on-disk layout.
   103  func (m Dense) MarshalBinaryTo(w io.Writer) (int, error) {
   104  	header := storage{
   105  		Form: 'G', Packing: 'F', Uplo: 'A',
   106  		Rows: int64(m.mat.Rows), Cols: int64(m.mat.Cols),
   107  		Version: version,
   108  	}
   109  	n, err := header.marshalBinaryTo(w)
   110  	if err != nil {
   111  		return n, err
   112  	}
   113  
   114  	r, c := m.Dims()
   115  	var b [8]byte
   116  	for i := 0; i < r; i++ {
   117  		for j := 0; j < c; j++ {
   118  			binary.LittleEndian.PutUint64(b[:], math.Float64bits(m.at(i, j)))
   119  			nn, err := w.Write(b[:])
   120  			n += nn
   121  			if err != nil {
   122  				return n, err
   123  			}
   124  		}
   125  	}
   126  
   127  	return n, nil
   128  }
   129  
   130  // UnmarshalBinary decodes the binary form into the receiver.
   131  // It panics if the receiver is a non-empty Dense matrix.
   132  //
   133  // See MarshalBinary for the on-disk layout.
   134  //
   135  // Limited checks on the validity of the binary input are performed:
   136  //   - ErrShape is returned if the number of rows or columns is negative,
   137  //   - an error is returned if the resulting Dense matrix is too
   138  //     big for the current architecture (e.g. a 16GB matrix written by a
   139  //     64b application and read back from a 32b application.)
   140  //
   141  // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so
   142  // it should not be used on untrusted data.
   143  func (m *Dense) UnmarshalBinary(data []byte) error {
   144  	if !m.IsEmpty() {
   145  		panic("mat: unmarshal into non-empty matrix")
   146  	}
   147  
   148  	if len(data) < headerSize {
   149  		return errTooSmall
   150  	}
   151  
   152  	var header storage
   153  	err := header.unmarshalBinary(data[:headerSize])
   154  	if err != nil {
   155  		return err
   156  	}
   157  	rows := header.Rows
   158  	cols := header.Cols
   159  	header.Version = 0
   160  	header.Rows = 0
   161  	header.Cols = 0
   162  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   163  		return errWrongType
   164  	}
   165  	if rows < 0 || cols < 0 {
   166  		return errBadSize
   167  	}
   168  	size := rows * cols
   169  	if size == 0 {
   170  		return ErrZeroLength
   171  	}
   172  	if int(size) < 0 || size > maxLen {
   173  		return errTooBig
   174  	}
   175  	if len(data) != headerSize+int(rows*cols)*sizeFloat64 {
   176  		return errBadBuffer
   177  	}
   178  
   179  	p := headerSize
   180  	m.reuseAsNonZeroed(int(rows), int(cols))
   181  	for i := range m.mat.Data {
   182  		m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64]))
   183  		p += sizeFloat64
   184  	}
   185  
   186  	return nil
   187  }
   188  
   189  // UnmarshalBinaryFrom decodes the binary form into the receiver and returns
   190  // the number of bytes read and an error if any.
   191  // It panics if the receiver is a non-empty Dense matrix.
   192  //
   193  // See MarshalBinary for the on-disk layout.
   194  //
   195  // Limited checks on the validity of the binary input are performed:
   196  //   - ErrShape is returned if the number of rows or columns is negative,
   197  //   - an error is returned if the resulting Dense matrix is too
   198  //     big for the current architecture (e.g. a 16GB matrix written by a
   199  //     64b application and read back from a 32b application.)
   200  //
   201  // UnmarshalBinary does not limit the size of the unmarshaled matrix, and so
   202  // it should not be used on untrusted data.
   203  func (m *Dense) UnmarshalBinaryFrom(r io.Reader) (int, error) {
   204  	if !m.IsEmpty() {
   205  		panic("mat: unmarshal into non-empty matrix")
   206  	}
   207  
   208  	var header storage
   209  	n, err := header.unmarshalBinaryFrom(r)
   210  	if err != nil {
   211  		return n, err
   212  	}
   213  	rows := header.Rows
   214  	cols := header.Cols
   215  	header.Version = 0
   216  	header.Rows = 0
   217  	header.Cols = 0
   218  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   219  		return n, errWrongType
   220  	}
   221  	if rows < 0 || cols < 0 {
   222  		return n, errBadSize
   223  	}
   224  	size := rows * cols
   225  	if size == 0 {
   226  		return n, ErrZeroLength
   227  	}
   228  	if int(size) < 0 || size > maxLen {
   229  		return n, errTooBig
   230  	}
   231  
   232  	m.reuseAsNonZeroed(int(rows), int(cols))
   233  	var b [8]byte
   234  	for i := range m.mat.Data {
   235  		nn, err := readFull(r, b[:])
   236  		n += nn
   237  		if err != nil {
   238  			if err == io.EOF {
   239  				return n, io.ErrUnexpectedEOF
   240  			}
   241  			return n, err
   242  		}
   243  		m.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:]))
   244  	}
   245  
   246  	return n, nil
   247  }
   248  
   249  // MarshalBinary encodes the receiver into a binary form and returns the result.
   250  //
   251  // VecDense is little-endian encoded as follows:
   252  //
   253  //	 0 -  3  Version = 1            (uint32)
   254  //	 4       'G'                    (byte)
   255  //	 5       'F'                    (byte)
   256  //	 6       'A'                    (byte)
   257  //	 7       0                      (byte)
   258  //	 8 - 15  number of elements     (int64)
   259  //	16 - 23  1                      (int64)
   260  //	24 - 31  0                      (int64)
   261  //	32 - 39  0                      (int64)
   262  //	40 - ..  vector's data elements (float64)
   263  func (v VecDense) MarshalBinary() ([]byte, error) {
   264  	bufLen := int64(headerSize) + int64(v.mat.N)*int64(sizeFloat64)
   265  	if bufLen <= 0 {
   266  		// bufLen is too big and has wrapped around.
   267  		return nil, errTooBig
   268  	}
   269  
   270  	header := storage{
   271  		Form: 'G', Packing: 'F', Uplo: 'A',
   272  		Rows: int64(v.mat.N), Cols: 1,
   273  		Version: version,
   274  	}
   275  	buf := make([]byte, bufLen)
   276  	n, err := header.marshalBinaryTo(bytes.NewBuffer(buf[:0]))
   277  	if err != nil {
   278  		return buf[:n], err
   279  	}
   280  
   281  	p := headerSize
   282  	for i := 0; i < v.mat.N; i++ {
   283  		binary.LittleEndian.PutUint64(buf[p:p+sizeFloat64], math.Float64bits(v.at(i)))
   284  		p += sizeFloat64
   285  	}
   286  
   287  	return buf, nil
   288  }
   289  
   290  // MarshalBinaryTo encodes the receiver into a binary form, writes it to w and
   291  // returns the number of bytes written and an error if any.
   292  //
   293  // See MarshalBinary for the on-disk format.
   294  func (v VecDense) MarshalBinaryTo(w io.Writer) (int, error) {
   295  	header := storage{
   296  		Form: 'G', Packing: 'F', Uplo: 'A',
   297  		Rows: int64(v.mat.N), Cols: 1,
   298  		Version: version,
   299  	}
   300  	n, err := header.marshalBinaryTo(w)
   301  	if err != nil {
   302  		return n, err
   303  	}
   304  
   305  	var buf [8]byte
   306  	for i := 0; i < v.mat.N; i++ {
   307  		binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v.at(i)))
   308  		nn, err := w.Write(buf[:])
   309  		n += nn
   310  		if err != nil {
   311  			return n, err
   312  		}
   313  	}
   314  
   315  	return n, nil
   316  }
   317  
   318  // UnmarshalBinary decodes the binary form into the receiver.
   319  // It panics if the receiver is a non-empty VecDense.
   320  //
   321  // See MarshalBinary for the on-disk layout.
   322  //
   323  // Limited checks on the validity of the binary input are performed:
   324  //   - ErrShape is returned if the number of rows is negative,
   325  //   - an error is returned if the resulting VecDense is too
   326  //     big for the current architecture (e.g. a 16GB vector written by a
   327  //     64b application and read back from a 32b application.)
   328  //
   329  // UnmarshalBinary does not limit the size of the unmarshaled vector, and so
   330  // it should not be used on untrusted data.
   331  func (v *VecDense) UnmarshalBinary(data []byte) error {
   332  	if !v.IsEmpty() {
   333  		panic("mat: unmarshal into non-empty vector")
   334  	}
   335  
   336  	if len(data) < headerSize {
   337  		return errTooSmall
   338  	}
   339  
   340  	var header storage
   341  	err := header.unmarshalBinary(data[:headerSize])
   342  	if err != nil {
   343  		return err
   344  	}
   345  	if header.Cols != 1 {
   346  		return ErrShape
   347  	}
   348  	n := header.Rows
   349  	header.Version = 0
   350  	header.Rows = 0
   351  	header.Cols = 0
   352  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   353  		return errWrongType
   354  	}
   355  	if n == 0 {
   356  		return ErrZeroLength
   357  	}
   358  	if n < 0 {
   359  		return errBadSize
   360  	}
   361  	if int64(maxLen) < n {
   362  		return errTooBig
   363  	}
   364  	if len(data) != headerSize+int(n)*sizeFloat64 {
   365  		return errBadBuffer
   366  	}
   367  
   368  	p := headerSize
   369  	v.reuseAsNonZeroed(int(n))
   370  	for i := range v.mat.Data {
   371  		v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(data[p : p+sizeFloat64]))
   372  		p += sizeFloat64
   373  	}
   374  
   375  	return nil
   376  }
   377  
   378  // UnmarshalBinaryFrom decodes the binary form into the receiver, from the
   379  // io.Reader and returns the number of bytes read and an error if any.
   380  // It panics if the receiver is a non-empty VecDense.
   381  //
   382  // See MarshalBinary for the on-disk layout.
   383  // See UnmarshalBinary for the list of sanity checks performed on the input.
   384  func (v *VecDense) UnmarshalBinaryFrom(r io.Reader) (int, error) {
   385  	if !v.IsEmpty() {
   386  		panic("mat: unmarshal into non-empty vector")
   387  	}
   388  
   389  	var header storage
   390  	n, err := header.unmarshalBinaryFrom(r)
   391  	if err != nil {
   392  		return n, err
   393  	}
   394  	if header.Cols != 1 {
   395  		return n, ErrShape
   396  	}
   397  	l := header.Rows
   398  	header.Version = 0
   399  	header.Rows = 0
   400  	header.Cols = 0
   401  	if (header != storage{Form: 'G', Packing: 'F', Uplo: 'A'}) {
   402  		return n, errWrongType
   403  	}
   404  	if l == 0 {
   405  		return n, ErrZeroLength
   406  	}
   407  	if l < 0 {
   408  		return n, errBadSize
   409  	}
   410  	if int64(maxLen) < l {
   411  		return n, errTooBig
   412  	}
   413  
   414  	v.reuseAsNonZeroed(int(l))
   415  	var b [8]byte
   416  	for i := range v.mat.Data {
   417  		nn, err := readFull(r, b[:])
   418  		n += nn
   419  		if err != nil {
   420  			if err == io.EOF {
   421  				return n, io.ErrUnexpectedEOF
   422  			}
   423  			return n, err
   424  		}
   425  		v.mat.Data[i] = math.Float64frombits(binary.LittleEndian.Uint64(b[:]))
   426  	}
   427  
   428  	return n, nil
   429  }
   430  
   431  // storage is the internal representation of the storage format of a
   432  // serialised matrix.
   433  type storage struct {
   434  	Version uint32 // Keep this first.
   435  	Form    byte   // [GST]
   436  	Packing byte   // [BPF]
   437  	Uplo    byte   // [AUL]
   438  	Unit    bool
   439  	Rows    int64
   440  	Cols    int64
   441  	KU      int64
   442  	KL      int64
   443  }
   444  
   445  // TODO(kortschak): Consider replacing these with calls to direct
   446  // encoding/decoding of fields rather than to binary.Write/binary.Read.
   447  
   448  func (s storage) marshalBinaryTo(w io.Writer) (int, error) {
   449  	buf := bytes.NewBuffer(make([]byte, 0, headerSize))
   450  	err := binary.Write(buf, binary.LittleEndian, s)
   451  	if err != nil {
   452  		return 0, err
   453  	}
   454  	return w.Write(buf.Bytes())
   455  }
   456  
   457  func (s *storage) unmarshalBinary(buf []byte) error {
   458  	err := binary.Read(bytes.NewReader(buf), binary.LittleEndian, s)
   459  	if err != nil {
   460  		return err
   461  	}
   462  	if s.Version != version {
   463  		return fmt.Errorf("mat: incorrect version: %d", s.Version)
   464  	}
   465  	return nil
   466  }
   467  
   468  func (s *storage) unmarshalBinaryFrom(r io.Reader) (int, error) {
   469  	buf := make([]byte, headerSize)
   470  	n, err := readFull(r, buf)
   471  	if err != nil {
   472  		return n, err
   473  	}
   474  	return n, s.unmarshalBinary(buf[:n])
   475  }
   476  
   477  // readFull reads from r into buf until it has read len(buf).
   478  // It returns the number of bytes copied and an error if fewer bytes were read.
   479  // If an EOF happens after reading fewer than len(buf) bytes, io.ErrUnexpectedEOF is returned.
   480  func readFull(r io.Reader, buf []byte) (int, error) {
   481  	var n int
   482  	var err error
   483  	for n < len(buf) && err == nil {
   484  		var nn int
   485  		nn, err = r.Read(buf[n:])
   486  		n += nn
   487  	}
   488  	if n == len(buf) {
   489  		return n, nil
   490  	}
   491  	if err == io.EOF {
   492  		return n, io.ErrUnexpectedEOF
   493  	}
   494  	return n, err
   495  }