go-hep.org/x/hep@v0.38.1/rio/rio.go (about)

     1  // Copyright ©2015 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rio // import "go-hep.org/x/hep/rio"
     6  
     7  import (
     8  	"bytes"
     9  	"compress/flate"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"io"
    13  	"reflect"
    14  )
    15  
    16  const (
    17  	gAlign        = 0x00000003
    18  	rioHdrVersion = Version(0)
    19  
    20  	gMaskCodec = Options(0x00000fff)
    21  	gMaskLevel = Options(0x0000f000)
    22  	gMaskCompr = Options(0xffff0000)
    23  
    24  	// Name of the metadata record holding Metadata informations about the rio stream
    25  	MetaRecord = ".rio.meta"
    26  )
    27  
    28  // Version describes a rio on-disk version of a serialized block.
    29  type Version uint32
    30  
    31  // frameType frames blocks, records and footers
    32  type frameType [4]byte
    33  
    34  var (
    35  	rioMagic = [4]byte{'r', 'i', 'o', '\x00'}
    36  
    37  	recFrame = frameType{0xab, 0xad, 0xca, 0xfe} // 0xabadcafe
    38  	blkFrame = frameType{0xde, 0xad, 0xbe, 0xef} // 0xdeadbeef
    39  	ftrFrame = frameType{0xca, 0xfe, 0xba, 0xbe} // 0xcafebabe
    40  
    41  	// Endian exposes the endianness of rio streams
    42  	Endian = binary.LittleEndian
    43  
    44  	// hdrSize = int(reflect.TypeOf(rioHeader{}).Size())
    45  	// blkSize = int(reflect.TypeOf(rioBlock{}).Size())
    46  
    47  	recSize = int(reflect.TypeOf(rioRecord{}).Size())
    48  	ftrSize = int(reflect.TypeOf(rioFooter{}).Size())
    49  )
    50  
    51  // Marshaler is the interface implemented by an object that can
    52  // marshal itself into a rio-binary form.
    53  //
    54  // RioMarshal marshals the receiver into a rio-binary form, writes that
    55  // binary form to the io.Writer and returns an error if any.
    56  type Marshaler interface {
    57  	RioMarshal(w io.Writer) error
    58  }
    59  
    60  // Unmarshalr is the interface implemented by an object that can
    61  // unmarshal a rio-binary representation of itself.
    62  //
    63  // RioUnmarshal must be able to unmarshal the form generated by RioMarshal.
    64  type Unmarshaler interface {
    65  	RioUnmarshal(r io.Reader) error
    66  }
    67  
    68  // Streamer is the interface implemented by an object that can
    69  // marshal/unmarshal a rio-binary representation of itself
    70  // to/from an io.Writer/io.Reader.
    71  type Streamer interface {
    72  	Marshaler
    73  	Unmarshaler
    74  	RioVersion() Version
    75  }
    76  
    77  // rioHeader
    78  type rioHeader struct {
    79  	// Length of payload in bytes (not counting Len nor Frame).
    80  	// Always a multiple of four.
    81  	Len uint32
    82  
    83  	// Framing used to try identifying what kind of payload follows
    84  	// (record or block)
    85  	Frame frameType
    86  }
    87  
    88  func (hdr *rioHeader) RioMarshal(w io.Writer) error {
    89  	var err error
    90  
    91  	err = binary.Write(w, Endian, hdr.Len)
    92  	if err != nil {
    93  		return fmt.Errorf("rio: write header length failed: %w", err)
    94  	}
    95  
    96  	err = binary.Write(w, Endian, hdr.Frame)
    97  	if err != nil {
    98  		return fmt.Errorf("rio: write header frame failed: %w", err)
    99  	}
   100  
   101  	return err
   102  }
   103  
   104  func (hdr *rioHeader) RioUnmarshal(r io.Reader) error {
   105  	var err error
   106  
   107  	err = binary.Read(r, Endian, &hdr.Len)
   108  	if err != nil {
   109  		if err == io.EOF {
   110  			return err
   111  		}
   112  		return fmt.Errorf("rio: read header length failed: %w", err)
   113  	}
   114  
   115  	err = binary.Read(r, Endian, &hdr.Frame)
   116  	if err != nil {
   117  		return fmt.Errorf("rio: read header frame failed: %w", err)
   118  	}
   119  
   120  	return err
   121  }
   122  
   123  func (hdr *rioHeader) RioVersion() Version {
   124  	return rioHdrVersion
   125  }
   126  
   127  // Options describes the various options attached to a rio stream
   128  // such as: compression method, compression level, codec, ...
   129  type Options uint32
   130  
   131  // CompressorKind extracts the CompressorKind from the Options value
   132  func (o Options) CompressorKind() CompressorKind {
   133  	return CompressorKind((o & gMaskCompr) >> 16)
   134  }
   135  
   136  // CompressorLevel extracts the compression level from the Options value
   137  func (o Options) CompressorLevel() int {
   138  	lvl := int((o & gMaskLevel) >> 12)
   139  	if lvl == 0xf {
   140  		return flate.DefaultCompression
   141  	}
   142  	return lvl
   143  }
   144  
   145  // CompressorCodec extracts the compression codec from the Options value
   146  func (o Options) CompressorCodec() int {
   147  	return int(o & gMaskCodec)
   148  }
   149  
   150  // NewOptions returns a new Options value carefully crafted from the CompressorKind and
   151  // compression level
   152  func NewOptions(compr CompressorKind, lvl int, codec int) Options {
   153  	if lvl <= flate.DefaultCompression || lvl >= 0xf {
   154  		lvl = 0xf
   155  	}
   156  
   157  	if compr == CompressDefault {
   158  		compr = CompressZlib
   159  	}
   160  
   161  	// FIXME(sbinet): decide on how to handle different codecs (gob|cbor|xdr|riobin|...)
   162  	opts := Options(Options(compr)<<16) |
   163  		Options(Options(lvl)<<12) |
   164  		Options(Options(codec)&gMaskCodec)
   165  	return opts
   166  }
   167  
   168  // rioRecord
   169  type rioRecord struct {
   170  	Header rioHeader
   171  
   172  	Options Options // options word (compression method, compression level, codec, ...)
   173  
   174  	// length of compressed record content.
   175  	// Total length in bytes for all the blocks in the record.
   176  	// Always a multiple of four.
   177  	// If the record is not compressed, same value than XLen.
   178  	CLen uint32
   179  
   180  	// length of un-compressed record content.
   181  	// Total length in bytes for all the blocks in the record when decompressed.
   182  	// Always a multiple of four.
   183  	// When the record is not compressed, it is a count of the bytes that follow in the
   184  	// record content.
   185  	// When the record is compressed, this number is used to allocate a buffer into which
   186  	// the record is decompressed.
   187  	XLen uint32
   188  
   189  	// name of the record. padded with zeros to a four byte boundary
   190  	Name string
   191  }
   192  
   193  func (rec *rioRecord) MarshalBinary() ([]byte, error) {
   194  	buf := bytes.NewBuffer(make([]byte, 0, recSize))
   195  	err := rec.RioMarshal(buf)
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  
   200  	return buf.Bytes(), err
   201  }
   202  
   203  func (rec *rioRecord) UnmarshalBinary(data []byte) error {
   204  	r := bytes.NewReader(data)
   205  	return rec.RioUnmarshal(r)
   206  }
   207  
   208  func (rec *rioRecord) RioMarshal(w io.Writer) error {
   209  	var err error
   210  
   211  	err = rec.Header.RioMarshal(w)
   212  	if err != nil {
   213  		return fmt.Errorf("rio: write record header failed: %w", err)
   214  	}
   215  
   216  	err = binary.Write(w, Endian, rec.Options)
   217  	if err != nil {
   218  		return fmt.Errorf("rio: write record options failed: %w", err)
   219  	}
   220  
   221  	err = binary.Write(w, Endian, rec.CLen)
   222  	if err != nil {
   223  		return fmt.Errorf("rio: write record compr-len failed: %w", err)
   224  	}
   225  
   226  	err = binary.Write(w, Endian, rec.XLen)
   227  	if err != nil {
   228  		return fmt.Errorf("rio: write record len failed: %w", err)
   229  	}
   230  
   231  	err = binary.Write(w, Endian, uint32(len(rec.Name)))
   232  	if err != nil {
   233  		return fmt.Errorf("rio: write record name-len failed: %w", err)
   234  	}
   235  
   236  	name := []byte(rec.Name)
   237  	_, err = w.Write(name)
   238  	if err != nil {
   239  		return fmt.Errorf("rio: write record name failed: %w", err)
   240  	}
   241  
   242  	size := rioAlign(len(name))
   243  	if size > len(name) {
   244  		_, err = w.Write(make([]byte, size-len(name)))
   245  		if err != nil {
   246  			return fmt.Errorf("rio: write record name-padding failed: %w", err)
   247  		}
   248  	}
   249  
   250  	return err
   251  }
   252  
   253  func (rec *rioRecord) RioUnmarshal(r io.Reader) error {
   254  	var err error
   255  
   256  	err = rec.unmarshalHeader(r)
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	err = rec.unmarshalData(r)
   262  	if err != nil {
   263  		return err
   264  	}
   265  	return err
   266  }
   267  
   268  func (rec *rioRecord) unmarshalHeader(r io.Reader) error {
   269  	err := rec.Header.RioUnmarshal(r)
   270  	if err != nil {
   271  		if err == io.EOF || err == io.ErrUnexpectedEOF {
   272  			return err
   273  		}
   274  		return fmt.Errorf("rio: read record header failed: %w", err)
   275  	}
   276  
   277  	if rec.Header.Frame != recFrame {
   278  		return fmt.Errorf("rio: read record header corrupted (frame=%#v)", rec.Header.Frame)
   279  	}
   280  
   281  	return nil
   282  }
   283  
   284  func (rec *rioRecord) unmarshalData(r io.Reader) error {
   285  	err := binary.Read(r, Endian, &rec.Options)
   286  	if err != nil {
   287  		return fmt.Errorf("rio: read record options failed: %w", err)
   288  	}
   289  
   290  	err = binary.Read(r, Endian, &rec.CLen)
   291  	if err != nil {
   292  		return fmt.Errorf("rio: read record compr-len failed: %w", err)
   293  	}
   294  
   295  	err = binary.Read(r, Endian, &rec.XLen)
   296  	if err != nil {
   297  		return fmt.Errorf("rio: read record len failed failed: %w", err)
   298  	}
   299  
   300  	nsize := uint32(0)
   301  	err = binary.Read(r, Endian, &nsize)
   302  	if err != nil {
   303  		return fmt.Errorf("rio: read record name-len failed: %w", err)
   304  	}
   305  
   306  	buf := make([]byte, rioAlign(int(nsize)))
   307  	_, err = r.Read(buf)
   308  	if err != nil {
   309  		return fmt.Errorf("rio: read record name failed: %w", err)
   310  	}
   311  
   312  	rec.Name = string(buf[:int(nsize)])
   313  
   314  	return nil
   315  }
   316  
   317  func (rec *rioRecord) RioVersion() Version {
   318  	return rioHdrVersion
   319  }
   320  
   321  // rioBlock
   322  type rioBlock struct {
   323  	Header  rioHeader
   324  	Version Version // block version
   325  	Name    string  // block name
   326  	Data    []byte  // block payload
   327  }
   328  
   329  func (blk *rioBlock) MarshalBinary() ([]byte, error) {
   330  	buf := bytes.NewBuffer(make([]byte, 0, recSize))
   331  	err := blk.RioMarshal(buf)
   332  	if err != nil {
   333  		return nil, err
   334  	}
   335  
   336  	return buf.Bytes(), err
   337  }
   338  
   339  func (blk *rioBlock) UnmarshalBinary(data []byte) error {
   340  	r := bytes.NewReader(data)
   341  	return blk.RioUnmarshal(r)
   342  }
   343  
   344  func (blk *rioBlock) RioMarshal(w io.Writer) error {
   345  	var err error
   346  
   347  	err = blk.Header.RioMarshal(w)
   348  	if err != nil {
   349  		return fmt.Errorf("rio: write block header failed: %w", err)
   350  	}
   351  
   352  	err = binary.Write(w, Endian, blk.Version)
   353  	if err != nil {
   354  		return fmt.Errorf("rio: write block version failed: %w", err)
   355  	}
   356  
   357  	name := []byte(blk.Name)
   358  	err = binary.Write(w, Endian, uint32(len(name)))
   359  	if err != nil {
   360  		return fmt.Errorf("rio: write block name-len failed: %w", err)
   361  	}
   362  
   363  	nb, err := w.Write(name)
   364  	if err != nil {
   365  		return fmt.Errorf("rio: write block name failed: %w", err)
   366  	}
   367  	if nb != len(name) {
   368  		return fmt.Errorf("rio: wrote too few bytes (want=%d. got=%d)", len(name), nb)
   369  	}
   370  
   371  	nsize := rioAlign(len(name))
   372  	if nsize > len(name) {
   373  		nb, err = w.Write(make([]byte, nsize-len(name)))
   374  		if err != nil {
   375  			return fmt.Errorf("rio: write block name-padding failed: %w", err)
   376  		}
   377  		if nb != nsize-len(name) {
   378  			return fmt.Errorf("rio: wrote too few bytes (want=%d. got=%d)", nsize-len(name), nb)
   379  		}
   380  	}
   381  
   382  	nb, err = w.Write(blk.Data)
   383  	if err != nil {
   384  		return fmt.Errorf("rio: write block data failed: %w", err)
   385  	}
   386  	if nb != len(blk.Data) {
   387  		return fmt.Errorf("rio: wrote too few bytes (want=%d. got=%d)", len(blk.Data), nb)
   388  	}
   389  
   390  	dsize := rioAlign(len(blk.Data))
   391  	if dsize > len(blk.Data) {
   392  		nb, err = w.Write(make([]byte, dsize-len(blk.Data)))
   393  		if err != nil {
   394  			return fmt.Errorf("rio: write block data-padding failed: %w", err)
   395  		}
   396  		if nb != dsize-len(blk.Data) {
   397  			return fmt.Errorf("rio: wrote too few bytes (want=%d. got=%d)", dsize-len(blk.Data), nb)
   398  		}
   399  	}
   400  
   401  	return err
   402  }
   403  
   404  func (blk *rioBlock) RioUnmarshal(r io.Reader) error {
   405  	var err error
   406  
   407  	err = blk.Header.RioUnmarshal(r)
   408  	if err != nil {
   409  		if err == io.EOF || err == io.ErrUnexpectedEOF {
   410  			return err
   411  		}
   412  		return fmt.Errorf("rio: read block header failed: %w", err)
   413  	}
   414  
   415  	if blk.Header.Frame != blkFrame {
   416  		return fmt.Errorf("rio: read block header corrupted (frame=%#v)", blk.Header.Frame)
   417  	}
   418  
   419  	err = binary.Read(r, Endian, &blk.Version)
   420  	if err != nil {
   421  		return fmt.Errorf("rio: read block version failed: %w", err)
   422  	}
   423  
   424  	nsize := uint32(0)
   425  	err = binary.Read(r, Endian, &nsize)
   426  	if err != nil {
   427  		return fmt.Errorf("rio: read block name-len failed: %w", err)
   428  	}
   429  	name := make([]byte, rioAlign(int(nsize)))
   430  
   431  	nb, err := io.ReadFull(r, name)
   432  	if err != nil {
   433  		return fmt.Errorf("rio: read block name failed: %w", err)
   434  	}
   435  	if int(nb) != len(name) {
   436  		return fmt.Errorf("rio: read too few bytes for name (want=%d. got=%d)", len(name), nb)
   437  	}
   438  
   439  	blk.Name = string(name[:int(nsize)])
   440  
   441  	data := make([]byte, rioAlign(int(blk.Header.Len)))
   442  	nb, err = io.ReadFull(r, data)
   443  	if err != nil {
   444  		return fmt.Errorf("rio: read block data failed: %w", err)
   445  	}
   446  	if int(nb) != len(data) {
   447  		return fmt.Errorf("rio: read too few bytes for data (want=%d. got=%d)", len(data), nb)
   448  	}
   449  	blk.Data = data[:int(blk.Header.Len)]
   450  
   451  	return err
   452  }
   453  
   454  func (blk *rioBlock) RioVersion() Version {
   455  	return blk.Version
   456  }
   457  
   458  // rioFooter marks the end of a rio stream
   459  type rioFooter struct {
   460  	Header rioHeader
   461  	Meta   int64 // position of the record holding stream metadata, in bytes from rio-magic
   462  }
   463  
   464  func (ftr *rioFooter) MarshalBinary() ([]byte, error) {
   465  	buf := bytes.NewBuffer(make([]byte, 0, recSize))
   466  	err := ftr.RioMarshal(buf)
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  
   471  	return buf.Bytes(), err
   472  }
   473  
   474  func (ftr *rioFooter) UnmarshalBinary(data []byte) error {
   475  	r := bytes.NewReader(data)
   476  	return ftr.RioUnmarshal(r)
   477  }
   478  
   479  func (ftr *rioFooter) RioVersion() Version {
   480  	return rioHdrVersion
   481  }
   482  
   483  func (ftr *rioFooter) RioMarshal(w io.Writer) error {
   484  	var err error
   485  
   486  	err = ftr.Header.RioMarshal(w)
   487  	if err != nil {
   488  		return fmt.Errorf("rio: write footer header failed: %w", err)
   489  	}
   490  
   491  	err = binary.Write(w, Endian, ftr.Meta)
   492  	if err != nil {
   493  		return fmt.Errorf("rio: write footer meta failed: %w", err)
   494  	}
   495  
   496  	return err
   497  }
   498  
   499  func (ftr *rioFooter) RioUnmarshal(r io.Reader) error {
   500  	var err error
   501  
   502  	err = ftr.unmarshalHeader(r)
   503  	if err != nil {
   504  		return err
   505  	}
   506  
   507  	err = ftr.unmarshalData(r)
   508  	if err != nil {
   509  		return err
   510  	}
   511  
   512  	return err
   513  }
   514  
   515  func (ftr *rioFooter) unmarshalHeader(r io.Reader) error {
   516  	err := ftr.Header.RioUnmarshal(r)
   517  	if err != nil {
   518  		if err == io.EOF {
   519  			return err
   520  		}
   521  		return fmt.Errorf("rio: read footer header failed: %w", err)
   522  	}
   523  
   524  	if ftr.Header.Frame != ftrFrame {
   525  		return fmt.Errorf("rio: read footer header corrupted (frame=%#v)", ftr.Header.Frame)
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  func (ftr *rioFooter) unmarshalData(r io.Reader) error {
   532  	err := binary.Read(r, Endian, &ftr.Meta)
   533  	if err != nil {
   534  		return fmt.Errorf("rio: read footer meta failed: %w", err)
   535  	}
   536  
   537  	return nil
   538  }
   539  
   540  // Metadata stores metadata about a rio stream
   541  type Metadata struct {
   542  	Records []RecordDesc
   543  	Offsets map[string][]Span
   544  }
   545  
   546  // RecordDesc provides high-level informations about a Record
   547  type RecordDesc struct {
   548  	Name   string
   549  	Blocks []BlockDesc
   550  }
   551  
   552  // BlockDesc provides high-level informations about a Block
   553  type BlockDesc struct {
   554  	Name string
   555  	Type string
   556  }
   557  
   558  // Span is a pair (position, length)
   559  type Span struct {
   560  	Pos int64
   561  	Len int64
   562  }
   563  
   564  type recordsByName []RecordDesc
   565  
   566  func (p recordsByName) Len() int           { return len(p) }
   567  func (p recordsByName) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   568  func (p recordsByName) Less(i, j int) bool { return p[i].Name < p[j].Name }