github.com/shakinm/xlsReader@v0.9.12/cfb/cfb.go (about)

     1  package cfb
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"github.com/shakinm/xlsReader/helpers"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  )
    11  
    12  // Cfb - Compound File Binary
    13  type Cfb struct {
    14  	header           Header
    15  	file             io.ReadSeeker
    16  	fLink            *os.File
    17  	difatPositions   []uint32
    18  	miniFatPositions []uint32
    19  	dirs             []*Directory
    20  }
    21  
    22  // EntrySize - Directory array entry length
    23  var EntrySize = 128
    24  
    25  // DefaultDIFATEntries -Number FAT locations in DIFAT
    26  var DefaultDIFATEntries = uint32(109)
    27  
    28  // GetDirs - Get a list of directories
    29  func (cfb *Cfb) GetDirs() []*Directory {
    30  	return cfb.dirs
    31  }
    32  
    33  func (cfb *Cfb) CloseFile() error {
    34  	return cfb.fLink.Close()
    35  }
    36  
    37  // OpenFile - Open document from the file
    38  func OpenFile(filename string) (cfb Cfb, err error) {
    39  
    40  	cfb.fLink, err = os.Open(filepath.Clean(filename))
    41  
    42  	if err != nil {
    43  		return cfb, err
    44  	}
    45  
    46  	cfb.file = cfb.fLink
    47  
    48  	err = open(&cfb)
    49  
    50  	return cfb, err
    51  }
    52  
    53  // OpenReader - Open document from the reader
    54  func OpenReader(reader io.ReadSeeker) (cfb Cfb, err error) {
    55  
    56  	cfb.file = reader
    57  
    58  	if err != nil {
    59  		return
    60  	}
    61  
    62  	err = open(&cfb)
    63  
    64  	return
    65  }
    66  
    67  func open(cfb *Cfb) (err error) {
    68  
    69  	err = cfb.getHeader()
    70  
    71  	if err != nil {
    72  		return err
    73  	}
    74  
    75  	err = cfb.getMiniFATSectors()
    76  
    77  	if err != nil {
    78  		return err
    79  	}
    80  
    81  	err = cfb.getFatSectors()
    82  
    83  	if err != nil {
    84  		return err
    85  	}
    86  
    87  	err = cfb.getDirectories()
    88  
    89  	return err
    90  }
    91  
    92  func (cfb *Cfb) getHeader() (err error) {
    93  
    94  	var bHeader = make([]byte, 4096)
    95  
    96  	_, err = cfb.file.Read(bHeader)
    97  
    98  	if err != nil {
    99  		return
   100  	}
   101  
   102  	err = binary.Read(bytes.NewBuffer(bHeader), binary.LittleEndian, &cfb.header)
   103  
   104  	if err != nil {
   105  		return
   106  	}
   107  
   108  	err = cfb.header.validate()
   109  
   110  	return
   111  }
   112  
   113  func (cfb *Cfb) getDirectories() (err error) {
   114  
   115  	stream, err := cfb.getDataFromFatChain(helpers.BytesToUint32(cfb.header.FirstDirectorySectorLocation[:]))
   116  
   117  	if err != nil {
   118  		return err
   119  	}
   120  	var section = make([]byte, 0)
   121  
   122  	for _, value := range stream {
   123  		section = append(section, value)
   124  		if len(section) == EntrySize {
   125  			var dir Directory
   126  			err = binary.Read(bytes.NewBuffer(section), binary.LittleEndian, &dir)
   127  			if err == nil && dir.ObjectType != 0x00 {
   128  				cfb.dirs = append(cfb.dirs, &dir)
   129  			}
   130  
   131  			section = make([]byte, 0)
   132  		}
   133  
   134  	}
   135  
   136  	return
   137  
   138  }
   139  
   140  func (cfb *Cfb) getMiniFATSectors() (err error) {
   141  
   142  	var section = make([]byte, 0)
   143  
   144  	position := cfb.calculateOffset(cfb.header.FirstMiniFATSectorLocation[:])
   145  
   146  	for i := uint32(0); i < helpers.BytesToUint32(cfb.header.NumberMiniFATSectors[:]); i++ {
   147  		sector := NewSector(&cfb.header)
   148  		err := cfb.getData(position, &sector.Data)
   149  
   150  		if err != nil {
   151  			return err
   152  		}
   153  
   154  		for _, value := range sector.getMiniFatFATSectorLocations() {
   155  			section = append(section, value)
   156  			if len(section) == 4 {
   157  				cfb.miniFatPositions = append(cfb.miniFatPositions, helpers.BytesToUint32(section))
   158  				section = make([]byte, 0)
   159  			}
   160  		}
   161  		position = position + sector.SectorSize
   162  	}
   163  
   164  	return
   165  }
   166  
   167  func (cfb *Cfb) getFatSectors() (err error) { // nolint: gocyclo
   168  
   169  	entries := DefaultDIFATEntries
   170  
   171  	if helpers.BytesToUint32(cfb.header.NumberFATSectors[:]) < DefaultDIFATEntries {
   172  		entries = helpers.BytesToUint32(cfb.header.NumberFATSectors[:])
   173  	}
   174  
   175  	for i := uint32(0); i < entries; i++ {
   176  
   177  		position := cfb.calculateOffset(cfb.header.getDIFATEntry(i))
   178  		sector := NewSector(&cfb.header)
   179  
   180  		err := cfb.getData(position, &sector.Data)
   181  
   182  		if err != nil {
   183  			return err
   184  		}
   185  
   186  		cfb.difatPositions = append(cfb.difatPositions, sector.values(EntrySize)...)
   187  
   188  	}
   189  
   190  	if bytes.Compare(cfb.header.FirstDIFATSectorLocation[:], ENDOFCHAIN) == 0 {
   191  		return
   192  	}
   193  
   194  	position := cfb.calculateOffset(cfb.header.FirstDIFATSectorLocation[:])
   195  	var section = make([]byte, 0)
   196  	for i := uint32(0); i < helpers.BytesToUint32(cfb.header.NumberDIFATSectors[:]); i++ {
   197  		sector := NewSector(&cfb.header)
   198  		err := cfb.getData(position, &sector.Data)
   199  
   200  		if err != nil {
   201  			return err
   202  		}
   203  
   204  		for _, value := range sector.getFATSectorLocations() {
   205  			section = append(section, value)
   206  			if len(section) == 4 {
   207  
   208  				position = cfb.calculateOffset(section)
   209  				sectorF := NewSector(&cfb.header)
   210  				err := cfb.getData(position, &sectorF.Data)
   211  
   212  				if err != nil {
   213  					return err
   214  				}
   215  				cfb.difatPositions = append(cfb.difatPositions, sectorF.values(EntrySize)...)
   216  
   217  				section = make([]byte, 0)
   218  			}
   219  
   220  		}
   221  
   222  		position = cfb.calculateOffset(sector.getNextDIFATSectorLocation())
   223  
   224  	}
   225  
   226  	return
   227  }
   228  func (cfb *Cfb) getDataFromMiniFat(miniFatSectorLocation uint32, offset uint32) (data []byte, err error) {
   229  
   230  	sPoint := cfb.sectorOffset(miniFatSectorLocation)
   231  	point := sPoint + cfb.calculateMiniFatOffset(offset)
   232  
   233  	for {
   234  
   235  		sector := NewMiniFatSector(&cfb.header)
   236  
   237  		err = cfb.getData(point, &sector.Data)
   238  
   239  		if err != nil {
   240  			return data, err
   241  		}
   242  
   243  		data = append(data, sector.Data...)
   244  
   245  		if cfb.miniFatPositions[offset] == helpers.BytesToUint32(ENDOFCHAIN) {
   246  			break
   247  		}
   248  
   249  		offset = cfb.miniFatPositions[offset]
   250  
   251  		point = sPoint + cfb.calculateMiniFatOffset(offset)
   252  
   253  	}
   254  
   255  	return data, err
   256  }
   257  
   258  func (cfb *Cfb) getDataFromFatChain(offset uint32) (data []byte, err error) {
   259  
   260  	for {
   261  		sector := NewSector(&cfb.header)
   262  		point := cfb.sectorOffset(offset)
   263  
   264  		err = cfb.getData(point, &sector.Data)
   265  
   266  		if err != nil {
   267  			return data, err
   268  		}
   269  
   270  		data = append(data, sector.Data...)
   271  		offset = cfb.difatPositions[offset]
   272  		if offset == helpers.BytesToUint32(ENDOFCHAIN) {
   273  			break
   274  		}
   275  	}
   276  
   277  	return data, err
   278  }
   279  
   280  // OpenObject - Get object stream
   281  func (cfb *Cfb) OpenObject(object *Directory, root *Directory) (reader io.ReadSeeker, err error) {
   282  
   283  	if helpers.BytesToUint32(object.StreamSize[:]) < uint32(helpers.BytesToUint16(cfb.header.MiniStreamCutoffSize[:])) {
   284  
   285  		data, err := cfb.getDataFromMiniFat(root.GetStartingSectorLocation(), object.GetStartingSectorLocation())
   286  
   287  		if err != nil {
   288  			return reader, err
   289  		}
   290  
   291  		reader = bytes.NewReader(data)
   292  	} else {
   293  
   294  		data, err := cfb.getDataFromFatChain(object.GetStartingSectorLocation())
   295  
   296  		if err != nil {
   297  			return reader, err
   298  		}
   299  
   300  		reader = bytes.NewReader(data)
   301  
   302  	}
   303  
   304  	return reader, err
   305  }
   306  
   307  func (cfb *Cfb) getData(offset uint32, data *[]byte) (err error) {
   308  
   309  	_, err = cfb.file.Seek(int64(offset), 0)
   310  
   311  	if err != nil {
   312  		return
   313  	}
   314  
   315  	_, err = cfb.file.Read(*data)
   316  
   317  	if err != nil {
   318  		return
   319  	}
   320  	return
   321  
   322  }
   323  
   324  func (cfb *Cfb) sectorOffset(sid uint32) uint32 {
   325  	return (sid + 1) * cfb.header.sectorSize()
   326  }
   327  
   328  func (cfb *Cfb) calculateMiniFatOffset(sid uint32) (n uint32) {
   329  
   330  	return sid * 64
   331  }
   332  
   333  func (cfb *Cfb) calculateOffset(sectorID []byte) (n uint32) {
   334  
   335  	if len(sectorID) == 4 {
   336  		n = helpers.BytesToUint32(sectorID)
   337  	}
   338  	if len(sectorID) == 2 {
   339  		n = uint32(binary.LittleEndian.Uint16(sectorID))
   340  	}
   341  	return (n * cfb.header.sectorSize()) + cfb.header.sectorSize()
   342  }