github.com/shakinm/xlsReader@v0.9.12/cfb/cfb.go (about) 1 package cfb 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "github.com/shakinm/xlsReader/helpers" 7 "io" 8 "os" 9 "path/filepath" 10 ) 11 12 // Cfb - Compound File Binary 13 type Cfb struct { 14 header Header 15 file io.ReadSeeker 16 fLink *os.File 17 difatPositions []uint32 18 miniFatPositions []uint32 19 dirs []*Directory 20 } 21 22 // EntrySize - Directory array entry length 23 var EntrySize = 128 24 25 // DefaultDIFATEntries -Number FAT locations in DIFAT 26 var DefaultDIFATEntries = uint32(109) 27 28 // GetDirs - Get a list of directories 29 func (cfb *Cfb) GetDirs() []*Directory { 30 return cfb.dirs 31 } 32 33 func (cfb *Cfb) CloseFile() error { 34 return cfb.fLink.Close() 35 } 36 37 // OpenFile - Open document from the file 38 func OpenFile(filename string) (cfb Cfb, err error) { 39 40 cfb.fLink, err = os.Open(filepath.Clean(filename)) 41 42 if err != nil { 43 return cfb, err 44 } 45 46 cfb.file = cfb.fLink 47 48 err = open(&cfb) 49 50 return cfb, err 51 } 52 53 // OpenReader - Open document from the reader 54 func OpenReader(reader io.ReadSeeker) (cfb Cfb, err error) { 55 56 cfb.file = reader 57 58 if err != nil { 59 return 60 } 61 62 err = open(&cfb) 63 64 return 65 } 66 67 func open(cfb *Cfb) (err error) { 68 69 err = cfb.getHeader() 70 71 if err != nil { 72 return err 73 } 74 75 err = cfb.getMiniFATSectors() 76 77 if err != nil { 78 return err 79 } 80 81 err = cfb.getFatSectors() 82 83 if err != nil { 84 return err 85 } 86 87 err = cfb.getDirectories() 88 89 return err 90 } 91 92 func (cfb *Cfb) getHeader() (err error) { 93 94 var bHeader = make([]byte, 4096) 95 96 _, err = cfb.file.Read(bHeader) 97 98 if err != nil { 99 return 100 } 101 102 err = binary.Read(bytes.NewBuffer(bHeader), binary.LittleEndian, &cfb.header) 103 104 if err != nil { 105 return 106 } 107 108 err = cfb.header.validate() 109 110 return 111 } 112 113 func (cfb *Cfb) getDirectories() (err error) { 114 115 stream, err := cfb.getDataFromFatChain(helpers.BytesToUint32(cfb.header.FirstDirectorySectorLocation[:])) 116 117 if err != nil { 118 return err 119 } 120 var section = make([]byte, 0) 121 122 for _, value := range stream { 123 section = append(section, value) 124 if len(section) == EntrySize { 125 var dir Directory 126 err = binary.Read(bytes.NewBuffer(section), binary.LittleEndian, &dir) 127 if err == nil && dir.ObjectType != 0x00 { 128 cfb.dirs = append(cfb.dirs, &dir) 129 } 130 131 section = make([]byte, 0) 132 } 133 134 } 135 136 return 137 138 } 139 140 func (cfb *Cfb) getMiniFATSectors() (err error) { 141 142 var section = make([]byte, 0) 143 144 position := cfb.calculateOffset(cfb.header.FirstMiniFATSectorLocation[:]) 145 146 for i := uint32(0); i < helpers.BytesToUint32(cfb.header.NumberMiniFATSectors[:]); i++ { 147 sector := NewSector(&cfb.header) 148 err := cfb.getData(position, §or.Data) 149 150 if err != nil { 151 return err 152 } 153 154 for _, value := range sector.getMiniFatFATSectorLocations() { 155 section = append(section, value) 156 if len(section) == 4 { 157 cfb.miniFatPositions = append(cfb.miniFatPositions, helpers.BytesToUint32(section)) 158 section = make([]byte, 0) 159 } 160 } 161 position = position + sector.SectorSize 162 } 163 164 return 165 } 166 167 func (cfb *Cfb) getFatSectors() (err error) { // nolint: gocyclo 168 169 entries := DefaultDIFATEntries 170 171 if helpers.BytesToUint32(cfb.header.NumberFATSectors[:]) < DefaultDIFATEntries { 172 entries = helpers.BytesToUint32(cfb.header.NumberFATSectors[:]) 173 } 174 175 for i := uint32(0); i < entries; i++ { 176 177 position := cfb.calculateOffset(cfb.header.getDIFATEntry(i)) 178 sector := NewSector(&cfb.header) 179 180 err := cfb.getData(position, §or.Data) 181 182 if err != nil { 183 return err 184 } 185 186 cfb.difatPositions = append(cfb.difatPositions, sector.values(EntrySize)...) 187 188 } 189 190 if bytes.Compare(cfb.header.FirstDIFATSectorLocation[:], ENDOFCHAIN) == 0 { 191 return 192 } 193 194 position := cfb.calculateOffset(cfb.header.FirstDIFATSectorLocation[:]) 195 var section = make([]byte, 0) 196 for i := uint32(0); i < helpers.BytesToUint32(cfb.header.NumberDIFATSectors[:]); i++ { 197 sector := NewSector(&cfb.header) 198 err := cfb.getData(position, §or.Data) 199 200 if err != nil { 201 return err 202 } 203 204 for _, value := range sector.getFATSectorLocations() { 205 section = append(section, value) 206 if len(section) == 4 { 207 208 position = cfb.calculateOffset(section) 209 sectorF := NewSector(&cfb.header) 210 err := cfb.getData(position, §orF.Data) 211 212 if err != nil { 213 return err 214 } 215 cfb.difatPositions = append(cfb.difatPositions, sectorF.values(EntrySize)...) 216 217 section = make([]byte, 0) 218 } 219 220 } 221 222 position = cfb.calculateOffset(sector.getNextDIFATSectorLocation()) 223 224 } 225 226 return 227 } 228 func (cfb *Cfb) getDataFromMiniFat(miniFatSectorLocation uint32, offset uint32) (data []byte, err error) { 229 230 sPoint := cfb.sectorOffset(miniFatSectorLocation) 231 point := sPoint + cfb.calculateMiniFatOffset(offset) 232 233 for { 234 235 sector := NewMiniFatSector(&cfb.header) 236 237 err = cfb.getData(point, §or.Data) 238 239 if err != nil { 240 return data, err 241 } 242 243 data = append(data, sector.Data...) 244 245 if cfb.miniFatPositions[offset] == helpers.BytesToUint32(ENDOFCHAIN) { 246 break 247 } 248 249 offset = cfb.miniFatPositions[offset] 250 251 point = sPoint + cfb.calculateMiniFatOffset(offset) 252 253 } 254 255 return data, err 256 } 257 258 func (cfb *Cfb) getDataFromFatChain(offset uint32) (data []byte, err error) { 259 260 for { 261 sector := NewSector(&cfb.header) 262 point := cfb.sectorOffset(offset) 263 264 err = cfb.getData(point, §or.Data) 265 266 if err != nil { 267 return data, err 268 } 269 270 data = append(data, sector.Data...) 271 offset = cfb.difatPositions[offset] 272 if offset == helpers.BytesToUint32(ENDOFCHAIN) { 273 break 274 } 275 } 276 277 return data, err 278 } 279 280 // OpenObject - Get object stream 281 func (cfb *Cfb) OpenObject(object *Directory, root *Directory) (reader io.ReadSeeker, err error) { 282 283 if helpers.BytesToUint32(object.StreamSize[:]) < uint32(helpers.BytesToUint16(cfb.header.MiniStreamCutoffSize[:])) { 284 285 data, err := cfb.getDataFromMiniFat(root.GetStartingSectorLocation(), object.GetStartingSectorLocation()) 286 287 if err != nil { 288 return reader, err 289 } 290 291 reader = bytes.NewReader(data) 292 } else { 293 294 data, err := cfb.getDataFromFatChain(object.GetStartingSectorLocation()) 295 296 if err != nil { 297 return reader, err 298 } 299 300 reader = bytes.NewReader(data) 301 302 } 303 304 return reader, err 305 } 306 307 func (cfb *Cfb) getData(offset uint32, data *[]byte) (err error) { 308 309 _, err = cfb.file.Seek(int64(offset), 0) 310 311 if err != nil { 312 return 313 } 314 315 _, err = cfb.file.Read(*data) 316 317 if err != nil { 318 return 319 } 320 return 321 322 } 323 324 func (cfb *Cfb) sectorOffset(sid uint32) uint32 { 325 return (sid + 1) * cfb.header.sectorSize() 326 } 327 328 func (cfb *Cfb) calculateMiniFatOffset(sid uint32) (n uint32) { 329 330 return sid * 64 331 } 332 333 func (cfb *Cfb) calculateOffset(sectorID []byte) (n uint32) { 334 335 if len(sectorID) == 4 { 336 n = helpers.BytesToUint32(sectorID) 337 } 338 if len(sectorID) == 2 { 339 n = uint32(binary.LittleEndian.Uint16(sectorID)) 340 } 341 return (n * cfb.header.sectorSize()) + cfb.header.sectorSize() 342 }