github.com/richardwilkes/toolbox@v1.121.0/formats/xlsx/sheet.go (about)

     1  // Copyright (c) 2016-2024 by Richard A. Wilkes. All rights reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the Mozilla Public
     4  // License, version 2.0. If a copy of the MPL was not distributed with
     5  // this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6  //
     7  // This Source Code Form is "Incompatible With Secondary Licenses", as
     8  // defined by the Mozilla Public License, version 2.0.
     9  
    10  // Package xlsx provides the ability to extract text from Excel spreadsheets.
    11  package xlsx
    12  
    13  import (
    14  	"archive/zip"
    15  	"encoding/xml"
    16  	"fmt"
    17  	"io"
    18  	"math"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  
    23  	"github.com/richardwilkes/toolbox/errs"
    24  	"github.com/richardwilkes/toolbox/txt"
    25  	"github.com/richardwilkes/toolbox/xio"
    26  )
    27  
    28  // Sheet holds the data contained in a single worksheet.
    29  type Sheet struct {
    30  	Cells map[Ref]Cell
    31  	Name  string
    32  	Min   Ref
    33  	Max   Ref
    34  }
    35  
    36  // Load sheets from an .xlsx file.
    37  func Load(path string) ([]Sheet, error) {
    38  	r, err := zip.OpenReader(path)
    39  	if err != nil {
    40  		return nil, errs.Wrap(err)
    41  	}
    42  	defer xio.CloseIgnoringErrors(r)
    43  	return load(&r.Reader)
    44  }
    45  
    46  // Read sheets from an .xlsx stream.
    47  func Read(in io.ReaderAt, size int64) ([]Sheet, error) {
    48  	r, err := zip.NewReader(in, size)
    49  	if err != nil {
    50  		return nil, errs.Wrap(err)
    51  	}
    52  	return load(r)
    53  }
    54  
    55  func load(r *zip.Reader) ([]Sheet, error) {
    56  	var sheetNames []string
    57  	var strs []string
    58  	var files []*zip.File
    59  	var err error
    60  	for _, f := range r.File {
    61  		switch {
    62  		case f.Name == "docProps/app.xml":
    63  			if sheetNames, err = loadSheetNames(f); err != nil {
    64  				return nil, err
    65  			}
    66  		case f.Name == "xl/sharedStrings.xml":
    67  			if strs, err = loadStrings(f); err != nil {
    68  				return nil, err
    69  			}
    70  		case strings.HasPrefix(f.Name, "xl/worksheets/sheet"):
    71  			files = append(files, f)
    72  		}
    73  	}
    74  	sort.Slice(files, func(i, j int) bool {
    75  		return txt.NaturalLess(files[i].Name, files[j].Name, true)
    76  	})
    77  	sheets := make([]Sheet, 0, len(files))
    78  	for i, f := range files {
    79  		var sheet *Sheet
    80  		if sheet, err = loadSheet(f, strs); err != nil {
    81  			return nil, err
    82  		}
    83  		if i < len(sheetNames) {
    84  			sheet.Name = sheetNames[i]
    85  		} else {
    86  			sheet.Name = fmt.Sprintf("Sheet%d", i+1)
    87  		}
    88  		sheets = append(sheets, *sheet)
    89  	}
    90  	return sheets, nil
    91  }
    92  
    93  func loadSheetNames(f *zip.File) ([]string, error) {
    94  	fr, err := f.Open()
    95  	if err != nil {
    96  		return nil, errs.Wrap(err)
    97  	}
    98  	defer xio.CloseIgnoringErrors(fr)
    99  	decoder := xml.NewDecoder(fr)
   100  	var data struct {
   101  		Names []string `xml:"TitlesOfParts>vector>lpstr"`
   102  	}
   103  	if err = decoder.Decode(&data); err != nil {
   104  		return nil, errs.Wrap(err)
   105  	}
   106  	return data.Names, nil
   107  }
   108  
   109  func loadStrings(f *zip.File) ([]string, error) {
   110  	fr, err := f.Open()
   111  	if err != nil {
   112  		return nil, errs.Wrap(err)
   113  	}
   114  	defer xio.CloseIgnoringErrors(fr)
   115  	decoder := xml.NewDecoder(fr)
   116  	var data struct {
   117  		SST []string `xml:"si>t"`
   118  	}
   119  	if err = decoder.Decode(&data); err != nil {
   120  		return nil, errs.Wrap(err)
   121  	}
   122  	return data.SST, nil
   123  }
   124  
   125  func loadSheet(f *zip.File, strs []string) (*Sheet, error) {
   126  	fr, err := f.Open()
   127  	if err != nil {
   128  		return nil, errs.Wrap(err)
   129  	}
   130  	defer xio.CloseIgnoringErrors(fr)
   131  	decoder := xml.NewDecoder(fr)
   132  	var data struct {
   133  		Cells []struct {
   134  			Value *string `xml:"v"`
   135  			Label string  `xml:"r,attr"`
   136  			Type  string  `xml:"t,attr"`
   137  		} `xml:"sheetData>row>c"`
   138  	}
   139  	if err = decoder.Decode(&data); err != nil {
   140  		return nil, errs.Wrap(err)
   141  	}
   142  	sheet := &Sheet{
   143  		Min:   Ref{Row: math.MaxInt32, Col: math.MaxInt32},
   144  		Max:   Ref{},
   145  		Cells: make(map[Ref]Cell, len(data.Cells)),
   146  	}
   147  	for _, one := range data.Cells {
   148  		if one.Value == nil {
   149  			continue
   150  		}
   151  		ref := ParseRef(one.Label)
   152  		cell := Cell{Value: *one.Value}
   153  		switch one.Type {
   154  		case "s": // String
   155  			var v int
   156  			if v, err = strconv.Atoi(cell.Value); err != nil {
   157  				return nil, errs.Wrap(err)
   158  			}
   159  			if v >= 0 && v < len(strs) {
   160  				cell.Value = strs[v]
   161  			} else {
   162  				cell.Value = "#REF!"
   163  			}
   164  			cell.Type = String
   165  		case "b": // Boolean
   166  			cell.Type = Boolean
   167  		default: // Number
   168  			cell.Type = Number
   169  		}
   170  		if sheet.Min.Row > ref.Row {
   171  			sheet.Min.Row = ref.Row
   172  		}
   173  		if sheet.Min.Col > ref.Col {
   174  			sheet.Min.Col = ref.Col
   175  		}
   176  		if sheet.Max.Row < ref.Row {
   177  			sheet.Max.Row = ref.Row
   178  		}
   179  		if sheet.Max.Col < ref.Col {
   180  			sheet.Max.Col = ref.Col
   181  		}
   182  		sheet.Cells[ref] = cell
   183  	}
   184  	if sheet.Min.Row > sheet.Max.Row {
   185  		sheet.Min.Row = sheet.Max.Row
   186  	}
   187  	if sheet.Min.Col > sheet.Max.Col {
   188  		sheet.Min.Col = sheet.Max.Col
   189  	}
   190  	return sheet, nil
   191  }