github.com/richardwilkes/toolbox@v1.121.0/formats/xlsx/sheet.go (about) 1 // Copyright (c) 2016-2024 by Richard A. Wilkes. All rights reserved. 2 // 3 // This Source Code Form is subject to the terms of the Mozilla Public 4 // License, version 2.0. If a copy of the MPL was not distributed with 5 // this file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 // 7 // This Source Code Form is "Incompatible With Secondary Licenses", as 8 // defined by the Mozilla Public License, version 2.0. 9 10 // Package xlsx provides the ability to extract text from Excel spreadsheets. 11 package xlsx 12 13 import ( 14 "archive/zip" 15 "encoding/xml" 16 "fmt" 17 "io" 18 "math" 19 "sort" 20 "strconv" 21 "strings" 22 23 "github.com/richardwilkes/toolbox/errs" 24 "github.com/richardwilkes/toolbox/txt" 25 "github.com/richardwilkes/toolbox/xio" 26 ) 27 28 // Sheet holds the data contained in a single worksheet. 29 type Sheet struct { 30 Cells map[Ref]Cell 31 Name string 32 Min Ref 33 Max Ref 34 } 35 36 // Load sheets from an .xlsx file. 37 func Load(path string) ([]Sheet, error) { 38 r, err := zip.OpenReader(path) 39 if err != nil { 40 return nil, errs.Wrap(err) 41 } 42 defer xio.CloseIgnoringErrors(r) 43 return load(&r.Reader) 44 } 45 46 // Read sheets from an .xlsx stream. 47 func Read(in io.ReaderAt, size int64) ([]Sheet, error) { 48 r, err := zip.NewReader(in, size) 49 if err != nil { 50 return nil, errs.Wrap(err) 51 } 52 return load(r) 53 } 54 55 func load(r *zip.Reader) ([]Sheet, error) { 56 var sheetNames []string 57 var strs []string 58 var files []*zip.File 59 var err error 60 for _, f := range r.File { 61 switch { 62 case f.Name == "docProps/app.xml": 63 if sheetNames, err = loadSheetNames(f); err != nil { 64 return nil, err 65 } 66 case f.Name == "xl/sharedStrings.xml": 67 if strs, err = loadStrings(f); err != nil { 68 return nil, err 69 } 70 case strings.HasPrefix(f.Name, "xl/worksheets/sheet"): 71 files = append(files, f) 72 } 73 } 74 sort.Slice(files, func(i, j int) bool { 75 return txt.NaturalLess(files[i].Name, files[j].Name, true) 76 }) 77 sheets := make([]Sheet, 0, len(files)) 78 for i, f := range files { 79 var sheet *Sheet 80 if sheet, err = loadSheet(f, strs); err != nil { 81 return nil, err 82 } 83 if i < len(sheetNames) { 84 sheet.Name = sheetNames[i] 85 } else { 86 sheet.Name = fmt.Sprintf("Sheet%d", i+1) 87 } 88 sheets = append(sheets, *sheet) 89 } 90 return sheets, nil 91 } 92 93 func loadSheetNames(f *zip.File) ([]string, error) { 94 fr, err := f.Open() 95 if err != nil { 96 return nil, errs.Wrap(err) 97 } 98 defer xio.CloseIgnoringErrors(fr) 99 decoder := xml.NewDecoder(fr) 100 var data struct { 101 Names []string `xml:"TitlesOfParts>vector>lpstr"` 102 } 103 if err = decoder.Decode(&data); err != nil { 104 return nil, errs.Wrap(err) 105 } 106 return data.Names, nil 107 } 108 109 func loadStrings(f *zip.File) ([]string, error) { 110 fr, err := f.Open() 111 if err != nil { 112 return nil, errs.Wrap(err) 113 } 114 defer xio.CloseIgnoringErrors(fr) 115 decoder := xml.NewDecoder(fr) 116 var data struct { 117 SST []string `xml:"si>t"` 118 } 119 if err = decoder.Decode(&data); err != nil { 120 return nil, errs.Wrap(err) 121 } 122 return data.SST, nil 123 } 124 125 func loadSheet(f *zip.File, strs []string) (*Sheet, error) { 126 fr, err := f.Open() 127 if err != nil { 128 return nil, errs.Wrap(err) 129 } 130 defer xio.CloseIgnoringErrors(fr) 131 decoder := xml.NewDecoder(fr) 132 var data struct { 133 Cells []struct { 134 Value *string `xml:"v"` 135 Label string `xml:"r,attr"` 136 Type string `xml:"t,attr"` 137 } `xml:"sheetData>row>c"` 138 } 139 if err = decoder.Decode(&data); err != nil { 140 return nil, errs.Wrap(err) 141 } 142 sheet := &Sheet{ 143 Min: Ref{Row: math.MaxInt32, Col: math.MaxInt32}, 144 Max: Ref{}, 145 Cells: make(map[Ref]Cell, len(data.Cells)), 146 } 147 for _, one := range data.Cells { 148 if one.Value == nil { 149 continue 150 } 151 ref := ParseRef(one.Label) 152 cell := Cell{Value: *one.Value} 153 switch one.Type { 154 case "s": // String 155 var v int 156 if v, err = strconv.Atoi(cell.Value); err != nil { 157 return nil, errs.Wrap(err) 158 } 159 if v >= 0 && v < len(strs) { 160 cell.Value = strs[v] 161 } else { 162 cell.Value = "#REF!" 163 } 164 cell.Type = String 165 case "b": // Boolean 166 cell.Type = Boolean 167 default: // Number 168 cell.Type = Number 169 } 170 if sheet.Min.Row > ref.Row { 171 sheet.Min.Row = ref.Row 172 } 173 if sheet.Min.Col > ref.Col { 174 sheet.Min.Col = ref.Col 175 } 176 if sheet.Max.Row < ref.Row { 177 sheet.Max.Row = ref.Row 178 } 179 if sheet.Max.Col < ref.Col { 180 sheet.Max.Col = ref.Col 181 } 182 sheet.Cells[ref] = cell 183 } 184 if sheet.Min.Row > sheet.Max.Row { 185 sheet.Min.Row = sheet.Max.Row 186 } 187 if sheet.Min.Col > sheet.Max.Col { 188 sheet.Min.Col = sheet.Max.Col 189 } 190 return sheet, nil 191 }