github.com/vugu/vugu@v0.3.6-0.20240430171613-3f6f402e014b/vugufmt/formatter.go (about) 1 package vugufmt 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "path/filepath" 8 "strings" 9 "unicode" 10 11 "github.com/vugu/vugu/internal/htmlx" 12 "github.com/vugu/vugu/internal/htmlx/atom" 13 ) 14 15 // Formatter allows you to format vugu files. 16 type Formatter struct { 17 // ScriptFormatters maps script blocks to formatting 18 // functions. 19 // For each type of script block, 20 // we can run it through the supplied function. 21 // If the function returns error, we should 22 // not accept the output written to the writer. 23 // You can add your own custom one for JS, for 24 // example. If you want to use gofmt or goimports, 25 // see how to apply options in NewFormatter. 26 ScriptFormatters map[string]func([]byte) ([]byte, *FmtError) 27 // StyleFormatter handles CSS blocks. 28 StyleFormatter func([]byte) ([]byte, *FmtError) 29 } 30 31 // NewFormatter creates a new formatter. 32 // Pass in vugufmt.UseGoFmt to use gofmt. 33 // Pass in vugufmt.UseGoImports to use goimports. 34 func NewFormatter(opts ...func(*Formatter)) *Formatter { 35 f := &Formatter{ 36 ScriptFormatters: make(map[string](func([]byte) ([]byte, *FmtError))), 37 } 38 39 // apply options 40 for _, opt := range opts { 41 opt(f) 42 } 43 44 return f 45 } 46 47 // FormatScript formats script text nodes. 48 func (f *Formatter) FormatScript(scriptType string, scriptContent []byte) ([]byte, *FmtError) { 49 if f.ScriptFormatters == nil { 50 return scriptContent, nil 51 } 52 fn, ok := f.ScriptFormatters[strings.ToLower(scriptType)] 53 if !ok { 54 return scriptContent, nil 55 } 56 return fn(scriptContent) 57 } 58 59 // FormatStyle formats script text nodes. 60 func (f *Formatter) FormatStyle(styleContent []byte) ([]byte, *FmtError) { 61 if f.StyleFormatter == nil { 62 return styleContent, nil 63 } 64 return f.StyleFormatter(styleContent) 65 } 66 67 // breaks returns the number of newlines if all input 68 // text is whitespace. Otherwise returns 0. 69 func breaks(input string) int { 70 numBreaks := 0 71 for _, s := range input { 72 if !unicode.IsSpace(s) { 73 return 0 74 } 75 if s == '\n' { 76 numBreaks++ 77 } 78 } 79 return numBreaks 80 } 81 82 // FormatHTML formats script and css nodes. 83 func (f *Formatter) FormatHTML(filename string, in io.Reader, out io.Writer) error { 84 izer := htmlx.NewTokenizer(in) 85 ts := tokenStack{} 86 87 curTok := htmlx.Token{} 88 89 previousLineBreak := false 90 91 loop: 92 for { 93 curTokType := izer.Next() 94 95 // quit on errors. 96 if curTokType == htmlx.ErrorToken { 97 if err := izer.Err(); err != nil { 98 if err != io.EOF { 99 return &FmtError{ 100 Msg: err.Error(), 101 Line: curTok.Line, 102 Column: curTok.Column, 103 } 104 } 105 // it's ok if we hit the end, 106 // provided the stack is empty 107 if len(ts) == 0 { 108 return nil 109 } 110 tagNames := make([]string, len(ts)) 111 for i, t := range ts { 112 tagNames[i] = t.Data 113 } 114 return &FmtError{ 115 Msg: fmt.Sprintf("missing end tags (%s)", strings.Join(tagNames, ", ")), 116 Line: curTok.Line, 117 Column: curTok.Column, 118 } 119 } 120 return &FmtError{ 121 Msg: "tokenization error", 122 Line: curTok.Line, 123 Column: curTok.Column, 124 } 125 } 126 127 curTok := izer.Token() 128 129 // do indentation if we broke the line before this token. 130 if previousLineBreak { 131 indentLevel := len(ts) 132 if curTokType == htmlx.EndTagToken && indentLevel > 0 { 133 indentLevel-- 134 } 135 for i := 0; i < indentLevel; i++ { 136 _, err := out.Write([]byte{'\t'}) 137 if err != nil { 138 return &FmtError{ 139 Msg: err.Error(), 140 Line: curTok.Line, 141 Column: curTok.Column, 142 } 143 } 144 } 145 } 146 previousLineBreak = false 147 148 raw := izer.Raw() 149 raws := string(raw) 150 // add or remove tokens from the stack 151 switch curTokType { 152 case htmlx.StartTagToken: 153 ts.push(&curTok) 154 _, err := out.Write(raw) 155 if err != nil { 156 return &FmtError{ 157 Msg: err.Error(), 158 Line: curTok.Line, 159 Column: curTok.Column, 160 } 161 } 162 case htmlx.EndTagToken: 163 lastPushed := ts.pop() 164 if lastPushed.DataAtom != curTok.DataAtom { 165 return &FmtError{ 166 Msg: fmt.Sprintf("mismatched ending tag (expected %s, found %s)", lastPushed.Data, curTok.Data), 167 Line: curTok.Line, 168 Column: curTok.Column, 169 } 170 } 171 _, err := out.Write(raw) 172 if err != nil { 173 return &FmtError{ 174 Msg: err.Error(), 175 Line: curTok.Line, 176 Column: curTok.Column, 177 } 178 } 179 case htmlx.TextToken: 180 parent := ts.top() 181 182 if breakCount := breaks(raws); breakCount > 0 { 183 // This is a break between tags. 184 for i := 0; i < breakCount; i++ { 185 _, err := out.Write([]byte{'\n'}) 186 if err != nil { 187 return &FmtError{ 188 Msg: err.Error(), 189 Line: curTok.Line, 190 Column: curTok.Column, 191 } 192 } 193 } 194 previousLineBreak = true 195 continue loop 196 } 197 198 if parent == nil { 199 _, err := out.Write(raw) 200 if err != nil { 201 return &FmtError{ 202 Msg: err.Error(), 203 Line: curTok.Line, 204 Column: curTok.Column, 205 } 206 } 207 //return fmt.Errorf("%s:%v:%v: orphaned text node", 208 // filename, curTok.Line, curTok.Column) 209 } else if parent.DataAtom == atom.Script { 210 // determine the type of the script 211 scriptType := "" 212 for _, st := range parent.Attr { 213 if st.Key == "type" { 214 scriptType = st.Val 215 } 216 } 217 218 // hey we are in a script text node 219 fmtr, err := f.FormatScript(scriptType, raw) 220 // Exit out on error. 221 if err != nil { 222 err.Line += curTok.Line 223 err.FileName = filename 224 return err 225 } 226 _, fmtrErr := out.Write(fmtr) 227 if fmtrErr != nil { 228 return &FmtError{ 229 Msg: fmtrErr.Error(), 230 Line: curTok.Line, 231 Column: curTok.Column, 232 } 233 } 234 } else if parent.DataAtom == atom.Style { 235 // hey we are in a CSS text node 236 fmtr, err := f.FormatStyle(raw) 237 if err != nil { 238 return &FmtError{ 239 Msg: err.Error(), 240 Line: curTok.Line, 241 Column: curTok.Column, 242 } 243 } 244 _, fmtrErr := out.Write(fmtr) 245 if fmtrErr != nil { 246 return &FmtError{ 247 Msg: fmtrErr.Error(), 248 Line: curTok.Line, 249 Column: curTok.Column, 250 } 251 } 252 } else { 253 // we are in some other text node we don't care about. 254 _, err := out.Write(raw) 255 if err != nil { 256 return &FmtError{ 257 Msg: err.Error(), 258 Line: curTok.Line, 259 Column: curTok.Column, 260 } 261 } 262 } 263 default: 264 _, err := out.Write(raw) 265 if err != nil { 266 return &FmtError{ 267 Msg: err.Error(), 268 Line: curTok.Line, 269 Column: curTok.Column, 270 } 271 } 272 } 273 } 274 } 275 276 // Diff will show differences between input and what 277 // Format() would do. It will return (true, nil) if there 278 // is a difference, (false, nil) if there is no difference, 279 // and (*, notnil) when the difference can't be determined. 280 // filename is optional, but helps with generating useful output. 281 func (f *Formatter) Diff(filename string, input io.Reader, output io.Writer) (bool, error) { 282 if filename == "" { 283 filename = "<not set>" 284 } 285 286 var resBuff bytes.Buffer 287 src, err := io.ReadAll(input) 288 if err != nil { 289 return false, err 290 } 291 if err := f.FormatHTML(filename, bytes.NewReader(src), &resBuff); err != nil { 292 return false, err 293 } 294 res := resBuff.Bytes() 295 296 // No difference! 297 if bytes.Equal(src, res) { 298 return false, nil 299 } 300 301 // There is a difference, so what is it? 302 data, err := diff(src, res, filename) 303 if err != nil { 304 return true, fmt.Errorf("computing diff: %s", err) 305 } 306 _, err = output.Write([]byte(fmt.Sprintf("diff -u %s %s\n", filepath.ToSlash(filename+".orig"), filepath.ToSlash(filename)))) 307 if err != nil { 308 return false, err 309 } 310 _, err = output.Write(data) 311 if err != nil { 312 return false, err 313 } 314 return true, nil 315 }