github.com/vugu/vugu@v0.3.6-0.20240430171613-3f6f402e014b/vugufmt/formatter.go (about)

     1  package vugufmt
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"path/filepath"
     8  	"strings"
     9  	"unicode"
    10  
    11  	"github.com/vugu/vugu/internal/htmlx"
    12  	"github.com/vugu/vugu/internal/htmlx/atom"
    13  )
    14  
    15  // Formatter allows you to format vugu files.
    16  type Formatter struct {
    17  	// ScriptFormatters maps script blocks to formatting
    18  	// functions.
    19  	// For each type of script block,
    20  	// we can run it through the supplied function.
    21  	// If the function returns error, we should
    22  	// not accept the output written to the writer.
    23  	// You can add your own custom one for JS, for
    24  	// example. If you want to use gofmt or goimports,
    25  	// see how to apply options in NewFormatter.
    26  	ScriptFormatters map[string]func([]byte) ([]byte, *FmtError)
    27  	// StyleFormatter handles CSS blocks.
    28  	StyleFormatter func([]byte) ([]byte, *FmtError)
    29  }
    30  
    31  // NewFormatter creates a new formatter.
    32  // Pass in vugufmt.UseGoFmt to use gofmt.
    33  // Pass in vugufmt.UseGoImports to use goimports.
    34  func NewFormatter(opts ...func(*Formatter)) *Formatter {
    35  	f := &Formatter{
    36  		ScriptFormatters: make(map[string](func([]byte) ([]byte, *FmtError))),
    37  	}
    38  
    39  	// apply options
    40  	for _, opt := range opts {
    41  		opt(f)
    42  	}
    43  
    44  	return f
    45  }
    46  
    47  // FormatScript formats script text nodes.
    48  func (f *Formatter) FormatScript(scriptType string, scriptContent []byte) ([]byte, *FmtError) {
    49  	if f.ScriptFormatters == nil {
    50  		return scriptContent, nil
    51  	}
    52  	fn, ok := f.ScriptFormatters[strings.ToLower(scriptType)]
    53  	if !ok {
    54  		return scriptContent, nil
    55  	}
    56  	return fn(scriptContent)
    57  }
    58  
    59  // FormatStyle formats script text nodes.
    60  func (f *Formatter) FormatStyle(styleContent []byte) ([]byte, *FmtError) {
    61  	if f.StyleFormatter == nil {
    62  		return styleContent, nil
    63  	}
    64  	return f.StyleFormatter(styleContent)
    65  }
    66  
    67  // breaks returns the number of newlines if all input
    68  // text is whitespace. Otherwise returns 0.
    69  func breaks(input string) int {
    70  	numBreaks := 0
    71  	for _, s := range input {
    72  		if !unicode.IsSpace(s) {
    73  			return 0
    74  		}
    75  		if s == '\n' {
    76  			numBreaks++
    77  		}
    78  	}
    79  	return numBreaks
    80  }
    81  
    82  // FormatHTML formats script and css nodes.
    83  func (f *Formatter) FormatHTML(filename string, in io.Reader, out io.Writer) error {
    84  	izer := htmlx.NewTokenizer(in)
    85  	ts := tokenStack{}
    86  
    87  	curTok := htmlx.Token{}
    88  
    89  	previousLineBreak := false
    90  
    91  loop:
    92  	for {
    93  		curTokType := izer.Next()
    94  
    95  		// quit on errors.
    96  		if curTokType == htmlx.ErrorToken {
    97  			if err := izer.Err(); err != nil {
    98  				if err != io.EOF {
    99  					return &FmtError{
   100  						Msg:    err.Error(),
   101  						Line:   curTok.Line,
   102  						Column: curTok.Column,
   103  					}
   104  				}
   105  				// it's ok if we hit the end,
   106  				// provided the stack is empty
   107  				if len(ts) == 0 {
   108  					return nil
   109  				}
   110  				tagNames := make([]string, len(ts))
   111  				for i, t := range ts {
   112  					tagNames[i] = t.Data
   113  				}
   114  				return &FmtError{
   115  					Msg:    fmt.Sprintf("missing end tags (%s)", strings.Join(tagNames, ", ")),
   116  					Line:   curTok.Line,
   117  					Column: curTok.Column,
   118  				}
   119  			}
   120  			return &FmtError{
   121  				Msg:    "tokenization error",
   122  				Line:   curTok.Line,
   123  				Column: curTok.Column,
   124  			}
   125  		}
   126  
   127  		curTok := izer.Token()
   128  
   129  		// do indentation if we broke the line before this token.
   130  		if previousLineBreak {
   131  			indentLevel := len(ts)
   132  			if curTokType == htmlx.EndTagToken && indentLevel > 0 {
   133  				indentLevel--
   134  			}
   135  			for i := 0; i < indentLevel; i++ {
   136  				_, err := out.Write([]byte{'\t'})
   137  				if err != nil {
   138  					return &FmtError{
   139  						Msg:    err.Error(),
   140  						Line:   curTok.Line,
   141  						Column: curTok.Column,
   142  					}
   143  				}
   144  			}
   145  		}
   146  		previousLineBreak = false
   147  
   148  		raw := izer.Raw()
   149  		raws := string(raw)
   150  		// add or remove tokens from the stack
   151  		switch curTokType {
   152  		case htmlx.StartTagToken:
   153  			ts.push(&curTok)
   154  			_, err := out.Write(raw)
   155  			if err != nil {
   156  				return &FmtError{
   157  					Msg:    err.Error(),
   158  					Line:   curTok.Line,
   159  					Column: curTok.Column,
   160  				}
   161  			}
   162  		case htmlx.EndTagToken:
   163  			lastPushed := ts.pop()
   164  			if lastPushed.DataAtom != curTok.DataAtom {
   165  				return &FmtError{
   166  					Msg:    fmt.Sprintf("mismatched ending tag (expected %s, found %s)", lastPushed.Data, curTok.Data),
   167  					Line:   curTok.Line,
   168  					Column: curTok.Column,
   169  				}
   170  			}
   171  			_, err := out.Write(raw)
   172  			if err != nil {
   173  				return &FmtError{
   174  					Msg:    err.Error(),
   175  					Line:   curTok.Line,
   176  					Column: curTok.Column,
   177  				}
   178  			}
   179  		case htmlx.TextToken:
   180  			parent := ts.top()
   181  
   182  			if breakCount := breaks(raws); breakCount > 0 {
   183  				// This is a break between tags.
   184  				for i := 0; i < breakCount; i++ {
   185  					_, err := out.Write([]byte{'\n'})
   186  					if err != nil {
   187  						return &FmtError{
   188  							Msg:    err.Error(),
   189  							Line:   curTok.Line,
   190  							Column: curTok.Column,
   191  						}
   192  					}
   193  				}
   194  				previousLineBreak = true
   195  				continue loop
   196  			}
   197  
   198  			if parent == nil {
   199  				_, err := out.Write(raw)
   200  				if err != nil {
   201  					return &FmtError{
   202  						Msg:    err.Error(),
   203  						Line:   curTok.Line,
   204  						Column: curTok.Column,
   205  					}
   206  				}
   207  				//return fmt.Errorf("%s:%v:%v: orphaned text node",
   208  				//	filename, curTok.Line, curTok.Column)
   209  			} else if parent.DataAtom == atom.Script {
   210  				// determine the type of the script
   211  				scriptType := ""
   212  				for _, st := range parent.Attr {
   213  					if st.Key == "type" {
   214  						scriptType = st.Val
   215  					}
   216  				}
   217  
   218  				// hey we are in a script text node
   219  				fmtr, err := f.FormatScript(scriptType, raw)
   220  				// Exit out on error.
   221  				if err != nil {
   222  					err.Line += curTok.Line
   223  					err.FileName = filename
   224  					return err
   225  				}
   226  				_, fmtrErr := out.Write(fmtr)
   227  				if fmtrErr != nil {
   228  					return &FmtError{
   229  						Msg:    fmtrErr.Error(),
   230  						Line:   curTok.Line,
   231  						Column: curTok.Column,
   232  					}
   233  				}
   234  			} else if parent.DataAtom == atom.Style {
   235  				// hey we are in a CSS text node
   236  				fmtr, err := f.FormatStyle(raw)
   237  				if err != nil {
   238  					return &FmtError{
   239  						Msg:    err.Error(),
   240  						Line:   curTok.Line,
   241  						Column: curTok.Column,
   242  					}
   243  				}
   244  				_, fmtrErr := out.Write(fmtr)
   245  				if fmtrErr != nil {
   246  					return &FmtError{
   247  						Msg:    fmtrErr.Error(),
   248  						Line:   curTok.Line,
   249  						Column: curTok.Column,
   250  					}
   251  				}
   252  			} else {
   253  				// we are in some other text node we don't care about.
   254  				_, err := out.Write(raw)
   255  				if err != nil {
   256  					return &FmtError{
   257  						Msg:    err.Error(),
   258  						Line:   curTok.Line,
   259  						Column: curTok.Column,
   260  					}
   261  				}
   262  			}
   263  		default:
   264  			_, err := out.Write(raw)
   265  			if err != nil {
   266  				return &FmtError{
   267  					Msg:    err.Error(),
   268  					Line:   curTok.Line,
   269  					Column: curTok.Column,
   270  				}
   271  			}
   272  		}
   273  	}
   274  }
   275  
   276  // Diff will show differences between input and what
   277  // Format() would do. It will return (true, nil) if there
   278  // is a difference, (false, nil) if there is no difference,
   279  // and (*, notnil) when the difference can't be determined.
   280  // filename is optional, but helps with generating useful output.
   281  func (f *Formatter) Diff(filename string, input io.Reader, output io.Writer) (bool, error) {
   282  	if filename == "" {
   283  		filename = "<not set>"
   284  	}
   285  
   286  	var resBuff bytes.Buffer
   287  	src, err := io.ReadAll(input)
   288  	if err != nil {
   289  		return false, err
   290  	}
   291  	if err := f.FormatHTML(filename, bytes.NewReader(src), &resBuff); err != nil {
   292  		return false, err
   293  	}
   294  	res := resBuff.Bytes()
   295  
   296  	// No difference!
   297  	if bytes.Equal(src, res) {
   298  		return false, nil
   299  	}
   300  
   301  	// There is a difference, so what is it?
   302  	data, err := diff(src, res, filename)
   303  	if err != nil {
   304  		return true, fmt.Errorf("computing diff: %s", err)
   305  	}
   306  	_, err = output.Write([]byte(fmt.Sprintf("diff -u %s %s\n", filepath.ToSlash(filename+".orig"), filepath.ToSlash(filename))))
   307  	if err != nil {
   308  		return false, err
   309  	}
   310  	_, err = output.Write(data)
   311  	if err != nil {
   312  		return false, err
   313  	}
   314  	return true, nil
   315  }