github.com/vugu/vugu@v0.3.5/vugufmt/formatter.go (about)

     1  package vugufmt
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"path/filepath"
     9  	"strings"
    10  	"unicode"
    11  
    12  	"github.com/vugu/vugu/internal/htmlx"
    13  	"github.com/vugu/vugu/internal/htmlx/atom"
    14  )
    15  
    16  // Formatter allows you to format vugu files.
    17  type Formatter struct {
    18  	// ScriptFormatters maps script blocks to formatting
    19  	// functions.
    20  	// For each type of script block,
    21  	// we can run it through the supplied function.
    22  	// If the function returns error, we should
    23  	// not accept the output written to the writer.
    24  	// You can add your own custom one for JS, for
    25  	// example. If you want to use gofmt or goimports,
    26  	// see how to apply options in NewFormatter.
    27  	ScriptFormatters map[string]func([]byte) ([]byte, *FmtError)
    28  	// StyleFormatter handles CSS blocks.
    29  	StyleFormatter func([]byte) ([]byte, *FmtError)
    30  }
    31  
    32  // NewFormatter creates a new formatter.
    33  // Pass in vugufmt.UseGoFmt to use gofmt.
    34  // Pass in vugufmt.UseGoImports to use goimports.
    35  func NewFormatter(opts ...func(*Formatter)) *Formatter {
    36  	f := &Formatter{
    37  		ScriptFormatters: make(map[string](func([]byte) ([]byte, *FmtError))),
    38  	}
    39  
    40  	// apply options
    41  	for _, opt := range opts {
    42  		opt(f)
    43  	}
    44  
    45  	return f
    46  }
    47  
    48  // FormatScript formats script text nodes.
    49  func (f *Formatter) FormatScript(scriptType string, scriptContent []byte) ([]byte, *FmtError) {
    50  	if f.ScriptFormatters == nil {
    51  		return scriptContent, nil
    52  	}
    53  	fn, ok := f.ScriptFormatters[strings.ToLower(scriptType)]
    54  	if !ok {
    55  		return scriptContent, nil
    56  	}
    57  	return fn(scriptContent)
    58  }
    59  
    60  // FormatStyle formats script text nodes.
    61  func (f *Formatter) FormatStyle(styleContent []byte) ([]byte, *FmtError) {
    62  	if f.StyleFormatter == nil {
    63  		return styleContent, nil
    64  	}
    65  	return f.StyleFormatter(styleContent)
    66  }
    67  
    68  // breaks returns the number of newlines if all input
    69  // text is whitespace. Otherwise returns 0.
    70  func breaks(input string) int {
    71  	numBreaks := 0
    72  	for _, s := range input {
    73  		if !unicode.IsSpace(s) {
    74  			return 0
    75  		}
    76  		if s == '\n' {
    77  			numBreaks++
    78  		}
    79  	}
    80  	return numBreaks
    81  }
    82  
    83  // FormatHTML formats script and css nodes.
    84  func (f *Formatter) FormatHTML(filename string, in io.Reader, out io.Writer) error {
    85  	izer := htmlx.NewTokenizer(in)
    86  	ts := tokenStack{}
    87  
    88  	curTok := htmlx.Token{}
    89  
    90  	previousLineBreak := false
    91  
    92  loop:
    93  	for {
    94  		curTokType := izer.Next()
    95  
    96  		// quit on errors.
    97  		if curTokType == htmlx.ErrorToken {
    98  			if err := izer.Err(); err != nil {
    99  				if err != io.EOF {
   100  					return &FmtError{
   101  						Msg:    err.Error(),
   102  						Line:   curTok.Line,
   103  						Column: curTok.Column,
   104  					}
   105  				}
   106  				// it's ok if we hit the end,
   107  				// provided the stack is empty
   108  				if len(ts) == 0 {
   109  					return nil
   110  				}
   111  				tagNames := make([]string, len(ts))
   112  				for i, t := range ts {
   113  					tagNames[i] = t.Data
   114  				}
   115  				return &FmtError{
   116  					Msg:    fmt.Sprintf("missing end tags (%s)", strings.Join(tagNames, ", ")),
   117  					Line:   curTok.Line,
   118  					Column: curTok.Column,
   119  				}
   120  			}
   121  			return &FmtError{
   122  				Msg:    "tokenization error",
   123  				Line:   curTok.Line,
   124  				Column: curTok.Column,
   125  			}
   126  		}
   127  
   128  		curTok := izer.Token()
   129  
   130  		// do indentation if we broke the line before this token.
   131  		if previousLineBreak {
   132  			indentLevel := len(ts)
   133  			if curTokType == htmlx.EndTagToken && indentLevel > 0 {
   134  				indentLevel--
   135  			}
   136  			for i := 0; i < indentLevel; i++ {
   137  				out.Write([]byte{'\t'})
   138  			}
   139  		}
   140  		previousLineBreak = false
   141  
   142  		raw := izer.Raw()
   143  		raws := string(raw)
   144  		// add or remove tokens from the stack
   145  		switch curTokType {
   146  		case htmlx.StartTagToken:
   147  			ts.push(&curTok)
   148  			out.Write(raw)
   149  		case htmlx.EndTagToken:
   150  			lastPushed := ts.pop()
   151  			if lastPushed.DataAtom != curTok.DataAtom {
   152  				return &FmtError{
   153  					Msg:    fmt.Sprintf("mismatched ending tag (expected %s, found %s)", lastPushed.Data, curTok.Data),
   154  					Line:   curTok.Line,
   155  					Column: curTok.Column,
   156  				}
   157  			}
   158  			out.Write(raw)
   159  		case htmlx.TextToken:
   160  			parent := ts.top()
   161  
   162  			if breakCount := breaks(raws); breakCount > 0 {
   163  				// This is a break between tags.
   164  				for i := 0; i < breakCount; i++ {
   165  					out.Write([]byte{'\n'})
   166  				}
   167  				previousLineBreak = true
   168  				continue loop
   169  			}
   170  
   171  			if parent == nil {
   172  				out.Write(raw)
   173  				//return fmt.Errorf("%s:%v:%v: orphaned text node",
   174  				//	filename, curTok.Line, curTok.Column)
   175  			} else if parent.DataAtom == atom.Script {
   176  				// determine the type of the script
   177  				scriptType := ""
   178  				for _, st := range parent.Attr {
   179  					if st.Key == "type" {
   180  						scriptType = st.Val
   181  					}
   182  				}
   183  
   184  				// hey we are in a script text node
   185  				fmtr, err := f.FormatScript(scriptType, raw)
   186  				// Exit out on error.
   187  				if err != nil {
   188  					err.Line += curTok.Line
   189  					err.FileName = filename
   190  					return err
   191  				}
   192  				out.Write(fmtr)
   193  
   194  			} else if parent.DataAtom == atom.Style {
   195  				// hey we are in a CSS text node
   196  				fmtr, err := f.FormatStyle(raw)
   197  				if err != nil {
   198  					return &FmtError{
   199  						Msg:    err.Error(),
   200  						Line:   curTok.Line,
   201  						Column: curTok.Column,
   202  					}
   203  				}
   204  				out.Write(fmtr)
   205  			} else {
   206  				// we are in some other text node we don't care about.
   207  				out.Write(raw)
   208  			}
   209  		default:
   210  			out.Write(raw)
   211  		}
   212  	}
   213  }
   214  
   215  // Diff will show differences between input and what
   216  // Format() would do. It will return (true, nil) if there
   217  // is a difference, (false, nil) if there is no difference,
   218  // and (*, notnil) when the difference can't be determined.
   219  // filename is optional, but helps with generating useful output.
   220  func (f *Formatter) Diff(filename string, input io.Reader, output io.Writer) (bool, error) {
   221  	if filename == "" {
   222  		filename = "<not set>"
   223  	}
   224  
   225  	var resBuff bytes.Buffer
   226  	src, err := ioutil.ReadAll(input)
   227  	if err != nil {
   228  		return false, err
   229  	}
   230  	if err := f.FormatHTML(filename, bytes.NewReader(src), &resBuff); err != nil {
   231  		return false, err
   232  	}
   233  	res := resBuff.Bytes()
   234  
   235  	// No difference!
   236  	if bytes.Equal(src, res) {
   237  		return false, nil
   238  	}
   239  
   240  	// There is a difference, so what is it?
   241  	data, err := diff(src, res, filename)
   242  	if err != nil {
   243  		return true, fmt.Errorf("computing diff: %s", err)
   244  	}
   245  	output.Write([]byte(fmt.Sprintf("diff -u %s %s\n", filepath.ToSlash(filename+".orig"), filepath.ToSlash(filename))))
   246  	output.Write(data)
   247  	return true, nil
   248  }