github.com/SuCicada/su-hugo@v1.0.0/parser/metadecoders/decoder.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package metadecoders
    15  
    16  import (
    17  	"bytes"
    18  	"encoding/csv"
    19  	"encoding/json"
    20  	"fmt"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/gohugoio/hugo/common/herrors"
    25  	"github.com/niklasfasching/go-org/org"
    26  
    27  	xml "github.com/clbanning/mxj/v2"
    28  	toml "github.com/pelletier/go-toml/v2"
    29  	"github.com/spf13/afero"
    30  	"github.com/spf13/cast"
    31  	jww "github.com/spf13/jwalterweatherman"
    32  	yaml "gopkg.in/yaml.v2"
    33  )
    34  
    35  // Decoder provides some configuration options for the decoders.
    36  type Decoder struct {
    37  	// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
    38  	Delimiter rune
    39  
    40  	// Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
    41  	// Comment character without preceding whitespace are ignored.
    42  	Comment rune
    43  }
    44  
    45  // OptionsKey is used in cache keys.
    46  func (d Decoder) OptionsKey() string {
    47  	var sb strings.Builder
    48  	sb.WriteRune(d.Delimiter)
    49  	sb.WriteRune(d.Comment)
    50  	return sb.String()
    51  }
    52  
    53  // Default is a Decoder in its default configuration.
    54  var Default = Decoder{
    55  	Delimiter: ',',
    56  }
    57  
    58  // UnmarshalToMap will unmarshall data in format f into a new map. This is
    59  // what's needed for Hugo's front matter decoding.
    60  func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
    61  	m := make(map[string]any)
    62  	if data == nil {
    63  		return m, nil
    64  	}
    65  
    66  	err := d.UnmarshalTo(data, f, &m)
    67  
    68  	return m, err
    69  }
    70  
    71  // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
    72  // the given filename.
    73  func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
    74  	format := FormatFromString(filename)
    75  	if format == "" {
    76  		return nil, fmt.Errorf("%q is not a valid configuration format", filename)
    77  	}
    78  
    79  	data, err := afero.ReadFile(fs, filename)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	return d.UnmarshalToMap(data, format)
    84  }
    85  
    86  // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
    87  func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
    88  	data = strings.TrimSpace(data)
    89  	// We only check for the possible types in YAML, JSON and TOML.
    90  	switch typ.(type) {
    91  	case string:
    92  		return data, nil
    93  	case map[string]any:
    94  		format := d.FormatFromContentString(data)
    95  		return d.UnmarshalToMap([]byte(data), format)
    96  	case []any:
    97  		// A standalone slice. Let YAML handle it.
    98  		return d.Unmarshal([]byte(data), YAML)
    99  	case bool:
   100  		return cast.ToBoolE(data)
   101  	case int:
   102  		return cast.ToIntE(data)
   103  	case int64:
   104  		return cast.ToInt64E(data)
   105  	case float64:
   106  		return cast.ToFloat64E(data)
   107  	default:
   108  		return nil, fmt.Errorf("unmarshal: %T not supported", typ)
   109  	}
   110  }
   111  
   112  // Unmarshal will unmarshall data in format f into an interface{}.
   113  // This is what's needed for Hugo's /data handling.
   114  func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
   115  	if data == nil {
   116  		switch f {
   117  		case CSV:
   118  			return make([][]string, 0), nil
   119  		default:
   120  			return make(map[string]any), nil
   121  		}
   122  	}
   123  	var v any
   124  	err := d.UnmarshalTo(data, f, &v)
   125  
   126  	return v, err
   127  }
   128  
   129  // UnmarshalTo unmarshals data in format f into v.
   130  func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
   131  	var err error
   132  
   133  	switch f {
   134  	case ORG:
   135  		err = d.unmarshalORG(data, v)
   136  	case JSON:
   137  		err = json.Unmarshal(data, v)
   138  	case XML:
   139  		var xmlRoot xml.Map
   140  		xmlRoot, err = xml.NewMapXml(data)
   141  
   142  		var xmlValue map[string]any
   143  		if err == nil {
   144  			xmlRootName, err := xmlRoot.Root()
   145  			if err != nil {
   146  				return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
   147  			}
   148  			xmlValue = xmlRoot[xmlRootName].(map[string]any)
   149  		}
   150  
   151  		switch v := v.(type) {
   152  		case *map[string]any:
   153  			*v = xmlValue
   154  		case *any:
   155  			*v = xmlValue
   156  		}
   157  	case TOML:
   158  		err = toml.Unmarshal(data, v)
   159  	case YAML:
   160  		err = yaml.Unmarshal(data, v)
   161  		if err != nil {
   162  			return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
   163  		}
   164  
   165  		// To support boolean keys, the YAML package unmarshals maps to
   166  		// map[interface{}]interface{}. Here we recurse through the result
   167  		// and change all maps to map[string]interface{} like we would've
   168  		// gotten from `json`.
   169  		var ptr any
   170  		switch v.(type) {
   171  		case *map[string]any:
   172  			ptr = *v.(*map[string]any)
   173  		case *any:
   174  			ptr = *v.(*any)
   175  		default:
   176  			// Not a map.
   177  		}
   178  
   179  		if ptr != nil {
   180  			if mm, changed := stringifyMapKeys(ptr); changed {
   181  				switch v.(type) {
   182  				case *map[string]any:
   183  					*v.(*map[string]any) = mm.(map[string]any)
   184  				case *any:
   185  					*v.(*any) = mm
   186  				}
   187  			}
   188  		}
   189  	case CSV:
   190  		return d.unmarshalCSV(data, v)
   191  
   192  	default:
   193  		return fmt.Errorf("unmarshal of format %q is not supported", f)
   194  	}
   195  
   196  	if err == nil {
   197  		return nil
   198  	}
   199  
   200  	return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
   201  }
   202  
   203  func (d Decoder) unmarshalCSV(data []byte, v any) error {
   204  	r := csv.NewReader(bytes.NewReader(data))
   205  	r.Comma = d.Delimiter
   206  	r.Comment = d.Comment
   207  
   208  	records, err := r.ReadAll()
   209  	if err != nil {
   210  		return err
   211  	}
   212  
   213  	switch v.(type) {
   214  	case *any:
   215  		*v.(*any) = records
   216  	default:
   217  		return fmt.Errorf("CSV cannot be unmarshaled into %T", v)
   218  
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  func parseORGDate(s string) string {
   225  	r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
   226  	if m := r.FindStringSubmatch(s); m != nil {
   227  		return m[1]
   228  	}
   229  	return s
   230  }
   231  
   232  func (d Decoder) unmarshalORG(data []byte, v any) error {
   233  	config := org.New()
   234  	config.Log = jww.WARN
   235  	document := config.Parse(bytes.NewReader(data), "")
   236  	if document.Error != nil {
   237  		return document.Error
   238  	}
   239  	frontMatter := make(map[string]any, len(document.BufferSettings))
   240  	for k, v := range document.BufferSettings {
   241  		k = strings.ToLower(k)
   242  		if strings.HasSuffix(k, "[]") {
   243  			frontMatter[k[:len(k)-2]] = strings.Fields(v)
   244  		} else if k == "tags" || k == "categories" || k == "aliases" {
   245  			jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k)
   246  			frontMatter[k] = strings.Fields(v)
   247  		} else if k == "date" {
   248  			frontMatter[k] = parseORGDate(v)
   249  		} else {
   250  			frontMatter[k] = v
   251  		}
   252  	}
   253  	switch v.(type) {
   254  	case *map[string]any:
   255  		*v.(*map[string]any) = frontMatter
   256  	default:
   257  		*v.(*any) = frontMatter
   258  	}
   259  	return nil
   260  }
   261  
   262  func toFileError(f Format, data []byte, err error) error {
   263  	return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
   264  }
   265  
   266  // stringifyMapKeys recurses into in and changes all instances of
   267  // map[interface{}]interface{} to map[string]interface{}. This is useful to
   268  // work around the impedance mismatch between JSON and YAML unmarshaling that's
   269  // described here: https://github.com/go-yaml/yaml/issues/139
   270  //
   271  // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
   272  func stringifyMapKeys(in any) (any, bool) {
   273  	switch in := in.(type) {
   274  	case []any:
   275  		for i, v := range in {
   276  			if vv, replaced := stringifyMapKeys(v); replaced {
   277  				in[i] = vv
   278  			}
   279  		}
   280  	case map[string]any:
   281  		for k, v := range in {
   282  			if vv, changed := stringifyMapKeys(v); changed {
   283  				in[k] = vv
   284  			}
   285  		}
   286  	case map[any]any:
   287  		res := make(map[string]any)
   288  		var (
   289  			ok  bool
   290  			err error
   291  		)
   292  		for k, v := range in {
   293  			var ks string
   294  
   295  			if ks, ok = k.(string); !ok {
   296  				ks, err = cast.ToStringE(k)
   297  				if err != nil {
   298  					ks = fmt.Sprintf("%v", k)
   299  				}
   300  			}
   301  			if vv, replaced := stringifyMapKeys(v); replaced {
   302  				res[ks] = vv
   303  			} else {
   304  				res[ks] = v
   305  			}
   306  		}
   307  		return res, true
   308  	}
   309  
   310  	return nil, false
   311  }