github.com/kovansky/hugo@v0.92.3-0.20220224232819-63076e4ff19f/parser/metadecoders/decoder.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package metadecoders
    15  
    16  import (
    17  	"bytes"
    18  	"encoding/csv"
    19  	"encoding/json"
    20  	"fmt"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/gohugoio/hugo/common/herrors"
    25  	"github.com/niklasfasching/go-org/org"
    26  
    27  	xml "github.com/clbanning/mxj/v2"
    28  	toml "github.com/pelletier/go-toml/v2"
    29  	"github.com/pkg/errors"
    30  	"github.com/spf13/afero"
    31  	"github.com/spf13/cast"
    32  	jww "github.com/spf13/jwalterweatherman"
    33  	yaml "gopkg.in/yaml.v2"
    34  )
    35  
    36  // Decoder provides some configuration options for the decoders.
    37  type Decoder struct {
    38  	// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
    39  	Delimiter rune
    40  
    41  	// Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
    42  	// Comment character without preceding whitespace are ignored.
    43  	Comment rune
    44  }
    45  
    46  // OptionsKey is used in cache keys.
    47  func (d Decoder) OptionsKey() string {
    48  	var sb strings.Builder
    49  	sb.WriteRune(d.Delimiter)
    50  	sb.WriteRune(d.Comment)
    51  	return sb.String()
    52  }
    53  
    54  // Default is a Decoder in its default configuration.
    55  var Default = Decoder{
    56  	Delimiter: ',',
    57  }
    58  
    59  // UnmarshalToMap will unmarshall data in format f into a new map. This is
    60  // what's needed for Hugo's front matter decoding.
    61  func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) {
    62  	m := make(map[string]interface{})
    63  	if data == nil {
    64  		return m, nil
    65  	}
    66  
    67  	err := d.UnmarshalTo(data, f, &m)
    68  
    69  	return m, err
    70  }
    71  
    72  // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
    73  // the given filename.
    74  func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) {
    75  	format := FormatFromString(filename)
    76  	if format == "" {
    77  		return nil, errors.Errorf("%q is not a valid configuration format", filename)
    78  	}
    79  
    80  	data, err := afero.ReadFile(fs, filename)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	return d.UnmarshalToMap(data, format)
    85  }
    86  
    87  // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
    88  func (d Decoder) UnmarshalStringTo(data string, typ interface{}) (interface{}, error) {
    89  	data = strings.TrimSpace(data)
    90  	// We only check for the possible types in YAML, JSON and TOML.
    91  	switch typ.(type) {
    92  	case string:
    93  		return data, nil
    94  	case map[string]interface{}:
    95  		format := d.FormatFromContentString(data)
    96  		return d.UnmarshalToMap([]byte(data), format)
    97  	case []interface{}:
    98  		// A standalone slice. Let YAML handle it.
    99  		return d.Unmarshal([]byte(data), YAML)
   100  	case bool:
   101  		return cast.ToBoolE(data)
   102  	case int:
   103  		return cast.ToIntE(data)
   104  	case int64:
   105  		return cast.ToInt64E(data)
   106  	case float64:
   107  		return cast.ToFloat64E(data)
   108  	default:
   109  		return nil, errors.Errorf("unmarshal: %T not supported", typ)
   110  	}
   111  }
   112  
   113  // Unmarshal will unmarshall data in format f into an interface{}.
   114  // This is what's needed for Hugo's /data handling.
   115  func (d Decoder) Unmarshal(data []byte, f Format) (interface{}, error) {
   116  	if data == nil {
   117  		switch f {
   118  		case CSV:
   119  			return make([][]string, 0), nil
   120  		default:
   121  			return make(map[string]interface{}), nil
   122  		}
   123  	}
   124  	var v interface{}
   125  	err := d.UnmarshalTo(data, f, &v)
   126  
   127  	return v, err
   128  }
   129  
   130  // UnmarshalTo unmarshals data in format f into v.
   131  func (d Decoder) UnmarshalTo(data []byte, f Format, v interface{}) error {
   132  	var err error
   133  
   134  	switch f {
   135  	case ORG:
   136  		err = d.unmarshalORG(data, v)
   137  	case JSON:
   138  		err = json.Unmarshal(data, v)
   139  	case XML:
   140  		var xmlRoot xml.Map
   141  		xmlRoot, err = xml.NewMapXml(data)
   142  
   143  		var xmlValue map[string]interface{}
   144  		if err == nil {
   145  			xmlRootName, err := xmlRoot.Root()
   146  			if err != nil {
   147  				return toFileError(f, errors.Wrap(err, "failed to unmarshal XML"))
   148  			}
   149  			xmlValue = xmlRoot[xmlRootName].(map[string]interface{})
   150  		}
   151  
   152  		switch v := v.(type) {
   153  		case *map[string]interface{}:
   154  			*v = xmlValue
   155  		case *interface{}:
   156  			*v = xmlValue
   157  		}
   158  	case TOML:
   159  		err = toml.Unmarshal(data, v)
   160  	case YAML:
   161  		err = yaml.Unmarshal(data, v)
   162  		if err != nil {
   163  			return toFileError(f, errors.Wrap(err, "failed to unmarshal YAML"))
   164  		}
   165  
   166  		// To support boolean keys, the YAML package unmarshals maps to
   167  		// map[interface{}]interface{}. Here we recurse through the result
   168  		// and change all maps to map[string]interface{} like we would've
   169  		// gotten from `json`.
   170  		var ptr interface{}
   171  		switch v.(type) {
   172  		case *map[string]interface{}:
   173  			ptr = *v.(*map[string]interface{})
   174  		case *interface{}:
   175  			ptr = *v.(*interface{})
   176  		default:
   177  			// Not a map.
   178  		}
   179  
   180  		if ptr != nil {
   181  			if mm, changed := stringifyMapKeys(ptr); changed {
   182  				switch v.(type) {
   183  				case *map[string]interface{}:
   184  					*v.(*map[string]interface{}) = mm.(map[string]interface{})
   185  				case *interface{}:
   186  					*v.(*interface{}) = mm
   187  				}
   188  			}
   189  		}
   190  	case CSV:
   191  		return d.unmarshalCSV(data, v)
   192  
   193  	default:
   194  		return errors.Errorf("unmarshal of format %q is not supported", f)
   195  	}
   196  
   197  	if err == nil {
   198  		return nil
   199  	}
   200  
   201  	return toFileError(f, errors.Wrap(err, "unmarshal failed"))
   202  }
   203  
   204  func (d Decoder) unmarshalCSV(data []byte, v interface{}) error {
   205  	r := csv.NewReader(bytes.NewReader(data))
   206  	r.Comma = d.Delimiter
   207  	r.Comment = d.Comment
   208  
   209  	records, err := r.ReadAll()
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	switch v.(type) {
   215  	case *interface{}:
   216  		*v.(*interface{}) = records
   217  	default:
   218  		return errors.Errorf("CSV cannot be unmarshaled into %T", v)
   219  
   220  	}
   221  
   222  	return nil
   223  }
   224  
   225  func parseORGDate(s string) string {
   226  	r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
   227  	if m := r.FindStringSubmatch(s); m != nil {
   228  		return m[1]
   229  	}
   230  	return s
   231  }
   232  
   233  func (d Decoder) unmarshalORG(data []byte, v interface{}) error {
   234  	config := org.New()
   235  	config.Log = jww.WARN
   236  	document := config.Parse(bytes.NewReader(data), "")
   237  	if document.Error != nil {
   238  		return document.Error
   239  	}
   240  	frontMatter := make(map[string]interface{}, len(document.BufferSettings))
   241  	for k, v := range document.BufferSettings {
   242  		k = strings.ToLower(k)
   243  		if strings.HasSuffix(k, "[]") {
   244  			frontMatter[k[:len(k)-2]] = strings.Fields(v)
   245  		} else if k == "tags" || k == "categories" || k == "aliases" {
   246  			jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k)
   247  			frontMatter[k] = strings.Fields(v)
   248  		} else if k == "date" {
   249  			frontMatter[k] = parseORGDate(v)
   250  		} else {
   251  			frontMatter[k] = v
   252  		}
   253  	}
   254  	switch v.(type) {
   255  	case *map[string]interface{}:
   256  		*v.(*map[string]interface{}) = frontMatter
   257  	default:
   258  		*v.(*interface{}) = frontMatter
   259  	}
   260  	return nil
   261  }
   262  
   263  func toFileError(f Format, err error) error {
   264  	return herrors.ToFileError(string(f), err)
   265  }
   266  
   267  // stringifyMapKeys recurses into in and changes all instances of
   268  // map[interface{}]interface{} to map[string]interface{}. This is useful to
   269  // work around the impedance mismatch between JSON and YAML unmarshaling that's
   270  // described here: https://github.com/go-yaml/yaml/issues/139
   271  //
   272  // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
   273  func stringifyMapKeys(in interface{}) (interface{}, bool) {
   274  	switch in := in.(type) {
   275  	case []interface{}:
   276  		for i, v := range in {
   277  			if vv, replaced := stringifyMapKeys(v); replaced {
   278  				in[i] = vv
   279  			}
   280  		}
   281  	case map[string]interface{}:
   282  		for k, v := range in {
   283  			if vv, changed := stringifyMapKeys(v); changed {
   284  				in[k] = vv
   285  			}
   286  		}
   287  	case map[interface{}]interface{}:
   288  		res := make(map[string]interface{})
   289  		var (
   290  			ok  bool
   291  			err error
   292  		)
   293  		for k, v := range in {
   294  			var ks string
   295  
   296  			if ks, ok = k.(string); !ok {
   297  				ks, err = cast.ToStringE(k)
   298  				if err != nil {
   299  					ks = fmt.Sprintf("%v", k)
   300  				}
   301  			}
   302  			if vv, replaced := stringifyMapKeys(v); replaced {
   303  				res[ks] = vv
   304  			} else {
   305  				res[ks] = v
   306  			}
   307  		}
   308  		return res, true
   309  	}
   310  
   311  	return nil, false
   312  }