github.com/graemephi/kahugo@v0.62.3-0.20211121071557-d78c0423784d/parser/metadecoders/decoder.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package metadecoders
    15  
    16  import (
    17  	"bytes"
    18  	"encoding/csv"
    19  	"encoding/json"
    20  	"fmt"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/gohugoio/hugo/common/herrors"
    25  	"github.com/niklasfasching/go-org/org"
    26  
    27  	toml "github.com/pelletier/go-toml/v2"
    28  	"github.com/pkg/errors"
    29  	"github.com/spf13/afero"
    30  	"github.com/spf13/cast"
    31  	jww "github.com/spf13/jwalterweatherman"
    32  	yaml "gopkg.in/yaml.v2"
    33  )
    34  
    35  // Decoder provides some configuration options for the decoders.
    36  type Decoder struct {
    37  	// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
    38  	Delimiter rune
    39  
    40  	// Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
    41  	// Comment character without preceding whitespace are ignored.
    42  	Comment rune
    43  }
    44  
    45  // OptionsKey is used in cache keys.
    46  func (d Decoder) OptionsKey() string {
    47  	var sb strings.Builder
    48  	sb.WriteRune(d.Delimiter)
    49  	sb.WriteRune(d.Comment)
    50  	return sb.String()
    51  }
    52  
    53  // Default is a Decoder in its default configuration.
    54  var Default = Decoder{
    55  	Delimiter: ',',
    56  }
    57  
    58  // UnmarshalToMap will unmarshall data in format f into a new map. This is
    59  // what's needed for Hugo's front matter decoding.
    60  func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) {
    61  	m := make(map[string]interface{})
    62  	if data == nil {
    63  		return m, nil
    64  	}
    65  
    66  	err := d.UnmarshalTo(data, f, &m)
    67  
    68  	return m, err
    69  }
    70  
    71  // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
    72  // the given filename.
    73  func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) {
    74  	format := FormatFromString(filename)
    75  	if format == "" {
    76  		return nil, errors.Errorf("%q is not a valid configuration format", filename)
    77  	}
    78  
    79  	data, err := afero.ReadFile(fs, filename)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	return d.UnmarshalToMap(data, format)
    84  }
    85  
    86  // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
    87  func (d Decoder) UnmarshalStringTo(data string, typ interface{}) (interface{}, error) {
    88  	data = strings.TrimSpace(data)
    89  	// We only check for the possible types in YAML, JSON and TOML.
    90  	switch typ.(type) {
    91  	case string:
    92  		return data, nil
    93  	case map[string]interface{}:
    94  		format := d.FormatFromContentString(data)
    95  		return d.UnmarshalToMap([]byte(data), format)
    96  	case []interface{}:
    97  		// A standalone slice. Let YAML handle it.
    98  		return d.Unmarshal([]byte(data), YAML)
    99  	case bool:
   100  		return cast.ToBoolE(data)
   101  	case int:
   102  		return cast.ToIntE(data)
   103  	case int64:
   104  		return cast.ToInt64E(data)
   105  	case float64:
   106  		return cast.ToFloat64E(data)
   107  	default:
   108  		return nil, errors.Errorf("unmarshal: %T not supported", typ)
   109  	}
   110  }
   111  
   112  // Unmarshal will unmarshall data in format f into an interface{}.
   113  // This is what's needed for Hugo's /data handling.
   114  func (d Decoder) Unmarshal(data []byte, f Format) (interface{}, error) {
   115  	if data == nil {
   116  		switch f {
   117  		case CSV:
   118  			return make([][]string, 0), nil
   119  		default:
   120  			return make(map[string]interface{}), nil
   121  		}
   122  	}
   123  	var v interface{}
   124  	err := d.UnmarshalTo(data, f, &v)
   125  
   126  	return v, err
   127  }
   128  
   129  // UnmarshalTo unmarshals data in format f into v.
   130  func (d Decoder) UnmarshalTo(data []byte, f Format, v interface{}) error {
   131  	var err error
   132  
   133  	switch f {
   134  	case ORG:
   135  		err = d.unmarshalORG(data, v)
   136  	case JSON:
   137  		err = json.Unmarshal(data, v)
   138  	case TOML:
   139  		err = toml.Unmarshal(data, v)
   140  	case YAML:
   141  		err = yaml.Unmarshal(data, v)
   142  		if err != nil {
   143  			return toFileError(f, errors.Wrap(err, "failed to unmarshal YAML"))
   144  		}
   145  
   146  		// To support boolean keys, the YAML package unmarshals maps to
   147  		// map[interface{}]interface{}. Here we recurse through the result
   148  		// and change all maps to map[string]interface{} like we would've
   149  		// gotten from `json`.
   150  		var ptr interface{}
   151  		switch v.(type) {
   152  		case *map[string]interface{}:
   153  			ptr = *v.(*map[string]interface{})
   154  		case *interface{}:
   155  			ptr = *v.(*interface{})
   156  		default:
   157  			// Not a map.
   158  		}
   159  
   160  		if ptr != nil {
   161  			if mm, changed := stringifyMapKeys(ptr); changed {
   162  				switch v.(type) {
   163  				case *map[string]interface{}:
   164  					*v.(*map[string]interface{}) = mm.(map[string]interface{})
   165  				case *interface{}:
   166  					*v.(*interface{}) = mm
   167  				}
   168  			}
   169  		}
   170  	case CSV:
   171  		return d.unmarshalCSV(data, v)
   172  
   173  	default:
   174  		return errors.Errorf("unmarshal of format %q is not supported", f)
   175  	}
   176  
   177  	if err == nil {
   178  		return nil
   179  	}
   180  
   181  	return toFileError(f, errors.Wrap(err, "unmarshal failed"))
   182  }
   183  
   184  func (d Decoder) unmarshalCSV(data []byte, v interface{}) error {
   185  	r := csv.NewReader(bytes.NewReader(data))
   186  	r.Comma = d.Delimiter
   187  	r.Comment = d.Comment
   188  
   189  	records, err := r.ReadAll()
   190  	if err != nil {
   191  		return err
   192  	}
   193  
   194  	switch v.(type) {
   195  	case *interface{}:
   196  		*v.(*interface{}) = records
   197  	default:
   198  		return errors.Errorf("CSV cannot be unmarshaled into %T", v)
   199  
   200  	}
   201  
   202  	return nil
   203  }
   204  
   205  func parseORGDate(s string) string {
   206  	r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
   207  	if m := r.FindStringSubmatch(s); m != nil {
   208  		return m[1]
   209  	}
   210  	return s
   211  }
   212  
   213  func (d Decoder) unmarshalORG(data []byte, v interface{}) error {
   214  	config := org.New()
   215  	config.Log = jww.WARN
   216  	document := config.Parse(bytes.NewReader(data), "")
   217  	if document.Error != nil {
   218  		return document.Error
   219  	}
   220  	frontMatter := make(map[string]interface{}, len(document.BufferSettings))
   221  	for k, v := range document.BufferSettings {
   222  		k = strings.ToLower(k)
   223  		if strings.HasSuffix(k, "[]") {
   224  			frontMatter[k[:len(k)-2]] = strings.Fields(v)
   225  		} else if k == "tags" || k == "categories" || k == "aliases" {
   226  			jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k)
   227  			frontMatter[k] = strings.Fields(v)
   228  		} else if k == "date" {
   229  			frontMatter[k] = parseORGDate(v)
   230  		} else {
   231  			frontMatter[k] = v
   232  		}
   233  	}
   234  	switch v.(type) {
   235  	case *map[string]interface{}:
   236  		*v.(*map[string]interface{}) = frontMatter
   237  	default:
   238  		*v.(*interface{}) = frontMatter
   239  	}
   240  	return nil
   241  }
   242  
   243  func toFileError(f Format, err error) error {
   244  	return herrors.ToFileError(string(f), err)
   245  }
   246  
   247  // stringifyMapKeys recurses into in and changes all instances of
   248  // map[interface{}]interface{} to map[string]interface{}. This is useful to
   249  // work around the impedance mismatch between JSON and YAML unmarshaling that's
   250  // described here: https://github.com/go-yaml/yaml/issues/139
   251  //
   252  // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
   253  func stringifyMapKeys(in interface{}) (interface{}, bool) {
   254  	switch in := in.(type) {
   255  	case []interface{}:
   256  		for i, v := range in {
   257  			if vv, replaced := stringifyMapKeys(v); replaced {
   258  				in[i] = vv
   259  			}
   260  		}
   261  	case map[string]interface{}:
   262  		for k, v := range in {
   263  			if vv, changed := stringifyMapKeys(v); changed {
   264  				in[k] = vv
   265  			}
   266  		}
   267  	case map[interface{}]interface{}:
   268  		res := make(map[string]interface{})
   269  		var (
   270  			ok  bool
   271  			err error
   272  		)
   273  		for k, v := range in {
   274  			var ks string
   275  
   276  			if ks, ok = k.(string); !ok {
   277  				ks, err = cast.ToStringE(k)
   278  				if err != nil {
   279  					ks = fmt.Sprintf("%v", k)
   280  				}
   281  			}
   282  			if vv, replaced := stringifyMapKeys(v); replaced {
   283  				res[ks] = vv
   284  			} else {
   285  				res[ks] = v
   286  			}
   287  		}
   288  		return res, true
   289  	}
   290  
   291  	return nil, false
   292  }