github.com/eliastor/durgaform@v0.0.0-20220816172711-d0ab2d17673e/internal/lang/funcs/encoding.go (about)

     1  package funcs
     2  
     3  import (
     4  	"bytes"
     5  	"compress/gzip"
     6  	"encoding/base64"
     7  	"fmt"
     8  	"log"
     9  	"net/url"
    10  	"unicode/utf8"
    11  
    12  	"github.com/zclconf/go-cty/cty"
    13  	"github.com/zclconf/go-cty/cty/function"
    14  	"golang.org/x/text/encoding/ianaindex"
    15  )
    16  
    17  // Base64DecodeFunc constructs a function that decodes a string containing a base64 sequence.
    18  var Base64DecodeFunc = function.New(&function.Spec{
    19  	Params: []function.Parameter{
    20  		{
    21  			Name:        "str",
    22  			Type:        cty.String,
    23  			AllowMarked: true,
    24  		},
    25  	},
    26  	Type: function.StaticReturnType(cty.String),
    27  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
    28  		str, strMarks := args[0].Unmark()
    29  		s := str.AsString()
    30  		sDec, err := base64.StdEncoding.DecodeString(s)
    31  		if err != nil {
    32  			return cty.UnknownVal(cty.String), fmt.Errorf("failed to decode base64 data %s", redactIfSensitive(s, strMarks))
    33  		}
    34  		if !utf8.Valid([]byte(sDec)) {
    35  			log.Printf("[DEBUG] the result of decoding the provided string is not valid UTF-8: %s", redactIfSensitive(sDec, strMarks))
    36  			return cty.UnknownVal(cty.String), fmt.Errorf("the result of decoding the provided string is not valid UTF-8")
    37  		}
    38  		return cty.StringVal(string(sDec)).WithMarks(strMarks), nil
    39  	},
    40  })
    41  
    42  // Base64EncodeFunc constructs a function that encodes a string to a base64 sequence.
    43  var Base64EncodeFunc = function.New(&function.Spec{
    44  	Params: []function.Parameter{
    45  		{
    46  			Name: "str",
    47  			Type: cty.String,
    48  		},
    49  	},
    50  	Type: function.StaticReturnType(cty.String),
    51  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
    52  		return cty.StringVal(base64.StdEncoding.EncodeToString([]byte(args[0].AsString()))), nil
    53  	},
    54  })
    55  
    56  // TextEncodeBase64Func constructs a function that encodes a string to a target encoding and then to a base64 sequence.
    57  var TextEncodeBase64Func = function.New(&function.Spec{
    58  	Params: []function.Parameter{
    59  		{
    60  			Name: "string",
    61  			Type: cty.String,
    62  		},
    63  		{
    64  			Name: "encoding",
    65  			Type: cty.String,
    66  		},
    67  	},
    68  	Type: function.StaticReturnType(cty.String),
    69  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
    70  		encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
    71  		if err != nil || encoding == nil {
    72  			return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Durgaform version", args[1].AsString())
    73  		}
    74  
    75  		encName, err := ianaindex.IANA.Name(encoding)
    76  		if err != nil { // would be weird, since we just read this encoding out
    77  			encName = args[1].AsString()
    78  		}
    79  
    80  		encoder := encoding.NewEncoder()
    81  		encodedInput, err := encoder.Bytes([]byte(args[0].AsString()))
    82  		if err != nil {
    83  			// The string representations of "err" disclose implementation
    84  			// details of the underlying library, and the main error we might
    85  			// like to return a special message for is unexported as
    86  			// golang.org/x/text/encoding/internal.RepertoireError, so this
    87  			// is just a generic error message for now.
    88  			//
    89  			// We also don't include the string itself in the message because
    90  			// it can typically be very large, contain newline characters,
    91  			// etc.
    92  			return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains characters that cannot be represented in %s", encName)
    93  		}
    94  
    95  		return cty.StringVal(base64.StdEncoding.EncodeToString(encodedInput)), nil
    96  	},
    97  })
    98  
    99  // TextDecodeBase64Func constructs a function that decodes a base64 sequence to a target encoding.
   100  var TextDecodeBase64Func = function.New(&function.Spec{
   101  	Params: []function.Parameter{
   102  		{
   103  			Name: "source",
   104  			Type: cty.String,
   105  		},
   106  		{
   107  			Name: "encoding",
   108  			Type: cty.String,
   109  		},
   110  	},
   111  	Type: function.StaticReturnType(cty.String),
   112  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
   113  		encoding, err := ianaindex.IANA.Encoding(args[1].AsString())
   114  		if err != nil || encoding == nil {
   115  			return cty.UnknownVal(cty.String), function.NewArgErrorf(1, "%q is not a supported IANA encoding name or alias in this Durgaform version", args[1].AsString())
   116  		}
   117  
   118  		encName, err := ianaindex.IANA.Name(encoding)
   119  		if err != nil { // would be weird, since we just read this encoding out
   120  			encName = args[1].AsString()
   121  		}
   122  
   123  		s := args[0].AsString()
   124  		sDec, err := base64.StdEncoding.DecodeString(s)
   125  		if err != nil {
   126  			switch err := err.(type) {
   127  			case base64.CorruptInputError:
   128  				return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given value is has an invalid base64 symbol at offset %d", int(err))
   129  			default:
   130  				return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "invalid source string: %w", err)
   131  			}
   132  
   133  		}
   134  
   135  		decoder := encoding.NewDecoder()
   136  		decoded, err := decoder.Bytes(sDec)
   137  		if err != nil || bytes.ContainsRune(decoded, '�') {
   138  			return cty.UnknownVal(cty.String), function.NewArgErrorf(0, "the given string contains symbols that are not defined for %s", encName)
   139  		}
   140  
   141  		return cty.StringVal(string(decoded)), nil
   142  	},
   143  })
   144  
   145  // Base64GzipFunc constructs a function that compresses a string with gzip and then encodes the result in
   146  // Base64 encoding.
   147  var Base64GzipFunc = function.New(&function.Spec{
   148  	Params: []function.Parameter{
   149  		{
   150  			Name: "str",
   151  			Type: cty.String,
   152  		},
   153  	},
   154  	Type: function.StaticReturnType(cty.String),
   155  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
   156  		s := args[0].AsString()
   157  
   158  		var b bytes.Buffer
   159  		gz := gzip.NewWriter(&b)
   160  		if _, err := gz.Write([]byte(s)); err != nil {
   161  			return cty.UnknownVal(cty.String), fmt.Errorf("failed to write gzip raw data: %w", err)
   162  		}
   163  		if err := gz.Flush(); err != nil {
   164  			return cty.UnknownVal(cty.String), fmt.Errorf("failed to flush gzip writer: %w", err)
   165  		}
   166  		if err := gz.Close(); err != nil {
   167  			return cty.UnknownVal(cty.String), fmt.Errorf("failed to close gzip writer: %w", err)
   168  		}
   169  		return cty.StringVal(base64.StdEncoding.EncodeToString(b.Bytes())), nil
   170  	},
   171  })
   172  
   173  // URLEncodeFunc constructs a function that applies URL encoding to a given string.
   174  var URLEncodeFunc = function.New(&function.Spec{
   175  	Params: []function.Parameter{
   176  		{
   177  			Name: "str",
   178  			Type: cty.String,
   179  		},
   180  	},
   181  	Type: function.StaticReturnType(cty.String),
   182  	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
   183  		return cty.StringVal(url.QueryEscape(args[0].AsString())), nil
   184  	},
   185  })
   186  
   187  // Base64Decode decodes a string containing a base64 sequence.
   188  //
   189  // Durgaform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
   190  //
   191  // Strings in the Durgaform language are sequences of unicode characters rather
   192  // than bytes, so this function will also interpret the resulting bytes as
   193  // UTF-8. If the bytes after Base64 decoding are _not_ valid UTF-8, this function
   194  // produces an error.
   195  func Base64Decode(str cty.Value) (cty.Value, error) {
   196  	return Base64DecodeFunc.Call([]cty.Value{str})
   197  }
   198  
   199  // Base64Encode applies Base64 encoding to a string.
   200  //
   201  // Durgaform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
   202  //
   203  // Strings in the Durgaform language are sequences of unicode characters rather
   204  // than bytes, so this function will first encode the characters from the string
   205  // as UTF-8, and then apply Base64 encoding to the result.
   206  func Base64Encode(str cty.Value) (cty.Value, error) {
   207  	return Base64EncodeFunc.Call([]cty.Value{str})
   208  }
   209  
   210  // Base64Gzip compresses a string with gzip and then encodes the result in
   211  // Base64 encoding.
   212  //
   213  // Durgaform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
   214  //
   215  // Strings in the Durgaform language are sequences of unicode characters rather
   216  // than bytes, so this function will first encode the characters from the string
   217  // as UTF-8, then apply gzip compression, and then finally apply Base64 encoding.
   218  func Base64Gzip(str cty.Value) (cty.Value, error) {
   219  	return Base64GzipFunc.Call([]cty.Value{str})
   220  }
   221  
   222  // URLEncode applies URL encoding to a given string.
   223  //
   224  // This function identifies characters in the given string that would have a
   225  // special meaning when included as a query string argument in a URL and
   226  // escapes them using RFC 3986 "percent encoding".
   227  //
   228  // If the given string contains non-ASCII characters, these are first encoded as
   229  // UTF-8 and then percent encoding is applied separately to each UTF-8 byte.
   230  func URLEncode(str cty.Value) (cty.Value, error) {
   231  	return URLEncodeFunc.Call([]cty.Value{str})
   232  }
   233  
   234  // TextEncodeBase64 applies Base64 encoding to a string that was encoded before with a target encoding.
   235  //
   236  // Durgaform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
   237  //
   238  // First step is to apply the target IANA encoding (e.g. UTF-16LE).
   239  // Strings in the Durgaform language are sequences of unicode characters rather
   240  // than bytes, so this function will first encode the characters from the string
   241  // as UTF-8, and then apply Base64 encoding to the result.
   242  func TextEncodeBase64(str, enc cty.Value) (cty.Value, error) {
   243  	return TextEncodeBase64Func.Call([]cty.Value{str, enc})
   244  }
   245  
   246  // TextDecodeBase64 decodes a string containing a base64 sequence whereas a specific encoding of the string is expected.
   247  //
   248  // Durgaform uses the "standard" Base64 alphabet as defined in RFC 4648 section 4.
   249  //
   250  // Strings in the Durgaform language are sequences of unicode characters rather
   251  // than bytes, so this function will also interpret the resulting bytes as
   252  // the target encoding.
   253  func TextDecodeBase64(str, enc cty.Value) (cty.Value, error) {
   254  	return TextDecodeBase64Func.Call([]cty.Value{str, enc})
   255  }