github.com/verrazzano/verrazzano@v1.7.1/tools/vz/pkg/helpers/vzsanitize.go (about)

     1  // Copyright (c) 2022, 2024, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package helpers
     5  
     6  import (
     7  	"crypto/sha256"
     8  	"encoding/csv"
     9  	"encoding/hex"
    10  	"fmt"
    11  	"os"
    12  	"regexp"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/verrazzano/verrazzano/tools/vz/pkg/constants"
    17  )
    18  
    19  type regexPlan struct {
    20  	preprocess  func(string) string
    21  	regex       string
    22  	postprocess func(string) string
    23  }
    24  
    25  var regexToReplacementList = []regexPlan{}
    26  var KnownHostNames = make(map[string]bool)
    27  var knownHostNamesMutex = &sync.Mutex{}
    28  
    29  // A map to keep track of all the strings that have been redacted.
    30  var redactedValues = make(map[string]string)
    31  var redactedValuesMutex = &sync.Mutex{}
    32  
    33  var ipv4Regex = regexPlan{regex: "[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}\\.[[:digit:]]{1,3}"}
    34  var userData = regexPlan{regex: "\"user_data\":\\s+\"[A-Za-z0-9=+]+\""}
    35  var sshAuthKeys = regexPlan{regex: "(sk-)?(ssh|ecdsa)-[a-zA-Z0-9\\-\\.@]+\\s+AAAA[A-Za-z0-9\\-\\/\\+]+[=]{0,3}( .*)*"}
    36  var ocid = regexPlan{regex: "ocid1\\.[[:lower:]]+\\.[[:alnum:]]+\\.[[:alnum:]]*\\.[[:alnum:]]+"}
    37  var opcid = regexPlan{
    38  	preprocess: func(s string) string {
    39  		return strings.Trim(strings.TrimPrefix(s, "Opc request id:"), " ")
    40  	},
    41  	regex: "(?:Opc request id:) *[A-Z,a-z,/,0-9]+",
    42  	postprocess: func(s string) string {
    43  		return "Opc request id: " + s
    44  	},
    45  }
    46  
    47  // InitRegexToReplacementMap Initialize the regex string to replacement string map
    48  // Append to this map for any future additions
    49  func InitRegexToReplacementMap() {
    50  	regexToReplacementList = append(regexToReplacementList, ipv4Regex)
    51  	regexToReplacementList = append(regexToReplacementList, userData)
    52  	regexToReplacementList = append(regexToReplacementList, sshAuthKeys)
    53  	regexToReplacementList = append(regexToReplacementList, ocid)
    54  	regexToReplacementList = append(regexToReplacementList, opcid)
    55  }
    56  
    57  // SanitizeString sanitizes each line in a given file,
    58  // Sanitizes based on the regex map initialized above, which is currently filtering for IPv4 addresses and hostnames
    59  //
    60  // The redactedValuesOverride parameter can be used to override the default redactedValues map for keeping track of
    61  // redacted strings.
    62  func SanitizeString(l string, redactedValuesOverride map[string]string) string {
    63  	redactedValuesMutex.Lock()
    64  	if len(regexToReplacementList) == 0 {
    65  		InitRegexToReplacementMap()
    66  	}
    67  	redactedValuesMutex.Unlock()
    68  	knownHostNamesMutex.Lock()
    69  	for knownHost := range KnownHostNames {
    70  		wholeOccurrenceHostPattern := "\"" + knownHost + "\""
    71  		l = regexp.MustCompile(wholeOccurrenceHostPattern).ReplaceAllString(l, "\""+getSha256Hash(knownHost)+"\"")
    72  	}
    73  	knownHostNamesMutex.Unlock()
    74  	for _, eachRegex := range regexToReplacementList {
    75  		redactedValuesMutex.Lock()
    76  		l = regexp.MustCompile(eachRegex.regex).ReplaceAllStringFunc(l, eachRegex.compilePlan(redactedValuesOverride))
    77  		redactedValuesMutex.Unlock()
    78  	}
    79  	return l
    80  }
    81  
    82  // WriteRedactionMapFile creates a CSV file at the provided outputFilePath to document all the values this tool has
    83  // redacted so far, stored in the redactedValues (or redactedValuesOverride) map.
    84  func WriteRedactionMapFile(outputFilePath string, redactedValuesOverride map[string]string) error {
    85  	f, err := os.OpenFile(outputFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
    86  	if err != nil {
    87  		return fmt.Errorf(createFileError, outputFilePath, err.Error())
    88  	}
    89  	defer f.Close()
    90  
    91  	redactedValuesMutex.Lock()
    92  	redactedValues := determineRedactedValuesMap(redactedValuesOverride)
    93  	csvWriter := csv.NewWriter(f)
    94  	for s, r := range redactedValues {
    95  		if err = csvWriter.Write([]string{r, s}); err != nil {
    96  			LogError(fmt.Sprintf("An error occurred while writing the file %s: %s\n", outputFilePath, err.Error()))
    97  			return err
    98  		}
    99  	}
   100  	redactedValuesMutex.Unlock()
   101  	csvWriter.Flush()
   102  	return nil
   103  }
   104  
   105  // compilePlan returns a function which processes strings according the the regexPlan rp.
   106  func (rp regexPlan) compilePlan(redactedValuesOverride map[string]string) func(string) string {
   107  	return func(s string) string {
   108  		if rp.preprocess != nil {
   109  			s = rp.preprocess(s)
   110  		}
   111  		s = redact(s, redactedValuesOverride)
   112  		if rp.postprocess != nil {
   113  			return rp.postprocess(s)
   114  		}
   115  		return s
   116  	}
   117  }
   118  
   119  // redact outputs a string, representing a piece of redacted text.
   120  // If a new string is encountered, keep track of it.
   121  func redact(s string, redactedValuesOverride map[string]string) string {
   122  	redactedValues := determineRedactedValuesMap(redactedValuesOverride)
   123  	if r, ok := redactedValues[s]; ok {
   124  		return r
   125  	}
   126  	r := constants.RedactionPrefix + getSha256Hash(s)
   127  	redactedValues[s] = r
   128  	return r
   129  }
   130  
   131  // getSha256Hash generates the one way hash for the input string
   132  func getSha256Hash(line string) string {
   133  	data := []byte(line)
   134  	hashedVal := sha256.Sum256(data)
   135  	hexString := hex.EncodeToString(hashedVal[:])
   136  	return hexString
   137  }
   138  
   139  // determineRedactedValuesMap returns the map of redacted values to use, according to the override provided
   140  func determineRedactedValuesMap(redactedValuesOverride map[string]string) map[string]string {
   141  	if redactedValuesOverride != nil {
   142  		return redactedValuesOverride
   143  	}
   144  	return redactedValues
   145  }