github.com/cyverse/go-irodsclient@v0.13.2/irods/connection/xml.go (about)

     1  package connection
     2  
     3  import (
     4  	"bytes"
     5  	"strconv"
     6  	"strings"
     7  	"unicode/utf8"
     8  
     9  	"github.com/cyverse/go-irodsclient/irods/message"
    10  	"golang.org/x/xerrors"
    11  )
    12  
    13  var (
    14  	// escapes from xml.Encode
    15  	escQuot = []byte(""") // shorter than """, \"
    16  	escApos = []byte("'") // shorter than "'", \'
    17  	escTab  = []byte("	")
    18  	escNL   = []byte("
")
    19  	escCR   = []byte("
")
    20  	escFFFD = []byte("\uFFFD") // Unicode replacement character
    21  
    22  	// escapes for irods
    23  	irodsEscQuot = []byte(""")
    24  	irodsEscApos = []byte("'")
    25  )
    26  
    27  // ErrInvalidUTF8 is returned if an invalid utf-8 character is found.
    28  var ErrInvalidUTF8 = xerrors.Errorf("invalid utf-8 character")
    29  
    30  func (conn *IRODSConnection) talksCorrectXML() bool {
    31  	if conn.serverVersion == nil {
    32  		// We don't know the server version yet, assume the best
    33  		return true
    34  	}
    35  
    36  	if !strings.HasPrefix(conn.serverVersion.ReleaseVersion, "rods") {
    37  		// Strange, but hopefully it talks correct xml
    38  		return true
    39  	}
    40  
    41  	version := strings.Split(conn.serverVersion.ReleaseVersion[4:], ".")
    42  
    43  	if len(version) != 3 {
    44  		// Strange, but hopefully it talks correct xml
    45  		return true
    46  	}
    47  
    48  	major, _ := strconv.Atoi(version[0])
    49  	minor, _ := strconv.Atoi(version[1])
    50  	release, _ := strconv.Atoi(version[2])
    51  
    52  	return major > 4 || (major == 4 && minor > 2) || (major == 4 && minor == 2 && release > 8)
    53  }
    54  
    55  // PostprocessMessage prepares a message that is received from irods for XML parsing.
    56  func (conn *IRODSConnection) PostprocessMessage(msg *message.IRODSMessage) error {
    57  	if msg.Body == nil || msg.Body.Message == nil {
    58  		return nil
    59  	}
    60  
    61  	var err error
    62  
    63  	msg.Body.Message, err = conn.PostprocessXML(msg.Body.Message)
    64  	msg.Header.MessageLen = uint32(len(msg.Body.Message))
    65  
    66  	return err
    67  }
    68  
    69  // PostprocessXML translates IRODS XML into valid XML.
    70  // We fix the invalid encoding of ` as &quot.
    71  func (conn *IRODSConnection) PostprocessXML(in []byte) ([]byte, error) {
    72  	buf := in
    73  	out := &bytes.Buffer{}
    74  
    75  	for len(buf) > 0 {
    76  		switch {
    77  		// turn " into `
    78  		case bytes.HasPrefix(buf, irodsEscQuot) && !conn.talksCorrectXML():
    79  			out.WriteByte('`')
    80  			buf = buf[len(irodsEscQuot):]
    81  		// turn ' into '
    82  		case buf[0] == '\'' && !conn.talksCorrectXML():
    83  			out.Write(escApos)
    84  			buf = buf[1:]
    85  		// check utf8 characters for validity
    86  		default:
    87  			r, size := utf8.DecodeRune(buf)
    88  			if r == utf8.RuneError && size == 1 {
    89  				return in, ErrInvalidUTF8
    90  			}
    91  
    92  			if isValidChar(r) {
    93  				out.Write(buf[:size])
    94  			} else {
    95  				out.Write(escFFFD)
    96  			}
    97  
    98  			buf = buf[size:]
    99  		}
   100  	}
   101  
   102  	return out.Bytes(), nil
   103  }
   104  
   105  // PreprocessMessage modifies a request message to use irods dialect for XML.
   106  func (conn *IRODSConnection) PreprocessMessage(msg *message.IRODSMessage, forPassword bool) error {
   107  	if msg.Body == nil || msg.Body.Message == nil {
   108  		return nil
   109  	}
   110  
   111  	var err error
   112  
   113  	if forPassword {
   114  		msg.Body.Message, err = conn.PreprocessXMLForPassword(msg.Body.Message)
   115  	} else {
   116  		msg.Body.Message, err = conn.PreprocessXML(msg.Body.Message)
   117  	}
   118  
   119  	msg.Header.MessageLen = uint32(len(msg.Body.Message))
   120  
   121  	return err
   122  }
   123  
   124  // PreprocessXML translates output of xml.Marshal into XML that IRODS understands.
   125  func (conn *IRODSConnection) PreprocessXML(in []byte) ([]byte, error) {
   126  	buf := in
   127  	out := &bytes.Buffer{}
   128  
   129  	for len(buf) > 0 {
   130  		switch {
   131  		// turn " into "
   132  		case bytes.HasPrefix(buf, escQuot):
   133  			out.Write(irodsEscQuot)
   134  			buf = buf[len(escQuot):]
   135  		// turn &#39 into ' or '
   136  		case bytes.HasPrefix(buf, escApos):
   137  			if conn.talksCorrectXML() {
   138  				out.Write(irodsEscApos)
   139  			} else {
   140  				out.WriteByte('\'')
   141  			}
   142  			buf = buf[len(escApos):]
   143  		// irods does not decode encoded tabs
   144  		case bytes.HasPrefix(buf, escTab):
   145  			out.WriteByte('\t')
   146  			buf = buf[len(escTab):]
   147  		// irods does not decode encoded carriage returns
   148  		case bytes.HasPrefix(buf, escCR):
   149  			out.WriteByte('\r')
   150  			buf = buf[len(escCR):]
   151  		// irods does not decode encoded newlines
   152  		case bytes.HasPrefix(buf, escNL):
   153  			out.WriteByte('\n')
   154  			buf = buf[len(escNL):]
   155  		// turn ` into '
   156  		case buf[0] == '`' && !conn.talksCorrectXML():
   157  			out.Write(irodsEscApos)
   158  			buf = buf[1:]
   159  		// pass utf8 characters
   160  		default:
   161  			r, size := utf8.DecodeRune(buf)
   162  			if r == utf8.RuneError && size == 1 {
   163  				return in, ErrInvalidUTF8
   164  			}
   165  
   166  			out.Write(buf[:size])
   167  			buf = buf[size:]
   168  		}
   169  	}
   170  
   171  	return out.Bytes(), nil
   172  }
   173  
   174  // PreprocessXMLForPassword translates output of xml.Marshal into XML that IRODS understands.
   175  func (conn *IRODSConnection) PreprocessXMLForPassword(in []byte) ([]byte, error) {
   176  	buf := in
   177  	out := &bytes.Buffer{}
   178  
   179  	for len(buf) > 0 {
   180  		switch {
   181  		// turn " into \"
   182  		case bytes.HasPrefix(buf, escQuot):
   183  			out.WriteByte('"')
   184  			buf = buf[len(escQuot):]
   185  		// turn ' into \'
   186  		case bytes.HasPrefix(buf, escApos):
   187  			out.WriteByte('\'')
   188  			buf = buf[len(escApos):]
   189  		// irods does not decode encoded tabs
   190  		case bytes.HasPrefix(buf, escTab):
   191  			out.WriteByte('\t')
   192  			buf = buf[len(escTab):]
   193  		// irods does not decode encoded carriage returns
   194  		case bytes.HasPrefix(buf, escCR):
   195  			out.WriteByte('\r')
   196  			buf = buf[len(escCR):]
   197  		// irods does not decode encoded newlines
   198  		case bytes.HasPrefix(buf, escNL):
   199  			out.WriteByte('\n')
   200  			buf = buf[len(escNL):]
   201  		// pass utf8 characters
   202  		default:
   203  			r, size := utf8.DecodeRune(buf)
   204  			if r == utf8.RuneError && size == 1 {
   205  				return in, ErrInvalidUTF8
   206  			}
   207  
   208  			out.Write(buf[:size])
   209  			buf = buf[size:]
   210  		}
   211  	}
   212  
   213  	return out.Bytes(), nil
   214  }
   215  
   216  func isValidChar(r rune) bool {
   217  	return r == 0x09 ||
   218  		r == 0x0A ||
   219  		r == 0x0D ||
   220  		r >= 0x20 && r <= 0xD7FF ||
   221  		r >= 0xE000 && r <= 0xFFFD ||
   222  		r >= 0x10000 && r <= 0x10FFFF
   223  }