github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/repo/parse.go (about)

     1  package repo
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/mr-tron/base58/base58"
     8  	"github.com/multiformats/go-multihash"
     9  	"github.com/qri-io/qri/profile"
    10  	reporef "github.com/qri-io/qri/repo/ref"
    11  )
    12  
    13  // ParseDatasetRef decodes a dataset reference from a string value
    14  // It’s possible to refer to a dataset in a number of ways.
    15  // The full definition of a dataset reference is as follows:
    16  //     dataset_reference = peer_name/dataset_name@peer_id/network/hash
    17  //
    18  // we swap in defaults as follows, all of which are represented as
    19  // empty strings:
    20  //     network - defaults to /ipfs/
    21  //     hash - tip of version history (latest known commit)
    22  //
    23  // these defaults are currently enforced by convention.
    24  // TODO - make Dataset Ref parsing the responisiblity of the Repo
    25  // interface, replacing empty strings with actual defaults
    26  //
    27  // dataset names & hashes are disambiguated by checking if the input
    28  // parses to a valid multihash after base58 decoding.
    29  // through defaults & base58 checking the following should all parse:
    30  //     peer_name/dataset_name
    31  //     /network/hash
    32  //     peername
    33  //     peer_id
    34  //     @peer_id
    35  //     @peer_id/network/hash
    36  //
    37  // see tests for more exmples
    38  //
    39  // TODO - add validation that prevents peernames from being
    40  // valid base58 multihashes and makes sure hashes are actually valid base58 multihashes
    41  // TODO - figure out how IPFS CID's play into this
    42  func ParseDatasetRef(ref string) (reporef.DatasetRef, error) {
    43  	if ref == "" {
    44  		return reporef.DatasetRef{}, ErrEmptyRef
    45  	}
    46  
    47  	var (
    48  		// nameRefString string
    49  		dsr = reporef.DatasetRef{}
    50  		err error
    51  	)
    52  
    53  	// if there is an @ symbol, we are dealing with a reporef.DatasetRef
    54  	// with an identifier
    55  	atIndex := strings.Index(ref, "@")
    56  
    57  	if atIndex != -1 {
    58  
    59  		dsr.Peername, dsr.Name = parseAlias(ref[:atIndex])
    60  		dsr.ProfileID, dsr.Path, err = parseIdentifiers(ref[atIndex+1:])
    61  
    62  	} else {
    63  
    64  		var peername, datasetname, pid bool
    65  		toks := strings.Split(ref, "/")
    66  
    67  		for i, tok := range toks {
    68  			if isBase58Multihash(tok) {
    69  				// first hash we encounter is a peerID
    70  				if !pid {
    71  					dsr.ProfileID, _ = profile.IDB58Decode(tok)
    72  					pid = true
    73  					continue
    74  				}
    75  
    76  				if !isBase58Multihash(toks[i-1]) {
    77  					dsr.Path = fmt.Sprintf("/%s/%s", toks[i-1], strings.Join(toks[i:], "/"))
    78  				} else {
    79  					dsr.Path = fmt.Sprintf("/ipfs/%s", strings.Join(toks[i:], "/"))
    80  				}
    81  				break
    82  			}
    83  
    84  			if !peername {
    85  				dsr.Peername = tok
    86  				peername = true
    87  				continue
    88  			}
    89  
    90  			if !datasetname {
    91  				dsr.Name = tok
    92  				datasetname = true
    93  				continue
    94  			}
    95  
    96  			dsr.Path = strings.Join(toks[i:], "/")
    97  			break
    98  		}
    99  	}
   100  
   101  	if dsr.ProfileID == "" && dsr.Peername == "" && dsr.Name == "" && dsr.Path == "" {
   102  		err = fmt.Errorf("malformed reporef.DatasetRef string: %s", ref)
   103  		return dsr, err
   104  	}
   105  
   106  	// if dsr.ProfileID != "" {
   107  	// 	if !isBase58Multihash(dsr.ProfileID) {
   108  	// 		err = fmt.Errorf("invalid ProfileID: '%s'", dsr.ProfileID)
   109  	// 		return dsr, err
   110  	// 	}
   111  	// }
   112  
   113  	return dsr, err
   114  }
   115  
   116  func parseAlias(alias string) (peer, dataset string) {
   117  	for i, tok := range strings.Split(alias, "/") {
   118  		switch i {
   119  		case 0:
   120  			peer = tok
   121  		case 1:
   122  			dataset = tok
   123  		}
   124  	}
   125  	return
   126  }
   127  
   128  func parseIdentifiers(ids string) (profileID profile.ID, path string, err error) {
   129  
   130  	toks := strings.Split(ids, "/")
   131  	switch len(toks) {
   132  	case 0:
   133  		err = fmt.Errorf("malformed reporef.DatasetRef identifier: %s", ids)
   134  	case 1:
   135  		if toks[0] != "" {
   136  			profileID, err = profile.IDB58Decode(toks[0])
   137  			// if !isBase58Multihash(toks[0]) {
   138  			// 	err = fmt.Errorf("'%s' is not a base58 multihash", ids)
   139  			// }
   140  
   141  			return
   142  		}
   143  	case 2:
   144  		if pid, e := profile.IDB58Decode(toks[0]); e == nil {
   145  			profileID = pid
   146  		}
   147  
   148  		if isBase58Multihash(toks[0]) && isBase58Multihash(toks[1]) {
   149  			toks[1] = fmt.Sprintf("/ipfs/%s", toks[1])
   150  		}
   151  
   152  		path = toks[1]
   153  	default:
   154  		if pid, e := profile.IDB58Decode(toks[0]); e == nil {
   155  			profileID = pid
   156  		}
   157  
   158  		path = fmt.Sprintf("/%s/%s", toks[1], toks[2])
   159  		return
   160  	}
   161  
   162  	return
   163  }
   164  
   165  // TODO - this could be more robust?
   166  func stripProtocol(ref string) string {
   167  	if strings.HasPrefix(ref, "/ipfs/") {
   168  		return ref[len("/ipfs/"):]
   169  	}
   170  	return ref
   171  }
   172  
   173  func isBase58Multihash(hash string) bool {
   174  	data, err := base58.Decode(hash)
   175  	if err != nil {
   176  		return false
   177  	}
   178  	if _, err := multihash.Decode(data); err != nil {
   179  		return false
   180  	}
   181  
   182  	return true
   183  }