github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/test/devstack/extract_car_test.go (about)

     1  //go:build integration
     2  
     3  package devstack
     4  
     5  import (
     6  	"bytes"
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  
    15  	"github.com/ipfs/go-cid"
    16  	"github.com/ipfs/go-unixfsnode"
    17  	"github.com/ipfs/go-unixfsnode/data"
    18  	"github.com/ipfs/go-unixfsnode/file"
    19  	"github.com/ipld/go-car/v2/blockstore"
    20  	dagpb "github.com/ipld/go-codec-dagpb"
    21  	"github.com/ipld/go-ipld-prime"
    22  	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
    23  	basicnode "github.com/ipld/go-ipld-prime/node/basic"
    24  )
    25  
    26  // copied from https://github.com/ipld/go-car/blob/master/cmd/car/extract.go
    27  
    28  var ErrNotDir = fmt.Errorf("not a directory")
    29  
    30  // ExtractCar pulls files and directories out of a car
    31  func ExtractCar(ctx context.Context, file string, outputDir string) error {
    32  	bs, err := blockstore.OpenReadOnly(file)
    33  	if err != nil {
    34  		return err
    35  	}
    36  
    37  	ls := cidlink.DefaultLinkSystem()
    38  	ls.TrustedStorage = true
    39  	ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) {
    40  		cl, ok := l.(cidlink.Link)
    41  		if !ok {
    42  			return nil, fmt.Errorf("not a cidlink")
    43  		}
    44  		blk, err := bs.Get(ctx, cl.Cid)
    45  		if err != nil {
    46  			return nil, err
    47  		}
    48  		return bytes.NewBuffer(blk.RawData()), nil
    49  	}
    50  
    51  	roots, err := bs.Roots()
    52  	if err != nil {
    53  		return err
    54  	}
    55  
    56  	for _, root := range roots {
    57  		if err := extractRoot(ctx, &ls, root, outputDir); err != nil {
    58  			return err
    59  		}
    60  	}
    61  
    62  	return nil
    63  }
    64  
    65  func extractRoot(ctx context.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string) error {
    66  	if root.Prefix().Codec == cid.Raw {
    67  		return nil
    68  	}
    69  
    70  	pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
    71  	if err != nil {
    72  		return err
    73  	}
    74  	pbnode := pbn.(dagpb.PBNode)
    75  
    76  	ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
    77  	if err != nil {
    78  		return err
    79  	}
    80  
    81  	outputResolvedDir, err := filepath.EvalSymlinks(outputDir)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
    86  		if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
    87  			return err
    88  		}
    89  	}
    90  	if err := extractDir(ctx, ls, ufn, outputResolvedDir, "/"); err != nil {
    91  		if !errors.Is(err, ErrNotDir) {
    92  			return fmt.Errorf("%s: %w", root, err)
    93  		}
    94  		ufsData, err := pbnode.LookupByString("Data")
    95  		if err != nil {
    96  			return err
    97  		}
    98  		ufsBytes, err := ufsData.AsBytes()
    99  		if err != nil {
   100  			return err
   101  		}
   102  		ufsNode, err := data.DecodeUnixFSData(ufsBytes)
   103  		if err != nil {
   104  			return err
   105  		}
   106  		if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
   107  			if err := extractFile(ctx, ls, pbnode, filepath.Join(outputResolvedDir, "unknown")); err != nil {
   108  				return err
   109  			}
   110  		}
   111  		return nil
   112  	}
   113  
   114  	return nil
   115  }
   116  
   117  func resolvePath(root, pth string) (string, error) {
   118  	rp, err := filepath.Rel("/", pth)
   119  	if err != nil {
   120  		return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
   121  	}
   122  	joined := path.Join(root, rp)
   123  
   124  	basename := path.Dir(joined)
   125  	final, err := filepath.EvalSymlinks(basename)
   126  	if err != nil {
   127  		return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
   128  	}
   129  	if final != path.Clean(basename) {
   130  		return "", fmt.Errorf("path attempts to redirect through symlinks")
   131  	}
   132  	return joined, nil
   133  }
   134  
   135  func extractDir(ctx context.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string) error {
   136  	dirPath, err := resolvePath(outputRoot, outputPath)
   137  	if err != nil {
   138  		return err
   139  	}
   140  	// make the directory.
   141  	if err := os.MkdirAll(dirPath, 0755); err != nil {
   142  		return err
   143  	}
   144  
   145  	if n.Kind() == ipld.Kind_Map {
   146  		mi := n.MapIterator()
   147  		for !mi.Done() {
   148  			key, val, err := mi.Next()
   149  			if err != nil {
   150  				return err
   151  			}
   152  			ks, err := key.AsString()
   153  			if err != nil {
   154  				return err
   155  			}
   156  			nextRes, err := resolvePath(outputRoot, path.Join(outputPath, ks))
   157  			if err != nil {
   158  				return err
   159  			}
   160  
   161  			if val.Kind() != ipld.Kind_Link {
   162  				return fmt.Errorf("unexpected map value for %s at %s", ks, outputPath)
   163  			}
   164  			// a directory may be represented as a map of name:<link> if unixADL is applied
   165  			vl, err := val.AsLink()
   166  			if err != nil {
   167  				return err
   168  			}
   169  			dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
   170  			if err != nil {
   171  				return err
   172  			}
   173  			// degenerate files are handled here.
   174  			if dest.Kind() == ipld.Kind_Bytes {
   175  				if err := extractFile(ctx, ls, dest, nextRes); err != nil {
   176  					return err
   177  				}
   178  				continue
   179  			} else {
   180  				// dir / pbnode
   181  				pbb := dagpb.Type.PBNode.NewBuilder()
   182  				if err := pbb.AssignNode(dest); err != nil {
   183  					return err
   184  				}
   185  				dest = pbb.Build()
   186  			}
   187  			pbnode := dest.(dagpb.PBNode)
   188  
   189  			// interpret dagpb 'data' as unixfs data and look at type.
   190  			ufsData, err := pbnode.LookupByString("Data")
   191  			if err != nil {
   192  				return err
   193  			}
   194  			ufsBytes, err := ufsData.AsBytes()
   195  			if err != nil {
   196  				return err
   197  			}
   198  			ufsNode, err := data.DecodeUnixFSData(ufsBytes)
   199  			if err != nil {
   200  				return err
   201  			}
   202  			if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard {
   203  				ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
   204  				if err != nil {
   205  					return err
   206  				}
   207  
   208  				if err := extractDir(ctx, ls, ufn, outputRoot, path.Join(outputPath, ks)); err != nil {
   209  					return err
   210  				}
   211  			} else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
   212  				if err := extractFile(ctx, ls, pbnode, nextRes); err != nil {
   213  					return err
   214  				}
   215  			} else if ufsNode.DataType.Int() == data.Data_Symlink {
   216  				data := ufsNode.Data.Must().Bytes()
   217  				if err := os.Symlink(string(data), nextRes); err != nil {
   218  					return err
   219  				}
   220  			}
   221  		}
   222  		return nil
   223  	}
   224  	return ErrNotDir
   225  }
   226  
   227  func extractFile(ctx context.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
   228  	node, err := file.NewUnixFSFile(ctx, n, ls)
   229  	if err != nil {
   230  		return err
   231  	}
   232  	nlr, err := node.AsLargeBytes()
   233  	if err != nil {
   234  		return err
   235  	}
   236  
   237  	f, err := os.Create(outputName)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	defer f.Close()
   242  	_, err = io.Copy(f, nlr)
   243  
   244  	return err
   245  }