github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/test/devstack/extract_car_test.go (about) 1 //go:build integration 2 3 package devstack 4 5 import ( 6 "bytes" 7 "context" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "path" 13 "path/filepath" 14 15 "github.com/ipfs/go-cid" 16 "github.com/ipfs/go-unixfsnode" 17 "github.com/ipfs/go-unixfsnode/data" 18 "github.com/ipfs/go-unixfsnode/file" 19 "github.com/ipld/go-car/v2/blockstore" 20 dagpb "github.com/ipld/go-codec-dagpb" 21 "github.com/ipld/go-ipld-prime" 22 cidlink "github.com/ipld/go-ipld-prime/linking/cid" 23 basicnode "github.com/ipld/go-ipld-prime/node/basic" 24 ) 25 26 // copied from https://github.com/ipld/go-car/blob/master/cmd/car/extract.go 27 28 var ErrNotDir = fmt.Errorf("not a directory") 29 30 // ExtractCar pulls files and directories out of a car 31 func ExtractCar(ctx context.Context, file string, outputDir string) error { 32 bs, err := blockstore.OpenReadOnly(file) 33 if err != nil { 34 return err 35 } 36 37 ls := cidlink.DefaultLinkSystem() 38 ls.TrustedStorage = true 39 ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) { 40 cl, ok := l.(cidlink.Link) 41 if !ok { 42 return nil, fmt.Errorf("not a cidlink") 43 } 44 blk, err := bs.Get(ctx, cl.Cid) 45 if err != nil { 46 return nil, err 47 } 48 return bytes.NewBuffer(blk.RawData()), nil 49 } 50 51 roots, err := bs.Roots() 52 if err != nil { 53 return err 54 } 55 56 for _, root := range roots { 57 if err := extractRoot(ctx, &ls, root, outputDir); err != nil { 58 return err 59 } 60 } 61 62 return nil 63 } 64 65 func extractRoot(ctx context.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string) error { 66 if root.Prefix().Codec == cid.Raw { 67 return nil 68 } 69 70 pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode) 71 if err != nil { 72 return err 73 } 74 pbnode := pbn.(dagpb.PBNode) 75 76 ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls) 77 if err != nil { 78 return err 79 } 80 81 outputResolvedDir, err := filepath.EvalSymlinks(outputDir) 82 if err != nil { 83 return err 84 } 85 if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) { 86 if err := os.Mkdir(outputResolvedDir, 0755); err != nil { 87 return err 88 } 89 } 90 if err := extractDir(ctx, ls, ufn, outputResolvedDir, "/"); err != nil { 91 if !errors.Is(err, ErrNotDir) { 92 return fmt.Errorf("%s: %w", root, err) 93 } 94 ufsData, err := pbnode.LookupByString("Data") 95 if err != nil { 96 return err 97 } 98 ufsBytes, err := ufsData.AsBytes() 99 if err != nil { 100 return err 101 } 102 ufsNode, err := data.DecodeUnixFSData(ufsBytes) 103 if err != nil { 104 return err 105 } 106 if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { 107 if err := extractFile(ctx, ls, pbnode, filepath.Join(outputResolvedDir, "unknown")); err != nil { 108 return err 109 } 110 } 111 return nil 112 } 113 114 return nil 115 } 116 117 func resolvePath(root, pth string) (string, error) { 118 rp, err := filepath.Rel("/", pth) 119 if err != nil { 120 return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err) 121 } 122 joined := path.Join(root, rp) 123 124 basename := path.Dir(joined) 125 final, err := filepath.EvalSymlinks(basename) 126 if err != nil { 127 return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err) 128 } 129 if final != path.Clean(basename) { 130 return "", fmt.Errorf("path attempts to redirect through symlinks") 131 } 132 return joined, nil 133 } 134 135 func extractDir(ctx context.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string) error { 136 dirPath, err := resolvePath(outputRoot, outputPath) 137 if err != nil { 138 return err 139 } 140 // make the directory. 141 if err := os.MkdirAll(dirPath, 0755); err != nil { 142 return err 143 } 144 145 if n.Kind() == ipld.Kind_Map { 146 mi := n.MapIterator() 147 for !mi.Done() { 148 key, val, err := mi.Next() 149 if err != nil { 150 return err 151 } 152 ks, err := key.AsString() 153 if err != nil { 154 return err 155 } 156 nextRes, err := resolvePath(outputRoot, path.Join(outputPath, ks)) 157 if err != nil { 158 return err 159 } 160 161 if val.Kind() != ipld.Kind_Link { 162 return fmt.Errorf("unexpected map value for %s at %s", ks, outputPath) 163 } 164 // a directory may be represented as a map of name:<link> if unixADL is applied 165 vl, err := val.AsLink() 166 if err != nil { 167 return err 168 } 169 dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any) 170 if err != nil { 171 return err 172 } 173 // degenerate files are handled here. 174 if dest.Kind() == ipld.Kind_Bytes { 175 if err := extractFile(ctx, ls, dest, nextRes); err != nil { 176 return err 177 } 178 continue 179 } else { 180 // dir / pbnode 181 pbb := dagpb.Type.PBNode.NewBuilder() 182 if err := pbb.AssignNode(dest); err != nil { 183 return err 184 } 185 dest = pbb.Build() 186 } 187 pbnode := dest.(dagpb.PBNode) 188 189 // interpret dagpb 'data' as unixfs data and look at type. 190 ufsData, err := pbnode.LookupByString("Data") 191 if err != nil { 192 return err 193 } 194 ufsBytes, err := ufsData.AsBytes() 195 if err != nil { 196 return err 197 } 198 ufsNode, err := data.DecodeUnixFSData(ufsBytes) 199 if err != nil { 200 return err 201 } 202 if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard { 203 ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls) 204 if err != nil { 205 return err 206 } 207 208 if err := extractDir(ctx, ls, ufn, outputRoot, path.Join(outputPath, ks)); err != nil { 209 return err 210 } 211 } else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { 212 if err := extractFile(ctx, ls, pbnode, nextRes); err != nil { 213 return err 214 } 215 } else if ufsNode.DataType.Int() == data.Data_Symlink { 216 data := ufsNode.Data.Must().Bytes() 217 if err := os.Symlink(string(data), nextRes); err != nil { 218 return err 219 } 220 } 221 } 222 return nil 223 } 224 return ErrNotDir 225 } 226 227 func extractFile(ctx context.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error { 228 node, err := file.NewUnixFSFile(ctx, n, ls) 229 if err != nil { 230 return err 231 } 232 nlr, err := node.AsLargeBytes() 233 if err != nil { 234 return err 235 } 236 237 f, err := os.Create(outputName) 238 if err != nil { 239 return err 240 } 241 defer f.Close() 242 _, err = io.Copy(f, nlr) 243 244 return err 245 }