github.com/cayleygraph/cayley@v0.7.7/internal/load.go (about)

     1  package internal
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net/http"
     7  	"net/url"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/cayleygraph/cayley/clog"
    13  	"github.com/cayleygraph/cayley/internal/decompressor"
    14  	"github.com/cayleygraph/quad"
    15  	"github.com/cayleygraph/quad/nquads"
    16  )
    17  
    18  // Load loads a graph from the given path and write it to qw.  See
    19  // DecompressAndLoad for more information.
    20  func Load(qw quad.WriteCloser, batch int, path, typ string) error {
    21  	return DecompressAndLoad(qw, batch, path, typ)
    22  }
    23  
    24  type readCloser struct {
    25  	quad.ReadCloser
    26  	close func() error
    27  }
    28  
    29  func (r readCloser) Close() error {
    30  	err := r.ReadCloser.Close()
    31  	if r.close != nil {
    32  		r.close()
    33  	}
    34  	return err
    35  }
    36  
    37  type nopCloser struct {
    38  	quad.Reader
    39  }
    40  
    41  func (r nopCloser) Close() error { return nil }
    42  
    43  func QuadReaderFor(path, typ string) (quad.ReadCloser, error) {
    44  	var (
    45  		r io.Reader
    46  		c io.Closer
    47  	)
    48  	if path == "-" {
    49  		r = os.Stdin
    50  	} else if u, err := url.Parse(path); err != nil || u.Scheme == "file" || u.Scheme == "" {
    51  		// Don't alter relative URL path or non-URL path parameter.
    52  		if u.Scheme != "" && err == nil {
    53  			// Recovery heuristic for mistyping "file://path/to/file".
    54  			path = filepath.Join(u.Host, u.Path)
    55  		}
    56  		f, err := os.Open(path)
    57  		if os.IsNotExist(err) {
    58  			return nil, err
    59  		} else if err != nil {
    60  			return nil, fmt.Errorf("could not open file %q: %v", path, err)
    61  		}
    62  		r, c = f, f
    63  	} else {
    64  		res, err := http.Get(path)
    65  		if err != nil {
    66  			return nil, fmt.Errorf("could not get resource <%s>: %v", u, err)
    67  		}
    68  		// TODO(dennwc): save content type for format auto-detection
    69  		r, c = res.Body, res.Body
    70  	}
    71  
    72  	r, err := decompressor.New(r)
    73  	if err != nil {
    74  		if c != nil {
    75  			c.Close()
    76  		}
    77  		if err == io.EOF {
    78  			return nopCloser{quad.NewReader(nil)}, nil
    79  		}
    80  		return nil, err
    81  	}
    82  
    83  	var qr quad.ReadCloser
    84  	switch typ {
    85  	case "cquad", "nquad": // legacy
    86  		qr = nquads.NewReader(r, false)
    87  	default:
    88  		var format *quad.Format
    89  		if typ == "" {
    90  			name := filepath.Base(path)
    91  			name = strings.TrimSuffix(name, ".gz")
    92  			name = strings.TrimSuffix(name, ".bz2")
    93  			format = quad.FormatByExt(filepath.Ext(name))
    94  			if format == nil {
    95  				typ = "nquads"
    96  			}
    97  		}
    98  		if format == nil {
    99  			format = quad.FormatByName(typ)
   100  		}
   101  		if format == nil {
   102  			err = fmt.Errorf("unknown quad format %q", typ)
   103  		} else if format.Reader == nil {
   104  			err = fmt.Errorf("decoding of %q is not supported", typ)
   105  		}
   106  		if err != nil {
   107  			if c != nil {
   108  				c.Close()
   109  			}
   110  			return nil, err
   111  		}
   112  		qr = format.Reader(r)
   113  	}
   114  	if c != nil {
   115  		return readCloser{ReadCloser: qr, close: c.Close}, nil
   116  	}
   117  	return qr, nil
   118  }
   119  
   120  // DecompressAndLoad will load or fetch a graph from the given path, decompress
   121  // it, and then call the given load function to process the decompressed graph.
   122  // If no loadFn is provided, db.Load is called.
   123  func DecompressAndLoad(qw quad.WriteCloser, batch int, path, typ string) error {
   124  	if path == "" {
   125  		return nil
   126  	}
   127  	qr, err := QuadReaderFor(path, typ)
   128  	if err != nil {
   129  		return err
   130  	}
   131  	defer qr.Close()
   132  
   133  	_, err = quad.CopyBatch(&batchLogger{w: qw}, qr, batch)
   134  	if err != nil {
   135  		return fmt.Errorf("db: failed to load data: %v", err)
   136  	}
   137  	return qw.Close()
   138  }
   139  
   140  type batchLogger struct {
   141  	cnt int
   142  	w   quad.Writer
   143  }
   144  
   145  func (w *batchLogger) WriteQuads(quads []quad.Quad) (int, error) {
   146  	n, err := w.w.WriteQuads(quads)
   147  	if clog.V(2) {
   148  		w.cnt += n
   149  		clog.Infof("Wrote %d quads.", w.cnt)
   150  	}
   151  	return n, err
   152  }