github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/conv/conv.go (about)

     1  /*
     2   * Copyright 2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package conv
    18  
    19  import (
    20  	"bufio"
    21  	"compress/gzip"
    22  	"encoding/json"
    23  	"fmt"
    24  	"io"
    25  	"io/ioutil"
    26  	"os"
    27  	"path/filepath"
    28  	"strings"
    29  
    30  	"github.com/dgraph-io/dgraph/x"
    31  	"github.com/paulmach/go.geojson"
    32  )
    33  
    34  // TODO: Reconsider if we need this binary.
    35  func writeToFile(fpath string, ch chan []byte) error {
    36  	f, err := os.Create(fpath)
    37  	if err != nil {
    38  		return err
    39  	}
    40  
    41  	defer f.Close()
    42  	x.Check(err)
    43  	w := bufio.NewWriterSize(f, 1e6)
    44  	gw, err := gzip.NewWriterLevel(w, gzip.BestCompression)
    45  	if err != nil {
    46  		return err
    47  	}
    48  
    49  	for buf := range ch {
    50  		if _, err := gw.Write(buf); err != nil {
    51  			return err
    52  		}
    53  	}
    54  	if err := gw.Flush(); err != nil {
    55  		return err
    56  	}
    57  	if err := gw.Close(); err != nil {
    58  		return err
    59  	}
    60  	return w.Flush()
    61  }
    62  
    63  func convertGeoFile(input string, output string) error {
    64  	fmt.Printf("\nProcessing %s\n\n", input)
    65  	f, err := os.Open(input)
    66  	if err != nil {
    67  		return err
    68  	}
    69  	defer f.Close()
    70  
    71  	var gz io.Reader
    72  	if filepath.Ext(input) == ".gz" {
    73  		gz, err = gzip.NewReader(f)
    74  		if err != nil {
    75  			return err
    76  		}
    77  	} else {
    78  		gz = f
    79  	}
    80  
    81  	// TODO - This might not be a good idea for large files. Use json.Decode to read features.
    82  	b, err := ioutil.ReadAll(gz)
    83  	if err != nil {
    84  		return err
    85  	}
    86  	basename := filepath.Base(input)
    87  	name := strings.TrimSuffix(basename, filepath.Ext(basename))
    88  
    89  	che := make(chan error, 1)
    90  	chb := make(chan []byte, 1000)
    91  	go func() {
    92  		che <- writeToFile(output, chb)
    93  	}()
    94  
    95  	fc := geojson.NewFeatureCollection()
    96  	err = json.Unmarshal(b, fc)
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	count := 0
   102  	rdfCount := 0
   103  	for _, f := range fc.Features {
   104  		b, err := json.Marshal(f.Geometry)
   105  		if err != nil {
   106  			return err
   107  		}
   108  
   109  		geometry := strings.Replace(string(b), `"`, "'", -1)
   110  		bn := fmt.Sprintf("_:%s-%d", name, count)
   111  		rdf := fmt.Sprintf("%s <%s> \"%s\"^^<geo:geojson> .\n", bn, opt.geopred, geometry)
   112  		chb <- []byte(rdf)
   113  
   114  		for k := range f.Properties {
   115  			// TODO - Support other types later.
   116  			if str, err := f.PropertyString(k); err == nil {
   117  				rdfCount++
   118  				rdf = fmt.Sprintf("%s <%s> \"%s\" .\n", bn, k, str)
   119  				chb <- []byte(rdf)
   120  			}
   121  		}
   122  		count++
   123  		rdfCount++
   124  		if count%1000 == 0 {
   125  			fmt.Printf("%d features converted\r", count)
   126  		}
   127  	}
   128  	close(chb)
   129  	fmt.Printf("%d features converted. %d rdf's generated\n", count, rdfCount)
   130  	return <-che
   131  }