storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/tools/parquet2json/parquet2json.go (about)

     1  /*
     2   * Minio Cloud Storage, (C) 2018 Minio, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package main
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"os"
    24  	"path"
    25  	"strings"
    26  
    27  	"github.com/minio/minio-go/v7/pkg/set"
    28  
    29  	parquet "storj.io/minio/pkg/s3select/internal/parquet-go"
    30  )
    31  
    32  func getReader(name string, offset int64, length int64) (io.ReadCloser, error) {
    33  	file, err := os.Open(name)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	fi, err := file.Stat()
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  
    43  	if offset < 0 {
    44  		offset = fi.Size() + offset
    45  	}
    46  
    47  	if _, err = file.Seek(offset, io.SeekStart); err != nil {
    48  		return nil, err
    49  	}
    50  
    51  	return file, nil
    52  }
    53  
    54  func printUsage() {
    55  	progName := path.Base(os.Args[0])
    56  	fmt.Printf("Usage: %v PARQUET-FILE [COLUMN...]\n", progName)
    57  	fmt.Println()
    58  	fmt.Printf("Examples:\n")
    59  	fmt.Printf("# Convert all columns to JSON\n")
    60  	fmt.Printf("$ %v example.parquet\n", progName)
    61  	fmt.Println()
    62  	fmt.Printf("# Convert specific columns to JSON\n")
    63  	fmt.Printf("$ %v example.par firstname dob\n", progName)
    64  	fmt.Println()
    65  }
    66  
    67  func main() {
    68  	if len(os.Args) < 2 {
    69  		printUsage()
    70  		os.Exit(-1)
    71  	}
    72  
    73  	name := os.Args[1]
    74  	ext := path.Ext(name)
    75  	jsonFilename := name + ".json"
    76  	if ext == ".parquet" || ext == ".par" {
    77  		jsonFilename = strings.TrimSuffix(name, ext) + ".json"
    78  	}
    79  
    80  	columns := set.CreateStringSet(os.Args[2:]...)
    81  	if len(columns) == 0 {
    82  		columns = nil
    83  	}
    84  
    85  	file, err := parquet.NewReader(
    86  		func(offset, length int64) (io.ReadCloser, error) {
    87  			return getReader(name, offset, length)
    88  		},
    89  		columns,
    90  	)
    91  	if err != nil {
    92  		fmt.Printf("%v: %v\n", name, err)
    93  		os.Exit(1)
    94  	}
    95  
    96  	defer file.Close()
    97  
    98  	jsonFile, err := os.OpenFile(jsonFilename, os.O_RDWR|os.O_CREATE, 0755)
    99  	if err != nil {
   100  		fmt.Printf("%v: %v\n", jsonFilename, err)
   101  		os.Exit(1)
   102  	}
   103  
   104  	defer jsonFile.Close()
   105  
   106  	for {
   107  		record, err := file.Read()
   108  		if err != nil {
   109  			if err != io.EOF {
   110  				fmt.Printf("%v: %v\n", name, err)
   111  				os.Exit(1)
   112  			}
   113  
   114  			break
   115  		}
   116  
   117  		data, err := json.Marshal(record)
   118  		if err != nil {
   119  			fmt.Printf("%v: %v\n", name, err)
   120  			os.Exit(1)
   121  		}
   122  		data = append(data, byte('\n'))
   123  
   124  		if _, err = jsonFile.Write(data); err != nil {
   125  			fmt.Printf("%v: %v\n", jsonFilename, err)
   126  			os.Exit(1)
   127  		}
   128  	}
   129  }