storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/tools/parquet2json/parquet2json.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2018 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "io" 23 "os" 24 "path" 25 "strings" 26 27 "github.com/minio/minio-go/v7/pkg/set" 28 29 parquet "storj.io/minio/pkg/s3select/internal/parquet-go" 30 ) 31 32 func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { 33 file, err := os.Open(name) 34 if err != nil { 35 return nil, err 36 } 37 38 fi, err := file.Stat() 39 if err != nil { 40 return nil, err 41 } 42 43 if offset < 0 { 44 offset = fi.Size() + offset 45 } 46 47 if _, err = file.Seek(offset, io.SeekStart); err != nil { 48 return nil, err 49 } 50 51 return file, nil 52 } 53 54 func printUsage() { 55 progName := path.Base(os.Args[0]) 56 fmt.Printf("Usage: %v PARQUET-FILE [COLUMN...]\n", progName) 57 fmt.Println() 58 fmt.Printf("Examples:\n") 59 fmt.Printf("# Convert all columns to JSON\n") 60 fmt.Printf("$ %v example.parquet\n", progName) 61 fmt.Println() 62 fmt.Printf("# Convert specific columns to JSON\n") 63 fmt.Printf("$ %v example.par firstname dob\n", progName) 64 fmt.Println() 65 } 66 67 func main() { 68 if len(os.Args) < 2 { 69 printUsage() 70 os.Exit(-1) 71 } 72 73 name := os.Args[1] 74 ext := path.Ext(name) 75 jsonFilename := name + ".json" 76 if ext == ".parquet" || ext == ".par" { 77 jsonFilename = strings.TrimSuffix(name, ext) + ".json" 78 } 79 80 columns := set.CreateStringSet(os.Args[2:]...) 81 if len(columns) == 0 { 82 columns = nil 83 } 84 85 file, err := parquet.NewReader( 86 func(offset, length int64) (io.ReadCloser, error) { 87 return getReader(name, offset, length) 88 }, 89 columns, 90 ) 91 if err != nil { 92 fmt.Printf("%v: %v\n", name, err) 93 os.Exit(1) 94 } 95 96 defer file.Close() 97 98 jsonFile, err := os.OpenFile(jsonFilename, os.O_RDWR|os.O_CREATE, 0755) 99 if err != nil { 100 fmt.Printf("%v: %v\n", jsonFilename, err) 101 os.Exit(1) 102 } 103 104 defer jsonFile.Close() 105 106 for { 107 record, err := file.Read() 108 if err != nil { 109 if err != io.EOF { 110 fmt.Printf("%v: %v\n", name, err) 111 os.Exit(1) 112 } 113 114 break 115 } 116 117 data, err := json.Marshal(record) 118 if err != nil { 119 fmt.Printf("%v: %v\n", name, err) 120 os.Exit(1) 121 } 122 data = append(data, byte('\n')) 123 124 if _, err = jsonFile.Write(data); err != nil { 125 fmt.Printf("%v: %v\n", jsonFilename, err) 126 os.Exit(1) 127 } 128 } 129 }