storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/tools/parquet2csv/parquet2csv.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2018 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "encoding/csv" 21 "fmt" 22 "io" 23 "os" 24 "path" 25 "strings" 26 27 "github.com/minio/minio-go/v7/pkg/set" 28 29 parquet "storj.io/minio/pkg/s3select/internal/parquet-go" 30 ) 31 32 func getReader(name string, offset int64, length int64) (io.ReadCloser, error) { 33 file, err := os.Open(name) 34 if err != nil { 35 return nil, err 36 } 37 38 fi, err := file.Stat() 39 if err != nil { 40 return nil, err 41 } 42 43 if offset < 0 { 44 offset = fi.Size() + offset 45 } 46 47 if _, err = file.Seek(offset, io.SeekStart); err != nil { 48 return nil, err 49 } 50 51 return file, nil 52 } 53 54 func printUsage() { 55 progName := path.Base(os.Args[0]) 56 fmt.Printf("usage: %v PARQUET-FILE [COLUMN...]\n", progName) 57 fmt.Println() 58 fmt.Printf("examples:\n") 59 fmt.Printf("# Convert all columns to CSV\n") 60 fmt.Printf("$ %v example.parquet\n", progName) 61 fmt.Println() 62 fmt.Printf("# Convert specific columns to CSV\n") 63 fmt.Printf("$ %v example.par firstname dob\n", progName) 64 fmt.Println() 65 } 66 67 func main() { 68 if len(os.Args) < 2 { 69 printUsage() 70 os.Exit(-1) 71 } 72 73 name := os.Args[1] 74 ext := path.Ext(name) 75 csvFilename := name + ".csv" 76 if ext == ".parquet" || ext == ".par" { 77 csvFilename = strings.TrimSuffix(name, ext) + ".csv" 78 } 79 80 columns := set.CreateStringSet(os.Args[2:]...) 81 if len(columns) == 0 { 82 columns = nil 83 } 84 85 file, err := parquet.NewReader( 86 func(offset, length int64) (io.ReadCloser, error) { 87 return getReader(name, offset, length) 88 }, 89 columns, 90 ) 91 if err != nil { 92 fmt.Printf("%v: %v\n", name, err) 93 os.Exit(1) 94 } 95 96 defer file.Close() 97 98 csvFile, err := os.OpenFile(csvFilename, os.O_RDWR|os.O_CREATE, 0755) 99 if err != nil { 100 fmt.Printf("%v: %v\n", csvFilename, err) 101 os.Exit(1) 102 } 103 104 defer csvFile.Close() 105 106 csvWriter := csv.NewWriter(csvFile) 107 defer csvWriter.Flush() 108 109 headerWritten := false 110 for { 111 record, err := file.Read() 112 if err != nil { 113 if err != io.EOF { 114 fmt.Printf("%v: %v\n", name, err) 115 os.Exit(1) 116 } 117 118 break 119 } 120 121 if !headerWritten { 122 var csvRecord []string 123 record.Range(func(name string, value parquet.Value) bool { 124 csvRecord = append(csvRecord, name) 125 return true 126 }) 127 128 if err = csvWriter.Write(csvRecord); err != nil { 129 fmt.Printf("%v: %v\n", csvFilename, err) 130 os.Exit(1) 131 } 132 133 headerWritten = true 134 } 135 136 var csvRecord []string 137 record.Range(func(name string, value parquet.Value) bool { 138 csvRecord = append(csvRecord, fmt.Sprintf("%v", value.Value)) 139 return true 140 }) 141 142 if err = csvWriter.Write(csvRecord); err != nil { 143 fmt.Printf("%v: %v\n", csvFilename, err) 144 os.Exit(1) 145 } 146 } 147 }