github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/print.go (about) 1 package parquet 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "strconv" 8 "strings" 9 10 "github.com/olekukonko/tablewriter" 11 ) 12 13 func PrintSchema(w io.Writer, name string, node Node) error { 14 return PrintSchemaIndent(w, name, node, "\t", "\n") 15 } 16 17 func PrintSchemaIndent(w io.Writer, name string, node Node, pattern, newline string) error { 18 pw := &printWriter{writer: w} 19 pi := &printIndent{} 20 21 if node.Leaf() { 22 printSchemaWithIndent(pw, "", node, pi) 23 } else { 24 pw.WriteString("message ") 25 26 if name == "" { 27 pw.WriteString("{") 28 } else { 29 pw.WriteString(name) 30 pw.WriteString(" {") 31 } 32 33 pi.pattern = pattern 34 pi.newline = newline 35 pi.repeat = 1 36 pi.writeNewLine(pw) 37 38 for _, field := range node.Fields() { 39 printSchemaWithIndent(pw, field.Name(), field, pi) 40 pi.writeNewLine(pw) 41 } 42 43 pw.WriteString("}") 44 } 45 46 return pw.err 47 } 48 49 func printSchemaWithIndent(w io.StringWriter, name string, node Node, indent *printIndent) { 50 indent.writeTo(w) 51 52 switch { 53 case node.Optional(): 54 w.WriteString("optional ") 55 case node.Repeated(): 56 w.WriteString("repeated ") 57 default: 58 w.WriteString("required ") 59 } 60 61 if node.Leaf() { 62 t := node.Type() 63 switch t.Kind() { 64 case Boolean: 65 w.WriteString("boolean") 66 case Int32: 67 w.WriteString("int32") 68 case Int64: 69 w.WriteString("int64") 70 case Int96: 71 w.WriteString("int96") 72 case Float: 73 w.WriteString("float") 74 case Double: 75 w.WriteString("double") 76 case ByteArray: 77 w.WriteString("binary") 78 case FixedLenByteArray: 79 w.WriteString("fixed_len_byte_array(") 80 w.WriteString(strconv.Itoa(t.Length())) 81 w.WriteString(")") 82 default: 83 w.WriteString("<?>") 84 } 85 86 if name != "" { 87 w.WriteString(" ") 88 w.WriteString(name) 89 } 90 91 if annotation := annotationOf(node); annotation != "" { 92 w.WriteString(" (") 93 w.WriteString(annotation) 94 w.WriteString(")") 95 } 96 97 w.WriteString(";") 98 } else { 99 w.WriteString("group") 100 101 if name != "" { 102 w.WriteString(" ") 103 w.WriteString(name) 104 } 105 106 if annotation := annotationOf(node); annotation != "" { 107 w.WriteString(" (") 108 w.WriteString(annotation) 109 w.WriteString(")") 110 } 111 112 w.WriteString(" {") 113 indent.writeNewLine(w) 114 indent.push() 115 116 for _, field := range node.Fields() { 117 printSchemaWithIndent(w, field.Name(), field, indent) 118 indent.writeNewLine(w) 119 } 120 121 indent.pop() 122 indent.writeTo(w) 123 w.WriteString("}") 124 } 125 } 126 127 func annotationOf(node Node) string { 128 if logicalType := node.Type().LogicalType(); logicalType != nil { 129 return logicalType.String() 130 } 131 return "" 132 } 133 134 type printIndent struct { 135 pattern string 136 newline string 137 repeat int 138 } 139 140 func (i *printIndent) push() { 141 i.repeat++ 142 } 143 144 func (i *printIndent) pop() { 145 i.repeat-- 146 } 147 148 func (i *printIndent) writeTo(w io.StringWriter) { 149 if i.pattern != "" { 150 for n := i.repeat; n > 0; n-- { 151 w.WriteString(i.pattern) 152 } 153 } 154 } 155 156 func (i *printIndent) writeNewLine(w io.StringWriter) { 157 if i.newline != "" { 158 w.WriteString(i.newline) 159 } 160 } 161 162 type printWriter struct { 163 writer io.Writer 164 err error 165 } 166 167 func (w *printWriter) Write(b []byte) (int, error) { 168 if w.err != nil { 169 return 0, w.err 170 } 171 n, err := w.writer.Write(b) 172 if err != nil { 173 w.err = err 174 } 175 return n, err 176 } 177 178 func (w *printWriter) WriteString(s string) (int, error) { 179 if w.err != nil { 180 return 0, w.err 181 } 182 n, err := io.WriteString(w.writer, s) 183 if err != nil { 184 w.err = err 185 } 186 return n, err 187 } 188 189 var ( 190 _ io.StringWriter = (*printWriter)(nil) 191 ) 192 193 func sprint(name string, node Node) string { 194 s := new(strings.Builder) 195 PrintSchema(s, name, node) 196 return s.String() 197 } 198 199 func PrintRowGroup(w io.Writer, rowGroup RowGroup) error { 200 schema := rowGroup.Schema() 201 pw := &printWriter{writer: w} 202 tw := tablewriter.NewWriter(pw) 203 204 columns := schema.Columns() 205 header := make([]string, len(columns)) 206 footer := make([]string, len(columns)) 207 alignment := make([]int, len(columns)) 208 209 for i, column := range columns { 210 leaf, _ := schema.Lookup(column...) 211 columnType := leaf.Node.Type() 212 213 header[i] = strings.Join(column, ".") 214 footer[i] = columnType.String() 215 216 switch columnType.Kind() { 217 case ByteArray: 218 alignment[i] = tablewriter.ALIGN_LEFT 219 default: 220 alignment[i] = tablewriter.ALIGN_RIGHT 221 } 222 } 223 224 rowbuf := make([]Row, defaultRowBufferSize) 225 cells := make([]string, 0, len(columns)) 226 rows := rowGroup.Rows() 227 defer rows.Close() 228 229 for { 230 n, err := rows.ReadRows(rowbuf) 231 232 for _, row := range rowbuf[:n] { 233 cells = cells[:0] 234 235 for _, value := range row { 236 columnIndex := value.Column() 237 238 for len(cells) <= columnIndex { 239 cells = append(cells, "") 240 } 241 242 if cells[columnIndex] == "" { 243 cells[columnIndex] = value.String() 244 } else { 245 cells[columnIndex] += "," + value.String() 246 alignment[columnIndex] = tablewriter.ALIGN_LEFT 247 } 248 } 249 250 tw.Append(cells) 251 } 252 253 if err != nil { 254 if errors.Is(err, io.EOF) { 255 break 256 } 257 return err 258 } 259 } 260 261 tw.SetAutoFormatHeaders(false) 262 tw.SetColumnAlignment(alignment) 263 tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT) 264 tw.SetFooterAlignment(tablewriter.ALIGN_LEFT) 265 tw.SetHeader(header) 266 tw.SetFooter(footer) 267 tw.Render() 268 269 fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows()) 270 return pw.err 271 } 272 273 func PrintColumnChunk(w io.Writer, columnChunk ColumnChunk) error { 274 pw := &printWriter{writer: w} 275 pw.WriteString(columnChunk.Type().String()) 276 pw.WriteString("\n--------------------------------------------------------------------------------\n") 277 278 values := [42]Value{} 279 pages := columnChunk.Pages() 280 numPages, numValues := int64(0), int64(0) 281 282 defer pages.Close() 283 for { 284 p, err := pages.ReadPage() 285 if err != nil { 286 if !errors.Is(err, io.EOF) { 287 return err 288 } 289 break 290 } 291 292 numPages++ 293 n := p.NumValues() 294 if n == 0 { 295 fmt.Fprintf(pw, "*** page %d, no values ***\n", numPages) 296 } else { 297 fmt.Fprintf(pw, "*** page %d, values %d to %d ***\n", numPages, numValues+1, numValues+n) 298 printPage(w, p, values[:], numValues+1) 299 numValues += n 300 } 301 302 pw.WriteString("\n") 303 } 304 305 return pw.err 306 } 307 308 func PrintPage(w io.Writer, page Page) error { 309 return printPage(w, page, make([]Value, 42), 0) 310 } 311 312 func printPage(w io.Writer, page Page, values []Value, numValues int64) error { 313 r := page.Values() 314 for { 315 n, err := r.ReadValues(values[:]) 316 for i, v := range values[:n] { 317 _, err := fmt.Fprintf(w, "value %d: %+v\n", numValues+int64(i), v) 318 if err != nil { 319 return err 320 } 321 } 322 if err != nil { 323 if errors.Is(err, io.EOF) { 324 err = nil 325 } 326 return err 327 } 328 } 329 }