github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/print.go (about) 1 package parquet 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "strconv" 8 "strings" 9 10 "github.com/olekukonko/tablewriter" 11 ) 12 13 func PrintSchema(w io.Writer, name string, node Node) error { 14 return PrintSchemaIndent(w, name, node, "\t", "\n") 15 } 16 17 func PrintSchemaIndent(w io.Writer, name string, node Node, pattern, newline string) error { 18 pw := &printWriter{writer: w} 19 pi := &printIndent{} 20 21 if node.Leaf() { 22 printSchemaWithIndent(pw, "", node, pi) 23 } else { 24 pw.WriteString("message ") 25 26 if name == "" { 27 pw.WriteString("{") 28 } else { 29 pw.WriteString(name) 30 pw.WriteString(" {") 31 } 32 33 pi.pattern = pattern 34 pi.newline = newline 35 pi.repeat = 1 36 pi.writeNewLine(pw) 37 38 for _, field := range node.Fields() { 39 printSchemaWithIndent(pw, field.Name(), field, pi) 40 pi.writeNewLine(pw) 41 } 42 43 pw.WriteString("}") 44 } 45 46 return pw.err 47 } 48 49 func printSchemaWithIndent(w io.StringWriter, name string, node Node, indent *printIndent) { 50 indent.writeTo(w) 51 52 switch { 53 case node.Optional(): 54 w.WriteString("optional ") 55 case node.Repeated(): 56 w.WriteString("repeated ") 57 default: 58 w.WriteString("required ") 59 } 60 61 if node.Leaf() { 62 t := node.Type() 63 switch t.Kind() { 64 case Boolean: 65 w.WriteString("boolean") 66 case Int32: 67 w.WriteString("int32") 68 case Int64: 69 w.WriteString("int64") 70 case Int96: 71 w.WriteString("int96") 72 case Float: 73 w.WriteString("float") 74 case Double: 75 w.WriteString("double") 76 case ByteArray: 77 w.WriteString("binary") 78 case FixedLenByteArray: 79 w.WriteString("fixed_len_byte_array(") 80 w.WriteString(strconv.Itoa(t.Length())) 81 w.WriteString(")") 82 default: 83 w.WriteString("<?>") 84 } 85 86 if name != "" { 87 w.WriteString(" ") 88 w.WriteString(name) 89 } 90 91 if annotation := annotationOf(node); annotation != "" { 92 w.WriteString(" (") 93 w.WriteString(annotation) 94 w.WriteString(")") 95 } 96 97 if id := node.ID(); id != 0 { 98 w.WriteString(" = ") 99 w.WriteString(strconv.Itoa(id)) 100 } 101 102 w.WriteString(";") 103 } else { 104 w.WriteString("group") 105 106 if name != "" { 107 w.WriteString(" ") 108 w.WriteString(name) 109 } 110 111 if annotation := annotationOf(node); annotation != "" { 112 w.WriteString(" (") 113 w.WriteString(annotation) 114 w.WriteString(")") 115 } 116 117 if id := node.ID(); id != 0 { 118 w.WriteString(" = ") 119 w.WriteString(strconv.Itoa(id)) 120 } 121 122 w.WriteString(" {") 123 indent.writeNewLine(w) 124 indent.push() 125 126 for _, field := range node.Fields() { 127 printSchemaWithIndent(w, field.Name(), field, indent) 128 indent.writeNewLine(w) 129 } 130 131 indent.pop() 132 indent.writeTo(w) 133 w.WriteString("}") 134 } 135 } 136 137 func annotationOf(node Node) string { 138 if logicalType := node.Type().LogicalType(); logicalType != nil { 139 return logicalType.String() 140 } 141 return "" 142 } 143 144 type printIndent struct { 145 pattern string 146 newline string 147 repeat int 148 } 149 150 func (i *printIndent) push() { 151 i.repeat++ 152 } 153 154 func (i *printIndent) pop() { 155 i.repeat-- 156 } 157 158 func (i *printIndent) writeTo(w io.StringWriter) { 159 if i.pattern != "" { 160 for n := i.repeat; n > 0; n-- { 161 w.WriteString(i.pattern) 162 } 163 } 164 } 165 166 func (i *printIndent) writeNewLine(w io.StringWriter) { 167 if i.newline != "" { 168 w.WriteString(i.newline) 169 } 170 } 171 172 type printWriter struct { 173 writer io.Writer 174 err error 175 } 176 177 func (w *printWriter) Write(b []byte) (int, error) { 178 if w.err != nil { 179 return 0, w.err 180 } 181 n, err := w.writer.Write(b) 182 if err != nil { 183 w.err = err 184 } 185 return n, err 186 } 187 188 func (w *printWriter) WriteString(s string) (int, error) { 189 if w.err != nil { 190 return 0, w.err 191 } 192 n, err := io.WriteString(w.writer, s) 193 if err != nil { 194 w.err = err 195 } 196 return n, err 197 } 198 199 var ( 200 _ io.StringWriter = (*printWriter)(nil) 201 ) 202 203 func sprint(name string, node Node) string { 204 s := new(strings.Builder) 205 PrintSchema(s, name, node) 206 return s.String() 207 } 208 209 func PrintRowGroup(w io.Writer, rowGroup RowGroup) error { 210 schema := rowGroup.Schema() 211 pw := &printWriter{writer: w} 212 tw := tablewriter.NewWriter(pw) 213 214 columns := schema.Columns() 215 header := make([]string, len(columns)) 216 footer := make([]string, len(columns)) 217 alignment := make([]int, len(columns)) 218 219 for i, column := range columns { 220 leaf, _ := schema.Lookup(column...) 221 columnType := leaf.Node.Type() 222 223 header[i] = strings.Join(column, ".") 224 footer[i] = columnType.String() 225 226 switch columnType.Kind() { 227 case ByteArray: 228 alignment[i] = tablewriter.ALIGN_LEFT 229 default: 230 alignment[i] = tablewriter.ALIGN_RIGHT 231 } 232 } 233 234 rowbuf := make([]Row, defaultRowBufferSize) 235 cells := make([]string, 0, len(columns)) 236 rows := rowGroup.Rows() 237 defer rows.Close() 238 239 for { 240 n, err := rows.ReadRows(rowbuf) 241 242 for _, row := range rowbuf[:n] { 243 cells = cells[:0] 244 245 for _, value := range row { 246 columnIndex := value.Column() 247 248 for len(cells) <= columnIndex { 249 cells = append(cells, "") 250 } 251 252 if cells[columnIndex] == "" { 253 cells[columnIndex] = value.String() 254 } else { 255 cells[columnIndex] += "," + value.String() 256 alignment[columnIndex] = tablewriter.ALIGN_LEFT 257 } 258 } 259 260 tw.Append(cells) 261 } 262 263 if err != nil { 264 if errors.Is(err, io.EOF) { 265 break 266 } 267 return err 268 } 269 } 270 271 tw.SetAutoFormatHeaders(false) 272 tw.SetColumnAlignment(alignment) 273 tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT) 274 tw.SetFooterAlignment(tablewriter.ALIGN_LEFT) 275 tw.SetHeader(header) 276 tw.SetFooter(footer) 277 tw.Render() 278 279 fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows()) 280 return pw.err 281 } 282 283 func PrintColumnChunk(w io.Writer, columnChunk ColumnChunk) error { 284 pw := &printWriter{writer: w} 285 pw.WriteString(columnChunk.Type().String()) 286 pw.WriteString("\n--------------------------------------------------------------------------------\n") 287 288 values := [42]Value{} 289 pages := columnChunk.Pages() 290 numPages, numValues := int64(0), int64(0) 291 292 defer pages.Close() 293 for { 294 p, err := pages.ReadPage() 295 if err != nil { 296 if !errors.Is(err, io.EOF) { 297 return err 298 } 299 break 300 } 301 302 numPages++ 303 n := p.NumValues() 304 if n == 0 { 305 fmt.Fprintf(pw, "*** page %d, no values ***\n", numPages) 306 } else { 307 fmt.Fprintf(pw, "*** page %d, values %d to %d ***\n", numPages, numValues+1, numValues+n) 308 printPage(w, p, values[:], numValues+1) 309 numValues += n 310 } 311 312 pw.WriteString("\n") 313 } 314 315 return pw.err 316 } 317 318 func PrintPage(w io.Writer, page Page) error { 319 return printPage(w, page, make([]Value, 42), 0) 320 } 321 322 func printPage(w io.Writer, page Page, values []Value, numValues int64) error { 323 r := page.Values() 324 for { 325 n, err := r.ReadValues(values[:]) 326 for i, v := range values[:n] { 327 _, err := fmt.Fprintf(w, "value %d: %+v\n", numValues+int64(i), v) 328 if err != nil { 329 return err 330 } 331 } 332 if err != nil { 333 if errors.Is(err, io.EOF) { 334 err = nil 335 } 336 return err 337 } 338 } 339 }