github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/print.go (about)

     1  package parquet
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/olekukonko/tablewriter"
    11  )
    12  
    13  func PrintSchema(w io.Writer, name string, node Node) error {
    14  	return PrintSchemaIndent(w, name, node, "\t", "\n")
    15  }
    16  
    17  func PrintSchemaIndent(w io.Writer, name string, node Node, pattern, newline string) error {
    18  	pw := &printWriter{writer: w}
    19  	pi := &printIndent{}
    20  
    21  	if node.Leaf() {
    22  		printSchemaWithIndent(pw, "", node, pi)
    23  	} else {
    24  		pw.WriteString("message ")
    25  
    26  		if name == "" {
    27  			pw.WriteString("{")
    28  		} else {
    29  			pw.WriteString(name)
    30  			pw.WriteString(" {")
    31  		}
    32  
    33  		pi.pattern = pattern
    34  		pi.newline = newline
    35  		pi.repeat = 1
    36  		pi.writeNewLine(pw)
    37  
    38  		for _, field := range node.Fields() {
    39  			printSchemaWithIndent(pw, field.Name(), field, pi)
    40  			pi.writeNewLine(pw)
    41  		}
    42  
    43  		pw.WriteString("}")
    44  	}
    45  
    46  	return pw.err
    47  }
    48  
    49  func printSchemaWithIndent(w io.StringWriter, name string, node Node, indent *printIndent) {
    50  	indent.writeTo(w)
    51  
    52  	switch {
    53  	case node.Optional():
    54  		w.WriteString("optional ")
    55  	case node.Repeated():
    56  		w.WriteString("repeated ")
    57  	default:
    58  		w.WriteString("required ")
    59  	}
    60  
    61  	if node.Leaf() {
    62  		t := node.Type()
    63  		switch t.Kind() {
    64  		case Boolean:
    65  			w.WriteString("boolean")
    66  		case Int32:
    67  			w.WriteString("int32")
    68  		case Int64:
    69  			w.WriteString("int64")
    70  		case Int96:
    71  			w.WriteString("int96")
    72  		case Float:
    73  			w.WriteString("float")
    74  		case Double:
    75  			w.WriteString("double")
    76  		case ByteArray:
    77  			w.WriteString("binary")
    78  		case FixedLenByteArray:
    79  			w.WriteString("fixed_len_byte_array(")
    80  			w.WriteString(strconv.Itoa(t.Length()))
    81  			w.WriteString(")")
    82  		default:
    83  			w.WriteString("<?>")
    84  		}
    85  
    86  		if name != "" {
    87  			w.WriteString(" ")
    88  			w.WriteString(name)
    89  		}
    90  
    91  		if annotation := annotationOf(node); annotation != "" {
    92  			w.WriteString(" (")
    93  			w.WriteString(annotation)
    94  			w.WriteString(")")
    95  		}
    96  
    97  		if id := node.ID(); id != 0 {
    98  			w.WriteString(" = ")
    99  			w.WriteString(strconv.Itoa(id))
   100  		}
   101  
   102  		w.WriteString(";")
   103  	} else {
   104  		w.WriteString("group")
   105  
   106  		if name != "" {
   107  			w.WriteString(" ")
   108  			w.WriteString(name)
   109  		}
   110  
   111  		if annotation := annotationOf(node); annotation != "" {
   112  			w.WriteString(" (")
   113  			w.WriteString(annotation)
   114  			w.WriteString(")")
   115  		}
   116  
   117  		if id := node.ID(); id != 0 {
   118  			w.WriteString(" = ")
   119  			w.WriteString(strconv.Itoa(id))
   120  		}
   121  
   122  		w.WriteString(" {")
   123  		indent.writeNewLine(w)
   124  		indent.push()
   125  
   126  		for _, field := range node.Fields() {
   127  			printSchemaWithIndent(w, field.Name(), field, indent)
   128  			indent.writeNewLine(w)
   129  		}
   130  
   131  		indent.pop()
   132  		indent.writeTo(w)
   133  		w.WriteString("}")
   134  	}
   135  }
   136  
   137  func annotationOf(node Node) string {
   138  	if logicalType := node.Type().LogicalType(); logicalType != nil {
   139  		return logicalType.String()
   140  	}
   141  	return ""
   142  }
   143  
   144  type printIndent struct {
   145  	pattern string
   146  	newline string
   147  	repeat  int
   148  }
   149  
   150  func (i *printIndent) push() {
   151  	i.repeat++
   152  }
   153  
   154  func (i *printIndent) pop() {
   155  	i.repeat--
   156  }
   157  
   158  func (i *printIndent) writeTo(w io.StringWriter) {
   159  	if i.pattern != "" {
   160  		for n := i.repeat; n > 0; n-- {
   161  			w.WriteString(i.pattern)
   162  		}
   163  	}
   164  }
   165  
   166  func (i *printIndent) writeNewLine(w io.StringWriter) {
   167  	if i.newline != "" {
   168  		w.WriteString(i.newline)
   169  	}
   170  }
   171  
   172  type printWriter struct {
   173  	writer io.Writer
   174  	err    error
   175  }
   176  
   177  func (w *printWriter) Write(b []byte) (int, error) {
   178  	if w.err != nil {
   179  		return 0, w.err
   180  	}
   181  	n, err := w.writer.Write(b)
   182  	if err != nil {
   183  		w.err = err
   184  	}
   185  	return n, err
   186  }
   187  
   188  func (w *printWriter) WriteString(s string) (int, error) {
   189  	if w.err != nil {
   190  		return 0, w.err
   191  	}
   192  	n, err := io.WriteString(w.writer, s)
   193  	if err != nil {
   194  		w.err = err
   195  	}
   196  	return n, err
   197  }
   198  
   199  var (
   200  	_ io.StringWriter = (*printWriter)(nil)
   201  )
   202  
   203  func sprint(name string, node Node) string {
   204  	s := new(strings.Builder)
   205  	PrintSchema(s, name, node)
   206  	return s.String()
   207  }
   208  
   209  func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
   210  	schema := rowGroup.Schema()
   211  	pw := &printWriter{writer: w}
   212  	tw := tablewriter.NewWriter(pw)
   213  
   214  	columns := schema.Columns()
   215  	header := make([]string, len(columns))
   216  	footer := make([]string, len(columns))
   217  	alignment := make([]int, len(columns))
   218  
   219  	for i, column := range columns {
   220  		leaf, _ := schema.Lookup(column...)
   221  		columnType := leaf.Node.Type()
   222  
   223  		header[i] = strings.Join(column, ".")
   224  		footer[i] = columnType.String()
   225  
   226  		switch columnType.Kind() {
   227  		case ByteArray:
   228  			alignment[i] = tablewriter.ALIGN_LEFT
   229  		default:
   230  			alignment[i] = tablewriter.ALIGN_RIGHT
   231  		}
   232  	}
   233  
   234  	rowbuf := make([]Row, defaultRowBufferSize)
   235  	cells := make([]string, 0, len(columns))
   236  	rows := rowGroup.Rows()
   237  	defer rows.Close()
   238  
   239  	for {
   240  		n, err := rows.ReadRows(rowbuf)
   241  
   242  		for _, row := range rowbuf[:n] {
   243  			cells = cells[:0]
   244  
   245  			for _, value := range row {
   246  				columnIndex := value.Column()
   247  
   248  				for len(cells) <= columnIndex {
   249  					cells = append(cells, "")
   250  				}
   251  
   252  				if cells[columnIndex] == "" {
   253  					cells[columnIndex] = value.String()
   254  				} else {
   255  					cells[columnIndex] += "," + value.String()
   256  					alignment[columnIndex] = tablewriter.ALIGN_LEFT
   257  				}
   258  			}
   259  
   260  			tw.Append(cells)
   261  		}
   262  
   263  		if err != nil {
   264  			if errors.Is(err, io.EOF) {
   265  				break
   266  			}
   267  			return err
   268  		}
   269  	}
   270  
   271  	tw.SetAutoFormatHeaders(false)
   272  	tw.SetColumnAlignment(alignment)
   273  	tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
   274  	tw.SetFooterAlignment(tablewriter.ALIGN_LEFT)
   275  	tw.SetHeader(header)
   276  	tw.SetFooter(footer)
   277  	tw.Render()
   278  
   279  	fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows())
   280  	return pw.err
   281  }
   282  
   283  func PrintColumnChunk(w io.Writer, columnChunk ColumnChunk) error {
   284  	pw := &printWriter{writer: w}
   285  	pw.WriteString(columnChunk.Type().String())
   286  	pw.WriteString("\n--------------------------------------------------------------------------------\n")
   287  
   288  	values := [42]Value{}
   289  	pages := columnChunk.Pages()
   290  	numPages, numValues := int64(0), int64(0)
   291  
   292  	defer pages.Close()
   293  	for {
   294  		p, err := pages.ReadPage()
   295  		if err != nil {
   296  			if !errors.Is(err, io.EOF) {
   297  				return err
   298  			}
   299  			break
   300  		}
   301  
   302  		numPages++
   303  		n := p.NumValues()
   304  		if n == 0 {
   305  			fmt.Fprintf(pw, "*** page %d, no values ***\n", numPages)
   306  		} else {
   307  			fmt.Fprintf(pw, "*** page %d, values %d to %d ***\n", numPages, numValues+1, numValues+n)
   308  			printPage(w, p, values[:], numValues+1)
   309  			numValues += n
   310  		}
   311  
   312  		pw.WriteString("\n")
   313  	}
   314  
   315  	return pw.err
   316  }
   317  
   318  func PrintPage(w io.Writer, page Page) error {
   319  	return printPage(w, page, make([]Value, 42), 0)
   320  }
   321  
   322  func printPage(w io.Writer, page Page, values []Value, numValues int64) error {
   323  	r := page.Values()
   324  	for {
   325  		n, err := r.ReadValues(values[:])
   326  		for i, v := range values[:n] {
   327  			_, err := fmt.Fprintf(w, "value %d: %+v\n", numValues+int64(i), v)
   328  			if err != nil {
   329  				return err
   330  			}
   331  		}
   332  		if err != nil {
   333  			if errors.Is(err, io.EOF) {
   334  				err = nil
   335  			}
   336  			return err
   337  		}
   338  	}
   339  }