github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/print.go (about)

     1  package parquet
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/olekukonko/tablewriter"
    11  )
    12  
    13  func PrintSchema(w io.Writer, name string, node Node) error {
    14  	return PrintSchemaIndent(w, name, node, "\t", "\n")
    15  }
    16  
    17  func PrintSchemaIndent(w io.Writer, name string, node Node, pattern, newline string) error {
    18  	pw := &printWriter{writer: w}
    19  	pi := &printIndent{}
    20  
    21  	if node.Leaf() {
    22  		printSchemaWithIndent(pw, "", node, pi)
    23  	} else {
    24  		pw.WriteString("message ")
    25  
    26  		if name == "" {
    27  			pw.WriteString("{")
    28  		} else {
    29  			pw.WriteString(name)
    30  			pw.WriteString(" {")
    31  		}
    32  
    33  		pi.pattern = pattern
    34  		pi.newline = newline
    35  		pi.repeat = 1
    36  		pi.writeNewLine(pw)
    37  
    38  		for _, field := range node.Fields() {
    39  			printSchemaWithIndent(pw, field.Name(), field, pi)
    40  			pi.writeNewLine(pw)
    41  		}
    42  
    43  		pw.WriteString("}")
    44  	}
    45  
    46  	return pw.err
    47  }
    48  
    49  func printSchemaWithIndent(w io.StringWriter, name string, node Node, indent *printIndent) {
    50  	indent.writeTo(w)
    51  
    52  	switch {
    53  	case node.Optional():
    54  		w.WriteString("optional ")
    55  	case node.Repeated():
    56  		w.WriteString("repeated ")
    57  	default:
    58  		w.WriteString("required ")
    59  	}
    60  
    61  	if node.Leaf() {
    62  		t := node.Type()
    63  		switch t.Kind() {
    64  		case Boolean:
    65  			w.WriteString("boolean")
    66  		case Int32:
    67  			w.WriteString("int32")
    68  		case Int64:
    69  			w.WriteString("int64")
    70  		case Int96:
    71  			w.WriteString("int96")
    72  		case Float:
    73  			w.WriteString("float")
    74  		case Double:
    75  			w.WriteString("double")
    76  		case ByteArray:
    77  			w.WriteString("binary")
    78  		case FixedLenByteArray:
    79  			w.WriteString("fixed_len_byte_array(")
    80  			w.WriteString(strconv.Itoa(t.Length()))
    81  			w.WriteString(")")
    82  		default:
    83  			w.WriteString("<?>")
    84  		}
    85  
    86  		if name != "" {
    87  			w.WriteString(" ")
    88  			w.WriteString(name)
    89  		}
    90  
    91  		if annotation := annotationOf(node); annotation != "" {
    92  			w.WriteString(" (")
    93  			w.WriteString(annotation)
    94  			w.WriteString(")")
    95  		}
    96  
    97  		w.WriteString(";")
    98  	} else {
    99  		w.WriteString("group")
   100  
   101  		if name != "" {
   102  			w.WriteString(" ")
   103  			w.WriteString(name)
   104  		}
   105  
   106  		if annotation := annotationOf(node); annotation != "" {
   107  			w.WriteString(" (")
   108  			w.WriteString(annotation)
   109  			w.WriteString(")")
   110  		}
   111  
   112  		w.WriteString(" {")
   113  		indent.writeNewLine(w)
   114  		indent.push()
   115  
   116  		for _, field := range node.Fields() {
   117  			printSchemaWithIndent(w, field.Name(), field, indent)
   118  			indent.writeNewLine(w)
   119  		}
   120  
   121  		indent.pop()
   122  		indent.writeTo(w)
   123  		w.WriteString("}")
   124  	}
   125  }
   126  
   127  func annotationOf(node Node) string {
   128  	if logicalType := node.Type().LogicalType(); logicalType != nil {
   129  		return logicalType.String()
   130  	}
   131  	return ""
   132  }
   133  
   134  type printIndent struct {
   135  	pattern string
   136  	newline string
   137  	repeat  int
   138  }
   139  
   140  func (i *printIndent) push() {
   141  	i.repeat++
   142  }
   143  
   144  func (i *printIndent) pop() {
   145  	i.repeat--
   146  }
   147  
   148  func (i *printIndent) writeTo(w io.StringWriter) {
   149  	if i.pattern != "" {
   150  		for n := i.repeat; n > 0; n-- {
   151  			w.WriteString(i.pattern)
   152  		}
   153  	}
   154  }
   155  
   156  func (i *printIndent) writeNewLine(w io.StringWriter) {
   157  	if i.newline != "" {
   158  		w.WriteString(i.newline)
   159  	}
   160  }
   161  
   162  type printWriter struct {
   163  	writer io.Writer
   164  	err    error
   165  }
   166  
   167  func (w *printWriter) Write(b []byte) (int, error) {
   168  	if w.err != nil {
   169  		return 0, w.err
   170  	}
   171  	n, err := w.writer.Write(b)
   172  	if err != nil {
   173  		w.err = err
   174  	}
   175  	return n, err
   176  }
   177  
   178  func (w *printWriter) WriteString(s string) (int, error) {
   179  	if w.err != nil {
   180  		return 0, w.err
   181  	}
   182  	n, err := io.WriteString(w.writer, s)
   183  	if err != nil {
   184  		w.err = err
   185  	}
   186  	return n, err
   187  }
   188  
   189  var (
   190  	_ io.StringWriter = (*printWriter)(nil)
   191  )
   192  
   193  func sprint(name string, node Node) string {
   194  	s := new(strings.Builder)
   195  	PrintSchema(s, name, node)
   196  	return s.String()
   197  }
   198  
   199  func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
   200  	schema := rowGroup.Schema()
   201  	pw := &printWriter{writer: w}
   202  	tw := tablewriter.NewWriter(pw)
   203  
   204  	columns := schema.Columns()
   205  	header := make([]string, len(columns))
   206  	footer := make([]string, len(columns))
   207  	alignment := make([]int, len(columns))
   208  
   209  	for i, column := range columns {
   210  		leaf, _ := schema.Lookup(column...)
   211  		columnType := leaf.Node.Type()
   212  
   213  		header[i] = strings.Join(column, ".")
   214  		footer[i] = columnType.String()
   215  
   216  		switch columnType.Kind() {
   217  		case ByteArray:
   218  			alignment[i] = tablewriter.ALIGN_LEFT
   219  		default:
   220  			alignment[i] = tablewriter.ALIGN_RIGHT
   221  		}
   222  	}
   223  
   224  	rowbuf := make([]Row, defaultRowBufferSize)
   225  	cells := make([]string, 0, len(columns))
   226  	rows := rowGroup.Rows()
   227  	defer rows.Close()
   228  
   229  	for {
   230  		n, err := rows.ReadRows(rowbuf)
   231  
   232  		for _, row := range rowbuf[:n] {
   233  			cells = cells[:0]
   234  
   235  			for _, value := range row {
   236  				columnIndex := value.Column()
   237  
   238  				for len(cells) <= columnIndex {
   239  					cells = append(cells, "")
   240  				}
   241  
   242  				if cells[columnIndex] == "" {
   243  					cells[columnIndex] = value.String()
   244  				} else {
   245  					cells[columnIndex] += "," + value.String()
   246  					alignment[columnIndex] = tablewriter.ALIGN_LEFT
   247  				}
   248  			}
   249  
   250  			tw.Append(cells)
   251  		}
   252  
   253  		if err != nil {
   254  			if errors.Is(err, io.EOF) {
   255  				break
   256  			}
   257  			return err
   258  		}
   259  	}
   260  
   261  	tw.SetAutoFormatHeaders(false)
   262  	tw.SetColumnAlignment(alignment)
   263  	tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
   264  	tw.SetFooterAlignment(tablewriter.ALIGN_LEFT)
   265  	tw.SetHeader(header)
   266  	tw.SetFooter(footer)
   267  	tw.Render()
   268  
   269  	fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows())
   270  	return pw.err
   271  }
   272  
   273  func PrintColumnChunk(w io.Writer, columnChunk ColumnChunk) error {
   274  	pw := &printWriter{writer: w}
   275  	pw.WriteString(columnChunk.Type().String())
   276  	pw.WriteString("\n--------------------------------------------------------------------------------\n")
   277  
   278  	values := [42]Value{}
   279  	pages := columnChunk.Pages()
   280  	numPages, numValues := int64(0), int64(0)
   281  
   282  	defer pages.Close()
   283  	for {
   284  		p, err := pages.ReadPage()
   285  		if err != nil {
   286  			if !errors.Is(err, io.EOF) {
   287  				return err
   288  			}
   289  			break
   290  		}
   291  
   292  		numPages++
   293  		n := p.NumValues()
   294  		if n == 0 {
   295  			fmt.Fprintf(pw, "*** page %d, no values ***\n", numPages)
   296  		} else {
   297  			fmt.Fprintf(pw, "*** page %d, values %d to %d ***\n", numPages, numValues+1, numValues+n)
   298  			printPage(w, p, values[:], numValues+1)
   299  			numValues += n
   300  		}
   301  
   302  		pw.WriteString("\n")
   303  	}
   304  
   305  	return pw.err
   306  }
   307  
   308  func PrintPage(w io.Writer, page Page) error {
   309  	return printPage(w, page, make([]Value, 42), 0)
   310  }
   311  
   312  func printPage(w io.Writer, page Page, values []Value, numValues int64) error {
   313  	r := page.Values()
   314  	for {
   315  		n, err := r.ReadValues(values[:])
   316  		for i, v := range values[:n] {
   317  			_, err := fmt.Fprintf(w, "value %d: %+v\n", numValues+int64(i), v)
   318  			if err != nil {
   319  				return err
   320  			}
   321  		}
   322  		if err != nil {
   323  			if errors.Is(err, io.EOF) {
   324  				err = nil
   325  			}
   326  			return err
   327  		}
   328  	}
   329  }