github.com/apache/arrow/go/v14@v14.0.2/parquet/doc.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  // Package parquet provides an implementation of Apache Parquet for Go.
    18  //
    19  // Apache Parquet is an open-source columnar data storage format using the record
    20  // shredding and assembly algorithm to accommodate complex data structures which
    21  // can then be used to efficiently store the data.
    22  //
    23  // While the go.mod states go1.18, everything here should be compatible
    24  // with go versions 1.17 and 1.16.
    25  //
    26  // This implementation is a native go implementation for reading and writing the
    27  // parquet file format.
    28  //
    29  // Install
    30  //
    31  // You can download the library and cli utilities via:
    32  //   go get -u github.com/apache/arrow/go/v14/parquet
    33  //   go install github.com/apache/arrow/go/v14/parquet/cmd/parquet_reader@latest
    34  //   go install github.com/apache/arrow/go/v14/parquet/cmd/parquet_schema@latest
    35  //
    36  // Modules
    37  //
    38  // This top level parquet package contains the basic common types and reader/writer
    39  // properties along with some utilities that are used throughout the other modules.
    40  //
    41  // The file module contains the functions for directly reading/writing parquet files
    42  // including Column Readers and Column Writers.
    43  //
    44  // The metadata module contains the types for managing the lower level file/rowgroup/column
    45  // metadata inside of a ParquetFile including inspecting the statistics.
    46  //
    47  // The pqarrow module contains helper functions and types for converting directly
    48  // between Parquet and Apache Arrow formats.
    49  //
    50  // The schema module contains the types for manipulating / inspecting / creating
    51  // parquet file schemas.
    52  //
    53  // Primitive Types
    54  //
    55  // The Parquet Primitive Types and their corresponding Go types are Boolean (bool),
    56  // Int32 (int32), Int64 (int64), Int96 (parquet.Int96), Float (float32), Double (float64),
    57  // ByteArray (parquet.ByteArray) and FixedLenByteArray (parquet.FixedLenByteArray).
    58  //
    59  // Encodings
    60  //
    61  // The encoding types supported in this package are:
    62  // Plain, Plain/RLE Dictionary, Delta Binary Packed (only integer types), Delta Byte Array
    63  // (only ByteArray), Delta Length Byte Array (only ByteArray)
    64  //
    65  // Tip: Some platforms don't necessarily support all kinds of encodings. If you're not
    66  // sure what to use, just use Plain and Dictionary encoding.
    67  package parquet
    68  
    69  //go:generate go run golang.org/x/tools/cmd/stringer -type=Version -linecomment
    70  //go:generate thrift -o internal -r --gen go ../../cpp/src/parquet/parquet.thrift