github.com/fraugster/parquet-go@v0.12.0/compatibility/data_model.go (about)

     1  // +build ignore
     2  
     3  package main
     4  
     5  import (
     6  	"bytes"
     7  	"encoding/json"
     8  	"io"
     9  	"os"
    10  	"os/exec"
    11  )
    12  
    13  type sampleData struct {
    14  	ID       string `json:"id"`
    15  	Index    int64  `json:"index"`
    16  	GUID     string `json:"guid"`
    17  	IsActive bool   `json:"is_active"`
    18  	Balance  string `json:"balance"`
    19  	Picture  string `json:"picture"`
    20  	Age      int32  `json:"age"`
    21  	EyeColor string `json:"eye_color"`
    22  	Name     struct {
    23  		First string `json:"first"`
    24  		Last  string `json:"last"`
    25  	} `json:"name"`
    26  	Company    string   `json:"company"`
    27  	Email      string   `json:"email"`
    28  	Phone      string   `json:"phone"`
    29  	Address    string   `json:"address"`
    30  	About      string   `json:"about"`
    31  	Registered string   `json:"registered"`
    32  	Latitude   float64  `json:"latitude"`
    33  	Longitude  float64  `json:"longitude"`
    34  	Tags       []string `json:"tags"`
    35  	Range      []int32  `json:"range"`
    36  	Friends    []struct {
    37  		ID   int32  `json:"id"`
    38  		Name string `json:"name"`
    39  	} `json:"friends"`
    40  	Greeting      string `json:"greeting"`
    41  	FavoriteFruit string `json:"favorite_fruit"`
    42  }
    43  
    44  // The json library converts all numbers to float64. this is not good, I need to translate them to
    45  // proper type
    46  func (m sampleData) toMap() map[string]interface{} {
    47  	ret := map[string]interface{}{
    48  		"id":        []byte(m.ID),
    49  		"index":     m.Index,
    50  		"guid":      []byte(m.GUID),
    51  		"is_active": m.IsActive,
    52  		"balance":   []byte(m.Balance),
    53  		"picture":   []byte(m.Picture),
    54  		"age":       m.Age,
    55  		"eye_color": []byte(m.EyeColor),
    56  		"name": map[string]interface{}{
    57  			"first": []byte(m.Name.First),
    58  			"last":  []byte(m.Name.Last),
    59  		},
    60  
    61  		"company":        []byte(m.Company),
    62  		"email":          []byte(m.Email),
    63  		"phone":          []byte(m.Phone),
    64  		"address":        []byte(m.Address),
    65  		"about":          []byte(m.About),
    66  		"registered":     []byte(m.Registered),
    67  		"latitude":       m.Latitude,
    68  		"longitude":      m.Longitude,
    69  		"greeting":       []byte(m.Greeting),
    70  		"favorite_fruit": []byte(m.FavoriteFruit),
    71  		"range":          m.Range,
    72  	}
    73  
    74  	var tags [][]byte
    75  	for i := range m.Tags {
    76  		tags = append(tags, []byte(m.Tags[i]))
    77  	}
    78  
    79  	ret["tags"] = tags
    80  
    81  	var friends []map[string]interface{}
    82  	for i := range m.Friends {
    83  		friends = append(friends, map[string]interface{}{
    84  			"id":   m.Friends[i].ID,
    85  			"name": []byte(m.Friends[i].Name),
    86  		})
    87  	}
    88  
    89  	ret["friends"] = friends
    90  	return ret
    91  }
    92  
    93  var schema = `
    94  message sample_data {
    95  	required binary id (STRING);
    96  	optional int64 index;
    97  	optional binary guid (STRING);
    98  	optional boolean is_active;
    99  	optional binary balance (STRING);
   100  	optional binary picture (STRING); 
   101  	optional int32 age; 
   102  	optional binary eye_color (STRING);
   103  	optional group name {
   104  		optional binary first (STRING);
   105  		optional binary last (STRING);
   106  	}
   107  	optional binary company (STRING);
   108  	optional binary email (STRING);
   109  	optional binary phone (STRING);
   110  	optional binary address (STRING);
   111  	optional binary about (STRING);
   112  	optional binary registered (STRING);
   113  	optional double latitude;
   114  	optional double longitude;
   115  	repeated binary tags (STRING);
   116  	repeated int32 range;
   117  	repeated group friends {
   118  		optional int32 id;
   119  		optional binary name (STRING);
   120  	}
   121  	optional binary greeting (STRING);
   122  	optional binary favorite_fruit (STRING); 
   123  }
   124  `
   125  
   126  func loadDataFromJson(fl string) ([]*sampleData, error) {
   127  	f, err := os.Open(fl)
   128  	if err != nil {
   129  		return nil, err
   130  	}
   131  	defer f.Close()
   132  
   133  	var data []*sampleData
   134  	if err := json.NewDecoder(f).Decode(&data); err != nil {
   135  		return nil, err
   136  	}
   137  
   138  	return data, nil
   139  }
   140  
   141  func loadDataFromParquet(file string) ([]*sampleData, error) {
   142  	cmd := exec.Command("java", "-jar", "/parquet-tools.jar", "cat", "--json", file)
   143  	var out bytes.Buffer
   144  	cmd.Stdout = &out
   145  	cmd.Stderr = os.Stderr
   146  	if err := cmd.Run(); err != nil {
   147  		return nil, err
   148  	}
   149  
   150  	dec := json.NewDecoder(&out)
   151  
   152  	var result []*sampleData
   153  	for {
   154  		var data sampleData
   155  		if err := dec.Decode(&data); err != nil {
   156  			if err == io.EOF {
   157  				return result, nil
   158  			}
   159  
   160  			return nil, err
   161  		}
   162  
   163  		result = append(result, &data)
   164  	}
   165  }