github.com/instill-ai/component@v0.16.0-beta/pkg/operator/text/v0/convert_test.go (about)

     1  package text
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"testing"
     7  
     8  	"encoding/base64"
     9  
    10  	"code.sajari.com/docconv"
    11  	"google.golang.org/protobuf/types/known/structpb"
    12  )
    13  
    14  // TestConvertToText tests the convert to text task
    15  func TestConvertToText(t *testing.T) {
    16  
    17  	tests := []struct {
    18  		name     string
    19  		filepath string
    20  	}{
    21  		{
    22  			name:     "Convert pdf file",
    23  			filepath: "testdata/test.pdf",
    24  		},
    25  		{
    26  			name:     "Convert docx file",
    27  			filepath: "testdata/test.docx",
    28  		},
    29  		{
    30  			name:     "Convert html file",
    31  			filepath: "testdata/test.html",
    32  		},
    33  		{
    34  			name:     "Convert odt file",
    35  			filepath: "testdata/test.odt",
    36  		},
    37  		{
    38  			name:     "Convert rtf file",
    39  			filepath: "testdata/test.rtf",
    40  		},
    41  		{
    42  			name:     "Convert png file",
    43  			filepath: "testdata/test.png",
    44  		},
    45  		{
    46  			name:     "Convert jpg file",
    47  			filepath: "testdata/test.jpg",
    48  		},
    49  		{
    50  			name:     "Convert tiff file",
    51  			filepath: "testdata/test.tif",
    52  		},
    53  		{
    54  			name:     "Convert txt file",
    55  			filepath: "testdata/test.txt",
    56  		},
    57  	}
    58  
    59  	for _, test := range tests {
    60  		t.Run(test.name, func(t *testing.T) {
    61  			// Read the fileContent content
    62  			fileContent, err := os.ReadFile(test.filepath)
    63  			if err != nil {
    64  				t.Fatalf("error reading the file: %s\n", err)
    65  				return
    66  			}
    67  
    68  			base64DataURI := fmt.Sprintf("data:%s;base64,%s", docconv.MimeTypeByExtension(test.filepath), base64.StdEncoding.EncodeToString(fileContent))
    69  
    70  			input := &structpb.Struct{
    71  				Fields: map[string]*structpb.Value{
    72  					"doc": {Kind: &structpb.Value_StringValue{StringValue: base64DataURI}},
    73  				},
    74  			}
    75  			inputs := []*structpb.Struct{
    76  				input,
    77  			}
    78  
    79  			e := &execution{}
    80  			e.Task = "TASK_CONVERT_TO_TEXT"
    81  
    82  			if outputs, err := e.Execute(inputs); err != nil {
    83  				t.Fatalf("convertToText returned an error: %v", err)
    84  			} else if outputs[0].Fields["body"].GetStringValue() == "" {
    85  				t.Fatal("convertToText returned an empty body")
    86  			} else if outputs[0].Fields["meta"].GetStructValue() == nil {
    87  				t.Fatal("convertToText returned a nil meta")
    88  			}
    89  
    90  		})
    91  	}
    92  
    93  }