github.com/instill-ai/component@v0.16.0-beta/pkg/operator/image/v0/draw.go (about)

     1  package image
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/base64"
     6  	"fmt"
     7  	"image"
     8  	"image/color"
     9  	"image/jpeg"
    10  	"math/rand"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  
    15  	"github.com/fogleman/gg"
    16  	"golang.org/x/image/font/opentype"
    17  	"google.golang.org/protobuf/types/known/structpb"
    18  )
    19  
    20  // BoundingBox holds the coordinates of a bounding box.
    21  type BoundingBox struct {
    22  	Top    int
    23  	Left   int
    24  	Width  int
    25  	Height int
    26  }
    27  
    28  // Size returns the area of the bounding box.
    29  func (b *BoundingBox) Size() int {
    30  	return b.Width * b.Height
    31  }
    32  
    33  func structpbToBoundingBox(s *structpb.Struct) *BoundingBox {
    34  	return &BoundingBox{
    35  		Top:    int(s.Fields["top"].GetNumberValue()),
    36  		Left:   int(s.Fields["left"].GetNumberValue()),
    37  		Width:  int(s.Fields["width"].GetNumberValue()),
    38  		Height: int(s.Fields["height"].GetNumberValue()),
    39  	}
    40  }
    41  
    42  // Keypoint holds the coordinates of a keypoint.
    43  type Keypoint struct {
    44  	x float64
    45  	y float64
    46  	v float64
    47  }
    48  
    49  func structpbToKeypoint(s *structpb.Struct) *Keypoint {
    50  	return &Keypoint{
    51  		x: s.Fields["x"].GetNumberValue(),
    52  		y: s.Fields["y"].GetNumberValue(),
    53  		v: s.Fields["v"].GetNumberValue(),
    54  	}
    55  }
    56  
    57  // Use the same color palette defined in yolov7: https://github.com/WongKinYiu/yolov7/blob/main/utils/plots.py#L449-L462
    58  var palette = []color.RGBA{
    59  	{255, 128, 0, 255},
    60  	{255, 153, 51, 255},
    61  	{255, 178, 102, 255},
    62  	{230, 230, 0, 255},
    63  	{255, 153, 255, 255},
    64  	{153, 204, 255, 255},
    65  	{255, 102, 255, 255},
    66  	{255, 51, 255, 255},
    67  	{102, 178, 255, 255},
    68  	{51, 153, 255, 255},
    69  	{255, 153, 153, 255},
    70  	{255, 102, 102, 255},
    71  	{255, 51, 51, 255},
    72  	{153, 255, 153, 255},
    73  	{102, 255, 102, 255},
    74  	{51, 255, 51, 255},
    75  	{0, 255, 0, 255},
    76  	{0, 0, 255, 255},
    77  	{255, 0, 0, 255},
    78  	{255, 255, 255, 255},
    79  }
    80  
    81  var skeleton = [][]int{{16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12},
    82  	{7, 13}, {6, 7}, {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7},
    83  }
    84  
    85  var keypointLimbColorIdx = []int{9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16}
    86  var keypointColorIdx = []int{16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9}
    87  
    88  func convertToBase64(img image.Image) ([]byte, error) {
    89  	var buf bytes.Buffer
    90  	err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 100})
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	base64Str := base64.StdEncoding.EncodeToString(buf.Bytes())
    95  	base64Bytes := []byte(base64Str)
    96  	return base64Bytes, nil
    97  }
    98  
    99  func convertToRGBA(img image.Image) *image.RGBA {
   100  	bounds := img.Bounds()
   101  	rgba := image.NewRGBA(bounds)
   102  	for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
   103  		for x := bounds.Min.X; x < bounds.Max.X; x++ {
   104  			originalColor := img.At(x, y)
   105  			rgba.Set(x, y, color.RGBAModel.Convert(originalColor))
   106  		}
   107  	}
   108  	return rgba
   109  }
   110  
   111  func indexUniqueCategories(objs []*structpb.Value) map[string]int {
   112  	catIdx := make(map[string]int)
   113  	for _, obj := range objs {
   114  		_, exist := catIdx[obj.GetStructValue().Fields["category"].GetStringValue()]
   115  		if !exist {
   116  			catIdx[obj.GetStructValue().Fields["category"].GetStringValue()] = len(catIdx)
   117  		}
   118  	}
   119  	return catIdx
   120  }
   121  
   122  // randomColor generates a random color with full opacity.
   123  func randomColor(seed int, alpha uint8) color.RGBA {
   124  	// Seed random number generator with current time
   125  	r := rand.New(rand.NewSource(int64(seed)))
   126  	return color.RGBA{
   127  		R: uint8(r.Intn(256)), // Random number between 0 and 255
   128  		G: uint8(r.Intn(256)),
   129  		B: uint8(r.Intn(256)),
   130  		A: alpha,
   131  	}
   132  }
   133  
   134  // blendColors blends two RGBA colors together.
   135  func blendColors(c1, c2 color.RGBA) color.RGBA {
   136  	// Simple alpha blending
   137  	alpha := float64(c2.A) / 255
   138  	return color.RGBA{
   139  		R: uint8((float64(c1.R)*(1-alpha) + float64(c2.R)*alpha)),
   140  		G: uint8((float64(c1.G)*(1-alpha) + float64(c2.G)*alpha)),
   141  		B: uint8((float64(c1.B)*(1-alpha) + float64(c2.B)*alpha)),
   142  		A: 255, // you might want to adjust this if you need transparency
   143  	}
   144  }
   145  
   146  // This function checks if a given point has at least one false neighbor.
   147  func hasFalseNeighbor(mask [][]bool, x, y int) bool {
   148  	// Check all eight neighbors
   149  	directions := []struct{ dx, dy int }{
   150  		{-1, 0}, {1, 0}, // Horizontal neighbors
   151  		{0, -1}, {0, 1}, // Vertical neighbors
   152  		{-1, -1}, {1, -1}, // Diagonal neighbors
   153  		{-1, 1}, {1, 1},
   154  	}
   155  
   156  	for _, dir := range directions {
   157  		newX, newY := x+dir.dx, y+dir.dy
   158  		// Check bounds
   159  		if newX < 0 || newX >= len(mask[0]) || newY < 0 || newY >= len(mask) {
   160  			return true // Outside bounds, so treat as a "false" neighbor
   161  		}
   162  		if !mask[newY][newX] {
   163  			return true // It has a false neighbor
   164  		}
   165  	}
   166  
   167  	return false
   168  }
   169  
   170  // This function finds the contour points of a boolean mask.
   171  func findContour(mask [][]bool) []image.Point {
   172  	var points []image.Point
   173  
   174  	for y := 0; y < len(mask); y++ {
   175  		for x := 0; x < len(mask[y]); x++ {
   176  			// Check if the current point is true, and if it has a false neighbor
   177  			if mask[y][x] && hasFalseNeighbor(mask, x, y) {
   178  				points = append(points, image.Point{X: x, Y: y})
   179  			}
   180  		}
   181  	}
   182  
   183  	return points
   184  }
   185  
   186  func rleDecode(rle []int, width, height int) [][]bool {
   187  	// Create a 2D slice to hold the mask.
   188  	mask := make([][]bool, height)
   189  	for i := range mask {
   190  		mask[i] = make([]bool, width)
   191  	}
   192  
   193  	x, y := 0, 0
   194  	fill := false
   195  
   196  	for _, val := range rle {
   197  		for v := 0; v < val; v++ {
   198  			mask[y][x] = fill
   199  			y++
   200  			if y >= height {
   201  				y = 0
   202  				x++
   203  			}
   204  		}
   205  		fill = !fill // Alternate between filling and skipping.
   206  	}
   207  	return mask
   208  }
   209  
   210  func drawSemanticMask(img *image.RGBA, rle string, colorSeed int) error {
   211  	// Split the string by commas to get the individual number strings.
   212  	numberStrings := strings.Split(rle, ",")
   213  
   214  	// Allocate an array of integers with the same length as the number of numberStrings.
   215  	rleInts := make([]int, len(numberStrings))
   216  
   217  	// Convert each number string to an integer.
   218  	for i, s := range numberStrings {
   219  		n, err := strconv.Atoi(strings.TrimSpace(s))
   220  		if err != nil {
   221  			return fmt.Errorf("failed to convert RLE string to int: %s, error: %v", s, err)
   222  		}
   223  		rleInts[i] = n
   224  	}
   225  
   226  	bound := img.Bounds()
   227  
   228  	// Decode the RLE mask for the full image size.
   229  	mask := rleDecode(rleInts, bound.Dx(), bound.Dy())
   230  
   231  	// Iterate over the bounding box and draw the mask onto the image.
   232  	for y := 0; y < bound.Dy(); y++ {
   233  		for x := 0; x < bound.Dx(); x++ {
   234  			if mask[y][x] {
   235  				// The mask is present for this pixel, so draw it on the image.
   236  				// Here you could set a specific color or just use the mask value.
   237  				// For example, let's paint the mask as a red semi-transparent overlay:
   238  				originalColor := img.At(x, y).(color.RGBA)
   239  				// Blend the original color with the mask color.
   240  				blendedColor := blendColors(originalColor, randomColor(colorSeed, 128))
   241  				img.Set(x, y, blendedColor)
   242  			}
   243  		}
   244  	}
   245  
   246  	dc := gg.NewContextForRGBA(img)
   247  	dc.SetColor(color.RGBA{255, 255, 255, 255})
   248  
   249  	// Find contour points
   250  	contourPoints := findContour(mask)
   251  
   252  	// Draw the contour
   253  	for _, pt := range contourPoints {
   254  		// Scale points as needed for your canvas size
   255  		dc.DrawPoint(float64(pt.X), float64(pt.Y), 0.5)
   256  		dc.Fill()
   257  	}
   258  
   259  	return nil
   260  }
   261  
   262  func drawInstanceMask(img *image.RGBA, bbox *BoundingBox, rle string, colorSeed int) error {
   263  
   264  	// Split the string by commas to get the individual number strings.
   265  	numberStrings := strings.Split(rle, ",")
   266  
   267  	// Allocate an array of integers with the same length as the number of numberStrings.
   268  	rleInts := make([]int, len(numberStrings))
   269  
   270  	// Convert each number string to an integer.
   271  	for i, s := range numberStrings {
   272  		n, err := strconv.Atoi(strings.TrimSpace(s))
   273  		if err != nil {
   274  			return fmt.Errorf("failed to convert RLE string to int: %s, error: %v", s, err)
   275  		}
   276  		rleInts[i] = n
   277  	}
   278  
   279  	// Decode the RLE mask for the full image size.
   280  	mask := rleDecode(rleInts, bbox.Width, bbox.Height)
   281  
   282  	// Iterate over the bounding box and draw the mask onto the image.
   283  	for y := 0; y < bbox.Height; y++ {
   284  		for x := 0; x < bbox.Width; x++ {
   285  			if mask[y][x] {
   286  				// The mask is present for this pixel, so draw it on the image.
   287  				// Here you could set a specific color or just use the mask value.
   288  				// For example, let's paint the mask as a red semi-transparent overlay:
   289  				originalColor := img.At(x+bbox.Left, y+bbox.Top).(color.RGBA)
   290  				// Blend the original color with the mask color.
   291  				blendedColor := blendColors(originalColor, randomColor(colorSeed, 156))
   292  				img.Set(x+bbox.Left, y+bbox.Top, blendedColor)
   293  			}
   294  		}
   295  	}
   296  
   297  	dc := gg.NewContextForRGBA(img)
   298  	dc.SetColor(randomColor(colorSeed, 255))
   299  	contourPoints := findContour(mask)
   300  	for _, pt := range contourPoints {
   301  		dc.DrawPoint(float64(pt.X+bbox.Left), float64(pt.Y+bbox.Top), 0.5)
   302  		dc.Fill()
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  func drawBoundingBox(img *image.RGBA, bbox *BoundingBox, colorSeed int) error {
   309  	dc := gg.NewContextForRGBA(img)
   310  	originalColor := img.At(bbox.Left, bbox.Top).(color.RGBA)
   311  	blendedColor := blendColors(originalColor, randomColor(colorSeed, 255))
   312  	dc.SetColor(blendedColor)
   313  	dc.SetLineWidth(3)
   314  	dc.DrawRoundedRectangle(float64(bbox.Left), float64(bbox.Top), float64(bbox.Width), float64(bbox.Height), 4)
   315  	dc.Stroke()
   316  	return nil
   317  }
   318  
   319  func drawSkeleton(img *image.RGBA, kpts []*Keypoint) error {
   320  	dc := gg.NewContextForRGBA(img)
   321  	for idx, kpt := range kpts {
   322  		if kpt.v > 0.5 {
   323  			dc.SetColor(palette[keypointColorIdx[idx]])
   324  			dc.DrawPoint(kpt.x, kpt.y, 2)
   325  			dc.Fill()
   326  		}
   327  	}
   328  	for idx, sk := range skeleton {
   329  		if kpts[sk[0]-1].v > 0.5 && kpts[sk[1]-1].v > 0.5 {
   330  			dc.SetColor(palette[keypointLimbColorIdx[idx]])
   331  			dc.SetLineWidth(2)
   332  			dc.DrawLine(kpts[sk[0]-1].x, kpts[sk[0]-1].y, kpts[sk[1]-1].x, kpts[sk[1]-1].y)
   333  			dc.Stroke()
   334  		}
   335  	}
   336  	return nil
   337  }
   338  
   339  func drawImageLabel(img *image.RGBA, category string, score float64) error {
   340  
   341  	dc := gg.NewContextForRGBA(img)
   342  
   343  	// Parse the font
   344  	font, err := opentype.Parse(IBMPlexSansRegular)
   345  	if err != nil {
   346  		return err
   347  	}
   348  
   349  	// Create a font face
   350  	face, err := opentype.NewFace(font, &opentype.FaceOptions{
   351  		Size: 20,
   352  		DPI:  72,
   353  	})
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	// Set the font face
   359  	dc.SetFontFace(face)
   360  
   361  	w, h := dc.MeasureString(category)
   362  
   363  	// Set the rectangle padding
   364  	padding := 2.0
   365  
   366  	x := padding
   367  	y := padding
   368  	w += 6 * padding
   369  	h += padding
   370  	dc.SetRGB(0, 0, 0)
   371  	dc.DrawRoundedRectangle(x, y, w, h, 4)
   372  	dc.Fill()
   373  	dc.SetColor(color.RGBA{255, 255, 255, 255})
   374  	dc.DrawString(category, 4*padding, 11*padding)
   375  	return nil
   376  }
   377  
   378  func drawObjectLabel(img *image.RGBA, bbox *BoundingBox, category string, maskAdjustment bool, colorSeed int) error {
   379  
   380  	dc := gg.NewContextForRGBA(img)
   381  
   382  	// Parse the font
   383  	font, err := opentype.Parse(IBMPlexSansRegular)
   384  	if err != nil {
   385  		return err
   386  	}
   387  
   388  	// Create a font face
   389  	face, err := opentype.NewFace(font, &opentype.FaceOptions{
   390  		Size: 20,
   391  		DPI:  72,
   392  	})
   393  	if err != nil {
   394  		return err
   395  	}
   396  
   397  	// Set the font face
   398  	dc.SetFontFace(face)
   399  
   400  	w, h := dc.MeasureString(category)
   401  
   402  	// Set the rectangle padding
   403  	padding := 2.0
   404  
   405  	if bbox.Size() > 10000 && maskAdjustment {
   406  		x := float64(bbox.Left) - 2*padding
   407  		y := float64(bbox.Top) + float64(bbox.Height)/2 - padding
   408  		w += 4 * padding
   409  		h += padding
   410  		dc.SetRGBA(0, 0, 0, 128)
   411  		dc.DrawRoundedRectangle(x, y, w, h, 4)
   412  		dc.Fill()
   413  		// Draw the text centered on the screen
   414  		originalColor := color.RGBA{255, 255, 255, 255}
   415  		// Blend the original color with the mask color.
   416  		blendedColor := blendColors(originalColor, randomColor(colorSeed, 64))
   417  		dc.SetColor(blendedColor)
   418  		dc.DrawString(category, float64(bbox.Left), float64(bbox.Top)+float64(bbox.Height)/2+8*padding)
   419  	} else {
   420  		x := float64(bbox.Left) - 2*padding
   421  		y := float64(bbox.Top) - 1.1*h - padding
   422  		w += 4 * padding
   423  		h += padding
   424  		dc.SetRGBA(0, 0, 0, 128)
   425  		dc.DrawRoundedRectangle(x, y, w, h, 4)
   426  		dc.Fill()
   427  		// Draw the text centered on the screen
   428  		originalColor := color.RGBA{255, 255, 255, 255}
   429  		// Blend the original color with the mask color.
   430  		blendedColor := blendColors(originalColor, randomColor(colorSeed, 64))
   431  		dc.SetColor(blendedColor)
   432  		dc.DrawString(category, float64(bbox.Left), float64(bbox.Top)-h/3-padding)
   433  	}
   434  
   435  	return nil
   436  }
   437  
   438  func draOCRLabel(img *image.RGBA, bbox *BoundingBox, text string) error {
   439  
   440  	dc := gg.NewContextForRGBA(img)
   441  
   442  	// Parse the font
   443  	font, err := opentype.Parse(IBMPlexSansRegular)
   444  	if err != nil {
   445  		return err
   446  	}
   447  
   448  	// Create a font face
   449  	face, err := opentype.NewFace(font, &opentype.FaceOptions{
   450  		Size: 20,
   451  		DPI:  72,
   452  	})
   453  	if err != nil {
   454  		return err
   455  	}
   456  
   457  	// Set the font face
   458  	dc.SetFontFace(face)
   459  
   460  	w, h := dc.MeasureString(text)
   461  
   462  	// Set the rectangle padding
   463  	padding := 2.0
   464  
   465  	x := float64(bbox.Left)
   466  	y := float64(bbox.Top)
   467  	w += 4 * padding
   468  	h += padding
   469  	dc.SetRGBA(0, 0, 0, 128)
   470  	dc.DrawRoundedRectangle(x, y, w, h, 4)
   471  	dc.Fill()
   472  	dc.SetColor(color.RGBA{255, 255, 255, 255})
   473  	dc.DrawString(text, float64(bbox.Left)+2*padding, float64(bbox.Top)+h-4*padding)
   474  
   475  	return nil
   476  }
   477  
   478  func drawClassification(srcImg image.Image, category string, score float64) ([]byte, error) {
   479  	img := convertToRGBA(srcImg)
   480  
   481  	if err := drawImageLabel(img, category, score); err != nil {
   482  		return nil, err
   483  	}
   484  
   485  	base64Img, err := convertToBase64(img)
   486  	if err != nil {
   487  		return nil, err
   488  	}
   489  	return base64Img, nil
   490  }
   491  
   492  func drawDetection(srcImg image.Image, objs []*structpb.Value) ([]byte, error) {
   493  	img := convertToRGBA(srcImg)
   494  
   495  	catIdx := indexUniqueCategories(objs)
   496  
   497  	for _, obj := range objs {
   498  		bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue())
   499  		if err := drawBoundingBox(img, bbox, catIdx[obj.GetStructValue().Fields["category"].GetStringValue()]); err != nil {
   500  			return nil, err
   501  		}
   502  	}
   503  
   504  	for _, obj := range objs {
   505  		bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue())
   506  		if err := drawObjectLabel(img, bbox, obj.GetStructValue().Fields["category"].GetStringValue(), false, catIdx[obj.GetStructValue().Fields["category"].GetStringValue()]); err != nil {
   507  			return nil, err
   508  		}
   509  	}
   510  
   511  	base64Img, err := convertToBase64(img)
   512  	if err != nil {
   513  		return nil, err
   514  	}
   515  	return base64Img, nil
   516  }
   517  
   518  func drawKeypoint(srcImg image.Image, objs []*structpb.Value) ([]byte, error) {
   519  	img := convertToRGBA(srcImg)
   520  	for _, obj := range objs {
   521  		kpts := make([]*Keypoint, len(obj.GetStructValue().Fields["keypoints"].GetListValue().Values))
   522  		for idx, kpt := range obj.GetStructValue().Fields["keypoints"].GetListValue().Values {
   523  			kpts[idx] = structpbToKeypoint(kpt.GetStructValue())
   524  		}
   525  		if err := drawSkeleton(img, kpts); err != nil {
   526  			return nil, err
   527  		}
   528  	}
   529  
   530  	base64Img, err := convertToBase64(img)
   531  	if err != nil {
   532  		return nil, err
   533  	}
   534  	return base64Img, nil
   535  }
   536  
   537  func drawOCR(srcImg image.Image, objs []*structpb.Value) ([]byte, error) {
   538  	img := convertToRGBA(srcImg)
   539  
   540  	for _, obj := range objs {
   541  		bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue())
   542  		if err := draOCRLabel(img, bbox, obj.GetStructValue().Fields["text"].GetStringValue()); err != nil {
   543  			return nil, err
   544  		}
   545  	}
   546  
   547  	base64Img, err := convertToBase64(img)
   548  	if err != nil {
   549  		return nil, err
   550  	}
   551  	return base64Img, nil
   552  }
   553  
   554  func drawInstanceSegmentation(srcImg image.Image, objs []*structpb.Value) ([]byte, error) {
   555  
   556  	img := convertToRGBA(srcImg)
   557  
   558  	// Sort the objects by size.
   559  	sort.Slice(objs, func(i, j int) bool {
   560  		bbox1 := structpbToBoundingBox(objs[i].GetStructValue().Fields["bounding_box"].GetStructValue())
   561  		bbox2 := structpbToBoundingBox(objs[j].GetStructValue().Fields["bounding_box"].GetStructValue())
   562  		return bbox1.Size() > bbox2.Size()
   563  	})
   564  
   565  	for instIdx, obj := range objs {
   566  		bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue())
   567  		if err := drawInstanceMask(img, bbox, obj.GetStructValue().Fields["rle"].GetStringValue(), instIdx); err != nil {
   568  			return nil, err
   569  		}
   570  	}
   571  
   572  	for instIdx, obj := range objs {
   573  		bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue())
   574  		text := obj.GetStructValue().Fields["category"].GetStringValue()
   575  		if err := drawObjectLabel(img, bbox, text, true, instIdx); err != nil {
   576  			return nil, err
   577  		}
   578  	}
   579  
   580  	base64Img, err := convertToBase64(img)
   581  	if err != nil {
   582  		return nil, err
   583  	}
   584  	return base64Img, nil
   585  }
   586  
   587  func drawSemanticSegmentation(srcImg image.Image, stuffs []*structpb.Value) ([]byte, error) {
   588  	img := convertToRGBA(srcImg)
   589  
   590  	for idx, stuff := range stuffs {
   591  		if err := drawSemanticMask(img, stuff.GetStructValue().Fields["rle"].GetStringValue(), idx); err != nil {
   592  			return nil, err
   593  		}
   594  	}
   595  
   596  	base64Img, err := convertToBase64(img)
   597  	if err != nil {
   598  		return nil, err
   599  	}
   600  	return base64Img, nil
   601  }