github.com/instill-ai/component@v0.16.0-beta/pkg/operator/image/v0/draw.go (about) 1 package image 2 3 import ( 4 "bytes" 5 "encoding/base64" 6 "fmt" 7 "image" 8 "image/color" 9 "image/jpeg" 10 "math/rand" 11 "sort" 12 "strconv" 13 "strings" 14 15 "github.com/fogleman/gg" 16 "golang.org/x/image/font/opentype" 17 "google.golang.org/protobuf/types/known/structpb" 18 ) 19 20 // BoundingBox holds the coordinates of a bounding box. 21 type BoundingBox struct { 22 Top int 23 Left int 24 Width int 25 Height int 26 } 27 28 // Size returns the area of the bounding box. 29 func (b *BoundingBox) Size() int { 30 return b.Width * b.Height 31 } 32 33 func structpbToBoundingBox(s *structpb.Struct) *BoundingBox { 34 return &BoundingBox{ 35 Top: int(s.Fields["top"].GetNumberValue()), 36 Left: int(s.Fields["left"].GetNumberValue()), 37 Width: int(s.Fields["width"].GetNumberValue()), 38 Height: int(s.Fields["height"].GetNumberValue()), 39 } 40 } 41 42 // Keypoint holds the coordinates of a keypoint. 43 type Keypoint struct { 44 x float64 45 y float64 46 v float64 47 } 48 49 func structpbToKeypoint(s *structpb.Struct) *Keypoint { 50 return &Keypoint{ 51 x: s.Fields["x"].GetNumberValue(), 52 y: s.Fields["y"].GetNumberValue(), 53 v: s.Fields["v"].GetNumberValue(), 54 } 55 } 56 57 // Use the same color palette defined in yolov7: https://github.com/WongKinYiu/yolov7/blob/main/utils/plots.py#L449-L462 58 var palette = []color.RGBA{ 59 {255, 128, 0, 255}, 60 {255, 153, 51, 255}, 61 {255, 178, 102, 255}, 62 {230, 230, 0, 255}, 63 {255, 153, 255, 255}, 64 {153, 204, 255, 255}, 65 {255, 102, 255, 255}, 66 {255, 51, 255, 255}, 67 {102, 178, 255, 255}, 68 {51, 153, 255, 255}, 69 {255, 153, 153, 255}, 70 {255, 102, 102, 255}, 71 {255, 51, 51, 255}, 72 {153, 255, 153, 255}, 73 {102, 255, 102, 255}, 74 {51, 255, 51, 255}, 75 {0, 255, 0, 255}, 76 {0, 0, 255, 255}, 77 {255, 0, 0, 255}, 78 {255, 255, 255, 255}, 79 } 80 81 var skeleton = [][]int{{16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12}, 82 {7, 13}, {6, 7}, {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7}, 83 } 84 85 var keypointLimbColorIdx = []int{9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16} 86 var keypointColorIdx = []int{16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9} 87 88 func convertToBase64(img image.Image) ([]byte, error) { 89 var buf bytes.Buffer 90 err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 100}) 91 if err != nil { 92 return nil, err 93 } 94 base64Str := base64.StdEncoding.EncodeToString(buf.Bytes()) 95 base64Bytes := []byte(base64Str) 96 return base64Bytes, nil 97 } 98 99 func convertToRGBA(img image.Image) *image.RGBA { 100 bounds := img.Bounds() 101 rgba := image.NewRGBA(bounds) 102 for y := bounds.Min.Y; y < bounds.Max.Y; y++ { 103 for x := bounds.Min.X; x < bounds.Max.X; x++ { 104 originalColor := img.At(x, y) 105 rgba.Set(x, y, color.RGBAModel.Convert(originalColor)) 106 } 107 } 108 return rgba 109 } 110 111 func indexUniqueCategories(objs []*structpb.Value) map[string]int { 112 catIdx := make(map[string]int) 113 for _, obj := range objs { 114 _, exist := catIdx[obj.GetStructValue().Fields["category"].GetStringValue()] 115 if !exist { 116 catIdx[obj.GetStructValue().Fields["category"].GetStringValue()] = len(catIdx) 117 } 118 } 119 return catIdx 120 } 121 122 // randomColor generates a random color with full opacity. 123 func randomColor(seed int, alpha uint8) color.RGBA { 124 // Seed random number generator with current time 125 r := rand.New(rand.NewSource(int64(seed))) 126 return color.RGBA{ 127 R: uint8(r.Intn(256)), // Random number between 0 and 255 128 G: uint8(r.Intn(256)), 129 B: uint8(r.Intn(256)), 130 A: alpha, 131 } 132 } 133 134 // blendColors blends two RGBA colors together. 135 func blendColors(c1, c2 color.RGBA) color.RGBA { 136 // Simple alpha blending 137 alpha := float64(c2.A) / 255 138 return color.RGBA{ 139 R: uint8((float64(c1.R)*(1-alpha) + float64(c2.R)*alpha)), 140 G: uint8((float64(c1.G)*(1-alpha) + float64(c2.G)*alpha)), 141 B: uint8((float64(c1.B)*(1-alpha) + float64(c2.B)*alpha)), 142 A: 255, // you might want to adjust this if you need transparency 143 } 144 } 145 146 // This function checks if a given point has at least one false neighbor. 147 func hasFalseNeighbor(mask [][]bool, x, y int) bool { 148 // Check all eight neighbors 149 directions := []struct{ dx, dy int }{ 150 {-1, 0}, {1, 0}, // Horizontal neighbors 151 {0, -1}, {0, 1}, // Vertical neighbors 152 {-1, -1}, {1, -1}, // Diagonal neighbors 153 {-1, 1}, {1, 1}, 154 } 155 156 for _, dir := range directions { 157 newX, newY := x+dir.dx, y+dir.dy 158 // Check bounds 159 if newX < 0 || newX >= len(mask[0]) || newY < 0 || newY >= len(mask) { 160 return true // Outside bounds, so treat as a "false" neighbor 161 } 162 if !mask[newY][newX] { 163 return true // It has a false neighbor 164 } 165 } 166 167 return false 168 } 169 170 // This function finds the contour points of a boolean mask. 171 func findContour(mask [][]bool) []image.Point { 172 var points []image.Point 173 174 for y := 0; y < len(mask); y++ { 175 for x := 0; x < len(mask[y]); x++ { 176 // Check if the current point is true, and if it has a false neighbor 177 if mask[y][x] && hasFalseNeighbor(mask, x, y) { 178 points = append(points, image.Point{X: x, Y: y}) 179 } 180 } 181 } 182 183 return points 184 } 185 186 func rleDecode(rle []int, width, height int) [][]bool { 187 // Create a 2D slice to hold the mask. 188 mask := make([][]bool, height) 189 for i := range mask { 190 mask[i] = make([]bool, width) 191 } 192 193 x, y := 0, 0 194 fill := false 195 196 for _, val := range rle { 197 for v := 0; v < val; v++ { 198 mask[y][x] = fill 199 y++ 200 if y >= height { 201 y = 0 202 x++ 203 } 204 } 205 fill = !fill // Alternate between filling and skipping. 206 } 207 return mask 208 } 209 210 func drawSemanticMask(img *image.RGBA, rle string, colorSeed int) error { 211 // Split the string by commas to get the individual number strings. 212 numberStrings := strings.Split(rle, ",") 213 214 // Allocate an array of integers with the same length as the number of numberStrings. 215 rleInts := make([]int, len(numberStrings)) 216 217 // Convert each number string to an integer. 218 for i, s := range numberStrings { 219 n, err := strconv.Atoi(strings.TrimSpace(s)) 220 if err != nil { 221 return fmt.Errorf("failed to convert RLE string to int: %s, error: %v", s, err) 222 } 223 rleInts[i] = n 224 } 225 226 bound := img.Bounds() 227 228 // Decode the RLE mask for the full image size. 229 mask := rleDecode(rleInts, bound.Dx(), bound.Dy()) 230 231 // Iterate over the bounding box and draw the mask onto the image. 232 for y := 0; y < bound.Dy(); y++ { 233 for x := 0; x < bound.Dx(); x++ { 234 if mask[y][x] { 235 // The mask is present for this pixel, so draw it on the image. 236 // Here you could set a specific color or just use the mask value. 237 // For example, let's paint the mask as a red semi-transparent overlay: 238 originalColor := img.At(x, y).(color.RGBA) 239 // Blend the original color with the mask color. 240 blendedColor := blendColors(originalColor, randomColor(colorSeed, 128)) 241 img.Set(x, y, blendedColor) 242 } 243 } 244 } 245 246 dc := gg.NewContextForRGBA(img) 247 dc.SetColor(color.RGBA{255, 255, 255, 255}) 248 249 // Find contour points 250 contourPoints := findContour(mask) 251 252 // Draw the contour 253 for _, pt := range contourPoints { 254 // Scale points as needed for your canvas size 255 dc.DrawPoint(float64(pt.X), float64(pt.Y), 0.5) 256 dc.Fill() 257 } 258 259 return nil 260 } 261 262 func drawInstanceMask(img *image.RGBA, bbox *BoundingBox, rle string, colorSeed int) error { 263 264 // Split the string by commas to get the individual number strings. 265 numberStrings := strings.Split(rle, ",") 266 267 // Allocate an array of integers with the same length as the number of numberStrings. 268 rleInts := make([]int, len(numberStrings)) 269 270 // Convert each number string to an integer. 271 for i, s := range numberStrings { 272 n, err := strconv.Atoi(strings.TrimSpace(s)) 273 if err != nil { 274 return fmt.Errorf("failed to convert RLE string to int: %s, error: %v", s, err) 275 } 276 rleInts[i] = n 277 } 278 279 // Decode the RLE mask for the full image size. 280 mask := rleDecode(rleInts, bbox.Width, bbox.Height) 281 282 // Iterate over the bounding box and draw the mask onto the image. 283 for y := 0; y < bbox.Height; y++ { 284 for x := 0; x < bbox.Width; x++ { 285 if mask[y][x] { 286 // The mask is present for this pixel, so draw it on the image. 287 // Here you could set a specific color or just use the mask value. 288 // For example, let's paint the mask as a red semi-transparent overlay: 289 originalColor := img.At(x+bbox.Left, y+bbox.Top).(color.RGBA) 290 // Blend the original color with the mask color. 291 blendedColor := blendColors(originalColor, randomColor(colorSeed, 156)) 292 img.Set(x+bbox.Left, y+bbox.Top, blendedColor) 293 } 294 } 295 } 296 297 dc := gg.NewContextForRGBA(img) 298 dc.SetColor(randomColor(colorSeed, 255)) 299 contourPoints := findContour(mask) 300 for _, pt := range contourPoints { 301 dc.DrawPoint(float64(pt.X+bbox.Left), float64(pt.Y+bbox.Top), 0.5) 302 dc.Fill() 303 } 304 305 return nil 306 } 307 308 func drawBoundingBox(img *image.RGBA, bbox *BoundingBox, colorSeed int) error { 309 dc := gg.NewContextForRGBA(img) 310 originalColor := img.At(bbox.Left, bbox.Top).(color.RGBA) 311 blendedColor := blendColors(originalColor, randomColor(colorSeed, 255)) 312 dc.SetColor(blendedColor) 313 dc.SetLineWidth(3) 314 dc.DrawRoundedRectangle(float64(bbox.Left), float64(bbox.Top), float64(bbox.Width), float64(bbox.Height), 4) 315 dc.Stroke() 316 return nil 317 } 318 319 func drawSkeleton(img *image.RGBA, kpts []*Keypoint) error { 320 dc := gg.NewContextForRGBA(img) 321 for idx, kpt := range kpts { 322 if kpt.v > 0.5 { 323 dc.SetColor(palette[keypointColorIdx[idx]]) 324 dc.DrawPoint(kpt.x, kpt.y, 2) 325 dc.Fill() 326 } 327 } 328 for idx, sk := range skeleton { 329 if kpts[sk[0]-1].v > 0.5 && kpts[sk[1]-1].v > 0.5 { 330 dc.SetColor(palette[keypointLimbColorIdx[idx]]) 331 dc.SetLineWidth(2) 332 dc.DrawLine(kpts[sk[0]-1].x, kpts[sk[0]-1].y, kpts[sk[1]-1].x, kpts[sk[1]-1].y) 333 dc.Stroke() 334 } 335 } 336 return nil 337 } 338 339 func drawImageLabel(img *image.RGBA, category string, score float64) error { 340 341 dc := gg.NewContextForRGBA(img) 342 343 // Parse the font 344 font, err := opentype.Parse(IBMPlexSansRegular) 345 if err != nil { 346 return err 347 } 348 349 // Create a font face 350 face, err := opentype.NewFace(font, &opentype.FaceOptions{ 351 Size: 20, 352 DPI: 72, 353 }) 354 if err != nil { 355 return err 356 } 357 358 // Set the font face 359 dc.SetFontFace(face) 360 361 w, h := dc.MeasureString(category) 362 363 // Set the rectangle padding 364 padding := 2.0 365 366 x := padding 367 y := padding 368 w += 6 * padding 369 h += padding 370 dc.SetRGB(0, 0, 0) 371 dc.DrawRoundedRectangle(x, y, w, h, 4) 372 dc.Fill() 373 dc.SetColor(color.RGBA{255, 255, 255, 255}) 374 dc.DrawString(category, 4*padding, 11*padding) 375 return nil 376 } 377 378 func drawObjectLabel(img *image.RGBA, bbox *BoundingBox, category string, maskAdjustment bool, colorSeed int) error { 379 380 dc := gg.NewContextForRGBA(img) 381 382 // Parse the font 383 font, err := opentype.Parse(IBMPlexSansRegular) 384 if err != nil { 385 return err 386 } 387 388 // Create a font face 389 face, err := opentype.NewFace(font, &opentype.FaceOptions{ 390 Size: 20, 391 DPI: 72, 392 }) 393 if err != nil { 394 return err 395 } 396 397 // Set the font face 398 dc.SetFontFace(face) 399 400 w, h := dc.MeasureString(category) 401 402 // Set the rectangle padding 403 padding := 2.0 404 405 if bbox.Size() > 10000 && maskAdjustment { 406 x := float64(bbox.Left) - 2*padding 407 y := float64(bbox.Top) + float64(bbox.Height)/2 - padding 408 w += 4 * padding 409 h += padding 410 dc.SetRGBA(0, 0, 0, 128) 411 dc.DrawRoundedRectangle(x, y, w, h, 4) 412 dc.Fill() 413 // Draw the text centered on the screen 414 originalColor := color.RGBA{255, 255, 255, 255} 415 // Blend the original color with the mask color. 416 blendedColor := blendColors(originalColor, randomColor(colorSeed, 64)) 417 dc.SetColor(blendedColor) 418 dc.DrawString(category, float64(bbox.Left), float64(bbox.Top)+float64(bbox.Height)/2+8*padding) 419 } else { 420 x := float64(bbox.Left) - 2*padding 421 y := float64(bbox.Top) - 1.1*h - padding 422 w += 4 * padding 423 h += padding 424 dc.SetRGBA(0, 0, 0, 128) 425 dc.DrawRoundedRectangle(x, y, w, h, 4) 426 dc.Fill() 427 // Draw the text centered on the screen 428 originalColor := color.RGBA{255, 255, 255, 255} 429 // Blend the original color with the mask color. 430 blendedColor := blendColors(originalColor, randomColor(colorSeed, 64)) 431 dc.SetColor(blendedColor) 432 dc.DrawString(category, float64(bbox.Left), float64(bbox.Top)-h/3-padding) 433 } 434 435 return nil 436 } 437 438 func draOCRLabel(img *image.RGBA, bbox *BoundingBox, text string) error { 439 440 dc := gg.NewContextForRGBA(img) 441 442 // Parse the font 443 font, err := opentype.Parse(IBMPlexSansRegular) 444 if err != nil { 445 return err 446 } 447 448 // Create a font face 449 face, err := opentype.NewFace(font, &opentype.FaceOptions{ 450 Size: 20, 451 DPI: 72, 452 }) 453 if err != nil { 454 return err 455 } 456 457 // Set the font face 458 dc.SetFontFace(face) 459 460 w, h := dc.MeasureString(text) 461 462 // Set the rectangle padding 463 padding := 2.0 464 465 x := float64(bbox.Left) 466 y := float64(bbox.Top) 467 w += 4 * padding 468 h += padding 469 dc.SetRGBA(0, 0, 0, 128) 470 dc.DrawRoundedRectangle(x, y, w, h, 4) 471 dc.Fill() 472 dc.SetColor(color.RGBA{255, 255, 255, 255}) 473 dc.DrawString(text, float64(bbox.Left)+2*padding, float64(bbox.Top)+h-4*padding) 474 475 return nil 476 } 477 478 func drawClassification(srcImg image.Image, category string, score float64) ([]byte, error) { 479 img := convertToRGBA(srcImg) 480 481 if err := drawImageLabel(img, category, score); err != nil { 482 return nil, err 483 } 484 485 base64Img, err := convertToBase64(img) 486 if err != nil { 487 return nil, err 488 } 489 return base64Img, nil 490 } 491 492 func drawDetection(srcImg image.Image, objs []*structpb.Value) ([]byte, error) { 493 img := convertToRGBA(srcImg) 494 495 catIdx := indexUniqueCategories(objs) 496 497 for _, obj := range objs { 498 bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue()) 499 if err := drawBoundingBox(img, bbox, catIdx[obj.GetStructValue().Fields["category"].GetStringValue()]); err != nil { 500 return nil, err 501 } 502 } 503 504 for _, obj := range objs { 505 bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue()) 506 if err := drawObjectLabel(img, bbox, obj.GetStructValue().Fields["category"].GetStringValue(), false, catIdx[obj.GetStructValue().Fields["category"].GetStringValue()]); err != nil { 507 return nil, err 508 } 509 } 510 511 base64Img, err := convertToBase64(img) 512 if err != nil { 513 return nil, err 514 } 515 return base64Img, nil 516 } 517 518 func drawKeypoint(srcImg image.Image, objs []*structpb.Value) ([]byte, error) { 519 img := convertToRGBA(srcImg) 520 for _, obj := range objs { 521 kpts := make([]*Keypoint, len(obj.GetStructValue().Fields["keypoints"].GetListValue().Values)) 522 for idx, kpt := range obj.GetStructValue().Fields["keypoints"].GetListValue().Values { 523 kpts[idx] = structpbToKeypoint(kpt.GetStructValue()) 524 } 525 if err := drawSkeleton(img, kpts); err != nil { 526 return nil, err 527 } 528 } 529 530 base64Img, err := convertToBase64(img) 531 if err != nil { 532 return nil, err 533 } 534 return base64Img, nil 535 } 536 537 func drawOCR(srcImg image.Image, objs []*structpb.Value) ([]byte, error) { 538 img := convertToRGBA(srcImg) 539 540 for _, obj := range objs { 541 bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue()) 542 if err := draOCRLabel(img, bbox, obj.GetStructValue().Fields["text"].GetStringValue()); err != nil { 543 return nil, err 544 } 545 } 546 547 base64Img, err := convertToBase64(img) 548 if err != nil { 549 return nil, err 550 } 551 return base64Img, nil 552 } 553 554 func drawInstanceSegmentation(srcImg image.Image, objs []*structpb.Value) ([]byte, error) { 555 556 img := convertToRGBA(srcImg) 557 558 // Sort the objects by size. 559 sort.Slice(objs, func(i, j int) bool { 560 bbox1 := structpbToBoundingBox(objs[i].GetStructValue().Fields["bounding_box"].GetStructValue()) 561 bbox2 := structpbToBoundingBox(objs[j].GetStructValue().Fields["bounding_box"].GetStructValue()) 562 return bbox1.Size() > bbox2.Size() 563 }) 564 565 for instIdx, obj := range objs { 566 bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue()) 567 if err := drawInstanceMask(img, bbox, obj.GetStructValue().Fields["rle"].GetStringValue(), instIdx); err != nil { 568 return nil, err 569 } 570 } 571 572 for instIdx, obj := range objs { 573 bbox := structpbToBoundingBox(obj.GetStructValue().Fields["bounding_box"].GetStructValue()) 574 text := obj.GetStructValue().Fields["category"].GetStringValue() 575 if err := drawObjectLabel(img, bbox, text, true, instIdx); err != nil { 576 return nil, err 577 } 578 } 579 580 base64Img, err := convertToBase64(img) 581 if err != nil { 582 return nil, err 583 } 584 return base64Img, nil 585 } 586 587 func drawSemanticSegmentation(srcImg image.Image, stuffs []*structpb.Value) ([]byte, error) { 588 img := convertToRGBA(srcImg) 589 590 for idx, stuff := range stuffs { 591 if err := drawSemanticMask(img, stuff.GetStructValue().Fields["rle"].GetStringValue(), idx); err != nil { 592 return nil, err 593 } 594 } 595 596 base64Img, err := convertToBase64(img) 597 if err != nil { 598 return nil, err 599 } 600 return base64Img, nil 601 }