github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/shaders/backdrop.comp (about)

     1  // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
     2  
     3  // Propagation of tile backdrop for filling.
     4  //
     5  // Each thread reads one path element and calculates the number of spanned tiles
     6  // based on the bounding box.
     7  // In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
     8  // For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
     9  // and propagated from the left to the right (prefix summed).
    10  //
    11  // Output state:
    12  //  - Each path element has an array of tiles covering the whole path based on boundig box
    13  //  - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
    14  
    15  #version 450
    16  #extension GL_GOOGLE_include_directive : enable
    17  
    18  #include "mem.h"
    19  #include "setup.h"
    20  
    21  #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
    22  #define BACKDROP_WG (1 << LG_BACKDROP_WG)
    23  
    24  layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
    25  
    26  layout(set = 0, binding = 1) readonly buffer ConfigBuf {
    27      Config conf;
    28  };
    29  
    30  #include "annotated.h"
    31  #include "tile.h"
    32  
    33  shared uint sh_row_count[BACKDROP_WG];
    34  shared Alloc sh_row_alloc[BACKDROP_WG];
    35  shared uint sh_row_width[BACKDROP_WG];
    36  
    37  void main() {
    38      uint th_ix = gl_LocalInvocationID.x;
    39      uint element_ix = gl_GlobalInvocationID.x;
    40      AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
    41  
    42      // Work assignment: 1 thread : 1 path element
    43      uint row_count = 0;
    44      bool mem_ok = mem_error == NO_ERROR;
    45      if (element_ix < conf.n_elements) {
    46          AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
    47          switch (tag.tag) {
    48          case Annotated_Image:
    49          case Annotated_BeginClip:
    50          case Annotated_Color:
    51              if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
    52                  break;
    53              }
    54              // Fall through.
    55              PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
    56              Path path = Path_read(conf.tile_alloc, path_ref);
    57              sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
    58              row_count = path.bbox.w - path.bbox.y;
    59              // Paths that don't cross tile top edges don't have backdrops.
    60              // Don't apply the optimization to paths that may cross the y = 0
    61              // top edge, but clipped to 1 row.
    62              if (row_count == 1 && path.bbox.y > 0) {
    63                  // Note: this can probably be expanded to width = 2 as
    64                  // long as it doesn't cross the left edge.
    65                  row_count = 0;
    66              }
    67              Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
    68              sh_row_alloc[th_ix] = path_alloc;
    69          }
    70      }
    71  
    72      sh_row_count[th_ix] = row_count;
    73      // Prefix sum of sh_row_count
    74      for (uint i = 0; i < LG_BACKDROP_WG; i++) {
    75          barrier();
    76          if (th_ix >= (1 << i)) {
    77              row_count += sh_row_count[th_ix - (1 << i)];
    78          }
    79          barrier();
    80          sh_row_count[th_ix] = row_count;
    81      }
    82      barrier();
    83      // Work assignment: 1 thread : 1 path element row
    84      uint total_rows = sh_row_count[BACKDROP_WG - 1];
    85      for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
    86          // Binary search to find element
    87          uint el_ix = 0;
    88          for (uint i = 0; i < LG_BACKDROP_WG; i++) {
    89              uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
    90              if (row >= sh_row_count[probe - 1]) {
    91                  el_ix = probe;
    92              }
    93          }
    94          uint width = sh_row_width[el_ix];
    95          if (width > 0 && mem_ok) {
    96              // Process one row sequentially
    97              // Read backdrop value per tile and prefix sum it
    98              Alloc tiles_alloc = sh_row_alloc[el_ix];
    99              uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
   100              uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
   101              uint sum = read_mem(tiles_alloc, tile_el_ix);
   102              for (uint x = 1; x < width; x++) {
   103                  tile_el_ix += 2;
   104                  sum += read_mem(tiles_alloc, tile_el_ix);
   105                  write_mem(tiles_alloc, tile_el_ix, sum);
   106              }
   107          }
   108      }
   109  }