github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/shaders/tile_alloc.comp (about)

     1  // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
     2  
     3  // Allocation and initialization of tiles for paths.
     4  
     5  #version 450
     6  #extension GL_GOOGLE_include_directive : enable
     7  
     8  #include "mem.h"
     9  #include "setup.h"
    10  
    11  #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
    12  #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
    13  
    14  layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
    15  
    16  layout(set = 0, binding = 1) readonly buffer ConfigBuf {
    17      Config conf;
    18  };
    19  
    20  #include "annotated.h"
    21  #include "tile.h"
    22  
    23  // scale factors useful for converting coordinates to tiles
    24  #define SX (1.0 / float(TILE_WIDTH_PX))
    25  #define SY (1.0 / float(TILE_HEIGHT_PX))
    26  
    27  shared uint sh_tile_count[TILE_ALLOC_WG];
    28  shared MallocResult sh_tile_alloc;
    29  
    30  void main() {
    31      uint th_ix = gl_LocalInvocationID.x;
    32      uint element_ix = gl_GlobalInvocationID.x;
    33      PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
    34      AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
    35  
    36      uint tag = Annotated_Nop;
    37      if (element_ix < conf.n_elements) {
    38          tag = Annotated_tag(conf.anno_alloc, ref).tag;
    39      }
    40      int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
    41      switch (tag) {
    42      case Annotated_Color:
    43      case Annotated_Image:
    44      case Annotated_BeginClip:
    45      case Annotated_EndClip:
    46          // Note: we take advantage of the fact that fills, strokes, and
    47          // clips have compatible layout.
    48          AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
    49          x0 = int(floor(clip.bbox.x * SX));
    50          y0 = int(floor(clip.bbox.y * SY));
    51          x1 = int(ceil(clip.bbox.z * SX));
    52          y1 = int(ceil(clip.bbox.w * SY));
    53          break;
    54      }
    55      x0 = clamp(x0, 0, int(conf.width_in_tiles));
    56      y0 = clamp(y0, 0, int(conf.height_in_tiles));
    57      x1 = clamp(x1, 0, int(conf.width_in_tiles));
    58      y1 = clamp(y1, 0, int(conf.height_in_tiles));
    59  
    60      Path path;
    61      path.bbox = uvec4(x0, y0, x1, y1);
    62      uint tile_count = (x1 - x0) * (y1 - y0);
    63      if (tag == Annotated_EndClip) {
    64          // Don't actually allocate tiles for an end clip, but we do want
    65          // the path structure (especially bbox) allocated for it.
    66          tile_count = 0;
    67      }
    68  
    69      sh_tile_count[th_ix] = tile_count;
    70      uint total_tile_count = tile_count;
    71      // Prefix sum of sh_tile_count
    72      for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
    73          barrier();
    74          if (th_ix >= (1 << i)) {
    75              total_tile_count += sh_tile_count[th_ix - (1 << i)];
    76          }
    77          barrier();
    78          sh_tile_count[th_ix] = total_tile_count;
    79      }
    80      if (th_ix == TILE_ALLOC_WG - 1) {
    81          sh_tile_alloc = malloc(total_tile_count * Tile_size);
    82      }
    83      barrier();
    84      MallocResult alloc_start = sh_tile_alloc;
    85      if (alloc_start.failed || mem_error != NO_ERROR) {
    86          return;
    87      }
    88  
    89      if (element_ix < conf.n_elements) {
    90          uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
    91          Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
    92          path.tiles = TileRef(tiles_alloc.offset);
    93          Path_write(conf.tile_alloc, path_ref, path);
    94      }
    95  
    96      // Zero out allocated tiles efficiently
    97      uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
    98      uint start_ix = alloc_start.alloc.offset >> 2;
    99      for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
   100          // Note: this interleaving is faster than using Tile_write
   101          // by a significant amount.
   102          write_mem(alloc_start.alloc, start_ix + i, 0);
   103      }
   104  }