github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/shaders/backdrop.comp (about) 1 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense 2 3 // Propagation of tile backdrop for filling. 4 // 5 // Each thread reads one path element and calculates the number of spanned tiles 6 // based on the bounding box. 7 // In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel. 8 // For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel, 9 // and propagated from the left to the right (prefix summed). 10 // 11 // Output state: 12 // - Each path element has an array of tiles covering the whole path based on boundig box 13 // - Each tile per path element contains the 'backdrop' and a list of subdivided path segments 14 15 #version 450 16 #extension GL_GOOGLE_include_directive : enable 17 18 #include "mem.h" 19 #include "setup.h" 20 21 #define LG_BACKDROP_WG (7 + LG_WG_FACTOR) 22 #define BACKDROP_WG (1 << LG_BACKDROP_WG) 23 24 layout(local_size_x = BACKDROP_WG, local_size_y = 1) in; 25 26 layout(set = 0, binding = 1) readonly buffer ConfigBuf { 27 Config conf; 28 }; 29 30 #include "annotated.h" 31 #include "tile.h" 32 33 shared uint sh_row_count[BACKDROP_WG]; 34 shared Alloc sh_row_alloc[BACKDROP_WG]; 35 shared uint sh_row_width[BACKDROP_WG]; 36 37 void main() { 38 uint th_ix = gl_LocalInvocationID.x; 39 uint element_ix = gl_GlobalInvocationID.x; 40 AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); 41 42 // Work assignment: 1 thread : 1 path element 43 uint row_count = 0; 44 bool mem_ok = mem_error == NO_ERROR; 45 if (element_ix < conf.n_elements) { 46 AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); 47 switch (tag.tag) { 48 case Annotated_Image: 49 case Annotated_BeginClip: 50 case Annotated_Color: 51 if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) { 52 break; 53 } 54 // Fall through. 55 PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); 56 Path path = Path_read(conf.tile_alloc, path_ref); 57 sh_row_width[th_ix] = path.bbox.z - path.bbox.x; 58 row_count = path.bbox.w - path.bbox.y; 59 // Paths that don't cross tile top edges don't have backdrops. 60 // Don't apply the optimization to paths that may cross the y = 0 61 // top edge, but clipped to 1 row. 62 if (row_count == 1 && path.bbox.y > 0) { 63 // Note: this can probably be expanded to width = 2 as 64 // long as it doesn't cross the left edge. 65 row_count = 0; 66 } 67 Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); 68 sh_row_alloc[th_ix] = path_alloc; 69 } 70 } 71 72 sh_row_count[th_ix] = row_count; 73 // Prefix sum of sh_row_count 74 for (uint i = 0; i < LG_BACKDROP_WG; i++) { 75 barrier(); 76 if (th_ix >= (1 << i)) { 77 row_count += sh_row_count[th_ix - (1 << i)]; 78 } 79 barrier(); 80 sh_row_count[th_ix] = row_count; 81 } 82 barrier(); 83 // Work assignment: 1 thread : 1 path element row 84 uint total_rows = sh_row_count[BACKDROP_WG - 1]; 85 for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) { 86 // Binary search to find element 87 uint el_ix = 0; 88 for (uint i = 0; i < LG_BACKDROP_WG; i++) { 89 uint probe = el_ix + ((BACKDROP_WG / 2) >> i); 90 if (row >= sh_row_count[probe - 1]) { 91 el_ix = probe; 92 } 93 } 94 uint width = sh_row_width[el_ix]; 95 if (width > 0 && mem_ok) { 96 // Process one row sequentially 97 // Read backdrop value per tile and prefix sum it 98 Alloc tiles_alloc = sh_row_alloc[el_ix]; 99 uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0); 100 uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width; 101 uint sum = read_mem(tiles_alloc, tile_el_ix); 102 for (uint x = 1; x < width; x++) { 103 tile_el_ix += 2; 104 sum += read_mem(tiles_alloc, tile_el_ix); 105 write_mem(tiles_alloc, tile_el_ix, sum); 106 } 107 } 108 } 109 }