github.com/cybriq/giocore@v0.0.7-0.20210703034601-cfb9cb5f3900/gpu/shaders/tile_alloc.comp (about) 1 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense 2 3 // Allocation and initialization of tiles for paths. 4 5 #version 450 6 #extension GL_GOOGLE_include_directive : enable 7 8 #include "mem.h" 9 #include "setup.h" 10 11 #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR) 12 #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) 13 14 layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; 15 16 layout(set = 0, binding = 1) readonly buffer ConfigBuf { 17 Config conf; 18 }; 19 20 #include "annotated.h" 21 #include "tile.h" 22 23 // scale factors useful for converting coordinates to tiles 24 #define SX (1.0 / float(TILE_WIDTH_PX)) 25 #define SY (1.0 / float(TILE_HEIGHT_PX)) 26 27 shared uint sh_tile_count[TILE_ALLOC_WG]; 28 shared MallocResult sh_tile_alloc; 29 30 void main() { 31 uint th_ix = gl_LocalInvocationID.x; 32 uint element_ix = gl_GlobalInvocationID.x; 33 PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); 34 AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); 35 36 uint tag = Annotated_Nop; 37 if (element_ix < conf.n_elements) { 38 tag = Annotated_tag(conf.anno_alloc, ref).tag; 39 } 40 int x0 = 0, y0 = 0, x1 = 0, y1 = 0; 41 switch (tag) { 42 case Annotated_Color: 43 case Annotated_Image: 44 case Annotated_BeginClip: 45 case Annotated_EndClip: 46 // Note: we take advantage of the fact that fills, strokes, and 47 // clips have compatible layout. 48 AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref); 49 x0 = int(floor(clip.bbox.x * SX)); 50 y0 = int(floor(clip.bbox.y * SY)); 51 x1 = int(ceil(clip.bbox.z * SX)); 52 y1 = int(ceil(clip.bbox.w * SY)); 53 break; 54 } 55 x0 = clamp(x0, 0, int(conf.width_in_tiles)); 56 y0 = clamp(y0, 0, int(conf.height_in_tiles)); 57 x1 = clamp(x1, 0, int(conf.width_in_tiles)); 58 y1 = clamp(y1, 0, int(conf.height_in_tiles)); 59 60 Path path; 61 path.bbox = uvec4(x0, y0, x1, y1); 62 uint tile_count = (x1 - x0) * (y1 - y0); 63 if (tag == Annotated_EndClip) { 64 // Don't actually allocate tiles for an end clip, but we do want 65 // the path structure (especially bbox) allocated for it. 66 tile_count = 0; 67 } 68 69 sh_tile_count[th_ix] = tile_count; 70 uint total_tile_count = tile_count; 71 // Prefix sum of sh_tile_count 72 for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) { 73 barrier(); 74 if (th_ix >= (1 << i)) { 75 total_tile_count += sh_tile_count[th_ix - (1 << i)]; 76 } 77 barrier(); 78 sh_tile_count[th_ix] = total_tile_count; 79 } 80 if (th_ix == TILE_ALLOC_WG - 1) { 81 sh_tile_alloc = malloc(total_tile_count * Tile_size); 82 } 83 barrier(); 84 MallocResult alloc_start = sh_tile_alloc; 85 if (alloc_start.failed || mem_error != NO_ERROR) { 86 return; 87 } 88 89 if (element_ix < conf.n_elements) { 90 uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0; 91 Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count); 92 path.tiles = TileRef(tiles_alloc.offset); 93 Path_write(conf.tile_alloc, path_ref, path); 94 } 95 96 // Zero out allocated tiles efficiently 97 uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4); 98 uint start_ix = alloc_start.alloc.offset >> 2; 99 for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) { 100 // Note: this interleaving is faster than using Tile_write 101 // by a significant amount. 102 write_mem(alloc_start.alloc, start_ix + i, 0); 103 } 104 }