@@ -405,6 +405,86 @@ void SimpleRenderer::TriangleTileBinning(
405405 SPDLOG_INFO (" Screen dimensions: {}x{}, Tile size: {}, Tiles: {}x{}" ,
406406 width_, height_, tile_size, tiles_x, tiles_y);
407407
408+ // 第一遍:仅统计每个 tile 的三角形数量以便预分配,避免 push_back 扩容
409+ std::vector<size_t > tile_counts (tiles_x * tiles_y, 0 );
410+ for (size_t tri_idx = 0 ; tri_idx < model.GetFaces ().size (); tri_idx++) {
411+ const auto &f = model.GetFaces ()[tri_idx];
412+ auto v0 = screenVertices[f.GetIndex (0 )];
413+ auto v1 = screenVertices[f.GetIndex (1 )];
414+ auto v2 = screenVertices[f.GetIndex (2 )];
415+
416+ if (v0.HasClipPosition ()) {
417+ Vector4f c0 = v0.GetClipPosition ();
418+ Vector4f c1 = v1.GetClipPosition ();
419+ Vector4f c2 = v2.GetClipPosition ();
420+ bool frustum_cull =
421+ (c0.x > c0.w && c1.x > c1.w && c2.x > c2.w ) ||
422+ (c0.x < -c0.w && c1.x < -c0.w && c2.x < -c0.w ) ||
423+ (c0.y > c0.w && c1.y > c1.w && c2.y > c2.w ) ||
424+ (c0.y < -c0.w && c1.y < -c0.w && c2.y < -c0.w ) ||
425+ (c0.z > c0.w && c1.z > c1.w && c2.z > c2.w ) ||
426+ (c0.z < -c0.w && c1.z < -c0.w && c2.z < -c0.w );
427+ if (frustum_cull) {
428+ continue ;
429+ }
430+ }
431+
432+ Vector4f pos0 = v0.GetPosition ();
433+ Vector4f pos1 = v1.GetPosition ();
434+ Vector4f pos2 = v2.GetPosition ();
435+
436+ Vector2f screen0 (pos0.x , pos0.y );
437+ Vector2f screen1 (pos1.x , pos1.y );
438+ Vector2f screen2 (pos2.x , pos2.y );
439+ Vector2f edge1 = screen1 - screen0;
440+ Vector2f edge2 = screen2 - screen0;
441+ float cross_product = edge1.x * edge2.y - edge1.y * edge2.x ;
442+ if (cross_product > 0 .0f ) {
443+ continue ;
444+ }
445+
446+ bool has_clipped_vertex = (pos0.x == -1000 .0f || pos1.x == -1000 .0f || pos2.x == -1000 .0f );
447+ if (has_clipped_vertex) {
448+ continue ;
449+ }
450+
451+ float screen_x0 = pos0.x ;
452+ float screen_y0 = pos0.y ;
453+ float screen_x1 = pos1.x ;
454+ float screen_y1 = pos1.y ;
455+ float screen_x2 = pos2.x ;
456+ float screen_y2 = pos2.y ;
457+
458+ float min_x = std::min ({screen_x0, screen_x1, screen_x2});
459+ float max_x = std::max ({screen_x0, screen_x1, screen_x2});
460+ float min_y = std::min ({screen_y0, screen_y1, screen_y2});
461+ float max_y = std::max ({screen_y0, screen_y1, screen_y2});
462+
463+ int start_tile_x = std::max (0 , static_cast <int >(min_x) / static_cast <int >(tile_size));
464+ int end_tile_x = std::min (static_cast <int >(tiles_x - 1 ),
465+ static_cast <int >(max_x) / static_cast <int >(tile_size));
466+ int start_tile_y = std::max (0 , static_cast <int >(min_y) / static_cast <int >(tile_size));
467+ int end_tile_y = std::min (static_cast <int >(tiles_y - 1 ),
468+ static_cast <int >(max_y) / static_cast <int >(tile_size));
469+
470+ if (start_tile_x > end_tile_x || start_tile_y > end_tile_y) {
471+ continue ;
472+ }
473+
474+ for (int ty = start_tile_y; ty <= end_tile_y; ++ty) {
475+ for (int tx = start_tile_x; tx <= end_tile_x; ++tx) {
476+ size_t tile_id = ty * tiles_x + tx;
477+ tile_counts[tile_id]++;
478+ }
479+ }
480+ }
481+
482+ // 依据统计结果进行容量预留
483+ for (size_t tile_id = 0 ; tile_id < tile_triangles.size (); ++tile_id) {
484+ if (tile_counts[tile_id] > 0 ) {
485+ tile_triangles[tile_id].reserve (tile_counts[tile_id]);
486+ }
487+ }
408488 for (size_t tri_idx = 0 ; tri_idx < model.GetFaces ().size (); tri_idx++) {
409489 const auto &f = model.GetFaces ()[tri_idx];
410490 auto v0 = screenVertices[f.GetIndex (0 )];
@@ -522,7 +602,8 @@ void SimpleRenderer::RasterizeTile(
522602 float * tile_depth_buffer, uint32_t * tile_color_buffer,
523603 std::unique_ptr<float []> &global_depth_buffer,
524604 std::unique_ptr<uint32_t []> &global_color_buffer,
525- bool use_early_z) {
605+ bool use_early_z,
606+ std::vector<Fragment>* scratch_fragments) {
526607 // 计算tile在屏幕空间的范围
527608 size_t tile_x = tile_id % tiles_x;
528609 size_t tile_y = tile_id / tiles_x;
@@ -539,38 +620,69 @@ void SimpleRenderer::RasterizeTile(
539620 std::fill_n (tile_color_buffer, tile_width * tile_height, 0 );
540621
541622 // 在tile内光栅化所有三角形
623+ (void )tiles_y; // 避免未使用参数告警
542624 for (const auto &triangle : triangles) {
543- auto fragments = rasterizer_->Rasterize (triangle.v0 , triangle.v1 , triangle.v2 );
544-
545- for (auto &fragment : fragments) {
546- fragment.material = triangle.material ;
547-
548- size_t screen_x = fragment.screen_coord [0 ];
549- size_t screen_y = fragment.screen_coord [1 ];
550-
551- // 检查fragment是否在当前tile内
552- if (screen_x >= screen_x_start && screen_x < screen_x_end &&
553- screen_y >= screen_y_start && screen_y < screen_y_end) {
554-
555- size_t tile_local_x = screen_x - screen_x_start;
556- size_t tile_local_y = screen_y - screen_y_start;
557- size_t tile_index = tile_local_x + tile_local_y * tile_width;
558-
559- // tile内深度测试
560- if (use_early_z) { // Early-Z模式:深度测试在Fragment Shader之前
561- if (fragment.depth < tile_depth_buffer[tile_index]) {
625+ // 复用线程本地 scratch 容器,限制在 tile 边界内栅格化
626+ if (scratch_fragments) { // 提供scratch容器
627+ scratch_fragments->clear ();
628+ if (scratch_fragments->capacity () < tile_width * tile_height) { // 二次确认,为日后可能的可变tile进行设计
629+ scratch_fragments->reserve (tile_width * tile_height);
630+ }
631+ rasterizer_->RasterizeTo (triangle.v0 , triangle.v1 , triangle.v2 ,
632+ static_cast <int >(screen_x_start), static_cast <int >(screen_y_start),
633+ static_cast <int >(screen_x_end), static_cast <int >(screen_y_end),
634+ *scratch_fragments);
635+
636+ for (auto &fragment : *scratch_fragments) {
637+ fragment.material = triangle.material ;
638+ size_t screen_x = fragment.screen_coord [0 ];
639+ size_t screen_y = fragment.screen_coord [1 ];
640+ if (screen_x >= screen_x_start && screen_x < screen_x_end &&
641+ screen_y >= screen_y_start && screen_y < screen_y_end) {
642+ size_t tile_local_x = screen_x - screen_x_start;
643+ size_t tile_local_y = screen_y - screen_y_start;
644+ size_t tile_index = tile_local_x + tile_local_y * tile_width;
645+ if (use_early_z) {
646+ if (fragment.depth < tile_depth_buffer[tile_index]) {
647+ auto color = shader_->FragmentShader (fragment);
648+ tile_depth_buffer[tile_index] = fragment.depth ;
649+ tile_color_buffer[tile_index] = uint32_t (color);
650+ }
651+ } else {
562652 auto color = shader_->FragmentShader (fragment);
563- tile_depth_buffer[tile_index] = fragment.depth ;
564- tile_color_buffer[tile_index] = uint32_t (color);
565- }
566- } else { // Late-Z模式:Fragment Shader在深度测试之前
567- auto color = shader_->FragmentShader (fragment);
568- if (fragment.depth < tile_depth_buffer[tile_index]) {
569- tile_depth_buffer[tile_index] = fragment.depth ;
570- tile_color_buffer[tile_index] = uint32_t (color);
653+ if (fragment.depth < tile_depth_buffer[tile_index]) {
654+ tile_depth_buffer[tile_index] = fragment.depth ;
655+ tile_color_buffer[tile_index] = uint32_t (color);
656+ }
571657 }
572658 }
659+ }
660+ } else { // 不提供scratch容器的版本
661+ auto fragments = rasterizer_->Rasterize (triangle.v0 , triangle.v1 , triangle.v2 );
662+ for (auto &fragment : fragments) {
663+ fragment.material = triangle.material ;
664+ size_t screen_x = fragment.screen_coord [0 ];
665+ size_t screen_y = fragment.screen_coord [1 ];
666+ if (screen_x >= screen_x_start && screen_x < screen_x_end &&
667+ screen_y >= screen_y_start && screen_y < screen_y_end) {
668+ size_t tile_local_x = screen_x - screen_x_start;
669+ size_t tile_local_y = screen_y - screen_y_start;
670+ size_t tile_index = tile_local_x + tile_local_y * tile_width;
671+ if (use_early_z) {
672+ if (fragment.depth < tile_depth_buffer[tile_index]) {
673+ auto color = shader_->FragmentShader (fragment);
674+ tile_depth_buffer[tile_index] = fragment.depth ;
675+ tile_color_buffer[tile_index] = uint32_t (color);
676+ }
677+ } else {
678+ auto color = shader_->FragmentShader (fragment);
679+ if (fragment.depth < tile_depth_buffer[tile_index]) {
680+ tile_depth_buffer[tile_index] = fragment.depth ;
681+ tile_color_buffer[tile_index] = uint32_t (color);
682+ }
573683 }
684+ }
685+ }
574686 }
575687 }
576688
@@ -785,14 +897,18 @@ SimpleRenderer::TileRenderStats SimpleRenderer::ExecuteTileBasedPipeline(
785897 std::unique_ptr<uint32_t []> tile_color_buffer =
786898 std::make_unique<uint32_t []>(TILE_SIZE * TILE_SIZE);
787899
900+ // 线程本地片段 scratch 容器(复用),容量按单 tile 上限预估
901+ std::vector<Fragment> scratch_fragments;
902+ scratch_fragments.reserve (TILE_SIZE * TILE_SIZE);
903+
788904#pragma omp for
789905 for (size_t tile_id = 0 ; tile_id < total_tiles; tile_id++) {
790- // 按照tile进行光栅化
906+ // 按照tile进行光栅化,每个Tile进行区域限制+scratch复用,区域限制避免了可能的数据竞争
791907 RasterizeTile (tile_id, tile_triangles[tile_id],
792908 tiles_x, tiles_y, TILE_SIZE,
793909 tile_depth_buffer.get (), tile_color_buffer.get (),
794910 depthBuffer_per_thread, colorBuffer_per_thread,
795- early_z_enabled_);
911+ early_z_enabled_, &scratch_fragments );
796912 }
797913 }
798914 auto rasterization_end_time = std::chrono::high_resolution_clock::now ();
0 commit comments