From 6ff20dc6a77d3f42c8fcaa9f1d574d5ac87bd16d Mon Sep 17 00:00:00 2001 From: James Rowe Date: Mon, 2 Jul 2018 20:31:21 -0600 Subject: [PATCH 1/8] Add qt windowsvistastyle dll to the build --- CMakeModules/CopyYuzuQt5Deps.cmake | 3 +++ appveyor.yml | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/CMakeModules/CopyYuzuQt5Deps.cmake b/CMakeModules/CopyYuzuQt5Deps.cmake index ed24c742c8..e4a9796c81 100644 --- a/CMakeModules/CopyYuzuQt5Deps.cmake +++ b/CMakeModules/CopyYuzuQt5Deps.cmake @@ -3,7 +3,9 @@ function(copy_yuzu_Qt5_deps target_dir) set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$/") set(Qt5_DLL_DIR "${Qt5_DIR}/../../../bin") set(Qt5_PLATFORMS_DIR "${Qt5_DIR}/../../../plugins/platforms/") + set(Qt5_STYLES_DIR "${Qt5_DIR}/../../../plugins/styles/") set(PLATFORMS ${DLL_DEST}platforms/) + set(STYLES ${DLL_DEST}styles/) windows_copy_files(${target_dir} ${Qt5_DLL_DIR} ${DLL_DEST} icudt*.dll icuin*.dll @@ -14,4 +16,5 @@ function(copy_yuzu_Qt5_deps target_dir) Qt5Widgets$<$:d>.* ) windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$:d>.*) + windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$:d>.*) endfunction(copy_yuzu_Qt5_deps) diff --git a/appveyor.yml b/appveyor.yml index 72cda26a7a..17d1b5fee6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -116,6 +116,7 @@ after_build: mkdir $RELEASE_DIST mkdir $RELEASE_DIST/platforms + mkdir $RELEASE_DIST/styles # copy the compiled binaries and other release files to the release folder Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST @@ -136,6 +137,9 @@ after_build: # copy the qt windows plugin dll to platforms Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms" + # copy the qt windows vista style dll to platforms + Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles" + 7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\* 7z a $MINGW_SEVENZIP $RELEASE_DIST } From 9da1552417440ca2294f9e06d33be42d933db117 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 3 Jul 2018 14:05:13 -0400 Subject: [PATCH 2/8] gl_rasterizer_cache: Implement PixelFormat S8Z24. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 84 ++++++++++++++++--- .../renderer_opengl/gl_rasterizer_cache.h | 8 ++ src/video_core/textures/decoders.cpp | 2 + 3 files changed, 83 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f9b4a4b877..3a00d93831 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -88,6 +88,8 @@ static constexpr std::array tex_form // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, false}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, + false}, // S8Z24 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -131,13 +133,6 @@ MathUtil::Rectangle SurfaceParams::GetRect() const { return {0, actual_height, width, 0}; } -static void ConvertASTCToRGBA8(std::vector& data, PixelFormat format, u32 width, u32 height) { - u32 block_width{}; - u32 block_height{}; - std::tie(block_width, block_height) = GetASTCBlockSize(format); - data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); -} - template void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; @@ -177,6 +172,7 @@ static constexpr std::array, MortonCopy, MortonCopy, MortonCopy, MortonCopy, MortonCopy, + MortonCopy, }; static constexpr std::array, MortonCopy, + MortonCopy, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -234,6 +231,71 @@ CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { rect.GetWidth(), rect.GetHeight()); } +static void ConvertS8Z24ToZ24S8(std::vector& data, u32 width, u32 height) { + union S8Z24 { + BitField<0, 24, u32> z24; + BitField<24, 8, u32> s8; + }; + static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); + + union Z24S8 { + BitField<0, 8, u32> s8; + BitField<8, 24, u32> z24; + }; + static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); + + S8Z24 input_pixel{}; + Z24S8 output_pixel{}; + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const size_t offset{y * width + x}; + std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); + output_pixel.s8.Assign(input_pixel.s8); + output_pixel.z24.Assign(input_pixel.z24); + std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8)); + } + } +} +/** + * Helper function to perform software conversion (as needed) when loading a buffer from Switch + * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with + * typical desktop GPUs. + */ +static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector& data, PixelFormat pixel_format, + u32 width, u32 height) { + switch (pixel_format) { + case PixelFormat::ASTC_2D_4X4: { + // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. + u32 block_width{}; + u32 block_height{}; + std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); + data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); + break; + } + case PixelFormat::S8Z24: + // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. + ConvertS8Z24ToZ24S8(data, width, height); + break; + } +} + +/** + * Helper function to perform software conversion (as needed) when flushing a buffer to Switch + * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with + * typical desktop GPUs. + */ +static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector& /*data*/, PixelFormat pixel_format, + u32 /*width*/, u32 /*height*/) { + switch (pixel_format) { + case PixelFormat::ASTC_2D_4X4: + case PixelFormat::S8Z24: + LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}", + static_cast(pixel_format)); + UNREACHABLE(); + break; + } +} + MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); void CachedSurface::LoadGLBuffer() { ASSERT(params.type != SurfaceType::Fill); @@ -256,10 +318,7 @@ void CachedSurface::LoadGLBuffer() { params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } - if (IsPixelFormatASTC(params.pixel_format)) { - // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this - ConvertASTCToRGBA8(gl_buffer, params.pixel_format, params.width, params.height); - } + ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); @@ -272,6 +331,9 @@ void CachedSurface::FlushGLBuffer() { MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, + params.height); + if (!params.is_tiled) { std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 459abbdc2b..7aaf371bda 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -41,6 +41,7 @@ struct SurfaceParams { // DepthStencil formats Z24S8 = 13, + S8Z24 = 14, MaxDepthStencilFormat, @@ -92,6 +93,7 @@ struct SurfaceParams { 4, // DXN1 4, // ASTC_2D_4X4 1, // Z24S8 + 1, // S8Z24 }}; ASSERT(static_cast(format) < compression_factor_table.size()); @@ -117,6 +119,7 @@ struct SurfaceParams { 64, // DXN1 32, // ASTC_2D_4X4 32, // Z24S8 + 32, // S8Z24 }}; ASSERT(static_cast(format) < bpp_table.size()); @@ -128,6 +131,8 @@ struct SurfaceParams { static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { switch (format) { + case Tegra::DepthFormat::S8_Z24_UNORM: + return PixelFormat::S8Z24; case Tegra::DepthFormat::Z24_S8_UNORM: return PixelFormat::Z24S8; default: @@ -226,6 +231,8 @@ struct SurfaceParams { static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { switch (format) { + case PixelFormat::S8Z24: + return Tegra::DepthFormat::S8_Z24_UNORM; case PixelFormat::Z24S8: return Tegra::DepthFormat::Z24_S8_UNORM; default: @@ -274,6 +281,7 @@ struct SurfaceParams { static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { switch (format) { + case Tegra::DepthFormat::S8_Z24_UNORM: case Tegra::DepthFormat::Z24_S8_UNORM: return ComponentType::UNorm; default: diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 680f22ddb8..7b06fea3e2 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -76,6 +76,7 @@ u32 BytesPerPixel(TextureFormat format) { static u32 DepthBytesPerPixel(DepthFormat format) { switch (format) { + case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: return 4; default: @@ -129,6 +130,7 @@ std::vector UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid std::vector unswizzled_data(width * height * bytes_per_pixel); switch (format) { + case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); From 827bb08c91086e0e24edb82df79c9ed860baf836 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 6 Jun 2018 23:53:43 -0500 Subject: [PATCH 3/8] GPU: Added registers for the CLEAR_BUFFERS and CLEAR_COLOR methods. --- src/video_core/engines/maxwell_3d.h | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ff67f2a58b..e264dd4d84 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -436,7 +436,12 @@ public: u32 count; } vertex_buffer; - INSERT_PADDING_WORDS(0x99); + INSERT_PADDING_WORDS(1); + + float clear_color[4]; + float clear_depth; + + INSERT_PADDING_WORDS(0x93); struct { u32 address_high; @@ -584,7 +589,21 @@ public: Cull cull; - INSERT_PADDING_WORDS(0x77); + INSERT_PADDING_WORDS(0x2B); + + union { + u32 raw; + BitField<0, 1, u32> Z; + BitField<1, 1, u32> S; + BitField<2, 1, u32> R; + BitField<3, 1, u32> G; + BitField<4, 1, u32> B; + BitField<5, 1, u32> A; + BitField<6, 4, u32> RT; + BitField<10, 11, u32> layer; + } clear_buffers; + + INSERT_PADDING_WORDS(0x4B); struct { u32 query_address_high; @@ -766,6 +785,9 @@ private: /// Handles writes to the macro uploading registers. void ProcessMacroUpload(u32 data); + /// Handles a write to the CLEAR_BUFFERS register. + void ProcessClearBuffers(); + /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -788,6 +810,8 @@ ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); +ASSERT_REG_POSITION(clear_color[0], 0x360); +ASSERT_REG_POSITION(clear_depth, 0x364); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); ASSERT_REG_POSITION(rt_control, 0x487); @@ -803,6 +827,7 @@ ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(cull, 0x646); +ASSERT_REG_POSITION(clear_buffers, 0x674); ASSERT_REG_POSITION(query, 0x6C0); ASSERT_REG_POSITION(vertex_array[0], 0x700); ASSERT_REG_POSITION(independent_blend, 0x780); From be51120d237cb551fae90fbfaebda41669c40403 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 6 Jun 2018 23:54:25 -0500 Subject: [PATCH 4/8] GPU: Bind and clear the render target when the CLEAR_BUFFERS register is written to. --- src/video_core/engines/maxwell_3d.cpp | 11 +++ src/video_core/rasterizer_interface.h | 3 + .../renderer_opengl/gl_rasterizer.cpp | 71 +++++++++++++++++++ .../renderer_opengl/gl_rasterizer.h | 1 + 4 files changed, 86 insertions(+) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 93c43c8cbc..78f1c0ea72 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -126,6 +126,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { DrawArrays(); break; } + case MAXWELL3D_REG_INDEX(clear_buffers): { + ProcessClearBuffers(); + break; + } case MAXWELL3D_REG_INDEX(query.query_get): { ProcessQueryGet(); break; @@ -415,5 +419,12 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const { UNREACHABLE(); } +void Maxwell3D::ProcessClearBuffers() { + ASSERT(regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && + regs.clear_buffers.A); + + VideoCore::g_renderer->Rasterizer()->Clear(); +} + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 145e583344..499e84b892 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -19,6 +19,9 @@ public: /// Draw the current batch of vertex arrays virtual void DrawArrays() = 0; + /// Clear the current framebuffer + virtual void Clear() = 0; + /// Notify rasterizer that the specified Maxwell register has been changed virtual void NotifyMaxwellRegisterChanged(u32 method) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ca3814cfc4..8e11711610 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -297,6 +297,77 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { return true; } +void RasterizerOpenGL::Clear() { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Implement these + const bool has_stencil = false; + const bool using_color_fb = true; + const bool using_depth_fb = regs.zeta.Address() != 0; + const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; + + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + + MathUtil::Rectangle draw_rect{ + static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast( + std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.bottom, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); + + // Sync the viewport + SyncViewport(surfaces_rect); + + // TODO(bunnei): Sync scissorbox uniform(s) here + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable + // scissor test to prevent drawing outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // TODO(Subv): Support clearing only partial colors. + glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], + regs.clear_color[3]); + glClearDepth(regs.clear_depth); + + GLbitfield clear_mask = GL_COLOR_BUFFER_BIT; + if (regs.clear_buffers.Z) + clear_mask |= GL_DEPTH_BUFFER_BIT; + + glClear(clear_mask); + + // Mark framebuffer surfaces as dirty + if (color_surface != nullptr && write_color_fb) { + res_cache.MarkSurfaceAsDirty(color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + res_cache.MarkSurfaceAsDirty(depth_surface); + } +} + void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 493aa39e5a..0b1e139b0b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,6 +28,7 @@ public: ~RasterizerOpenGL() override; void DrawArrays() override; + void Clear() override; void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; From c1811ed3d1805e0d0ab536762692e7c0e80f14dc Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 2 Jul 2018 19:09:03 -0500 Subject: [PATCH 5/8] GPU: Support clears that don't clear the color buffer. --- src/video_core/engines/maxwell_3d.cpp | 5 +++-- .../renderer_opengl/gl_rasterizer.cpp | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 78f1c0ea72..4d1a79c559 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -420,8 +420,9 @@ bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const { } void Maxwell3D::ProcessClearBuffers() { - ASSERT(regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && - regs.clear_buffers.A); + ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && + regs.clear_buffers.R == regs.clear_buffers.B && + regs.clear_buffers.R == regs.clear_buffers.A); VideoCore::g_renderer->Rasterizer()->Clear(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8e11711610..08c4df5dcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -300,6 +300,20 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { void RasterizerOpenGL::Clear() { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + GLbitfield clear_mask = 0; + if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && + regs.clear_buffers.A) { + clear_mask |= GL_COLOR_BUFFER_BIT; + } + if (regs.clear_buffers.Z) + clear_mask |= GL_DEPTH_BUFFER_BIT; + + if (clear_mask == 0) + return; + + // Sync the depth test state before configuring the framebuffer surfaces. + SyncDepthTestState(); + // TODO(bunnei): Implement these const bool has_stencil = false; const bool using_color_fb = true; @@ -353,10 +367,6 @@ void RasterizerOpenGL::Clear() { regs.clear_color[3]); glClearDepth(regs.clear_depth); - GLbitfield clear_mask = GL_COLOR_BUFFER_BIT; - if (regs.clear_buffers.Z) - clear_mask |= GL_DEPTH_BUFFER_BIT; - glClear(clear_mask); // Mark framebuffer surfaces as dirty From 78443a7f2929dece8d2509d50642df9478aeb166 Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 3 Jul 2018 16:55:44 -0500 Subject: [PATCH 6/8] GPU: Factor out the framebuffer configuration code for both Clear and Draw commands. --- .../renderer_opengl/gl_rasterizer.cpp | 152 +++++++----------- .../renderer_opengl/gl_rasterizer.h | 5 + 2 files changed, 62 insertions(+), 95 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 08c4df5dcc..32906be7ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -297,92 +297,7 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { return true; } -void RasterizerOpenGL::Clear() { - const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - - GLbitfield clear_mask = 0; - if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && - regs.clear_buffers.A) { - clear_mask |= GL_COLOR_BUFFER_BIT; - } - if (regs.clear_buffers.Z) - clear_mask |= GL_DEPTH_BUFFER_BIT; - - if (clear_mask == 0) - return; - - // Sync the depth test state before configuring the framebuffer surfaces. - SyncDepthTestState(); - - // TODO(bunnei): Implement these - const bool has_stencil = false; - const bool using_color_fb = true; - const bool using_depth_fb = regs.zeta.Address() != 0; - const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; - - const bool write_color_fb = - state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || - state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; - - const bool write_depth_fb = - (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || - (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); - - Surface color_surface; - Surface depth_surface; - MathUtil::Rectangle surfaces_rect; - std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); - - MathUtil::Rectangle draw_rect{ - static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast(std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.right, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast( - std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.bottom, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom - - // Bind the framebuffer surfaces - BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); - - // Sync the viewport - SyncViewport(surfaces_rect); - - // TODO(bunnei): Sync scissorbox uniform(s) here - - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable - // scissor test to prevent drawing outside of the framebuffer region - state.scissor.enabled = true; - state.scissor.x = draw_rect.left; - state.scissor.y = draw_rect.bottom; - state.scissor.width = draw_rect.GetWidth(); - state.scissor.height = draw_rect.GetHeight(); - state.Apply(); - - // TODO(Subv): Support clearing only partial colors. - glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], - regs.clear_color[3]); - glClearDepth(regs.clear_depth); - - glClear(clear_mask); - - // Mark framebuffer surfaces as dirty - if (color_surface != nullptr && write_color_fb) { - res_cache.MarkSurfaceAsDirty(color_surface); - } - if (depth_surface != nullptr && write_depth_fb) { - res_cache.MarkSurfaceAsDirty(depth_surface); - } -} - -void RasterizerOpenGL::DrawArrays() { - if (accelerate_draw == AccelDraw::Disabled) - return; - - MICROPROFILE_SCOPE(OpenGL_Drawing); +std::pair RasterizerOpenGL::ConfigureFramebuffers() { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; // Sync the depth test state before configuring the framebuffer surfaces. @@ -425,11 +340,6 @@ void RasterizerOpenGL::DrawArrays() { BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); SyncViewport(surfaces_rect); - SyncBlendState(); - SyncCullMode(); - - // TODO(bunnei): Sync framebuffer_scale uniform here - // TODO(bunnei): Sync scissorbox uniform(s) here // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region @@ -440,6 +350,58 @@ void RasterizerOpenGL::DrawArrays() { state.scissor.height = draw_rect.GetHeight(); state.Apply(); + // Only return the surface to be marked as dirty if writing to it is enabled. + return std::make_pair(write_color_fb ? color_surface : nullptr, + write_depth_fb ? depth_surface : nullptr); +} + +void RasterizerOpenGL::Clear() { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + GLbitfield clear_mask = 0; + if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && + regs.clear_buffers.A) { + clear_mask |= GL_COLOR_BUFFER_BIT; + } + if (regs.clear_buffers.Z) + clear_mask |= GL_DEPTH_BUFFER_BIT; + + if (clear_mask == 0) + return; + + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(); + + // TODO(Subv): Support clearing only partial colors. + glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], + regs.clear_color[3]); + glClearDepth(regs.clear_depth); + + glClear(clear_mask); + + // Mark framebuffer surfaces as dirty + if (dirty_color_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_color_surface); + } + if (dirty_depth_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_depth_surface); + } +} + +void RasterizerOpenGL::DrawArrays() { + if (accelerate_draw == AccelDraw::Disabled) + return; + + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(); + + SyncBlendState(); + SyncCullMode(); + + // TODO(bunnei): Sync framebuffer_scale uniform here + // TODO(bunnei): Sync scissorbox uniform(s) here + // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; @@ -520,11 +482,11 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Mark framebuffer surfaces as dirty - if (color_surface != nullptr && write_color_fb) { - res_cache.MarkSurfaceAsDirty(color_surface); + if (dirty_color_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_color_surface); } - if (depth_surface != nullptr && write_depth_fb) { - res_cache.MarkSurfaceAsDirty(depth_surface); + if (dirty_depth_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_depth_surface); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0b1e139b0b..7738f40b18 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include "common/common_types.h" @@ -82,6 +83,10 @@ private: u32 border_color_a; }; + /// Configures the color and depth framebuffer states and returns the dirty + /// surfaces if writing was enabled. + std::pair ConfigureFramebuffers(); + /// Binds the framebuffer color and depth surface void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, bool has_stencil); From 5a9df3c6753e66519acaa13685abb89231e45ade Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 3 Jul 2018 22:32:59 -0500 Subject: [PATCH 7/8] GPU: Only configure the used framebuffers during clear. Don't try to configure the color buffer if it is not being cleared, it may not be completely valid at this point. --- .../renderer_opengl/gl_rasterizer.cpp | 20 +++++++---- .../renderer_opengl/gl_rasterizer.h | 2 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 34 +++++++++++++++---- .../renderer_opengl/gl_rasterizer_cache.h | 9 +++-- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 43dbf4da92..e516eb1adf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -297,7 +297,8 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { return true; } -std::pair RasterizerOpenGL::ConfigureFramebuffers() { +std::pair RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; // Sync the depth test state before configuring the framebuffer surfaces. @@ -306,9 +307,6 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers() { // TODO(bunnei): Implement this const bool has_stencil = false; - const bool using_color_fb = true; - const bool using_depth_fb = regs.zeta.Address() != 0; - const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; const bool write_color_fb = @@ -358,18 +356,25 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers() { void RasterizerOpenGL::Clear() { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + bool use_color_fb = false; + bool use_depth_fb = false; + GLbitfield clear_mask = 0; if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && regs.clear_buffers.A) { clear_mask |= GL_COLOR_BUFFER_BIT; + use_color_fb = true; } - if (regs.clear_buffers.Z) + if (regs.clear_buffers.Z) { clear_mask |= GL_DEPTH_BUFFER_BIT; + use_depth_fb = true; + } if (clear_mask == 0) return; - auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(); + auto [dirty_color_surface, dirty_depth_surface] = + ConfigureFramebuffers(use_color_fb, use_depth_fb); // TODO(Subv): Support clearing only partial colors. glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], @@ -394,7 +399,8 @@ void RasterizerOpenGL::DrawArrays() { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - auto [dirty_color_surface, dirty_depth_surface] = ConfigureFramebuffers(); + auto [dirty_color_surface, dirty_depth_surface] = + ConfigureFramebuffers(true, regs.zeta.Address() != 0); SyncBlendState(); SyncCullMode(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7738f40b18..c406142e49 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -85,7 +85,7 @@ private: /// Configures the color and depth framebuffer states and returns the dirty /// surfaces if writing was enabled. - std::pair ConfigureFramebuffers(); + std::pair ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb); /// Binds the framebuffer color and depth surface void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 3a00d93831..50469c05cc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -65,6 +65,25 @@ struct FormatTuple { return params; } +/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address, + Tegra::DepthFormat format) { + + SurfaceParams params{}; + params.addr = zeta_address; + params.is_tiled = true; + params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.pixel_format = PixelFormatFromDepthFormat(format); + params.component_type = ComponentTypeFromDepthFormat(format); + params.type = GetFormatType(params.pixel_format); + params.size_in_bytes = params.SizeInBytes(); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.size_in_bytes = params.SizeInBytes(); + return params; +} + static constexpr std::array tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 @@ -461,15 +480,16 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); // get color and depth surfaces - const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])}; - SurfaceParams depth_params{color_params}; + SurfaceParams color_params{}; + SurfaceParams depth_params{}; + + if (using_color_fb) { + color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]); + } if (using_depth_fb) { - depth_params.addr = regs.zeta.Address(); - depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(regs.zeta.format); - depth_params.component_type = SurfaceParams::ComponentTypeFromDepthFormat(regs.zeta.format); - depth_params.type = SurfaceParams::GetFormatType(depth_params.pixel_format); - depth_params.size_in_bytes = depth_params.SizeInBytes(); + depth_params = + SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format); } MathUtil::Rectangle color_rect{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 7aaf371bda..8005a81b83 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -326,13 +326,18 @@ struct SurfaceParams { return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); } - /// Creates SurfaceParams from a texture configation + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); - /// Creates SurfaceParams from a framebuffer configation + /// Creates SurfaceParams from a framebuffer configuration static SurfaceParams CreateForFramebuffer( const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + /// Creates SurfaceParams for a depth buffer configuration + static SurfaceParams CreateForDepthBuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, + Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); + Tegra::GPUVAddr addr; bool is_tiled; u32 block_height; From c1bebdef5e11558d27dd9aa60525b47c20598491 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 4 Jul 2018 10:26:46 -0500 Subject: [PATCH 8/8] GPU: Flip the triangle front face winding if the GPU is configured to not flip the triangles. OpenGL's default behavior is already correct when the GPU is configured to flip the triangles. This fixes 1-2 Switch's splash screen. --- src/video_core/engines/maxwell_3d.h | 22 ++++++++++++++++--- .../renderer_opengl/gl_rasterizer.cpp | 10 +++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 988a6433e0..cc1f90de63 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -478,7 +478,9 @@ public: u32 depth_write_enabled; - INSERT_PADDING_WORDS(0x8); + INSERT_PADDING_WORDS(0x7); + + u32 d3d_cull_mode; BitField<0, 3, ComparisonOp> depth_test_func; @@ -498,7 +500,13 @@ public: u32 enable[NumRenderTargets]; } blend; - INSERT_PADDING_WORDS(0x2D); + INSERT_PADDING_WORDS(0xB); + + union { + BitField<4, 1, u32> triangle_rast_flip; + } screen_y_control; + + INSERT_PADDING_WORDS(0x21); u32 vb_element_base; @@ -528,7 +536,12 @@ public: } } tic; - INSERT_PADDING_WORDS(0x22); + INSERT_PADDING_WORDS(0x21); + + union { + BitField<2, 1, u32> coord_origin; + BitField<3, 10, u32> enable; + } point_coord_replace; struct { u32 code_address_high; @@ -818,11 +831,14 @@ ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); +ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); ASSERT_REG_POSITION(depth_test_func, 0x4C3); ASSERT_REG_POSITION(blend, 0x4CF); +ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(index_array, 0x5F2); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e516eb1adf..3c3657d9d3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -771,6 +771,16 @@ void RasterizerOpenGL::SyncCullMode() { if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + + // If the GPU is configured to flip the rasterized triangles, then we need to flip the + // notion of front and back. Note: We flip the triangles when the value of the register is 0 + // because OpenGL already does it for us. + if (regs.screen_y_control.triangle_rast_flip == 0) { + if (state.cull.front_face == GL_CCW) + state.cull.front_face = GL_CW; + else if (state.cull.front_face == GL_CW) + state.cull.front_face = GL_CCW; + } } }