From c96da97630e6c9746bd0a3ef62d8e54364bf1281 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 30 Jun 2018 03:00:39 -0400 Subject: [PATCH 1/4] gl_shader_decompiler: Implement predicate NotEqualWithNan. --- src/video_core/engines/shader_bytecode.h | 1 + .../renderer_opengl/gl_shader_decompiler.cpp | 40 +++++++++++-------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cb4db06792..0527fc376b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -142,6 +142,7 @@ enum class PredCondition : u64 { GreaterThan = 4, NotEqual = 5, GreaterEqual = 6, + NotEqualWithNan = 13, // TODO(Subv): Other condition types }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 46eaad0217..3ef79a5e7a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -719,21 +719,31 @@ private: /** * Returns the comparison string to use to compare two values in the 'set' family of * instructions. - * @params condition The condition used in the 'set'-family instruction. + * @param condition The condition used in the 'set'-family instruction. + * @param op_a First operand to use for the comparison. + * @param op_b Second operand to use for the comparison. * @returns String corresponding to the GLSL operator that matches the desired comparison. */ - std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const { + std::string GetPredicateComparison(Tegra::Shader::PredCondition condition, + const std::string& op_a, const std::string& op_b) const { using Tegra::Shader::PredCondition; static const std::unordered_map PredicateComparisonStrings = { - {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, - {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, - {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, + {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, + {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::NotEqualWithNan, "!="}, }; - auto comparison = PredicateComparisonStrings.find(condition); + const auto& comparison{PredicateComparisonStrings.find(condition)}; ASSERT_MSG(comparison != PredicateComparisonStrings.end(), "Unknown predicate comparison operation"); - return comparison->second; + + std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; + if (condition == PredCondition::NotEqualWithNan) { + predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; + } + + return predicate; } /** @@ -1415,10 +1425,9 @@ private: std::string second_pred = GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.fsetp.cond); std::string combiner = GetPredicateCombiner(instr.fsetp.op); - std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')'; + std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.fsetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -1453,10 +1462,9 @@ private: std::string second_pred = GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.isetp.cond); std::string combiner = GetPredicateCombiner(instr.isetp.op); - std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')'; + std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.isetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -1503,11 +1511,10 @@ private: std::string second_pred = GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.fset.cond); std::string combiner = GetPredicateCombiner(instr.fset.op); - std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " + - combiner + " (" + second_pred + "))"; + std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.fset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); @@ -1538,11 +1545,10 @@ private: std::string second_pred = GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.iset.cond); std::string combiner = GetPredicateCombiner(instr.iset.op); - std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " + - combiner + " (" + second_pred + "))"; + std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.iset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); From b11072d54a4e324fc6ad380f232c1de8bcf9ab63 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 30 Jun 2018 14:08:51 -0500 Subject: [PATCH 2/4] GLCache: Specify the component type along the texture type in the format tuple. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 63f5999eae..2864a7c8e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -27,6 +27,7 @@ struct FormatTuple { GLint internal_format; GLenum format; GLenum type; + ComponentType component_type; bool compressed; }; @@ -65,29 +66,32 @@ struct FormatTuple { } static constexpr std::array tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5 - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, + false}, // A2B10G10R10 + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, + false}, // R11FG11FB10F + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); if (type == SurfaceType::ColorTexture) { ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which - // are type FLOAT - ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || - pixel_format == PixelFormat::R11FG11FB10F); - return tex_format_tuples[static_cast(pixel_format)]; + auto& format = tex_format_tuples[static_cast(pixel_format)]; + ASSERT(component_type == format.component_type); + return format; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats ASSERT_MSG(false, "Unimplemented"); From c0e2d5275814e6d3fe15d1b8abb0f057c0e5d155 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 30 Jun 2018 14:23:13 -0500 Subject: [PATCH 3/4] GPU: Implemented the RGBA32_UINT rendertarget format. --- src/video_core/gpu.h | 1 + .../renderer_opengl/gl_rasterizer_cache.cpp | 11 ++++++---- .../renderer_opengl/gl_rasterizer_cache.h | 21 ++++++++++++++----- src/video_core/textures/decoders.cpp | 4 ++++ 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 7b4e9b8423..d0a4ac2671 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -16,6 +16,7 @@ namespace Tegra { enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, + RGBA32_UINT = 0xC2, RGBA16_FLOAT = 0xCA, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2864a7c8e4..ae48378f31 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -74,7 +74,8 @@ static constexpr std::array tex_form {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, - false}, // R11FG11FB10F + false}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, @@ -170,9 +171,10 @@ static constexpr std::array, MortonCopy, MortonCopy, MortonCopy, MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, }; static constexpr std::array, MortonCopy, MortonCopy, + MortonCopy, // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 85e7c88889..99be250b44 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -30,11 +30,12 @@ struct SurfaceParams { R8 = 4, RGBA16F = 5, R11FG11FB10F = 6, - DXT1 = 7, - DXT23 = 8, - DXT45 = 9, - DXN1 = 10, // This is also known as BC4 - ASTC_2D_4X4 = 11, + RGBA32UI = 7, + DXT1 = 8, + DXT23 = 9, + DXT45 = 10, + DXN1 = 11, // This is also known as BC4 + ASTC_2D_4X4 = 12, Max, Invalid = 255, @@ -77,6 +78,7 @@ struct SurfaceParams { 1, // R8 1, // RGBA16F 1, // R11FG11FB10F + 1, // RGBA32UI 4, // DXT1 4, // DXT23 4, // DXT45 @@ -100,6 +102,7 @@ struct SurfaceParams { 8, // R8 64, // RGBA16F 32, // R11FG11FB10F + 128, // RGBA32UI 64, // DXT1 128, // DXT23 128, // DXT45 @@ -125,6 +128,8 @@ struct SurfaceParams { return PixelFormat::RGBA16F; case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::RGBA32_UINT: + return PixelFormat::RGBA32UI; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); UNREACHABLE(); @@ -148,6 +153,8 @@ struct SurfaceParams { return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::BF10GF11RF11: return PixelFormat::R11FG11FB10F; + case Tegra::Texture::TextureFormat::R32_G32_B32_A32: + return PixelFormat::RGBA32UI; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; case Tegra::Texture::TextureFormat::DXT23: @@ -181,6 +188,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::R11FG11FB10F: return Tegra::Texture::TextureFormat::BF10GF11RF11; + case PixelFormat::RGBA32UI: + return Tegra::Texture::TextureFormat::R32_G32_B32_A32; case PixelFormat::DXT1: return Tegra::Texture::TextureFormat::DXT1; case PixelFormat::DXT23: @@ -217,6 +226,8 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return ComponentType::Float; + case Tegra::RenderTargetFormat::RGBA32_UINT: + return ComponentType::UInt; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); UNREACHABLE(); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 0db4367f16..eaf15da328 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -65,6 +65,8 @@ u32 BytesPerPixel(TextureFormat format) { return 1; case TextureFormat::R16_G16_B16_A16: return 8; + case TextureFormat::R32_G32_B32_A32: + return 16; default: UNIMPLEMENTED_MSG("Format not implemented"); break; @@ -94,6 +96,7 @@ std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::B5G6R5: case TextureFormat::R8: case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32_G32_B32_A32: case TextureFormat::BF10GF11RF11: case TextureFormat::ASTC_2D_4X4: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, @@ -124,6 +127,7 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat case TextureFormat::B5G6R5: case TextureFormat::R8: case TextureFormat::BF10GF11RF11: + case TextureFormat::R32_G32_B32_A32: // TODO(Subv): For the time being just forward the same data without any decoding. rgba_data = texture_data; break; From f33e406ff28724cd64a945d97b1f2df051fe4881 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 30 Jun 2018 14:48:25 -0500 Subject: [PATCH 4/4] GPU: Corrected the size of the MUFU subop field, and removed incorrect "min" operation. --- src/video_core/engines/shader_bytecode.h | 3 +-- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cb4db06792..fcc0d32d94 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -165,7 +165,6 @@ enum class SubOp : u64 { Lg2 = 0x3, Rcp = 0x4, Rsq = 0x5, - Min = 0x8, }; enum class F2iRoundingOp : u64 { @@ -209,7 +208,7 @@ union Instruction { } pred; BitField<19, 1, u64> negate_pred; BitField<20, 8, Register> gpr20; - BitField<20, 7, SubOp> sub_op; + BitField<20, 4, SubOp> sub_op; BitField<28, 8, Register> gpr28; BitField<39, 8, Register> gpr39; BitField<48, 16, u64> opcode; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 46eaad0217..5a43d8e24a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -907,10 +907,6 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, instr.alu.saturate_d); break; - case SubOp::Min: - regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, - instr.alu.saturate_d); - break; default: NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", static_cast(instr.sub_op.Value()));