CMakeLists: Make -Wreorder a compile-time error

This can result in silent logic bugs within code, and given the amount of times these kind of warnings are caused, they should be flagged at compile-time so no new code is submitted with them.
Merge pull request #3662 from ReinUsesLisp/constant-attrs
2020-04-15 14:14:41 -04:00 · 2020-04-15 11:54:50 -04:00 · 2020-04-15 11:43:52 -04:00 · 2020-04-15 03:17:46 -04:00 · 2020-04-15 03:17:18 -04:00 · 2020-04-15 03:16:56 -04:00
31 changed files with 684 additions and 216 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@ if (MSVC)
 else()
    add_compile_options(
        -Wall
+        -Werror=reorder
        -Wno-attributes
    )

--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
        if (ext_dir != nullptr)
            layers_ext.push_back(std::move(ext_dir));
    }
+
+    // When there are no layers to apply, return early as there is no need to rebuild the RomFS
+    if (layers.empty() && layers_ext.empty()) {
+        return;
+    }
+
    layers.push_back(std::move(extracted));

    auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_

 struct KernelCore::Impl {
    explicit Impl(Core::System& system, KernelCore& kernel)
-        : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}
+        : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}

    void Initialize(KernelCore& kernel) {
        Shutdown();
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -27,7 +27,7 @@ public:
            {10110, nullptr, "GetFriendProfileImage"},
            {10200, nullptr, "SendFriendRequestForApplication"},
            {10211, nullptr, "AddFacedFriendRequestForApplication"},
-            {10400, nullptr, "GetBlockedUserListIds"},
+            {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
            {10500, nullptr, "GetProfileList"},
            {10600, nullptr, "DeclareOpenOnlinePlaySession"},
            {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -121,6 +121,15 @@ private:
    };
    static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");

+    void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) {
+        // This is safe to stub, as there should be no adverse consequences from reporting no
+        // blocked users.
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(0); // Indicates there are no blocked users
+    }
+
    void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
        // Stub used by Splatoon 2
        LOG_WARNING(Service_ACC, "(STUBBED) called");
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
    buffer.slot = slot;
    buffer.igbp_buffer = igbp_buffer;
    buffer.status = Buffer::Status::Free;
+    free_buffers.push_back(slot);

    queue.emplace_back(buffer);
    buffer_wait_event.writable->Signal();
@@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)

 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
                                                                                       u32 height) {
-    auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
-        // Only consider free buffers. Buffers become free once again after they've been Acquired
-        // and Released by the compositor, see the NVFlinger::Compose method.
-        if (buffer.status != Buffer::Status::Free) {
-            return false;
-        }

-        // Make sure that the parameters match.
-        return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
-    });
+    if (free_buffers.empty()) {
+        return {};
+    }
+
+    auto f_itr = free_buffers.begin();
+    auto itr = queue.end();
+
+    while (f_itr != free_buffers.end()) {
+        auto slot = *f_itr;
+        itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
+            // Only consider free buffers. Buffers become free once again after they've been
+            // Acquired and Released by the compositor, see the NVFlinger::Compose method.
+            if (buffer.status != Buffer::Status::Free) {
+                return false;
+            }
+
+            if (buffer.slot != slot) {
+                return false;
+            }
+
+            // Make sure that the parameters match.
+            return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
+        });
+
+        if (itr != queue.end()) {
+            free_buffers.erase(f_itr);
+            break;
+        }
+        ++f_itr;
+    }

    if (itr == queue.end()) {
        return {};
@@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
    ASSERT(itr != queue.end());
    ASSERT(itr->status == Buffer::Status::Acquired);
    itr->status = Buffer::Status::Free;
+    free_buffers.push_back(slot);

    buffer_wait_event.writable->Signal();
 }

+void BufferQueue::Disconnect() {
+    queue.clear();
+    queue_sequence.clear();
+    id = 1;
+    layer_id = 1;
+}
+
 u32 BufferQueue::Query(QueryType type) {
    LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type));

--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -87,6 +87,7 @@ public:
                     Service::Nvidia::MultiFence& multi_fence);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
+    void Disconnect();
    u32 Query(QueryType type);

    u32 GetId() const {
@@ -101,6 +102,7 @@ private:
    u32 id;
    u64 layer_id;

+    std::list<u32> free_buffers;
    std::vector<Buffer> queue;
    std::list<u32> queue_sequence;
    Kernel::EventPair buffer_wait_event;
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -513,7 +513,8 @@ private:

        auto& buffer_queue = nv_flinger->FindBufferQueue(id);

-        if (transaction == TransactionId::Connect) {
+        switch (transaction) {
+        case TransactionId::Connect: {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
            IGBPConnectResponseParcel response{
                static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
@@ -521,14 +522,18 @@ private:
                static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
                                 Settings::values.resolution_factor)};
            ctx.WriteBuffer(response.Serialize());
-        } else if (transaction == TransactionId::SetPreallocatedBuffer) {
+            break;
+        }
+        case TransactionId::SetPreallocatedBuffer: {
            IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};

            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);

            IGBPSetPreallocatedBufferResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
-        } else if (transaction == TransactionId::DequeueBuffer) {
+            break;
+        }
+        case TransactionId::DequeueBuffer: {
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
@@ -556,14 +561,18 @@ private:
                    },
                    buffer_queue.GetWritableBufferWaitEvent());
            }
-        } else if (transaction == TransactionId::RequestBuffer) {
+            break;
+        }
+        case TransactionId::RequestBuffer: {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

            auto& buffer = buffer_queue.RequestBuffer(request.slot);

            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
-        } else if (transaction == TransactionId::QueueBuffer) {
+            break;
+        }
+        case TransactionId::QueueBuffer: {
            IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};

            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
@@ -572,7 +581,9 @@ private:

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());
-        } else if (transaction == TransactionId::Query) {
+            break;
+        }
+        case TransactionId::Query: {
            IGBPQueryRequestParcel request{ctx.ReadBuffer()};

            const u32 value =
@@ -580,15 +591,30 @@ private:

            IGBPQueryResponseParcel response{value};
            ctx.WriteBuffer(response.Serialize());
-        } else if (transaction == TransactionId::CancelBuffer) {
+            break;
+        }
+        case TransactionId::CancelBuffer: {
            LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer");
-        } else if (transaction == TransactionId::Disconnect ||
-                   transaction == TransactionId::DetachBuffer) {
+            break;
+        }
+        case TransactionId::Disconnect: {
+            LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect");
+            const auto buffer = ctx.ReadBuffer();
+
+            buffer_queue.Disconnect();
+
+            IGBPEmptyResponseParcel response{};
+            ctx.WriteBuffer(response.Serialize());
+            break;
+        }
+        case TransactionId::DetachBuffer: {
            const auto buffer = ctx.ReadBuffer();

            IGBPEmptyResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
-        } else {
+            break;
+        }
+        default:
            ASSERT_MSG(false, "Unimplemented");
        }

--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -303,6 +303,10 @@ public:
                return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
            }

+            bool IsConstant() const {
+                return constant;
+            }
+
            bool IsValid() const {
                return size != Size::Invalid;
            }
@@ -312,6 +316,35 @@ public:
            }
        };

+        struct MsaaSampleLocation {
+            union {
+                BitField<0, 4, u32> x0;
+                BitField<4, 4, u32> y0;
+                BitField<8, 4, u32> x1;
+                BitField<12, 4, u32> y1;
+                BitField<16, 4, u32> x2;
+                BitField<20, 4, u32> y2;
+                BitField<24, 4, u32> x3;
+                BitField<28, 4, u32> y3;
+            };
+
+            constexpr std::pair<u32, u32> Location(int index) const {
+                switch (index) {
+                case 0:
+                    return {x0, y0};
+                case 1:
+                    return {x1, y1};
+                case 2:
+                    return {x2, y2};
+                case 3:
+                    return {x3, y3};
+                default:
+                    UNREACHABLE();
+                    return {0, 0};
+                }
+            }
+        };
+
        enum class DepthMode : u32 {
            MinusOneToOne = 0,
            ZeroToOne = 1,
@@ -793,7 +826,13 @@ public:

                u32 rt_separate_frag_data;

-                INSERT_UNION_PADDING_WORDS(0xC);
+                INSERT_UNION_PADDING_WORDS(0x1);
+
+                u32 multisample_raster_enable;
+                u32 multisample_raster_samples;
+                std::array<u32, 4> multisample_sample_mask;
+
+                INSERT_UNION_PADDING_WORDS(0x5);

                struct {
                    u32 address_high;
@@ -830,7 +869,16 @@ public:

                std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;

-                INSERT_UNION_PADDING_WORDS(0xF);
+                std::array<MsaaSampleLocation, 4> multisample_sample_locations;
+
+                INSERT_UNION_PADDING_WORDS(0x2);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 3, u32> target;
+                } multisample_coverage_to_color;
+
+                INSERT_UNION_PADDING_WORDS(0x8);

                struct {
                    union {
@@ -922,7 +970,10 @@ public:
                    BitField<4, 1, u32> triangle_rast_flip;
                } screen_y_control;

-                INSERT_UNION_PADDING_WORDS(0x21);
+                float line_width_smooth;
+                float line_width_aliased;
+
+                INSERT_UNION_PADDING_WORDS(0x1F);

                u32 vb_element_base;
                u32 vb_base_instance;
@@ -943,7 +994,7 @@ public:

                CounterReset counter_reset;

-                INSERT_UNION_PADDING_WORDS(0x1);
+                u32 multisample_enable;

                u32 zeta_enable;

@@ -980,7 +1031,7 @@ public:

                float polygon_offset_factor;

-                INSERT_UNION_PADDING_WORDS(0x1);
+                u32 line_smooth_enable;

                struct {
                    u32 tic_address_high;
@@ -1007,7 +1058,11 @@ public:

                float polygon_offset_units;

-                INSERT_UNION_PADDING_WORDS(0x11);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                Tegra::Texture::MsaaMode multisample_mode;
+
+                INSERT_UNION_PADDING_WORDS(0xC);

                union {
                    BitField<2, 1, u32> coord_origin;
@@ -1507,12 +1562,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
-ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
+ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
+ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
+ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(fill_rectangle, 0x44F);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
+ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
+ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
 ASSERT_REG_POSITION(rt_control, 0x487);
 ASSERT_REG_POSITION(zeta_width, 0x48a);
 ASSERT_REG_POSITION(zeta_height, 0x48b);
@@ -1538,6 +1598,8 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
 ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
 ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
 ASSERT_REG_POSITION(screen_y_control, 0x4EB);
+ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
+ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
 ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
@@ -1545,11 +1607,13 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 ASSERT_REG_POSITION(counter_reset, 0x54C);
+ASSERT_REG_POSITION(multisample_enable, 0x54D);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
 ASSERT_REG_POSITION(tsc, 0x557);
-ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
+ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
+ASSERT_REG_POSITION(line_smooth_enable, 0x55C);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
 ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1558,6 +1622,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
 ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
 ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
+ASSERT_REG_POSITION(multisample_mode, 0x574);
 ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -290,6 +290,23 @@ enum class VmadShr : u64 {
    Shr15 = 2,
 };

+enum class VmnmxType : u64 {
+    Bits8,
+    Bits16,
+    Bits32,
+};
+
+enum class VmnmxOperation : u64 {
+    Mrg_16H = 0,
+    Mrg_16L = 1,
+    Mrg_8B0 = 2,
+    Mrg_8B2 = 3,
+    Acc = 4,
+    Min = 5,
+    Max = 6,
+    Nop = 7,
+};
+
 enum class XmadMode : u64 {
    None = 0,
    CLo = 1,
@@ -1650,6 +1667,42 @@ union Instruction {
        BitField<47, 1, u64> cc;
    } vmad;

+    union {
+        BitField<54, 1, u64> is_dest_signed;
+        BitField<48, 1, u64> is_src_a_signed;
+        BitField<49, 1, u64> is_src_b_signed;
+        BitField<37, 2, u64> src_format_a;
+        BitField<29, 2, u64> src_format_b;
+        BitField<56, 1, u64> mx;
+        BitField<55, 1, u64> sat;
+        BitField<36, 2, u64> selector_a;
+        BitField<28, 2, u64> selector_b;
+        BitField<50, 1, u64> is_op_b_register;
+        BitField<51, 3, VmnmxOperation> operation;
+
+        VmnmxType SourceFormatA() const {
+            switch (src_format_a) {
+            case 0b11:
+                return VmnmxType::Bits32;
+            case 0b10:
+                return VmnmxType::Bits16;
+            default:
+                return VmnmxType::Bits8;
+            }
+        }
+
+        VmnmxType SourceFormatB() const {
+            switch (src_format_b) {
+            case 0b11:
+                return VmnmxType::Bits32;
+            case 0b10:
+                return VmnmxType::Bits16;
+            default:
+                return VmnmxType::Bits8;
+            }
+        }
+    } vmnmx;
+
    union {
        BitField<20, 16, u64> imm20_16;
        BitField<35, 1, u64> high_b_rr; // used on RR
@@ -1763,6 +1816,7 @@ public:
        MEMBAR,
        VMAD,
        VSETP,
+        VMNMX,
        FFMA_IMM, // Fused Multiply and Add
        FFMA_CR,
        FFMA_RC,
@@ -1817,7 +1871,8 @@ public:
        ICMP_R,
        ICMP_CR,
        ICMP_IMM,
-        FCMP_R,
+        FCMP_RR,
+        FCMP_RC,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2070,6 +2125,7 @@ private:
            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
+            INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -2124,7 +2180,8 @@ private:
            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
-            INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
+            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
+            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -2170,7 +2227,7 @@ private:
            INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
-            INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
+            INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
            INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
            INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
            INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -12,8 +12,9 @@ namespace VideoCommon {

 GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
                     std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
-      cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
+    : GPU(system, std::move(renderer_), true), gpu_thread{system},
+      cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
+      gpu_context(std::move(context)) {}

 GPUAsynch::~GPUAsynch() = default;

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() {
        const auto attrib = gpu.regs.vertex_attrib_format[index];
        const auto gl_index = static_cast<GLuint>(index);

-        // Ignore invalid attributes.
-        if (!attrib.IsValid()) {
+        // Disable constant attributes.
+        if (attrib.IsConstant()) {
            glDisableVertexAttribArray(gl_index);
            continue;
        }
@@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {

    texture_cache.GuardRenderTargets(true);

-    View depth_surface = texture_cache.GetDepthBufferSurface(true);
+    View depth_surface = texture_cache.GetDepthBufferSurface();

    const auto& regs = gpu.regs;
    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
    FramebufferCacheKey key;
    const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
    for (std::size_t index = 0; index < colors_count; ++index) {
-        View color_surface{texture_cache.GetColorBufferSurface(index, true)};
+        View color_surface{texture_cache.GetColorBufferSurface(index)};
        if (!color_surface) {
            continue;
        }
@@ -387,12 +387,12 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
    View color_surface;
    if (using_color_fb) {
        const std::size_t index = regs.clear_buffers.RT;
-        color_surface = texture_cache.GetColorBufferSurface(index, true);
+        color_surface = texture_cache.GetColorBufferSurface(index);
        texture_cache.MarkColorBufferInUse(index);
    }
    View depth_surface;
    if (using_depth_fb || using_stencil_fb) {
-        depth_surface = texture_cache.GetDepthBufferSurface(true);
+        depth_surface = texture_cache.GetDepthBufferSurface();
        texture_cache.MarkDepthBufferInUse();
    }
    texture_cache.GuardRenderTargets(false);
@@ -496,6 +496,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    SyncPrimitiveRestart();
    SyncScissorTest();
    SyncPointState();
+    SyncLineState();
    SyncPolygonOffset();
    SyncAlphaTest();
    SyncFramebufferSRGB();
@@ -1311,6 +1312,19 @@ void RasterizerOpenGL::SyncPointState() {
    glDisable(GL_PROGRAM_POINT_SIZE);
 }

+void RasterizerOpenGL::SyncLineState() {
+    auto& gpu = system.GPU().Maxwell3D();
+    auto& flags = gpu.dirty.flags;
+    if (!flags[Dirty::LineWidth]) {
+        return;
+    }
+    flags[Dirty::LineWidth] = false;
+
+    const auto& regs = gpu.regs;
+    oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
+    glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
+}
+
 void RasterizerOpenGL::SyncPolygonOffset() {
    auto& gpu = system.GPU().Maxwell3D();
    auto& flags = gpu.dirty.flags;
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -171,6 +171,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs the line state to match the guest state
+    void SyncLineState();
+
    /// Syncs the rasterizer enable state to match the guest state
    void SyncRasterizeEnable();

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,6 +34,8 @@
 namespace OpenGL {

 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::CompileDepth;
+using VideoCommon::Shader::CompilerSettings;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
@@ -43,7 +45,7 @@ namespace {
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;

-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
+constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};

 /// Gets the address for the specified shader stage program
 GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1819,15 +1819,17 @@ private:
    }

    Expression HMergeH0(Operation operation) {
-        std::string dest = VisitOperand(operation, 0).AsUint();
-        std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint};
+        const std::string dest = VisitOperand(operation, 0).AsUint();
+        const std::string src = VisitOperand(operation, 1).AsUint();
+        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
+                Type::HalfFloat};
    }

    Expression HMergeH1(Operation operation) {
-        std::string dest = VisitOperand(operation, 0).AsUint();
-        std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint};
+        const std::string dest = VisitOperand(operation, 0).AsUint();
+        const std::string src = VisitOperand(operation, 1).AsUint();
+        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
+                Type::HalfFloat};
    }

    Expression HPack2(Operation operation) {
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -185,6 +185,12 @@ void SetupDirtyPointSize(Tables& tables) {
    tables[0][OFF(point_sprite_enable)] = PointSize;
 }

+void SetupDirtyLineWidth(Tables& tables) {
+    tables[0][OFF(line_width_smooth)] = LineWidth;
+    tables[0][OFF(line_width_aliased)] = LineWidth;
+    tables[0][OFF(line_smooth_enable)] = LineWidth;
+}
+
 void SetupDirtyClipControl(Tables& tables) {
    auto& table = tables[0];
    table[OFF(screen_y_control)] = ClipControl;
@@ -233,6 +239,7 @@ void StateTracker::Initialize() {
    SetupDirtyLogicOp(tables);
    SetupDirtyFragmentClampColor(tables);
    SetupDirtyPointSize(tables);
+    SetupDirtyLineWidth(tables);
    SetupDirtyClipControl(tables);
    SetupDirtyDepthClampEnabled(tables);
    SetupDirtyMisc(tables);
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -78,6 +78,7 @@ enum : u8 {
    LogicOp,
    FragmentClampColor,
    PointSize,
+    LineWidth,
    ClipControl,
    DepthClampEnabled,

--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -411,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default;
 void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
    ASSERT(params.num_levels == 1);

-    const GLuint texture = surface.GetTexture();
    if (params.num_layers > 1) {
        // Layered framebuffer attachments
        UNIMPLEMENTED_IF(params.base_layer != 0);

        switch (params.target) {
        case SurfaceTarget::Texture2DArray:
-            glFramebufferTexture(target, attachment, texture, params.base_level);
+            glFramebufferTexture(target, attachment, GetTexture(), 0);
            break;
        default:
            UNIMPLEMENTED();
@@ -427,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
    }

    const GLenum view_target = surface.GetTarget();
+    const GLuint texture = surface.GetTexture();
    switch (surface.GetSurfaceParams().target) {
    case SurfaceTarget::Texture1D:
        glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -315,8 +315,8 @@ public:

 RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
                               Core::Frontend::GraphicsContext& context)
-    : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
-      frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
+    : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
+      has_debug_tool{HasDebugTool()} {}

 RendererOpenGL::~RendererOpenGL() = default;

--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -360,6 +360,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
        default:
            break;
        }
+        break;
    case Maxwell::VertexAttribute::Type::UnsignedInt:
        switch (size) {
        case Maxwell::VertexAttribute::Size::Size_8:
@@ -370,6 +371,14 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
            return VK_FORMAT_R8G8B8_UINT;
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return VK_FORMAT_R8G8B8A8_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return VK_FORMAT_R16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return VK_FORMAT_R16G16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return VK_FORMAT_R16G16B16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return VK_FORMAT_R16G16B16A16_UINT;
        case Maxwell::VertexAttribute::Size::Size_32:
            return VK_FORMAT_R32_UINT;
        case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -381,6 +390,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
        default:
            break;
        }
+        break;
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
        switch (size) {
        case Maxwell::VertexAttribute::Size::Size_8:
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -535,7 +535,9 @@ void VKBlitScreen::CreateGraphicsPipeline() {
    viewport_state_ci.pNext = nullptr;
    viewport_state_ci.flags = 0;
    viewport_state_ci.viewportCount = 1;
+    viewport_state_ci.pViewports = nullptr;
    viewport_state_ci.scissorCount = 1;
+    viewport_state_ci.pScissors = nullptr;

    VkPipelineRasterizationStateCreateInfo rasterization_ci;
    rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -62,13 +62,16 @@ constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::Sha

 VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
    const auto& src = regs.viewport_transform[index];
+    const float width = src.scale_x * 2.0f;
+    const float height = src.scale_y * 2.0f;
+
    VkViewport viewport;
    viewport.x = src.translate_x - src.scale_x;
    viewport.y = src.translate_y - src.scale_y;
-    viewport.width = src.scale_x * 2.0f;
-    viewport.height = src.scale_y * 2.0f;
+    viewport.width = width != 0.0f ? width : 1.0f;
+    viewport.height = height != 0.0f ? height : 1.0f;

-    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
    viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
    viewport.maxDepth = src.translate_z + src.scale_z;
    if (!device.IsExtDepthRangeUnrestrictedSupported()) {
@@ -609,7 +612,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
    Texceptions texceptions;
    for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
        if (update_rendertargets) {
-            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
        }
        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
            texceptions[rt] = true;
@@ -617,7 +620,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
    }

    if (update_rendertargets) {
-        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+        zeta_attachment = texture_cache.GetDepthBufferSurface();
    }
    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
        texceptions[ZETA_TEXCEPTION_INDEX] = true;
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, value);
        break;
    }
-    case OpCode::Id::FCMP_R: {
+    case OpCode::Id::FCMP_RR:
+    case OpCode::Id::FCMP_RC: {
        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
        Node op_c = GetRegister(instr.gpr39);
        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -2,6 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <limits>
+#include <optional>
+#include <utility>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -15,9 +19,49 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

 namespace {
+
 constexpr OperationCode GetFloatSelector(u64 selector) {
    return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
 }
+
+constexpr u32 SizeInBits(Register::Size size) {
+    switch (size) {
+    case Register::Size::Byte:
+        return 8;
+    case Register::Size::Short:
+        return 16;
+    case Register::Size::Word:
+        return 32;
+    case Register::Size::Long:
+        return 64;
+    }
+    return 0;
+}
+
+constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
+                                                                   Register::Size dst_size,
+                                                                   bool src_signed,
+                                                                   bool dst_signed) {
+    const u32 dst_bits = SizeInBits(dst_size);
+    if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
+        if (src_signed == dst_signed) {
+            return std::nullopt;
+        }
+        return std::make_pair(0, std::numeric_limits<s32>::max());
+    }
+    if (dst_signed) {
+        // Signed destination, clamp to [-128, 127] for instance
+        return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
+    } else {
+        // Unsigned destination
+        if (dst_bits == 32) {
+            // Avoid shifting by 32, that is undefined behavior
+            return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
+        }
+        return std::make_pair(0, (1 << dst_bits) - 1);
+    }
+}
+
 } // Anonymous namespace

 u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
@@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    case OpCode::Id::I2I_R:
    case OpCode::Id::I2I_C:
    case OpCode::Id::I2I_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
-        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
-        UNIMPLEMENTED_IF(instr.alu.saturate_d);
+        const bool src_signed = instr.conversion.is_input_signed;
+        const bool dst_signed = instr.conversion.is_output_signed;
+        const Register::Size src_size = instr.conversion.src_size;
+        const Register::Size dst_size = instr.conversion.dst_size;
+        const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);

-        const bool input_signed = instr.conversion.is_input_signed;
-        const bool output_signed = instr.conversion.is_output_signed;
-
-        Node value = [&]() {
+        Node value = [this, instr, opcode] {
            switch (opcode->get().GetId()) {
            case OpCode::Id::I2I_R:
                return GetRegister(instr.gpr20);
@@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
                return Immediate(0);
            }
        }();
-        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);

-        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
-                                        input_signed);
-        if (input_signed != output_signed) {
-            value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
+        // Ensure the source selector is valid
+        switch (instr.conversion.src_size) {
+        case Register::Size::Byte:
+            break;
+        case Register::Size::Short:
+            ASSERT(selector == 0 || selector == 2);
+            break;
+        default:
+            ASSERT(selector == 0);
+            break;
+        }
+
+        if (src_size != Register::Size::Word || selector != 0) {
+            value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
+                                    Immediate(selector * 8), Immediate(SizeInBits(src_size)));
+        }
+
+        value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
+                                        instr.conversion.negate_a, src_signed);
+
+        if (instr.alu.saturate_d) {
+            if (src_signed && !dst_signed) {
+                Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
+                                             Immediate(1 << (SizeInBits(src_size) - 1)));
+                value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
+                                  std::move(value));
+
+                // Simplify generated expressions, this can be removed without semantic impact
+                SetTemporary(bb, 0, std::move(value));
+                value = GetTemporary(0);
+
+                if (dst_size != Register::Size::Word) {
+                    const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
+                    Node is_large =
+                        Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
+                    value = Operation(OperationCode::Select, std::move(is_large), limit,
+                                      std::move(value));
+                }
+            } else if (const std::optional bounds =
+                           IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
+                value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
+                                        Immediate(bounds->first));
+                value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
+                                        Immediate(bounds->second));
+            }
+        } else if (dst_size != Register::Size::Word) {
+            // No saturation, we only have to mask the result
+            Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
+            value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
        }

        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::I2F_R:
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    // When lod is used always is in gpr20
    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);

-    // Fill empty entries from the guest sampler
-    const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
-    if (type_coord_count != entry_coord_count) {
-        LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
-
-        // When the size is higher we insert zeroes
-        for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
-            coords.push_back(GetRegister(Register::ZeroIndex));
-        }
-
-        // Then we ensure the size matches the number of entries (dropping unused values)
-        coords.resize(entry_coord_count);
-    }
-
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@

 namespace VideoCommon::Shader {

+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;
+using Tegra::Shader::VmnmxOperation;
+using Tegra::Shader::VmnmxType;

 u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

+    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
+        DecodeVMNMX(bb, instr);
+        return pc;
+    }
+
    const Node op_a =
        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                        instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
    }
 }

+void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
+    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
+    UNIMPLEMENTED_IF(instr.vmnmx.sat);
+    UNIMPLEMENTED_IF(instr.generates_cc);
+
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = GetRegister(instr.gpr20);
+    Node op_c = GetRegister(instr.gpr39);
+
+    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
+    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
+
+    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
+    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
+
+    switch (instr.vmnmx.operation) {
+    case VmnmxOperation::Mrg_16H:
+        value = BitfieldInsert(move(op_c), move(value), 16, 16);
+        break;
+    case VmnmxOperation::Mrg_16L:
+        value = BitfieldInsert(move(op_c), move(value), 0, 16);
+        break;
+    case VmnmxOperation::Mrg_8B0:
+        value = BitfieldInsert(move(op_c), move(value), 0, 8);
+        break;
+    case VmnmxOperation::Mrg_8B2:
+        value = BitfieldInsert(move(op_c), move(value), 16, 8);
+        break;
+    case VmnmxOperation::Acc:
+        value = Operation(OperationCode::IAdd, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Min:
+        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Max:
+        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Nop:
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+
+    SetRegister(bb, instr.gpr0, move(value));
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -354,6 +354,9 @@ private:
    /// Marks the usage of a input or output attribute.
    void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);

+    /// Decodes VMNMX instruction and inserts its code into the passed basic block.
+    void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
+
    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                  const Node4& components);

--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -108,7 +108,7 @@ public:
        }

        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
        if (guard_samplers) {
            sampled_textures.push_back(surface);
        }
@@ -128,7 +128,7 @@ public:
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
        }
        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
        if (guard_samplers) {
            sampled_textures.push_back(surface);
        }
@@ -143,7 +143,7 @@ public:
        return any_rt;
    }

-    TView GetDepthBufferSurface(bool preserve_contents) {
+    TView GetDepthBufferSurface() {
        std::lock_guard lock{mutex};
        auto& maxwell3d = system.GPU().Maxwell3D();
        if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -164,7 +164,7 @@ public:
            return {};
        }
        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
-        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
+        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
        if (depth_buffer.target)
            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
        depth_buffer.target = surface_view.first;
@@ -174,7 +174,7 @@ public:
        return surface_view.second;
    }

-    TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
+    TView GetColorBufferSurface(std::size_t index) {
        std::lock_guard lock{mutex};
        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
        auto& maxwell3d = system.GPU().Maxwell3D();
@@ -204,9 +204,8 @@ public:
            return {};
        }

-        auto surface_view =
-            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
-                       preserve_contents, true);
+        auto surface_view = GetSurface(gpu_addr, *cpu_addr,
+                                       SurfaceParams::CreateForFramebuffer(system, index), true);
        if (render_targets[index].target)
            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
        render_targets[index].target = surface_view.first;
@@ -260,9 +259,9 @@ public:
        const std::optional<VAddr> src_cpu_addr =
            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
        std::pair<TSurface, TView> dst_surface =
-            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
+            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
        std::pair<TSurface, TView> src_surface =
-            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
+            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
        ImageBlit(src_surface.second, dst_surface.second, copy_config);
        dst_surface.first->MarkAsModified(true, Tick());
    }
@@ -451,22 +450,18 @@ private:
     * @param overlaps          The overlapping surfaces registered in the cache.
     * @param params            The parameters for the new surface.
     * @param gpu_addr          The starting address of the new surface.
-     * @param preserve_contents Indicates that the new surface should be loaded from memory or left
-     *                          blank.
     * @param untopological     Indicates to the recycler that the texture has no way to match the
     *                          overlaps due to topological reasons.
     **/
    std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
                                              const SurfaceParams& params, const GPUVAddr gpu_addr,
-                                              const bool preserve_contents,
                                              const MatchTopologyResult untopological) {
-        const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
        for (auto& surface : overlaps) {
            Unregister(surface);
        }
        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
        case RecycleStrategy::Ignore: {
-            return InitializeSurface(gpu_addr, params, do_load);
+            return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
        }
        case RecycleStrategy::Flush: {
            std::sort(overlaps.begin(), overlaps.end(),
@@ -476,7 +471,7 @@ private:
            for (auto& surface : overlaps) {
                FlushSurface(surface);
            }
-            return InitializeSurface(gpu_addr, params, preserve_contents);
+            return InitializeSurface(gpu_addr, params);
        }
        case RecycleStrategy::BufferCopy: {
            auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -485,7 +480,7 @@ private:
        }
        default: {
            UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
-            return InitializeSurface(gpu_addr, params, do_load);
+            return InitializeSurface(gpu_addr, params);
        }
        }
    }
@@ -514,7 +509,9 @@ private:
        }
        const auto& final_params = new_surface->GetSurfaceParams();
        if (cr_params.type != final_params.type) {
-            BufferCopy(current_surface, new_surface);
+            if (Settings::values.use_accurate_gpu_emulation) {
+                BufferCopy(current_surface, new_surface);
+            }
        } else {
            std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
            for (auto& brick : bricks) {
@@ -621,14 +618,11 @@ private:
     * @param params            The parameters on the new surface.
     * @param gpu_addr          The starting address of the new surface.
     * @param cache_addr        The starting address of the new surface on physical memory.
-     * @param preserve_contents Indicates that the new surface should be loaded from memory or
-     *                          left blank.
     */
    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
                                                               const SurfaceParams& params,
                                                               const GPUVAddr gpu_addr,
-                                                               const VAddr cpu_addr,
-                                                               bool preserve_contents) {
+                                                               const VAddr cpu_addr) {
        if (params.target == SurfaceTarget::Texture3D) {
            bool failed = false;
            if (params.num_levels > 1) {
@@ -677,7 +671,7 @@ private:
                            return std::nullopt;
                        }
                        Unregister(surface);
-                        return InitializeSurface(gpu_addr, params, preserve_contents);
+                        return InitializeSurface(gpu_addr, params);
                    }
                    return std::nullopt;
                }
@@ -688,7 +682,7 @@ private:
                    return {{surface, surface->GetMainView()}};
                }
            }
-            return InitializeSurface(gpu_addr, params, preserve_contents);
+            return InitializeSurface(gpu_addr, params);
        }
    }

@@ -711,13 +705,10 @@ private:
     *
     * @param gpu_addr          The starting address of the candidate surface.
     * @param params            The parameters on the candidate surface.
-     * @param preserve_contents Indicates that the new surface should be loaded from memory or
-     *                          left blank.
     * @param is_render         Whether or not the surface is a render target.
     **/
    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
-                                          const SurfaceParams& params, bool preserve_contents,
-                                          bool is_render) {
+                                          const SurfaceParams& params, bool is_render) {
        // Step 1
        // Check Level 1 Cache for a fast structural match. If candidate surface
        // matches at certain level we are pretty much done.
@@ -726,8 +717,7 @@ private:
            const auto topological_result = current_surface->MatchesTopology(params);
            if (topological_result != MatchTopologyResult::FullMatch) {
                std::vector<TSurface> overlaps{current_surface};
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
-                                      topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, topological_result);
            }

            const auto struct_result = current_surface->MatchesStructure(params);
@@ -752,7 +742,7 @@ private:

        // If none are found, we are done. we just load the surface and create it.
        if (overlaps.empty()) {
-            return InitializeSurface(gpu_addr, params, preserve_contents);
+            return InitializeSurface(gpu_addr, params);
        }

        // Step 3
@@ -762,15 +752,13 @@ private:
        for (const auto& surface : overlaps) {
            const auto topological_result = surface->MatchesTopology(params);
            if (topological_result != MatchTopologyResult::FullMatch) {
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
-                                      topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, topological_result);
            }
        }

        // Check if it's a 3D texture
        if (params.block_depth > 0) {
-            auto surface =
-                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
+            auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
            if (surface) {
                return *surface;
            }
@@ -790,8 +778,7 @@ private:
                        return *view;
                    }
                }
-                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
-                                      MatchTopologyResult::FullMatch);
+                return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
            }
            // Now we check if the candidate is a mipmap/layer of the overlap
            std::optional<TView> view =
@@ -815,7 +802,7 @@ private:
                        pair.first->EmplaceView(params, gpu_addr, candidate_size);
                    if (mirage_view)
                        return {pair.first, *mirage_view};
-                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                    return RecycleSurface(overlaps, params, gpu_addr,
                                          MatchTopologyResult::FullMatch);
                }
                return {current_surface, *view};
@@ -831,8 +818,7 @@ private:
            }
        }
        // We failed all the tests, recycle the overlaps into a new texture.
-        return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
-                              MatchTopologyResult::FullMatch);
+        return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
    }

    /**
@@ -990,10 +976,10 @@ private:
    }

    std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
-                                                 bool preserve_contents) {
+                                                 bool do_load = true) {
        auto new_surface{GetUncachedSurface(gpu_addr, params)};
        Register(new_surface);
-        if (preserve_contents) {
+        if (do_load) {
            LoadSurface(new_surface);
        }
        return {new_surface, new_surface->GetMainView()};
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -20,6 +20,8 @@
 #include <cstring>
 #include <vector>

+#include <boost/container/static_vector.hpp>
+
 #include "common/common_types.h"

 #include "video_core/textures/astc.h"
@@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) {

 class InputBitStream {
 public:
-    explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
-        : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
+        : cur_byte{ptr}, next_bit{start_offset % 8} {}

-    std::size_t GetBitsRead() const {
-        return m_BitsRead;
+    constexpr std::size_t GetBitsRead() const {
+        return bits_read;
    }

-    u32 ReadBit() {
-        u32 bit = *m_CurByte >> m_NextBit++;
-        while (m_NextBit >= 8) {
-            m_NextBit -= 8;
-            m_CurByte++;
+    constexpr bool ReadBit() {
+        const bool bit = (*cur_byte >> next_bit++) & 1;
+        while (next_bit >= 8) {
+            next_bit -= 8;
+            cur_byte++;
        }

-        m_BitsRead++;
-        return bit & 1;
+        bits_read++;
+        return bit;
    }

-    u32 ReadBits(std::size_t nBits) {
+    constexpr u32 ReadBits(std::size_t nBits) {
        u32 ret = 0;
        for (std::size_t i = 0; i < nBits; ++i) {
            ret |= (ReadBit() & 1) << i;
@@ -66,7 +68,7 @@ public:
    }

    template <std::size_t nBits>
-    u32 ReadBits() {
+    constexpr u32 ReadBits() {
        u32 ret = 0;
        for (std::size_t i = 0; i < nBits; ++i) {
            ret |= (ReadBit() & 1) << i;
@@ -75,64 +77,58 @@ public:
    }

 private:
-    const u8* m_CurByte;
-    std::size_t m_NextBit = 0;
-    std::size_t m_BitsRead = 0;
+    const u8* cur_byte;
+    std::size_t next_bit = 0;
+    std::size_t bits_read = 0;
 };

 class OutputBitStream {
 public:
-    explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0)
-        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0)
+        : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {}

-    ~OutputBitStream() = default;
-
-    s32 GetBitsWritten() const {
-        return m_BitsWritten;
+    constexpr std::size_t GetBitsWritten() const {
+        return bits_written;
    }

-    void WriteBitsR(u32 val, u32 nBits) {
+    constexpr void WriteBitsR(u32 val, u32 nBits) {
        for (u32 i = 0; i < nBits; i++) {
            WriteBit((val >> (nBits - i - 1)) & 1);
        }
    }

-    void WriteBits(u32 val, u32 nBits) {
+    constexpr void WriteBits(u32 val, u32 nBits) {
        for (u32 i = 0; i < nBits; i++) {
            WriteBit((val >> i) & 1);
        }
    }

 private:
-    void WriteBit(s32 b) {
-
-        if (done)
+    constexpr void WriteBit(bool b) {
+        if (bits_written >= num_bits) {
            return;
+        }

-        const u32 mask = 1 << m_NextBit++;
+        const u32 mask = 1 << next_bit++;

        // clear the bit
-        *m_CurByte &= static_cast<u8>(~mask);
+        *cur_byte &= static_cast<u8>(~mask);

        // Write the bit, if necessary
        if (b)
-            *m_CurByte |= static_cast<u8>(mask);
+            *cur_byte |= static_cast<u8>(mask);

        // Next byte?
-        if (m_NextBit >= 8) {
-            m_CurByte += 1;
-            m_NextBit = 0;
+        if (next_bit >= 8) {
+            cur_byte += 1;
+            next_bit = 0;
        }
-
-        done = done || ++m_BitsWritten >= m_NumBits;
    }

-    s32 m_BitsWritten = 0;
-    const s32 m_NumBits;
-    u8* m_CurByte;
-    s32 m_NextBit = 0;
-
-    bool done = false;
+    u8* cur_byte;
+    std::size_t num_bits;
+    std::size_t bits_written = 0;
+    std::size_t next_bit = 0;
 };

 template <typename IntType>
@@ -195,9 +191,13 @@ struct IntegerEncodedValue {
        u32 trit_value;
    };
 };
+using IntegerEncodedVector = boost::container::static_vector<
+    IntegerEncodedValue, 64,
+    boost::container::static_vector_options<
+        boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
+        boost::container::throw_on_overflow<false>>::type>;

-static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
-                            u32 nBitsPerValue) {
+static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
    // Implement the algorithm in section C.2.12
    u32 m[5];
    u32 t[5];
@@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu
    }
 }

-static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
+static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
                             u32 nBitsPerValue) {
    // Implement the algorithm in section C.2.12
    u32 m[3];
@@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues();
 // Fills result with the values that are encoded in the given
 // bitstream. We must know beforehand what the maximum possible
 // value is, and how many values we're decoding.
-static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits,
-                                  u32 maxRange, u32 nValues) {
+static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
+                                  u32 nValues) {
    // Determine encoding parameters
    IntegerEncodedValue val = EncodingsValues[maxRange];

@@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
 // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
 // is the same as [(numBits - 1):0] and repeats all the way down.
 template <typename IntType>
-static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
-    if (numBits == 0)
+static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
+    if (numBits == 0) {
        return 0;
-    if (toBit == 0)
+    }
+    if (toBit == 0) {
        return 0;
-    IntType v = val & static_cast<IntType>((1 << numBits) - 1);
+    }
+    const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
    IntType res = v;
    u32 reslen = numBits;
    while (reslen < toBit) {
@@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
    return res;
 }

+static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
+    return std::size_t(1) << num_bits;
+}
+
+template <typename IntType, u32 num_bits, u32 to_bit>
+static constexpr auto MakeReplicateTable() {
+    std::array<IntType, NumReplicateEntries(num_bits)> table{};
+    for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
+        table[value] = Replicate(value, num_bits, to_bit);
+    }
+    return table;
+}
+
+static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
+static constexpr u32 ReplicateByteTo16(std::size_t value) {
+    return REPLICATE_BYTE_TO_16_TABLE[value];
+}
+
+static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>();
+static constexpr u32 ReplicateBitTo7(std::size_t value) {
+    return REPLICATE_BIT_TO_7_TABLE[value];
+}
+
+static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>();
+static constexpr u32 ReplicateBitTo9(std::size_t value) {
+    return REPLICATE_BIT_TO_9_TABLE[value];
+}
+
+static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
+static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
+static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
+static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
+static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
+static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
+static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
+static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
+/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
+/// to the runtime implementation
+static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
+    switch (num_bits) {
+    case 1:
+        return REPLICATE_1_BIT_TO_8_TABLE[value];
+    case 2:
+        return REPLICATE_2_BIT_TO_8_TABLE[value];
+    case 3:
+        return REPLICATE_3_BIT_TO_8_TABLE[value];
+    case 4:
+        return REPLICATE_4_BIT_TO_8_TABLE[value];
+    case 5:
+        return REPLICATE_5_BIT_TO_8_TABLE[value];
+    case 6:
+        return REPLICATE_6_BIT_TO_8_TABLE[value];
+    case 7:
+        return REPLICATE_7_BIT_TO_8_TABLE[value];
+    case 8:
+        return REPLICATE_8_BIT_TO_8_TABLE[value];
+    default:
+        return Replicate(value, num_bits, 8);
+    }
+}
+
+static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
+static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
+static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
+static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
+static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
+static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
+    switch (num_bits) {
+    case 1:
+        return REPLICATE_1_BIT_TO_6_TABLE[value];
+    case 2:
+        return REPLICATE_2_BIT_TO_6_TABLE[value];
+    case 3:
+        return REPLICATE_3_BIT_TO_6_TABLE[value];
+    case 4:
+        return REPLICATE_4_BIT_TO_6_TABLE[value];
+    case 5:
+        return REPLICATE_5_BIT_TO_6_TABLE[value];
+    default:
+        return Replicate(value, num_bits, 6);
+    }
+}
+
 class Pixel {
 protected:
    using ChannelType = s16;
@@ -674,10 +759,10 @@ public:
    // significant bits when going from larger to smaller bit depth
    // or by repeating the most significant bits when going from
    // smaller to larger bit depths.
-    void ChangeBitDepth(const u8 (&depth)[4]) {
+    void ChangeBitDepth() {
        for (u32 i = 0; i < 4; i++) {
-            Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
-            m_BitDepth[i] = depth[i];
+            Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]);
+            m_BitDepth[i] = 8;
        }
    }

@@ -689,28 +774,23 @@ public:

    // Changes the bit depth of a single component. See the comment
    // above for how we do this.
-    static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) {
-        assert(newDepth <= 8);
+    static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) {
        assert(oldDepth <= 8);

-        if (oldDepth == newDepth) {
+        if (oldDepth == 8) {
            // Do nothing
            return val;
-        } else if (oldDepth == 0 && newDepth != 0) {
-            return static_cast<ChannelType>((1 << newDepth) - 1);
-        } else if (newDepth > oldDepth) {
-            return Replicate(val, oldDepth, newDepth);
+        } else if (oldDepth == 0) {
+            return static_cast<ChannelType>((1 << 8) - 1);
+        } else if (8 > oldDepth) {
+            return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth));
        } else {
            // oldDepth > newDepth
-            if (newDepth == 0) {
-                return 0xFF;
-            } else {
-                u8 bitsWasted = static_cast<u8>(oldDepth - newDepth);
-                u16 v = static_cast<u16>(val);
-                v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
-                v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
-                return static_cast<u8>(v);
-            }
+            const u8 bitsWasted = static_cast<u8>(oldDepth - 8);
+            u16 v = static_cast<u16>(val);
+            v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
+            v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1));
+            return static_cast<u8>(v);
        }

        assert(false && "We shouldn't get here.");
@@ -760,8 +840,7 @@ public:
    // up in the most-significant byte.
    u32 Pack() const {
        Pixel eightBit(*this);
-        const u8 eightBitDepth[4] = {8, 8, 8, 8};
-        eightBit.ChangeBitDepth(eightBitDepth);
+        eightBit.ChangeBitDepth();

        u32 r = 0;
        r |= eightBit.A();
@@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
    }

    // We now have enough to decode our integer sequence.
-    std::vector<IntegerEncodedValue> decodedColorValues;
-    decodedColorValues.reserve(32);
+    IntegerEncodedVector decodedColorValues;

    InputBitStream colorStream(data);
    DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
@@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP

        u32 A = 0, B = 0, C = 0, D = 0;
        // A is just the lsb replicated 9 times.
-        A = Replicate(bitval & 1, 1, 9);
+        A = ReplicateBitTo9(bitval & 1);

        switch (val.encoding) {
        // Replicate bits
        case IntegerEncoding::JustBits:
-            out[outIdx++] = Replicate(bitval, bitlen, 8);
+            out[outIdx++] = FastReplicateTo8(bitval, bitlen);
            break;

        // Use algorithm in C.2.13
@@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
    u32 bitval = val.bit_value;
    u32 bitlen = val.num_bits;

-    u32 A = Replicate(bitval & 1, 1, 7);
+    u32 A = ReplicateBitTo7(bitval & 1);
    u32 B = 0, C = 0, D = 0;

    u32 result = 0;
    switch (val.encoding) {
    case IntegerEncoding::JustBits:
-        result = Replicate(bitval, bitlen, 6);
+        result = FastReplicateTo6(bitval, bitlen);
        break;

    case IntegerEncoding::Trit: {
@@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
    return result;
 }

-static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights,
+static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights,
                                   const TexelWeightParams& params, const u32 blockWidth,
                                   const u32 blockHeight) {
    u32 weightIdx = 0;
@@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
        static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
    memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);

-    std::vector<IntegerEncodedValue> texelWeightValues;
-    texelWeightValues.reserve(64);
+    IntegerEncodedVector texelWeightValues;

    InputBitStream weightStream(texelWeightData);

@@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
            Pixel p;
            for (u32 c = 0; c < 4; c++) {
                u32 C0 = endpos32s[partition][0].Component(c);
-                C0 = Replicate(C0, 8, 16);
+                C0 = ReplicateByteTo16(C0);
                u32 C1 = endpos32s[partition][1].Component(c);
-                C1 = Replicate(C1, 8, 16);
+                C1 = ReplicateByteTo16(C1);

                u32 plane = 0;
                if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -131,6 +131,20 @@ enum class SwizzleSource : u32 {
    OneFloat = 7,
 };

+enum class MsaaMode : u32 {
+    Msaa1x1 = 0,
+    Msaa2x1 = 1,
+    Msaa2x2 = 2,
+    Msaa4x2 = 3,
+    Msaa4x2_D3D = 4,
+    Msaa2x1_D3D = 5,
+    Msaa4x4 = 6,
+    Msaa2x2_VC4 = 8,
+    Msaa2x2_VC12 = 9,
+    Msaa4x2_VC8 = 10,
+    Msaa4x2_VC24 = 11,
+};
+
 union TextureHandle {
    TextureHandle(u32 raw) : raw{raw} {}

@@ -197,6 +211,7 @@ struct TICEntry {
    union {
        BitField<0, 4, u32> res_min_mip_level;
        BitField<4, 4, u32> res_max_mip_level;
+        BitField<8, 4, MsaaMode> msaa_mode;
        BitField<12, 12, u32> min_lod_clamp;
    };

--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
        item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type"));
        item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size"));
    }
-    item_model->setSortRole(GameListItemPath::TitleRole);
+    item_model->setSortRole(GameListItemPath::SortRole);

    connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons);
    connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
@@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) {
    if (children_total > 0) {
        search_field->setFocus();
    }
+    item_model->sort(tree_view->header()->sortIndicatorSection(),
+                     tree_view->header()->sortIndicatorOrder());
 }

 void GameList::PopupContextMenu(const QPoint& menu_location) {
@@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() {
        // so make it as large as possible as default.
        header->resizeSection(COLUMN_NAME, header->width());
    }
-
-    item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
 }

 const QStringList GameList::supported_file_extensions = {
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -65,10 +65,10 @@ public:
 */
 class GameListItemPath : public GameListItem {
 public:
-    static const int TitleRole = SortRole;
-    static const int FullPathRole = SortRole + 1;
-    static const int ProgramIdRole = SortRole + 2;
-    static const int FileTypeRole = SortRole + 3;
+    static const int TitleRole = SortRole + 1;
+    static const int FullPathRole = SortRole + 2;
+    static const int ProgramIdRole = SortRole + 3;
+    static const int FileTypeRole = SortRole + 4;

    GameListItemPath() = default;
    GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data,
@@ -95,7 +95,7 @@ public:
    }

    QVariant data(int role) const override {
-        if (role == Qt::DisplayRole) {
+        if (role == Qt::DisplayRole || role == SortRole) {
            std::string filename;
            Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename,
                              nullptr);
@@ -110,6 +110,9 @@ public:
            const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
            const int row2_id = UISettings::values.row_2_text_id;

+            if (role == SortRole)
+                return row1.toLower();
+
            if (row2_id == 4) // None
                return row1;

@@ -123,6 +126,13 @@ public:

        return GameListItem::data(role);
    }
+
+    /**
+     * Override to prevent automatic sorting.
+     */
+    bool operator<(const QStandardItem& other) const override {
+        return false;
+    }
 };

 class GameListItemCompat : public GameListItem {
@@ -289,6 +299,10 @@ public:
    int type() const override {
        return static_cast<int>(GameListItemType::AddDir);
    }
+
+    bool operator<(const QStandardItem& other) const override {
+        return false;
+    }
 };

 class GameList;
Author	SHA1	Message	Date
Lioncash	213fff67bc	CMakeLists: Make -Wreorder a compile-time error This can result in silent logic bugs within code, and given the amount of times these kind of warnings are caused, they should be flagged at compile-time so no new code is submitted with them.	2020-04-15 14:14:41 -04:00
Mat M	64b5985f0a	Merge pull request #3662 from ReinUsesLisp/constant-attrs gl_rasterizer: Implement constant vertex attributes	2020-04-15 11:54:50 -04:00
Mat M	9208d555b7	Merge pull request #3668 from ReinUsesLisp/vtx-format-16ui maxwell_to_vk: Add uint16 vertex formats	2020-04-15 11:43:52 -04:00
Mat M	ab72696beb	Merge pull request #3656 from ReinUsesLisp/glsl-full-decompile gl_shader_cache: Use CompileDepth::FullDecompile on GLSL	2020-04-15 03:17:46 -04:00
Mat M	4878d6bb49	Merge pull request #3654 from ReinUsesLisp/fix-fb-attach gl_texture_cache: Fix layered texture attachment base level	2020-04-15 03:17:18 -04:00
Mat M	50c0a92db8	Merge pull request #3663 from ReinUsesLisp/fcmp-rc shader/arithmetic: Add FCMP_CR variant	2020-04-15 03:16:56 -04:00
Mat M	13331a3a32	Merge pull request #3664 from ReinUsesLisp/fe3h-black-squares Revert "gl_shader_decompiler: Implement merges with bitfieldInsert"	2020-04-15 03:14:28 -04:00
Mat M	3a759d2352	Merge pull request #3667 from ReinUsesLisp/viewport-trash vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo	2020-04-15 03:10:34 -04:00
ReinUsesLisp	3036067047	maxwell_to_vk: Add uint16 vertex formats	2020-04-15 04:06:30 -03:00
ReinUsesLisp	b4e43c64c8	maxwell_to_vk: Add missing breaks Avoid invalid fallbacks.	2020-04-15 04:05:33 -03:00
ReinUsesLisp	0ca456830f	vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo When the dynamic state is specified, pViewports and pScissors are ignored, quoting the specification: pViewports is a pointer to an array of VkViewport structures, defining the viewport transforms. If the viewport state is dynamic, this member is ignored. That said, AMD's proprietary driver itself seem to read it regardless of what the specification says.	2020-04-15 03:30:08 -03:00
Rodrigo Locatti	0b132e8cc1	Merge pull request #3657 from ReinUsesLisp/viewport-zero vk_rasterizer: Default to 1 viewports with a size of 0	2020-04-15 01:51:17 -03:00
Fernando Sahmkow	daddbeffd1	Texture Cache: Only do buffer copies on accurate GPU. (#3634 ) This is a simple optimization as Buffer Copies are mostly used for texture recycling. They are, however, useful when games abuse undefined behavior but most 3D APIs forbid it.	2020-04-14 23:21:00 -04:00
ReinUsesLisp	fd6371eba7	Revert "gl_shader_decompiler: Implement merges with bitfieldInsert" This reverts commit `05cf270836`. Apparently the first approach using floats instead of bitfieldInert worked better for Fire Emblem: Three Houses. Reverting to get that behavior back.	2020-04-14 21:24:33 -03:00
ReinUsesLisp	fefe7f18f9	shader/arithmetic: Add FCMP_CR variant Adds another variant of FCMP.	2020-04-14 19:11:04 -03:00
Zach Hilman	e366b4ee1f	Merge pull request #3660 from bunnei/friend-blocked-users service: friend: Stub IFriendService::GetBlockedUserListIds.	2020-04-14 16:59:46 -04:00
Zach Hilman	8040f6d544	Merge pull request #3661 from bunnei/patch-manager-fix file_sys: patch_manager: Return early when there are no layers to apply.	2020-04-14 16:59:25 -04:00
ReinUsesLisp	6dfcabc800	gl_rasterizer: Implement constant vertex attributes Credits go to gdkchan from Ryujinx for finding constant attributes are used in retail games.	2020-04-14 17:58:53 -03:00
bunnei	fc35803f91	file_sys: patch_manager: Return early when there are no layers to apply.	2020-04-14 16:25:55 -04:00
bunnei	598740f1dd	service: friend: Stub IFriendService::GetBlockedUserListIds. - This is safe to stub, as there should be no adverse consequences from reporting no blocked users.	2020-04-14 16:20:51 -04:00
ReinUsesLisp	37e5c4fa7c	vk_rasterizer: Default to 1 viewports with a size of 0 Silence validation layer errors.	2020-04-14 04:44:34 -03:00
ReinUsesLisp	453d7419d9	gl_shader_cache: Use CompileDepth::FullDecompile on GLSL From my testing on a Splatoon 2 shader that takes 3800ms on average to compile changing to FullDecompile reduces it to 900ms on average. The shader decoder will automatically fallback to a more naive method if it can't use full decompile.	2020-04-14 01:34:20 -03:00
ReinUsesLisp	21dc842171	gl_texture_cache: Fix layered texture attachment base level The base level is already included in the texture view. If we specify the base level in the texture again, this will end up in the incorrect level and potentially out of bounds.	2020-04-13 18:24:56 -03:00
Rodrigo Locatti	7e4a132a77	Merge pull request #3636 from ReinUsesLisp/drop-vk-hpp renderer_vulkan: Drop Vulkan-Hpp	2020-04-13 17:08:04 -03:00
Mat M	fbf13d3f48	Merge pull request #3651 from ReinUsesLisp/line-widths gl_rasterizer: Implement line widths and smooth lines	2020-04-13 10:19:59 -04:00
Mat M	08266d70ba	Merge pull request #3638 from ReinUsesLisp/remove-preserve-contents texture_cache: Remove preserve_contents	2020-04-13 10:19:01 -04:00
Mat M	c4001225f6	Merge pull request #3631 from ReinUsesLisp/more-astc texture/astc: More small ASTC optimizations	2020-04-13 10:17:32 -04:00
Mat M	7b62212461	Merge pull request #3619 from ReinUsesLisp/i2i shader/conversion: Implement I2I sign extension, saturation and selection	2020-04-13 10:17:07 -04:00
Mat M	3351e1e94f	Merge pull request #3627 from ReinUsesLisp/layered-view gl_texture_cache: Attach view instead of base texture for layered attchments	2020-04-13 10:16:18 -04:00
Mat M	d37d899431	Merge pull request #3646 from ReinUsesLisp/fix-glsl-turing gl_shader_decompiler: Improve generated code in HMergeH*	2020-04-13 10:15:12 -04:00
Mat M	47036859eb	Merge pull request #3633 from ReinUsesLisp/clean-texdec shader/texture: Remove type mismatches management from shader decoder	2020-04-13 10:13:05 -04:00
ReinUsesLisp	76615b9f34	gl_rasterizer: Implement line widths and smooth lines Implements "legacy" features from OpenGL present on hardware such as smooth lines and line width.	2020-04-13 01:30:34 -03:00
ReinUsesLisp	05cf270836	gl_shader_decompiler: Implement merges with bitfieldInsert This also fixes Turing issues but it avoids doing more bitcasts. This should improve the generated code while also avoiding more points where compilers can flush floats.	2020-04-12 22:39:59 -03:00
bunnei	a9f866264d	Merge pull request #3606 from ReinUsesLisp/nvflinger service/vi: Partially implement BufferQueue disconnect	2020-04-12 11:44:48 -04:00
Fernando Sahmkow	3d91dbb21d	Merge pull request #3578 from ReinUsesLisp/vmnmx shader/video: Partially implement VMNMX	2020-04-12 10:44:03 -04:00
Mat M	4aec01b850	Merge pull request #3644 from ReinUsesLisp/msaa video_core: Add MSAA registers in 3D engine and TIC	2020-04-12 09:11:44 -04:00
ReinUsesLisp	75eb953575	gl_shader_decompiler: Improve generated code in HMergeH* Avoiding bitwise expressions, this fixes Turing issues in shaders using half float merges that affected several games.	2020-04-12 05:06:55 -03:00
ReinUsesLisp	76f178ba6e	shader/video: Partially implement VMNMX Implements the common usages for VMNMX. Inputs with a different size than 32 bits are not supported and sign mismatches aren't supported either. VMNMX works as follows: It grabs Ra and Rb and applies a maximum/minimum on them (this is defined by .MX), having in mind the input sign. This result can then be saturated. After the intermediate result is calculated, it applies another operation on it using Rc. These operations are merges, accumulations or another min/max pass. This instruction allows to implement with a more flexible approach GCN's min3 and max3 instructions (for instance).	2020-04-12 00:34:42 -03:00
ReinUsesLisp	a7baf6fee4	video_core: Add MSAA registers in 3D engine and TIC This adds the registers used for multisampling. It doesn't implement anything for now.	2020-04-12 00:21:27 -03:00
Rodrigo Locatti	75e39f7f88	Merge pull request #3635 from FernandoS27/buffer-free Buffer queue: Correct behavior of free buffer.	2020-04-11 17:58:15 -03:00
bunnei	8938f9941c	Merge pull request #3611 from FearlessTobi/port-4956 Port citra-emu/citra#4956: "Fixes to game list sorting"	2020-04-11 12:44:36 -04:00
ReinUsesLisp	94b0e2e5da	texture_cache: Remove preserve_contents preserve_contents was always true. We can't assume we don't have to preserve clears because scissored and color masked clears exist. This removes preserve_contents and assumes it as true at all times.	2020-04-11 01:51:02 -03:00
Fernando Sahmkow	486a42c45a	Buffer queue: Correct behavior of free buffer. This corrects the behavior of free buffer after witnessing it in an unrelated hardware test. I haven't found any games affected by it but in name of better accuracy we'll correct such behavior.	2020-04-10 16:44:28 -04:00
ReinUsesLisp	8c0ba9c6fe	service/vi: Partially implement BufferQueue disconnect	2020-04-10 01:00:50 -03:00
ReinUsesLisp	a87b16da9a	shader/texture: Remove type mismatches management from shader decoder Since commit `e22816a5bb` we handle type mismatches from the CPU. We don't need to hack our shader decoder due to game bugs anymore. Removed in this commit.	2020-04-10 00:57:32 -03:00
ReinUsesLisp	6bf5d2b011	astc: Hard code bit depth changes to 8 and use fast replicate	2020-04-09 18:37:12 -03:00
ReinUsesLisp	bd2c1ab8a0	astc: Use boost's static_vector to avoid heap allocations	2020-04-09 05:27:57 -03:00
ReinUsesLisp	5de130beea	astc: Implement a fast precompiled alternative for Replicate	2020-04-09 03:58:25 -03:00
ReinUsesLisp	6b4d4473be	astc: Move Replicate to a constexpr LUT when possible	2020-04-09 03:35:07 -03:00
ReinUsesLisp	d22a689250	astc: Make InputBitStream constexpr	2020-04-09 02:54:05 -03:00
ReinUsesLisp	0efc230381	astc: OutputBitStream style changes and make it constexpr	2020-04-09 02:37:51 -03:00
ReinUsesLisp	6c8f9f40d7	gl_texture_cache: Attach view instead of base texture for layered attachments This way we are not ignoring the base layer of the current texture.	2020-04-08 22:20:25 -03:00
ReinUsesLisp	da706cad25	shader/conversion: Implement I2I sign extension, saturation and selection Reimplements I2I adding sign extension, saturation (clamp source value to the destination), selection and destination sizes that are not 32 bits wide. It doesn't implement CC yet.	2020-04-07 02:19:44 -03:00
FearlessTobi	8d0fb33ac4	yuzu: Fixes to game list sorting Should fix citra-emu/citra#4593. As the issue might not be entirely clear, I'll offer a short explanation from what I understood from it and found from experimentation. Currently yuzu offers the user the option to change the text that's displayed in the "Name" column in the game list. Generally, it is expected that the items are sorted based on the displayed text, but yuzu would sort them by title instead. Made it so that an access to SortRole returns the same as DisplayRole. There shouldn't be any UI changes, only change in behaviour. Also fixes a bug with directory sorting, where having the directories out of order would enable you to try to "move up" to the addDirectory button, which would crash the emulator. Co-Authored-By: Vitor K <vitor-k@users.noreply.github.com>	2020-04-06 03:12:17 +02:00
ReinUsesLisp	08470d261d	shader_bytecode: Fix I2I_IMM encoding	2020-03-28 18:49:07 -03:00