Shader_Ir: Implement F16 Variants of F2F, F2I, I2F.

This commit takes care of implementing the F16 Variants of the conversion instructions and makes sure conversions are done.
Merge pull request #2693 from ReinUsesLisp/hsetp2
2019-07-20 17:38:25 -04:00 · 2019-07-20 17:25:08 -04:00 · 2019-07-20 19:24:24 +00:00 · 2019-07-20 19:22:30 +00:00 · 2019-07-20 15:20:53 -04:00 · 2019-07-20 15:19:25 -04:00
24 changed files with 287 additions and 327 deletions
--- a/.ci/templates/build-standard.yml
+++ b/.ci/templates/build-standard.yml
@@ -3,7 +3,7 @@ jobs:
  displayName: 'standard'
  pool:
    vmImage: ubuntu-latest
-  strategy: 
+  strategy:
    maxParallel: 10
    matrix:
      windows:
--- a/.ci/templates/build-testing.yml
+++ b/.ci/templates/build-testing.yml
@@ -3,7 +3,7 @@ jobs:
  displayName: 'testing'
  pool:
    vmImage: ubuntu-latest
-  strategy: 
+  strategy:
    maxParallel: 10
    matrix:
      windows:
--- a/.ci/templates/release.yml
+++ b/.ci/templates/release.yml
@@ -1,29 +0,0 @@
-steps:
-  - task: DownloadPipelineArtifact@2
-    displayName: 'Download Windows Release'
-    inputs:
-      artifactName: 'yuzu-$(BuildName)-windows-mingw'
-      buildType: 'current'
-      targetPath: '$(Build.ArtifactStagingDirectory)'
-  - task: DownloadPipelineArtifact@2
-    displayName: 'Download Linux Release'
-    inputs:
-      artifactName: 'yuzu-$(BuildName)-linux'
-      buildType: 'current'
-      targetPath: '$(Build.ArtifactStagingDirectory)'
-  - task: DownloadPipelineArtifact@2
-    displayName: 'Download Release Point'
-    inputs:
-      artifactName: 'yuzu-$(BuildName)-release-point'
-      buildType: 'current'
-      targetPath: '$(Build.ArtifactStagingDirectory)'
-  - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
-    displayName: 'Calculate Release Point'
-  - task: GitHubRelease@0
-    inputs:
-      gitHubConnection: $(GitHubReleaseConnectionName)
-      repositoryName: '$(GitHubReleaseRepoName)'
-      action: 'create'
-      target: $(variables.tagcommit)
-      title: 'yuzu $(BuildName) #$(Build.BuildId)'
-      assets: '$(Build.ArtifactStagingDirectory)/*'
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@ yuzu emulator
 =============
 [![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
 [![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
+[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)

 yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).

--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
 }

 void Process::Run(s32 main_thread_priority, u64 stack_size) {
-    // The kernel always ensures that the given stack size is page aligned.
-    main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
-
-    // Allocate and map the main thread stack
-    // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
-    // of the user address space.
-    const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
-    vm_manager
-        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
-                        0, main_thread_stack_size, MemoryState::Stack)
-        .Unwrap();
+    AllocateMainThreadStack(stack_size);
+    tls_region_address = CreateTLSRegion();

    vm_manager.LogLayout();
+
    ChangeStatus(ProcessStatus::Running);

    SetupMainThread(*this, kernel, main_thread_priority);
@@ -226,6 +218,9 @@ void Process::PrepareForTermination() {
    stop_threads(system.Scheduler(2).GetThreadList());
    stop_threads(system.Scheduler(3).GetThreadList());

+    FreeTLSRegion(tls_region_address);
+    tls_region_address = 0;
+
    ChangeStatus(ProcessStatus::Exited);
 }

@@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
    WakeupAllWaitingThreads();
 }

+void Process::AllocateMainThreadStack(u64 stack_size) {
+    // The kernel always ensures that the given stack size is page aligned.
+    main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
+
+    // Allocate and map the main thread stack
+    const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
+    vm_manager
+        .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+                        0, main_thread_stack_size, MemoryState::Stack)
+        .Unwrap();
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
        return mutex;
    }

+    /// Gets the address to the process' dedicated TLS region.
+    VAddr GetTLSRegionAddress() const {
+        return tls_region_address;
+    }
+
    /// Gets the current status of the process
    ProcessStatus GetStatus() const {
        return status;
@@ -296,6 +301,9 @@ private:
    /// a process signal.
    void ChangeStatus(ProcessStatus new_status);

+    /// Allocates the main thread stack for the process, given the stack size in bytes.
+    void AllocateMainThreadStack(u64 stack_size);
+
    /// Memory manager for this process.
    Kernel::VMManager vm_manager;

@@ -358,6 +366,9 @@ private:
    /// variable related facilities.
    Mutex mutex;

+    /// Address indicating the location of the process' dedicated TLS region.
+    VAddr tls_region_address = 0;
+
    /// Random values for svcGetInfo RandomEntropy
    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
            return RESULT_SUCCESS;

        case GetInfoType::UserExceptionContextAddr:
-            LOG_WARNING(Kernel_SVC,
-                        "(STUBBED) Attempted to query user exception context address, returned 0");
-            *result = 0;
+            *result = process->GetTLSRegionAddress();
            return RESULT_SUCCESS;

        case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
@@ -1739,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
 // Wait for an address (via Address Arbiter)
 static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
                                 s64 timeout) {
-    LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
-                address, type, value, timeout);
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
+              type, value, timeout);

    // If the passed address is a kernel virtual address, return invalid memory state.
    if (Memory::IsKernelVirtualAddress(address)) {
@@ -1762,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
 // Signals to an address (via Address Arbiter)
 static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
                                  s32 num_to_wake) {
-    LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
-                address, type, value, num_to_wake);
+    LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
+              address, type, value, num_to_wake);

    // If the passed address is a kernel virtual address, return invalid memory state.
    if (Memory::IsKernelVirtualAddress(address)) {
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,10 +249,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        ProcessQueryGet();
        break;
    }
-    case MAXWELL3D_REG_INDEX(condition.mode): {
-        ProcessQueryCondition();
-        break;
-    }
    case MAXWELL3D_REG_INDEX(sync_info): {
        ProcessSyncPoint();
        break;
@@ -306,7 +302,6 @@ void Maxwell3D::ProcessQueryGet() {
        result = regs.query.query_sequence;
        break;
    default:
-        result = 1;
        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
                          static_cast<u32>(regs.query.query_get.select.Value()));
    }
@@ -347,45 +342,6 @@ void Maxwell3D::ProcessQueryGet() {
    }
 }

-void Maxwell3D::ProcessQueryCondition() {
-    const GPUVAddr condition_address{regs.condition.Address()};
-    switch (regs.condition.mode) {
-    case Regs::ConditionMode::Always: {
-        execute_on = true;
-        break;
-    }
-    case Regs::ConditionMode::Never: {
-        execute_on = false;
-        break;
-    }
-    case Regs::ConditionMode::ResNonZero: {
-        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
-        execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
-        break;
-    }
-    case Regs::ConditionMode::Equal: {
-        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
-        execute_on =
-            cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
-        break;
-    }
-    case Regs::ConditionMode::NotEqual: {
-        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
-        execute_on =
-            cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
-        break;
-    }
-    default: {
-        UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
-        execute_on = true;
-        break;
-    }
-    }
-}
-
 void Maxwell3D::ProcessSyncPoint() {
    const u32 sync_point = regs.sync_info.sync_point.Value();
    const u32 increment = regs.sync_info.increment.Value();
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -90,20 +90,6 @@ public:

        enum class QuerySelect : u32 {
            Zero = 0,
-            TimeElapsed = 2,
-            TransformFeedbackPrimitivesGenerated = 11,
-            PrimitivesGenerated = 18,
-            SamplesPassed = 21,
-            TransformFeedbackUnknown = 26,
-        };
-
-        struct QueryCompare {
-            u32 initial_sequence;
-            u32 initial_mode;
-            u32 unknown1;
-            u32 unknown2;
-            u32 current_sequence;
-            u32 current_mode;
        };

        enum class QuerySyncCondition : u32 {
@@ -111,14 +97,6 @@ public:
            GreaterThan = 1,
        };

-        enum class ConditionMode : u32 {
-            Never = 0,
-            Always = 1,
-            ResNonZero = 2,
-            Equal = 3,
-            NotEqual = 4,
-        };
-
        enum class ShaderProgram : u32 {
            VertexA = 0,
            VertexB = 1,
@@ -837,18 +815,7 @@ public:
                    BitField<4, 1, u32> alpha_to_one;
                } multisample_control;

-                INSERT_PADDING_WORDS(0x4);
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    ConditionMode mode;
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } condition;
+                INSERT_PADDING_WORDS(0x7);

                struct {
                    u32 tsc_address_high;
@@ -1202,10 +1169,6 @@ public:
        return macro_memory;
    }

-    bool ShouldExecute() const {
-        return execute_on;
-    }
-
 private:
    void InitializeRegisterDefaults();

@@ -1231,8 +1194,6 @@ private:

    Upload::State upload_state;

-    bool execute_on{true};
-
    /// Retrieves information about a specific TIC entry from the TIC buffer.
    Texture::TICEntry GetTICEntry(u32 tic_index) const;

@@ -1258,9 +1219,6 @@ private:
    /// Handles a write to the QUERY_GET register.
    void ProcessQueryGet();

-    // Handles Conditional Rendering
-    void ProcessQueryCondition();
-
    /// Handles writes to syncing register.
    void ProcessSyncPoint();

@@ -1332,7 +1290,6 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
 ASSERT_REG_POSITION(point_size, 0x546);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
-ASSERT_REG_POSITION(condition, 0x554);
 ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
 ASSERT_REG_POSITION(tic, 0x55D);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -931,8 +931,6 @@ union Instruction {
    } csetp;

    union {
-        BitField<35, 4, PredCondition> cond;
-        BitField<49, 1, u64> h_and;
        BitField<6, 1, u64> ftz;
        BitField<45, 2, PredOperation> op;
        BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
        BitField<43, 1, u64> negate_a;
        BitField<44, 1, u64> abs_a;
        BitField<47, 2, HalfType> type_a;
-        BitField<31, 1, u64> negate_b;
-        BitField<30, 1, u64> abs_b;
-        BitField<28, 2, HalfType> type_b;
+        union {
+            BitField<35, 4, PredCondition> cond;
+            BitField<49, 1, u64> h_and;
+            BitField<31, 1, u64> negate_b;
+            BitField<30, 1, u64> abs_b;
+            BitField<28, 2, HalfType> type_b;
+        } reg;
+        union {
+            BitField<56, 1, u64> negate_b;
+            BitField<54, 1, u64> abs_b;
+        } cbuf;
+        union {
+            BitField<49, 4, PredCondition> cond;
+            BitField<53, 1, u64> h_and;
+        } cbuf_and_imm;
        BitField<42, 1, u64> neg_pred;
        BitField<39, 3, u64> pred39;
    } hsetp2;
@@ -1008,8 +1018,6 @@ union Instruction {
        } f2i;

        union {
-            BitField<8, 2, Register::Size> src_size;
-            BitField<10, 2, Register::Size> dst_size;
            BitField<39, 4, u64> rounding;
            // H0, H1 extract for F16 missing
            BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
@@ -1548,7 +1556,9 @@ public:
        HFMA2_RC,
        HFMA2_RR,
        HFMA2_IMM_R,
+        HSETP2_C,
        HSETP2_R,
+        HSETP2_IMM,
        HSET2_R,
        POPC_C,
        POPC_R,
@@ -1831,7 +1841,9 @@ private:
            INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
            INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
            INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
-            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
+            INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
+            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
+            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -520,13 +520,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
 }

 void RasterizerOpenGL::Clear() {
-    const auto& maxwell3d = system.GPU().Maxwell3D();
-
-    if (!maxwell3d.ShouldExecute()) {
-        return;
-    }
-
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
    bool use_color{};
    bool use_depth{};
    bool use_stencil{};
@@ -622,11 +616,6 @@ void RasterizerOpenGL::DrawArrays() {

    MICROPROFILE_SCOPE(OpenGL_Drawing);
    auto& gpu = system.GPU().Maxwell3D();
-
-    if (!gpu.ShouldExecute()) {
-        return;
-    }
-
    const auto& regs = gpu.regs;

    SyncColorMask();
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -254,10 +254,6 @@ public:
    }

 private:
-    using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
-    using OperationDecompilersArray =
-        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
    void DeclareVertex() {
        if (stage != ShaderStage::Vertex)
            return;
@@ -1126,6 +1122,16 @@ private:
                               Type::Float);
    }

+    std::string FCastHalf0(Operation operation) {
+        const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
+        return fmt::format("({})[0]", op_a);
+    }
+
+    std::string FCastHalf1(Operation operation) {
+        const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
+        return fmt::format("({})[1]", op_a);
+    }
+
    template <Type type>
    std::string Min(Operation operation) {
        return GenerateBinaryCall(operation, "min", type, type, type);
@@ -1282,6 +1288,11 @@ private:
        return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
    }

+    std::string HCastFloat(Operation operation) {
+        const std::string op_a = VisitOperand(operation, 0, Type::Float);
+        return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
+    }
+
    std::string HUnpack(Operation operation) {
        const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
        const auto value = [&]() -> std::string {
@@ -1400,14 +1411,10 @@ private:
        return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
    }

-    std::string LogicalAll2(Operation operation) {
+    std::string LogicalAnd2(Operation operation) {
        return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
    }

-    std::string LogicalAny2(Operation operation) {
-        return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
-    }
-
    template <bool with_nan>
    std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
        const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1714,7 +1721,7 @@ private:
        return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
    }

-    static constexpr OperationDecompilersArray operation_decompilers = {
+    static constexpr std::array operation_decompilers = {
        &GLSLDecompiler::Assign,

        &GLSLDecompiler::Select,
@@ -1726,6 +1733,8 @@ private:
        &GLSLDecompiler::Negate<Type::Float>,
        &GLSLDecompiler::Absolute<Type::Float>,
        &GLSLDecompiler::FClamp,
+        &GLSLDecompiler::FCastHalf0,
+        &GLSLDecompiler::FCastHalf1,
        &GLSLDecompiler::Min<Type::Float>,
        &GLSLDecompiler::Max<Type::Float>,
        &GLSLDecompiler::FCos,
@@ -1786,6 +1795,7 @@ private:
        &GLSLDecompiler::Absolute<Type::HalfFloat>,
        &GLSLDecompiler::HNegate,
        &GLSLDecompiler::HClamp,
+        &GLSLDecompiler::HCastFloat,
        &GLSLDecompiler::HUnpack,
        &GLSLDecompiler::HMergeF32,
        &GLSLDecompiler::HMergeH0,
@@ -1798,8 +1808,7 @@ private:
        &GLSLDecompiler::LogicalXor,
        &GLSLDecompiler::LogicalNegate,
        &GLSLDecompiler::LogicalPick2,
-        &GLSLDecompiler::LogicalAll2,
-        &GLSLDecompiler::LogicalAny2,
+        &GLSLDecompiler::LogicalAnd2,

        &GLSLDecompiler::LogicalLessThan<Type::Float>,
        &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1863,6 +1872,7 @@ private:
        &GLSLDecompiler::WorkGroupId<1>,
        &GLSLDecompiler::WorkGroupId<2>,
    };
+    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

    std::string GetRegister(u32 index) const {
        return GetDeclarationWithSuffix(index, "gpr");
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
    }

 private:
-    using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
-    using OperationDecompilersArray =
-        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);

    void AllocateBindings() {
@@ -739,6 +735,16 @@ private:
        return {};
    }

+    Id FCastHalf0(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
+    Id FCastHalf1(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
    Id HNegate(Operation operation) {
        UNIMPLEMENTED();
        return {};
@@ -749,6 +755,11 @@ private:
        return {};
    }

+    Id HCastFloat(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
    Id HUnpack(Operation operation) {
        UNIMPLEMENTED();
        return {};
@@ -804,12 +815,7 @@ private:
        return {};
    }

-    Id LogicalAll2(Operation operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id LogicalAny2(Operation operation) {
+    Id LogicalAnd2(Operation operation) {
        UNIMPLEMENTED();
        return {};
    }
@@ -1206,7 +1212,7 @@ private:
        return {};
    }

-    static constexpr OperationDecompilersArray operation_decompilers = {
+    static constexpr std::array operation_decompilers = {
        &SPIRVDecompiler::Assign,

        &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1219,6 +1225,8 @@ private:
        &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
        &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
+        &SPIRVDecompiler::FCastHalf0,
+        &SPIRVDecompiler::FCastHalf1,
        &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
        &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
        &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1279,6 +1287,7 @@ private:
        &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
        &SPIRVDecompiler::HNegate,
        &SPIRVDecompiler::HClamp,
+        &SPIRVDecompiler::HCastFloat,
        &SPIRVDecompiler::HUnpack,
        &SPIRVDecompiler::HMergeF32,
        &SPIRVDecompiler::HMergeH0,
@@ -1291,8 +1300,7 @@ private:
        &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
        &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
        &SPIRVDecompiler::LogicalPick2,
-        &SPIRVDecompiler::LogicalAll2,
-        &SPIRVDecompiler::LogicalAny2,
+        &SPIRVDecompiler::LogicalAnd2,

        &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
        &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1365,7 @@ private:
        &SPIRVDecompiler::WorkGroupId<1>,
        &SPIRVDecompiler::WorkGroupId<2>,
    };
+    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

    const VKDevice& device;
    const ShaderIR& ir;
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -46,12 +46,12 @@ void ShaderIR::Decode() {
        coverage_end = shader_info.end;
        if (shader_info.decompilable) {
            disable_flow_stack = true;
-            const auto insert_block = ([this](NodeBlock& nodes, u32 label) {
+            const auto insert_block = [this](NodeBlock& nodes, u32 label) {
                if (label == exit_branch) {
                    return;
                }
                basic_blocks.insert({label, nodes});
-            });
+            };
            const auto& blocks = shader_info.blocks;
            NodeBlock current_block;
            u32 current_label = exit_branch;
@@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
 }

 void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
-    const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
+    const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
        Node result = n;
        if (cond.cc != ConditionCode::T) {
            result = Conditional(GetConditionCode(cond.cc), {result});
@@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
            result = Conditional(GetPredicate(pred, is_neg), {result});
        }
        return result;
-    });
+    };
    if (block.branch.address < 0) {
        if (block.branch.kills) {
            Node n = Operation(OperationCode::Discard);
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    case OpCode::Id::I2F_R:
    case OpCode::Id::I2F_C:
    case OpCode::Id::I2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
        UNIMPLEMENTED_IF(instr.conversion.selector);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in I2F is not implemented");
@@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);

        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::F2F_R:
    case OpCode::Id::F2F_C:
    case OpCode::Id::F2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word);
-        UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in F2F is not implemented");

@@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
            }
        }();

+        if (instr.conversion.src_size == Register::Size::Short) {
+            // TODO: figure where extract is sey in the encoding
+            value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+        }
+
        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);

        value = [&]() {
@@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
            default:
                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                  static_cast<u32>(instr.conversion.f2f.rounding.Value()));
-                return Immediate(0);
+                return value;
            }
        }();
        value = GetSaturatedFloat(value, instr.alu.saturate_d);

        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+
+        if (instr.conversion.dst_size == Register::Size::Short) {
+            value = Operation(OperationCode::HCastFloat, PRECISE, value);
+        }
+
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::F2I_R:
    case OpCode::Id::F2I_C:
    case OpCode::Id::F2I_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in F2I is not implemented");
        Node value = [&]() {
@@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
            }
        }();

+        if (instr.conversion.src_size == Register::Size::Short) {
+            // TODO: figure where extract is sey in the encoding
+            value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+        }
+
        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);

        value = [&]() {
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);

-    Node op_b = [&]() {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HSETP2_R:
-            return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
-                                        instr.hsetp2.negate_b);
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-    op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
-
-    // We can't use the constant predicate as destination.
-    ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
-    const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+    Tegra::Shader::PredCondition cond{};
+    bool h_and{};
+    Node op_b{};
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HSETP2_C:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        h_and = instr.hsetp2.cbuf_and_imm.h_and;
+        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+                                    instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+        break;
+    case OpCode::Id::HSETP2_IMM:
+        cond = instr.hsetp2.cbuf_and_imm.cond;
+        h_and = instr.hsetp2.cbuf_and_imm.h_and;
+        op_b = UnpackHalfImmediate(instr, true);
+        break;
+    case OpCode::Id::HSETP2_R:
+        cond = instr.hsetp2.reg.cond;
+        h_and = instr.hsetp2.reg.h_and;
+        op_b =
+            UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
+                                                 instr.hsetp2.reg.negate_b),
+                            instr.hsetp2.reg.type_b);
+        break;
+    default:
+        UNREACHABLE();
+        op_b = Immediate(0);
+    }

    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
-    const OperationCode pair_combiner =
-        instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
+    const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);

-    const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
-    const Node first_pred = Operation(pair_combiner, comparison);
+    const auto Write = [&](u64 dest, Node src) {
+        SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
+    };

-    // Set the primary predicate to the result of Predicate OP SecondPredicate
-    const Node value = Operation(combiner, first_pred, second_pred);
-    SetPredicate(bb, instr.hsetp2.pred3, value);
-
-    if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
-        const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
-        SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
+    const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
+    const u64 first = instr.hsetp2.pred0;
+    const u64 second = instr.hsetp2.pred3;
+    if (h_and) {
+        const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
+        Write(first, joined);
+        Write(second, Operation(OperationCode::LogicalNegate, joined));
+    } else {
+        Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
+        Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
    }

    return pc;
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            const Node op_b =
                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);

-            SetTemporal(bb, 0, op_a);
-            SetTemporal(bb, 1, op_b);
-            SetRegister(bb, instr.gpr0, GetTemporal(0));
-            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
+            SetTemporary(bb, 0, op_a);
+            SetTemporary(bb, 1, op_b);
+            SetRegister(bb, instr.gpr0, GetTemporary(0));
+            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
            break;
        }
        default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
                }
            }();
            for (u32 i = 0; i < count; ++i)
-                SetTemporal(bb, i, GetLmem(i * 4));
+                SetTemporary(bb, i, GetLmem(i * 4));
            for (u32 i = 0; i < count; ++i)
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            break;
        }
        default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);

-            SetTemporal(bb, i, gmem);
+            SetTemporary(bb, i, gmem);
        }
        for (u32 i = 0; i < count; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
        break;
    }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            TrackAndGetGlobalMemory(bb, instr, true);

        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
-        SetTemporal(bb, 0, real_address_base);
+        SetTemporary(bb, 0, real_address_base);

        const u32 count = GetUniformTypeElementsCount(type);
        for (u32 i = 0; i < count; ++i) {
-            SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+            SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
        }
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);

-            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
        }
        break;
    }
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta,
                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
-                SetTemporal(bb, indexer++, value);
+                SetTemporary(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
            break;
        }
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
            auto params = coords;
            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-            SetTemporal(bb, indexer++, value);
+            SetTemporary(bb, indexer++, value);
        }
        for (u32 i = 0; i < indexer; ++i) {
-            SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
        break;
    }
@@ -336,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
            // Skip disabled components
            continue;
        }
-        SetTemporal(bb, dest_elem++, components[elem]);
+        SetTemporary(bb, dest_elem++, components[elem]);
    }
    // After writing values in temporals, move them to the real registers
    for (u32 i = 0; i < dest_elem; ++i) {
-        SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+        SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
    }
 }

@@ -353,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
    for (u32 component = 0; component < 4; ++component) {
        if (!instr.texs.IsComponentEnabled(component))
            continue;
-        SetTemporal(bb, dest_elem++, components[component]);
+        SetTemporary(bb, dest_elem++, components[component]);
    }

    for (u32 i = 0; i < dest_elem; ++i) {
        if (i < 2) {
            // Write the first two swizzle components to gpr0 and gpr0+1
-            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
+            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
        } else {
            ASSERT(instr.texs.HasTwoDestinations());
            // Write the rest of the swizzle components to gpr28 and gpr28+1
-            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
+            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
        }
    }
 }
@@ -391,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
        return;
    }

-    SetTemporal(bb, 0, first_value);
-    SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+    SetTemporary(bb, 0, first_value);
+    SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));

-    SetRegister(bb, instr.gpr0, GetTemporal(0));
-    SetRegister(bb, instr.gpr28, GetTemporal(1));
+    SetRegister(bb, instr.gpr0, GetTemporary(0));
+    SetRegister(bb, instr.gpr28, GetTemporary(1));
 }

 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
    if (is_psl) {
        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
    }
-    SetTemporal(bb, 0, product);
-    product = GetTemporal(0);
+    SetTemporary(bb, 0, product);
+    product = GetTemporary(0);

    const Node original_c = op_c;
    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
        }
    }();

-    SetTemporal(bb, 1, op_c);
-    op_c = GetTemporal(1);
+    SetTemporary(bb, 1, op_c);
+    op_c = GetTemporary(1);

    // TODO(Rodrigo): Use an appropiate sign for this operation
    Node sum = Operation(OperationCode::IAdd, product, op_c);
-    SetTemporal(bb, 2, sum);
-    sum = GetTemporal(2);
+    SetTemporary(bb, 2, sum);
+    sum = GetTemporary(2);
    if (is_merge) {
        const Node a = BitfieldExtract(sum, 0, 16);
        const Node b =
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -30,6 +30,8 @@ enum class OperationCode {
    FNegate,       /// (MetaArithmetic, float a) -> float
    FAbsolute,     /// (MetaArithmetic, float a) -> float
    FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
+    FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
+    FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
    FMin,          /// (MetaArithmetic, float a, float b) -> float
    FMax,          /// (MetaArithmetic, float a, float b) -> float
    FCos,          /// (MetaArithmetic, float a) -> float
@@ -83,17 +85,18 @@ enum class OperationCode {
    UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
    UBitCount,        /// (MetaArithmetic, uint) -> uint

-    HAdd,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HMul,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
-    HFma,      /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
-    HAbsolute, /// (f16vec2 a) -> f16vec2
-    HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
-    HClamp,    /// (f16vec2 src, float min, float max) -> f16vec2
-    HUnpack,   /// (Tegra::Shader::HalfType, T value) -> f16vec2
-    HMergeF32, /// (f16vec2 src) -> float
-    HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
-    HPack2,    /// (float a, float b) -> f16vec2
+    HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
+    HAbsolute,  /// (f16vec2 a) -> f16vec2
+    HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
+    HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
+    HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
+    HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
+    HMergeF32,  /// (f16vec2 src) -> float
+    HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
+    HPack2,     /// (float a, float b) -> f16vec2

    LogicalAssign, /// (bool& dst, bool src) -> void
    LogicalAnd,    /// (bool a, bool b) -> bool
@@ -101,8 +104,7 @@ enum class OperationCode {
    LogicalXor,    /// (bool a, bool b) -> bool
    LogicalNegate, /// (bool a) -> bool
    LogicalPick2,  /// (bool2 pair, uint index) -> bool
-    LogicalAll2,   /// (bool2 a) -> bool
-    LogicalAny2,   /// (bool2 a) -> bool
+    LogicalAnd2,   /// (bool2 a) -> bool

    LogicalFLessThan,     /// (float a, float b) -> bool
    LogicalFEqual,        /// (float a, float b) -> bool
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
 namespace VideoCommon::Shader {

 Node Conditional(Node condition, std::vector<Node> code) {
-    return MakeNode<ConditionalNode>(condition, std::move(code));
+    return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
 }

 Node Comment(std::string text) {
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -61,7 +61,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
    const auto [entry, is_new] = used_cbufs.try_emplace(index);
    entry->second.MarkAsUsedIndirect();

-    const Node final_offset = [&]() {
+    Node final_offset = [&] {
        // Attempt to inline constant buffer without a variable offset. This is done to allow
        // tracking LDC calls.
        if (const auto gpr = std::get_if<GprNode>(&*node)) {
@@ -69,9 +69,9 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
                return Immediate(offset);
            }
        }
-        return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
+        return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
    }();
-    return MakeNode<CbufNode>(index, final_offset);
+    return MakeNode<CbufNode>(index, std::move(final_offset));
 }

 Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -89,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {

 Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
    used_input_attributes.emplace(index);
-    return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
+    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }

 Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -122,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
    }
    used_output_attributes.insert(index);

-    return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
+    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }

 Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -134,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
 }

 Node ShaderIR::GetLocalMemory(Node address) {
-    return MakeNode<LmemNode>(address);
+    return MakeNode<LmemNode>(std::move(address));
 }

-Node ShaderIR::GetTemporal(u32 id) {
+Node ShaderIR::GetTemporary(u32 id) {
    return GetRegister(Register::ZeroIndex + 1 + id);
 }

 Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
    if (absolute) {
-        value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
+        value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
-        value = Operation(OperationCode::FNegate, NO_PRECISE, value);
+        value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
    }
    return value;
 }
@@ -155,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
    if (!saturate) {
        return value;
    }
-    const Node positive_zero = Immediate(std::copysignf(0, 1));
-    const Node positive_one = Immediate(1.0f);
-    return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
+
+    Node positive_zero = Immediate(std::copysignf(0, 1));
+    Node positive_one = Immediate(1.0f);
+    return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
+                     std::move(positive_one));
 }

-Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
+Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
    switch (size) {
    case Register::Size::Byte:
-        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
-                                Immediate(24));
-        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
-                                Immediate(24));
+        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
+                                std::move(value), Immediate(24));
+        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
+                                std::move(value), Immediate(24));
        return value;
    case Register::Size::Short:
-        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
-                                Immediate(16));
-        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
-                                Immediate(16));
+        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
+                                std::move(value), Immediate(16));
+        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
+                                std::move(value), Immediate(16));
    case Register::Size::Word:
        // Default - do nothing
        return value;
@@ -188,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
        return value;
    }
    if (absolute) {
-        value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
+        value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
-        value = Operation(OperationCode::INegate, NO_PRECISE, value);
+        value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
    }
    return value;
 }

 Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
-    const Node value = Immediate(instr.half_imm.PackImmediates());
+    Node value = Immediate(instr.half_imm.PackImmediates());
    if (!has_negation) {
        return value;
    }
-    const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
-    const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);

-    return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
+    Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
+    Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
+
+    return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
+                     std::move(second_negate));
 }

 Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
-    return Operation(OperationCode::HUnpack, type, value);
+    return Operation(OperationCode::HUnpack, type, std::move(value));
 }

 Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -216,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
    case Tegra::Shader::HalfMerge::H0_H1:
        return src;
    case Tegra::Shader::HalfMerge::F32:
-        return Operation(OperationCode::HMergeF32, src);
+        return Operation(OperationCode::HMergeF32, std::move(src));
    case Tegra::Shader::HalfMerge::Mrg_H0:
-        return Operation(OperationCode::HMergeH0, dest, src);
+        return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
    case Tegra::Shader::HalfMerge::Mrg_H1:
-        return Operation(OperationCode::HMergeH1, dest, src);
+        return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
    }
    UNREACHABLE();
    return src;
@@ -228,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {

 Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
    if (absolute) {
-        value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
+        value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
-        value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
+        value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
                          GetPredicate(true));
    }
    return value;
@@ -241,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
    if (!saturate) {
        return value;
    }
-    const Node positive_zero = Immediate(std::copysignf(0, 1));
-    const Node positive_one = Immediate(1.0f);
-    return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
+
+    Node positive_zero = Immediate(std::copysignf(0, 1));
+    Node positive_one = Immediate(1.0f);
+    return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
+                     std::move(positive_one));
 }

 Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -271,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
        condition == PredCondition::LessEqualWithNan ||
        condition == PredCondition::GreaterThanWithNan ||
        condition == PredCondition::GreaterEqualWithNan) {
-
        predicate = Operation(OperationCode::LogicalOr, predicate,
                              Operation(OperationCode::LogicalFIsNan, op_a));
        predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -300,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
                         "Unknown predicate comparison operation");

-    Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
+    Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
+                                     std::move(op_b));

    UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
                             condition == PredCondition::NotEqualWithNan ||
@@ -330,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
                         "Unknown predicate comparison operation");

-    const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
-
-    return predicate;
+    return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
 }

 OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -358,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
 }

 void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
-    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
+    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
 }

 void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
-    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
+    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
 }

 void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
-    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
+    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
 }

 void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
-    bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
+    bb.push_back(
+        Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
 }

-void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
-    SetRegister(bb, Register::ZeroIndex + 1 + id, value);
+void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
+    SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
 }

 void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
    if (!sets_cc) {
        return;
    }
-    const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
-    SetInternalFlag(bb, InternalFlag::Zero, zerop);
+    Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
+    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }

@@ -390,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
    if (!sets_cc) {
        return;
    }
-    const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
-    SetInternalFlag(bb, InternalFlag::Zero, zerop);
+    Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
+    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }

 Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
-    return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
-                     Immediate(bits));
+    return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
+                     Immediate(offset), Immediate(bits));
 }

 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
 #pragma once

 #include <array>
-#include <cstring>
 #include <map>
 #include <optional>
 #include <set>
-#include <string>
 #include <tuple>
-#include <variant>
 #include <vector>

 #include "common/common_types.h"
@@ -210,8 +207,8 @@ private:
    Node GetInternalFlag(InternalFlag flag, bool negated = false);
    /// Generates a node representing a local memory address
    Node GetLocalMemory(Node address);
-    /// Generates a temporal, internally it uses a post-RZ register
-    Node GetTemporal(u32 id);
+    /// Generates a temporary, internally it uses a post-RZ register
+    Node GetTemporary(u32 id);

    /// Sets a register. src value must be a number-evaluated node.
    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -221,8 +218,8 @@ private:
    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
    /// Sets a local memory address. address and value must be a number-evaluated node
    void SetLocalMemory(NodeBlock& bb, Node address, Node value);
-    /// Sets a temporal. Internally it uses a post-RZ register
-    void SetTemporal(NodeBlock& bb, u32 id, Node value);
+    /// Sets a temporary. Internally it uses a post-RZ register
+    void SetTemporary(NodeBlock& bb, u32 id, Node value);

    /// Sets internal flags from a float
    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,18 +15,20 @@ namespace {
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                   OperationCode operation_code) {
    for (; cursor >= 0; --cursor) {
-        const Node node = code.at(cursor);
+        Node node = code.at(cursor);
+
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
            if (operation->GetCode() == operation_code) {
-                return {node, cursor};
+                return {std::move(node), cursor};
            }
        }
+
        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
            const auto& conditional_code = conditional->GetCode();
-            const auto [found, internal_cursor] = FindOperation(
+            auto [found, internal_cursor] = FindOperation(
                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
            if (found) {
-                return {found, cursor};
+                return {std::move(found), cursor};
            }
        }
    }
Author	SHA1	Message	Date
Fernando Sahmkow	11f4e739bd	Shader_Ir: Implement F16 Variants of F2F, F2I, I2F. This commit takes care of implementing the F16 Variants of the conversion instructions and makes sure conversions are done.	2019-07-20 17:38:25 -04:00
Fernando Sahmkow	0a67416971	Merge pull request #2693 from ReinUsesLisp/hsetp2 shader/half_set_predicate: Implement missing HSETP2 variants	2019-07-20 17:25:08 -04:00
Flame Sage	369be67039	Update README.md	2019-07-20 19:24:24 +00:00
Flame Sage	aa599ac709	Update README.md	2019-07-20 19:22:30 +00:00
Flame Sage	a2edb27158	Merge pull request #2752 from DarkLordZach/master azure: Fix clang-format and releases	2019-07-20 15:20:53 -04:00
Zach Hilman	f470bcb826	azure: Fix clang-format and releases	2019-07-20 15:19:25 -04:00
ReinUsesLisp	45c162444d	shader/half_set_predicate: Fix HSETP2 implementation	2019-07-19 22:21:22 -03:00
ReinUsesLisp	6c4985edc9	shader/half_set_predicate: Implement missing HSETP2 variants	2019-07-19 22:20:47 -03:00
bunnei	5d369112d9	Merge pull request #2687 from lioncash/tls-process kernel/process: Allocate the process' TLS region during initialization	2019-07-18 13:53:04 -04:00
bunnei	63bda67a34	Merge pull request #2738 from lioncash/shader-ir shader-ir: Minor cleanup-related changes	2019-07-18 13:52:01 -04:00
David	d4b95bfc25	Merge pull request #2741 from FernandoS27/trace-log Kernel: Downgrade WaitForAddress and SignalToAddress messages to Trace.	2019-07-18 13:58:29 +10:00
Fernando Sahmkow	5e457bf258	Kernel: Downgrade WaitForAddress and SignalToAddress messages to Trace. This messages were originally set as warnning since few games used these svcs and it was needed for debugging. This is no longer the case.	2019-07-17 22:05:47 -04:00
Lioncash	bebbdc2067	shader_ir: std::move Node instance where applicable These are std::shared_ptr instances underneath the hood, which means copying them isn't as cheap as a regular pointer. Particularly so on weakly-ordered systems. This avoids atomic reference count increments and decrements where they aren't necessary for the core set of operations.	2019-07-16 19:49:23 -04:00
Lioncash	60926ac16b	shader_ir: Rename Get/SetTemporal to Get/SetTemporary This is more accurate in terms of describing what the functions are actually doing. Temporal relates to time, not the setting of a temporary itself.	2019-07-16 19:47:43 -04:00
Lioncash	44d87ff641	shader_ir: Remove unused includes Removes unnecessary header dependencies.	2019-07-16 19:47:42 -04:00
Lioncash	56c7912159	kernel/process: Allocate the process' TLS region during initialization Prior to execution within a process beginning, the process establishes its own TLS region for uses (as far as I can tell) related to exception handling. Now that TLS creation was decoupled from threads themselves, we can add this behavior to our Process class. This is also good, as it allows us to remove a stub within svcGetInfo, namely querying the address of that region.	2019-07-07 14:08:28 -04:00
Lioncash	eb6f55d880	kernel/process: Move main thread stack allocation to its own function Keeps this particular set of behavior isolated to its own function.	2019-07-07 14:08:25 -04:00