vk_shader_decompiler: Implement UAtomicAdd (ATOMS) on SPIR-V

Also updates sirit to include atomic instructions.
Merge pull request #3317 from ReinUsesLisp/gl-decomp-cc-decomp
2020-01-19 16:40:31 -03:00 · 2020-01-18 19:56:55 -04:00 · 2020-01-18 17:37:05 -05:00 · 2020-01-18 19:10:34 -03:00 · 2020-01-18 13:07:13 -05:00 · 2020-01-18 03:03:48 -05:00
41 changed files with 1008 additions and 275 deletions
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -5,6 +5,10 @@ function(get_timestamp _var)
 endfunction()

 list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
+
+# Find the package here with the known path so that the GetGit commands can find it as well
+find_package(Git QUIET PATHS "${GIT_EXECUTABLE}")
+
 # generate git/build information
 include(GetGitRevisionDescription)
 get_git_head_revision(GIT_REF_SPEC GIT_REV)
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,10 @@ endif ()
 if (DEFINED ENV{DISPLAYVERSION})
  set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
 endif ()
+
+# Pass the path to git to the GenerateSCMRev.cmake as well
+find_package(Git QUIET)
+
 add_custom_command(OUTPUT scm_rev.cpp
    COMMAND ${CMAKE_COMMAND}
      -DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
      -DBUILD_TAG="${BUILD_TAG}"
      -DBUILD_ID="${DISPLAY_VERSION}"
+      -DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
      -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
    DEPENDS
      # WARNING! It was too much work to try and make a common location for this list,
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
 template class Field<const char*>;
 template class Field<std::chrono::microseconds>;

-#ifdef ARCHITECTURE_x86_64
-static const char* CpuVendorToStr(Common::CPUVendor vendor) {
-    switch (vendor) {
-    case Common::CPUVendor::INTEL:
-        return "Intel";
-    case Common::CPUVendor::AMD:
-        return "Amd";
-    case Common::CPUVendor::OTHER:
-        return "Other";
-    }
-    UNREACHABLE();
-}
-#endif
-
 void AppendBuildInfo(FieldCollection& fc) {
    const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
    fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
 #ifdef ARCHITECTURE_x86_64
    fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
    fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
-    fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
 // Refer to the license.txt file included.

 #include <cstring>
-#include <string>
-#include <thread>
 #include "common/common_types.h"
 #include "common/x64/cpu_detect.h"

@@ -51,8 +49,6 @@ namespace Common {
 static CPUCaps Detect() {
    CPUCaps caps = {};

-    caps.num_cores = std::thread::hardware_concurrency();
-
    // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
    // yuzu at all anyway

@@ -70,12 +66,6 @@ static CPUCaps Detect() {
    __cpuid(cpu_id, 0x80000000);

    u32 max_ex_fn = cpu_id[0];
-    if (!strcmp(caps.brand_string, "GenuineIntel"))
-        caps.vendor = CPUVendor::INTEL;
-    else if (!strcmp(caps.brand_string, "AuthenticAMD"))
-        caps.vendor = CPUVendor::AMD;
-    else
-        caps.vendor = CPUVendor::OTHER;

    // Set reasonable default brand string even if brand string not available
    strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
            caps.sse4_1 = true;
        if ((cpu_id[2] >> 20) & 1)
            caps.sse4_2 = true;
-        if ((cpu_id[2] >> 22) & 1)
-            caps.movbe = true;
        if ((cpu_id[2] >> 25) & 1)
            caps.aes = true;

-        if ((cpu_id[3] >> 24) & 1) {
-            caps.fxsave_fxrstor = true;
-        }
-
        // AVX support requires 3 separate checks:
        //  - Is the AVX bit set in CPUID?
        //  - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
        }
    }

-    caps.flush_to_zero = caps.sse;
-
    if (max_ex_fn >= 0x80000004) {
        // Extract CPU model string
        __cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
    if (max_ex_fn >= 0x80000001) {
        // Check for more features
        __cpuid(cpu_id, 0x80000001);
-        if (cpu_id[2] & 1)
-            caps.lahf_sahf_64 = true;
-        if ((cpu_id[2] >> 5) & 1)
-            caps.lzcnt = true;
        if ((cpu_id[2] >> 16) & 1)
            caps.fma4 = true;
-        if ((cpu_id[3] >> 29) & 1)
-            caps.long_mode = true;
    }

    return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
    return caps;
 }

-std::string GetCPUCapsString() {
-    auto caps = GetCPUCaps();
-
-    std::string sum(caps.cpu_string);
-    sum += " (";
-    sum += caps.brand_string;
-    sum += ")";
-
-    if (caps.sse)
-        sum += ", SSE";
-    if (caps.sse2) {
-        sum += ", SSE2";
-        if (!caps.flush_to_zero)
-            sum += " (without DAZ)";
-    }
-
-    if (caps.sse3)
-        sum += ", SSE3";
-    if (caps.ssse3)
-        sum += ", SSSE3";
-    if (caps.sse4_1)
-        sum += ", SSE4.1";
-    if (caps.sse4_2)
-        sum += ", SSE4.2";
-    if (caps.avx)
-        sum += ", AVX";
-    if (caps.avx2)
-        sum += ", AVX2";
-    if (caps.bmi1)
-        sum += ", BMI1";
-    if (caps.bmi2)
-        sum += ", BMI2";
-    if (caps.fma)
-        sum += ", FMA";
-    if (caps.aes)
-        sum += ", AES";
-    if (caps.movbe)
-        sum += ", MOVBE";
-    if (caps.long_mode)
-        sum += ", 64-bit support";
-
-    return sum;
-}
-
 } // namespace Common
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@

 #pragma once

-#include <string>
-
 namespace Common {

-/// x86/x64 CPU vendors that may be detected by this module
-enum class CPUVendor {
-    INTEL,
-    AMD,
-    OTHER,
-};
-
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
-    CPUVendor vendor;
    char cpu_string[0x21];
    char brand_string[0x41];
-    int num_cores;
    bool sse;
    bool sse2;
    bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
    bool fma;
    bool fma4;
    bool aes;
-
-    // Support for the FXSAVE and FXRSTOR instructions
-    bool fxsave_fxrstor;
-
-    bool movbe;
-
-    // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
-    // are automatically set to (signed) zero.
-    bool flush_to_zero;
-
-    // Support for LAHF and SAHF instructions in 64-bit mode
-    bool lahf_sahf_64;
-
-    bool long_mode;
 };

 /**
@@ -57,10 +32,4 @@ struct CPUCaps {
 */
 const CPUCaps& GetCPUCaps();

-/**
- * Gets a string summary of the name and supported capabilities of the host CPU
- * @return String summary
- */
-std::string GetCPUCapsString();
-
 } // namespace Common
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
 // - Second to ensure all host backing memory used is aligned to 256 bytes due
 // to strict alignment restrictions on GPU memory.

-using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+class PhysicalMemory final : public PhysicalMemoryVector {
+    using PhysicalMemoryVector::PhysicalMemoryVector;
+};

 } // namespace Kernel
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }

 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
+    code_memory_size += module_.memory.size();
+
    const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));

    const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
-
-    code_memory_size += module_.memory.size();
 }

 Process::Process(Core::System& system)
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <cstring>
 #include <iterator>
 #include <utility>
 #include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
    // If necessary, expand backing vector to cover new heap extents in
    // the case of allocating. Otherwise, shrink the backing memory,
    // if a smaller heap has been requested.
-    const u64 old_heap_size = GetCurrentHeapSize();
-    if (size > old_heap_size) {
-        const u64 alloc_size = size - old_heap_size;
-
-        heap_memory->insert(heap_memory->end(), alloc_size, 0);
-        RefreshMemoryBlockMappings(heap_memory.get());
-    } else if (size < old_heap_size) {
-        heap_memory->resize(size);
-        heap_memory->shrink_to_fit();
-
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
+    heap_memory->resize(size);
+    heap_memory->shrink_to_fit();
+    RefreshMemoryBlockMappings(heap_memory.get());

    heap_end = heap_region_base + size;
    ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
    // Always merge allocated memory blocks, even when they don't share the same backing block.
    if (left.type == VMAType::AllocatedMemoryBlock &&
        (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
-        const auto right_begin = right.backing_block->begin() + right.offset;
-        const auto right_end = right_begin + right.size;

        // Check if we can save work.
        if (left.offset == 0 && left.size == left.backing_block->size()) {
            // Fast case: left is an entire backing block.
-            left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
+            left.backing_block->resize(left.size + right.size);
+            std::memcpy(left.backing_block->data() + left.size,
+                        right.backing_block->data() + right.offset, right.size);
        } else {
            // Slow case: make a new memory block for left and right.
-            const auto left_begin = left.backing_block->begin() + left.offset;
-            const auto left_end = left_begin + left.size;
-            const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
-            const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
-
            auto new_memory = std::make_shared<PhysicalMemory>();
-            new_memory->reserve(left_size + right_size);
-            new_memory->insert(new_memory->end(), left_begin, left_end);
-            new_memory->insert(new_memory->end(), right_begin, right_end);
+            new_memory->resize(left.size + right.size);
+            std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
+            std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
+                        right.size);

            left.backing_block = std::move(new_memory);
            left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
        memory.UnmapRegion(page_table, vma.base, vma.size);
        break;
    case VMAType::AllocatedMemoryBlock:
-        memory.MapMemoryRegion(page_table, vma.base, vma.size,
-                               vma.backing_block->data() + vma.offset);
+        memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
        break;
    case VMAType::BackingMemory:
        memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
            codeset_segment->addr = segment_addr;
            codeset_segment->size = aligned_size;

-            memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz);
+            std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
+                        p->p_filesz);
            current_image_position += aligned_size;
        }
    }
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <cstring>
 #include "core/file_sys/kernel_executable.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
        segment.addr = offset;
        segment.offset = offset;
        segment.size = PageAlignSize(static_cast<u32>(data.size()));
-        program_image.resize(offset);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(offset + data.size());
+        std::memcpy(program_image.data() + offset, data.data(), data.size());
    };

    load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <cinttypes>
+#include <cstring>
 #include <vector>

 #include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
        if (nso_header.IsSegmentCompressed(i)) {
            data = DecompressSegment(data, nso_header.segments[i]);
        }
-        program_image.resize(nso_header.segments[i].location);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(nso_header.segments[i].location + data.size());
+        std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
+                    data.size());
        codeset.segments[i].addr = nso_header.segments[i].location;
        codeset.segments[i].offset = nso_header.segments[i].location;
        codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
        std::vector<u8> pi_header;
        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
-        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
-                         program_image.end());
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
+                         program_image.data() + program_image.size());

        pi_header = pm->PatchNSO(pi_header, file.GetName());

-        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
    }

    // Apply cheats if they exist and the program has a valid title ID
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/hle/kernel/physical_memory.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
        system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
    }

+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset) {
+        MapMemoryRegion(page_table, base, size, memory.data() + offset);
+    }
+
    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
        ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
        ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -601,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
    impl->SetCurrentPageTable(process);
 }

+void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                             Kernel::PhysicalMemory& memory, VAddr offset) {
+    impl->MapMemoryRegion(page_table, base, size, memory, offset);
+}
+
 void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
    impl->MapMemoryRegion(page_table, base, size, target);
 }
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
 }

 namespace Kernel {
+class PhysicalMemory;
 class Process;
-}
+} // namespace Kernel

 namespace Memory {

@@ -65,6 +66,19 @@ public:
     */
    void SetCurrentPageTable(Kernel::Process& process);

+    /**
+     * Maps an physical buffer onto a region of the emulated process address space.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The address to start mapping at. Must be page-aligned.
+     * @param size       The amount of bytes to map. Must be page-aligned.
+     * @param memory     Physical buffer with the memory backing the mapping. Must be of length
+     *                   at least `size + offset`.
+     * @param offset     The offset within the physical memory. Must be page-aligned.
+     */
+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset);
+
    /**
     * Maps an allocated buffer onto a region of the emulated process address space.
     *
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -190,8 +190,11 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_texture_cache.cpp
+        renderer_vulkan/vk_texture_cache.h
        renderer_vulkan/vk_update_descriptor.cpp
-        renderer_vulkan/vk_update_descriptor.h)
+        renderer_vulkan/vk_update_descriptor.h
+    )

    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
                    }
                } instanced_arrays;

-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 8, u32> unk4;
+                } vp_point_size;
+
+                INSERT_UNION_PADDING_WORDS(1);

                Cull cull;

@@ -1271,8 +1278,6 @@ public:

    } dirty{};

-    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
-
    /// Reads a register value located at the input method address
    u32 GetRegisterValue(u32 method) const;

@@ -1367,6 +1372,8 @@ private:

    bool execute_on{true};

+    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+
    /// Retrieves information about a specific TIC entry from the TIC buffer.
    Texture::TICEntry GetTICEntry(u32 tic_index) const;

@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
    Trunc = 11,
 };

+enum class AtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+};
+
 enum class UniformType : u64 {
    UnsignedByte = 0,
    SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
    Bits128 = 6,
 };

+enum class AtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    S64 = 3,
+};
+
 enum class IMinMaxExchange : u64 {
    None = 0,
    XLo = 1,
@@ -938,6 +957,16 @@ union Instruction {
        BitField<46, 2, u64> cache_mode;
    } stg;

+    union {
+        BitField<52, 4, AtomicOp> operation;
+        BitField<28, 2, AtomicType> type;
+        BitField<30, 22, s64> offset;
+
+        s32 GetImmediateOffset() const {
+            return static_cast<s32>(offset << 2);
+        }
+    } atoms;
+
    union {
        BitField<32, 1, PhysicalAttributeDirection> direction;
        BitField<47, 3, AttributeSize> size;
@@ -1659,9 +1688,10 @@ public:
        ST_A,
        ST_L,
        ST_S,
-        ST,   // Store in generic memory
-        STG,  // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        ST,    // Store in generic memory
+        STG,   // Store in global memory
+        ATOMS, // Atomic operation on shared memory
+        AL2P,  // Transforms attribute memory into physical memory
        TEX,
        TEX_B,  // Texture Load Bindless
        TXQ,    // Texture Query
@@ -1964,6 +1994,7 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("101-------------", Id::ST, Type::Memory, "ST"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
    const auto& regs = system.GPU().Maxwell3D().regs;
    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
    // in OpenGL).
+    state.point.program_control = regs.vp_point_size.enable != 0;
    state.point.size = std::max(1.0f, regs.point_size);
 }

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;

 namespace {

-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;

@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
    if (!code_b.empty()) {
        ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
    }
-    const auto entries = GLShader::GetEntries(ir);

    std::string source = fmt::format(R"(// {}
 #version 430 core
@@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {

 CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
                           GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
-    : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
-      device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
-      shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, shader_type{shader_type},
+      entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
    if (!params.precompiled_variants) {
        return;
    }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1856,6 +1856,16 @@ private:
                Type::Uint};
    }

+    template <const std::string_view& opname, Type type>
+    Expression Atomic(Operation operation) {
+        ASSERT(stage == ShaderType::Compute);
+        auto& smem = std::get<SmemNode>(*operation[0]);
+
+        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+                            Visit(operation[1]).As(type)),
+                type};
+    }
+
    Expression Branch(Operation operation) {
        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
        UNIMPLEMENTED_IF(!target);
@@ -2194,6 +2204,8 @@ private:
        &GLSLDecompiler::AtomicImage<Func::Xor>,
        &GLSLDecompiler::AtomicImage<Func::Exchange>,

+        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
        &GLSLDecompiler::Branch,
        &GLSLDecompiler::BranchIndirect,
        &GLSLDecompiler::PushFlowStack,
@@ -2313,7 +2325,7 @@ public:
    explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}

    void operator()(const ExprAnd& expr) {
-        inner += "( ";
+        inner += '(';
        std::visit(*this, *expr.operand1);
        inner += " && ";
        std::visit(*this, *expr.operand2);
@@ -2321,7 +2333,7 @@ public:
    }

    void operator()(const ExprOr& expr) {
-        inner += "( ";
+        inner += '(';
        std::visit(*this, *expr.operand1);
        inner += " || ";
        std::visit(*this, *expr.operand2);
@@ -2339,28 +2351,7 @@ public:
    }

    void operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        std::string target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = "false";
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = "true";
-                break;
-            default:
-                target = decomp.GetPredicate(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.GetInternalFlag(flag->GetFlag());
-        } else {
-            UNREACHABLE();
-        }
-        inner += target;
+        inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
    }

    void operator()(const ExprVar& expr) {
@@ -2372,8 +2363,7 @@ public:
    }

    void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
-        inner +=
-            "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+        inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
    }

    const std::string& GetResult() const {
@@ -2381,8 +2371,8 @@ public:
    }

 private:
-    std::string inner;
    GLSLDecompiler& decomp;
+    std::string inner;
 };

 class ASTDecompiler {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
 }

 void OpenGLState::ApplyPointSize() {
+    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
    if (UpdateValue(cur_state.point.size, point.size)) {
        glPointSize(point.size);
    }
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
    std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;

    struct {
-        float size = 1.0f; // GL_POINT_SIZE
+        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        GLfloat size = 1.0f;          // GL_POINT_SIZE
    } point;

    struct {
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {

 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
    {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
        glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        u8* const mip_data = staging_buffer.data() + mip_offset;
+        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
        if (is_compressed) {
-            glGetCompressedTextureImage(texture.handle, level,
-                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                                        staging_buffer.data() + mip_offset);
+            glGetCompressedTextureImage(texture.handle, level, size, mip_data);
        } else {
-            glGetTextureImage(texture.handle, level, format, type,
-                              static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                              staging_buffer.data() + mip_offset);
+            glGetTextureImage(texture.handle, level, format, type, size, mip_data);
        }
    }
 }
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
 #include <vector>

 #include <fmt/format.h>
-
 #include <glad/glad.h>

-#include "common/assert.h"
 #include "common/common_types.h"
-#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/utils.h"

 namespace OpenGL {

+struct VertexArrayPushBuffer::Entry {
+    GLuint binding_index{};
+    const GLuint* buffer{};
+    GLintptr offset{};
+    GLsizei stride{};
+};
+
 VertexArrayPushBuffer::VertexArrayPushBuffer() = default;

 VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
    }
 }

+struct BindBuffersRangePushBuffer::Entry {
+    GLuint binding;
+    const GLuint* buffer;
+    GLintptr offset;
+    GLsizeiptr size;
+};
+
 BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}

 BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
    void Bind();

 private:
-    struct Entry {
-        GLuint binding_index{};
-        const GLuint* buffer{};
-        GLintptr offset{};
-        GLsizei stride{};
-    };
+    struct Entry;

    GLuint vao{};
    const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
    void Bind();

 private:
-    struct Entry {
-        GLuint binding;
-        const GLuint* buffer;
-        GLintptr offset;
-        GLsizeiptr size;
-    };
+    struct Entry;

    GLenum target;
    std::vector<Entry> entries;
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
    return {};
 }

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                Tegra::Texture::TextureFilter filter) {
    switch (wrap_mode) {
    case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
    case Tegra::Texture::WrapMode::Border:
        return vk::SamplerAddressMode::eClampToBorder;
    case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
+            // by sending an invalid enumeration.
+            return static_cast<vk::SamplerAddressMode>(0xcafe);
+        }
+        // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
        switch (filter) {
        case Tegra::Texture::TextureFilter::Nearest:
            return vk::SamplerAddressMode::eClampToEdge;
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);

 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                Tegra::Texture::TextureFilter filter);

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
        MaxwellToVK::Sampler::Filter(tsc.min_filter),
        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
        has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1796,6 +1796,19 @@ private:
        return {};
    }

+    Expression UAtomicAdd(Operation operation) {
+        const auto& smem = std::get<SmemNode>(*operation[0]);
+        Id address = AsUint(Visit(smem.GetAddress()));
+        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
+        const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
+
+        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
+        const Id semantics = Constant(t_uint, 0U);
+
+        const Id value = AsUint(Visit(operation[1]));
+        return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint};
+    }
+
    Expression Branch(Operation operation) {
        const auto& target = std::get<ImmediateNode>(*operation[0]);
        OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2373,6 +2386,8 @@ private:
        &SPIRVDecompiler::AtomicImageXor,
        &SPIRVDecompiler::AtomicImageExchange,

+        &SPIRVDecompiler::UAtomicAdd,
+
        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::BranchIndirect,
        &SPIRVDecompiler::PushFlowStack,
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -13,6 +13,7 @@

 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"

 namespace Vulkan {

--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -0,0 +1,475 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <variant>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/morton.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/surface.h"
+#include "video_core/textures/convert.h"
+
+namespace Vulkan {
+
+using VideoCore::MortonSwizzle;
+using VideoCore::MortonSwizzleMode;
+
+using Tegra::Texture::SwizzleSource;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceCompression;
+using VideoCore::Surface::SurfaceTarget;
+
+namespace {
+
+vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageType::e1D;
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageType::e3D;
+    }
+    UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
+    return {};
+}
+
+vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
+    if (pixel_format < PixelFormat::MaxColorFormat) {
+        return vk::ImageAspectFlagBits::eColor;
+    } else if (pixel_format < PixelFormat::MaxDepthFormat) {
+        return vk::ImageAspectFlagBits::eDepth;
+    } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
+        return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
+    } else {
+        UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format));
+        return vk::ImageAspectFlagBits::eColor;
+    }
+}
+
+vk::ImageViewType GetImageViewType(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+        return vk::ImageViewType::e1D;
+    case SurfaceTarget::Texture2D:
+        return vk::ImageViewType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageViewType::e3D;
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageViewType::e1DArray;
+    case SurfaceTarget::Texture2DArray:
+        return vk::ImageViewType::e2DArray;
+    case SurfaceTarget::TextureCubemap:
+        return vk::ImageViewType::eCube;
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageViewType::eCubeArray;
+    case SurfaceTarget::TextureBuffer:
+        break;
+    }
+    UNREACHABLE();
+    return {};
+}
+
+UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
+    // TODO(Rodrigo): Move texture buffer creation to the buffer cache
+    const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
+                                         vk::BufferUsageFlagBits::eUniformTexelBuffer |
+                                             vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createBufferUnique(buffer_ci, nullptr, dld);
+}
+
+vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
+                                                      const SurfaceParams& params,
+                                                      vk::Buffer buffer) {
+    ASSERT(params.IsBuffer());
+
+    const auto format =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
+    return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
+}
+
+vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
+    constexpr auto sample_count = vk::SampleCountFlagBits::e1;
+    constexpr auto tiling = vk::ImageTiling::eOptimal;
+
+    ASSERT(!params.IsBuffer());
+
+    const auto [format, attachable, storage] =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
+
+    auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
+                       vk::ImageUsageFlagBits::eTransferSrc;
+    if (attachable) {
+        image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment
+                                                  : vk::ImageUsageFlagBits::eColorAttachment;
+    }
+    if (storage) {
+        image_usage |= vk::ImageUsageFlagBits::eStorage;
+    }
+
+    vk::ImageCreateFlags flags;
+    vk::Extent3D extent;
+    switch (params.target) {
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        flags |= vk::ImageCreateFlagBits::eCubeCompatible;
+        [[fallthrough]];
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+        extent = vk::Extent3D(params.width, params.height, 1);
+        break;
+    case SurfaceTarget::Texture3D:
+        extent = vk::Extent3D(params.width, params.height, params.depth);
+        break;
+    case SurfaceTarget::TextureBuffer:
+        UNREACHABLE();
+    }
+
+    return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent,
+                               params.num_levels, static_cast<u32>(params.GetNumLayers()),
+                               sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
+                               nullptr, vk::ImageLayout::eUndefined);
+}
+
+} // Anonymous namespace
+
+CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
+                             VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                             GPUVAddr gpu_addr, const SurfaceParams& params)
+    : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool{staging_pool} {
+    if (params.IsBuffer()) {
+        buffer = CreateBuffer(device, params);
+        commit = memory_manager.Commit(*buffer, false);
+
+        const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
+        format = buffer_view_ci.format;
+
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
+    } else {
+        const auto image_ci = GenerateImageCreateInfo(device, params);
+        format = image_ci.format;
+
+        image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
+        commit = memory_manager.Commit(image->GetHandle(), false);
+    }
+
+    // TODO(Rodrigo): Move this to a virtual function.
+    main_view = CreateViewInner(
+        ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
+        true);
+}
+
+CachedSurface::~CachedSurface() = default;
+
+void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
+    // To upload data we have to be outside of a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    if (params.IsBuffer()) {
+        UploadBuffer(staging_buffer);
+    } else {
+        UploadImage(staging_buffer);
+    }
+}
+
+void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
+    UNIMPLEMENTED_IF(params.IsBuffer());
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
+    }
+
+    // We can't copy images to buffers inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                   vk::ImageLayout::eTransferSrcOptimal);
+
+    const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    // TODO(Rodrigo): Do this in a single copy
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle,
+                          copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) {
+            cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy},
+                                     dld);
+        });
+    }
+    scheduler.Finish();
+
+    // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
+    std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
+}
+
+void CachedSurface::DecorateSurfaceName() {
+    // TODO(Rodrigo): Add name decorations
+}
+
+View CachedSurface::CreateView(const ViewParams& params) {
+    return CreateViewInner(params, false);
+}
+
+View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
+    // TODO(Rodrigo): Add name decorations
+    return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+}
+
+void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
+                      size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
+        const vk::BufferCopy copy(0, 0, size);
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
+
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite,
+                                     vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)},
+            {}, dld);
+    });
+}
+
+void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite,
+                   vk::ImageLayout::eTransferDstOptimal);
+
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        vk::BufferImageCopy copy = GetBufferImageCopy(level);
+        const auto& dld = device.GetDispatchLoader();
+        if (image->GetAspectMask() ==
+            (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+            vk::BufferImageCopy depth = copy;
+            vk::BufferImageCopy stencil = copy;
+            depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
+            stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth,
+                              stencil](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {depth, stencil}, dld);
+            });
+        } else {
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(),
+                              copy](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {copy}, dld);
+            });
+        }
+    }
+}
+
+vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
+    const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
+    const auto compression_type = params.GetCompressionType();
+    const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
+                                       ? params.GetConvertedMipmapOffset(level)
+                                       : params.GetHostMipmapLevelOffset(level);
+
+    return vk::BufferImageCopy(
+        mip_offset, 0, 0,
+        {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0},
+        {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth});
+}
+
+vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
+    return {image->GetAspectMask(), 0, params.num_levels, 0,
+            static_cast<u32>(params.GetNumLayers())};
+}
+
+CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                                     const ViewParams& params, bool is_proxy)
+    : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
+      image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
+      aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
+      base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
+      num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
+                                                           : vk::ImageViewType{}} {}
+
+CachedSurfaceView::~CachedSurfaceView() = default;
+
+vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
+                                           SwizzleSource z_source, SwizzleSource w_source) {
+    const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+    if (last_image_view && last_swizzle == swizzle) {
+        return last_image_view;
+    }
+    last_swizzle = swizzle;
+
+    const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
+    auto& image_view = entry->second;
+    if (!is_cache_miss) {
+        return last_image_view = *image_view;
+    }
+
+    auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
+    auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
+    auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
+    auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
+        std::swap(swizzle_x, swizzle_z);
+    }
+
+    // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
+    // hardware. To emulate this on Vulkan we specify it in the aspect.
+    vk::ImageAspectFlags aspect = aspect_mask;
+    if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+        UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
+        const bool is_first = x_source == SwizzleSource::R;
+        switch (params.pixel_format) {
+        case VideoCore::Surface::PixelFormat::Z24S8:
+        case VideoCore::Surface::PixelFormat::Z32FS8:
+            aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil;
+            break;
+        case VideoCore::Surface::PixelFormat::S8Z24:
+            aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth;
+            break;
+        default:
+            aspect = vk::ImageAspectFlagBits::eDepth;
+            UNIMPLEMENTED();
+        }
+
+        // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
+        swizzle_x = vk::ComponentSwizzle::eR;
+        swizzle_y = vk::ComponentSwizzle::eG;
+        swizzle_z = vk::ComponentSwizzle::eB;
+        swizzle_w = vk::ComponentSwizzle::eA;
+    }
+
+    const vk::ImageViewCreateInfo image_view_ci(
+        {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(),
+        {swizzle_x, swizzle_y, swizzle_z, swizzle_w},
+        {aspect, base_level, num_levels, base_layer, num_layers});
+
+    const auto dev = device.GetLogical();
+    image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
+    return last_image_view = *image_view;
+}
+
+VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               const VKDevice& device, VKResourceManager& resource_manager,
+                               VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                               VKStagingBufferPool& staging_pool)
+    : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
+
+VKTextureCache::~VKTextureCache() = default;
+
+Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
+    return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
+                                           scheduler, staging_pool, gpu_addr, params);
+}
+
+void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
+                               const VideoCommon::CopyParams& copy_params) {
+    const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    UNIMPLEMENTED_IF(src_3d);
+
+    // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
+    // dimension respectively.
+    const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
+    const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
+
+    const u32 extent_z = dst_3d ? copy_params.depth : 1;
+    const u32 num_layers = dst_3d ? 1 : copy_params.depth;
+
+    // We can't copy inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
+                            vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                            vk::ImageLayout::eTransferSrcOptimal);
+    dst_surface->Transition(
+        dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
+        vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const vk::ImageSubresourceLayers src_subresource(
+        src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
+    const vk::ImageSubresourceLayers dst_subresource(
+        dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers);
+    const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0);
+    const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z);
+    const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z);
+    const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent);
+    const vk::Image src_image = src_surface->GetImageHandle();
+    const vk::Image dst_image = dst_surface->GetImageHandle();
+    scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) {
+        cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {copy}, dld);
+    });
+}
+
+void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) {
+    // We can't blit inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferRead);
+    dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferWrite);
+
+    const auto& cfg = copy_config;
+    const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0);
+    const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1);
+    const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0);
+    const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1);
+    const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right},
+                             dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
+    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
+
+    const auto& dld{device.GetDispatchLoader()};
+    scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
+                      is_linear](auto cmdbuf, auto& dld) {
+        cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {blit},
+                         is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld);
+    });
+}
+
+void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
+    // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
+    // convert from color to depth and viceversa.
+    LOG_WARNING(Render_Vulkan, "Unimplemented");
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -0,0 +1,239 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/texture_cache/surface_base.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/textures/decoders.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKDevice;
+class VKResourceManager;
+class VKScheduler;
+class VKStagingBufferPool;
+
+class CachedSurfaceView;
+class CachedSurface;
+
+using Surface = std::shared_ptr<CachedSurface>;
+using View = std::shared_ptr<CachedSurfaceView>;
+using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+
+using VideoCommon::SurfaceParams;
+using VideoCommon::ViewParams;
+
+class CachedSurface final : public VideoCommon::SurfaceBase<View> {
+    friend CachedSurfaceView;
+
+public:
+    explicit CachedSurface(Core::System& system, const VKDevice& device,
+                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                           GPUVAddr gpu_addr, const SurfaceParams& params);
+    ~CachedSurface();
+
+    void UploadTexture(const std::vector<u8>& staging_buffer) override;
+    void DownloadTexture(std::vector<u8>& staging_buffer) override;
+
+    void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                        vk::ImageLayout new_layout) {
+        image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
+                          new_stage_mask, new_access, new_layout);
+    }
+
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout) {
+        image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                          new_access, new_layout);
+    }
+
+    VKImage& GetImage() {
+        return *image;
+    }
+
+    const VKImage& GetImage() const {
+        return *image;
+    }
+
+    vk::Image GetImageHandle() const {
+        return image->GetHandle();
+    }
+
+    vk::ImageAspectFlags GetAspectMask() const {
+        return image->GetAspectMask();
+    }
+
+    vk::BufferView GetBufferViewHandle() const {
+        return *buffer_view;
+    }
+
+protected:
+    void DecorateSurfaceName();
+
+    View CreateView(const ViewParams& params) override;
+    View CreateViewInner(const ViewParams& params, bool is_proxy);
+
+private:
+    void UploadBuffer(const std::vector<u8>& staging_buffer);
+
+    void UploadImage(const std::vector<u8>& staging_buffer);
+
+    vk::BufferImageCopy GetBufferImageCopy(u32 level) const;
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const;
+
+    Core::System& system;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+
+    std::optional<VKImage> image;
+    UniqueBuffer buffer;
+    UniqueBufferView buffer_view;
+    VKMemoryCommit commit;
+
+    vk::Format format;
+};
+
+class CachedSurfaceView final : public VideoCommon::ViewBase {
+public:
+    explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                               const ViewParams& params, bool is_proxy);
+    ~CachedSurfaceView();
+
+    vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
+                            Tegra::Texture::SwizzleSource y_source,
+                            Tegra::Texture::SwizzleSource z_source,
+                            Tegra::Texture::SwizzleSource w_source);
+
+    bool IsSameSurface(const CachedSurfaceView& rhs) const {
+        return &surface == &rhs.surface;
+    }
+
+    vk::ImageView GetHandle() {
+        return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
+                         Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
+    }
+
+    u32 GetWidth() const {
+        return params.GetMipWidth(base_level);
+    }
+
+    u32 GetHeight() const {
+        return params.GetMipHeight(base_level);
+    }
+
+    bool IsBufferView() const {
+        return buffer_view;
+    }
+
+    vk::Image GetImage() const {
+        return image;
+    }
+
+    vk::BufferView GetBufferView() const {
+        return buffer_view;
+    }
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const {
+        return {aspect_mask, base_level, num_levels, base_layer, num_layers};
+    }
+
+    vk::ImageSubresourceLayers GetImageSubresourceLayers() const {
+        return {surface.GetAspectMask(), base_level, base_layer, num_layers};
+    }
+
+    void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask,
+                    vk::AccessFlags new_access) const {
+        surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                           new_access, new_layout);
+    }
+
+    void MarkAsModified(u64 tick) {
+        surface.MarkAsModified(true, tick);
+    }
+
+private:
+    static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
+                             Tegra::Texture::SwizzleSource y_source,
+                             Tegra::Texture::SwizzleSource z_source,
+                             Tegra::Texture::SwizzleSource w_source) {
+        return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+               (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+    }
+
+    // Store a copy of these values to avoid double dereference when reading them
+    const SurfaceParams params;
+    const vk::Image image;
+    const vk::BufferView buffer_view;
+    const vk::ImageAspectFlags aspect_mask;
+
+    const VKDevice& device;
+    CachedSurface& surface;
+    const u32 base_layer;
+    const u32 num_layers;
+    const u32 base_level;
+    const u32 num_levels;
+    const vk::ImageViewType image_view_type;
+
+    vk::ImageView last_image_view;
+    u32 last_swizzle{};
+
+    std::unordered_map<u32, UniqueImageView> view_cache;
+};
+
+class VKTextureCache final : public TextureCacheBase {
+public:
+    explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                            const VKDevice& device, VKResourceManager& resource_manager,
+                            VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                            VKStagingBufferPool& staging_pool);
+    ~VKTextureCache();
+
+private:
+    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
+
+    void ImageCopy(Surface& src_surface, Surface& dst_surface,
+                   const VideoCommon::CopyParams& copy_params) override;
+
+    void ImageBlit(View& src_view, View& dst_view,
+                   const Tegra::Engines::Fermi2D::Config& copy_config) override;
+
+    void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {

 struct CFGRebuildState {
    explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
-        : program_code{program_code}, start{start}, locker{locker} {}
+        : program_code{program_code}, locker{locker}, start{start} {}

    const ProgramCode& program_code;
    ConstBufferLocker& locker;
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
 #include <vector>
 #include <fmt/format.h>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -15,6 +16,8 @@

 namespace VideoCommon::Shader {

+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -22,34 +25,39 @@ using Tegra::Shader::Register;

 namespace {

-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::UnsignedByte:
-    case Tegra::Shader::UniformType::Single:
-        return 1;
-    case Tegra::Shader::UniformType::Double:
-        return 2;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
    default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        UNREACHABLE();
+        return 0;
    }
 }

-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
    case Tegra::Shader::UniformType::Single:
-        return 1;
+        return 32;
    case Tegra::Shader::UniformType::Double:
-        return 2;
+        return 64;
    case Tegra::Shader::UniformType::Quad:
    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 128;
    default:
        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        return 32;
    }
 }

@@ -184,9 +192,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }();

        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, true, false);

-        const u32 count = GetLdgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
@@ -200,14 +209,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);

-            if (type == Tegra::Shader::UniformType::UnsignedByte) {
-                // To handle unaligned loads get the byte used to dereferenced global memory
-                // and extract that byte from the loaded uint32.
-                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
-                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));

-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
-                                 Immediate(8));
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
+                                 std::move(offset), Immediate(size));
            }

            SetTemporary(bb, i, gmem);
@@ -295,23 +305,53 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            }
        }();

+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true);
+            TrackGlobalMemory(bb, instr, is_read, true);
        if (!real_address_base || !base_address) {
            // Tracking failed, skip the store.
            break;
        }

-        const u32 count = GetStgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            const Node value = GetRegister(instr.gpr0.Value() + i);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
+
+                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
+                                  Immediate(size));
+            }
+
            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
    }
+    case OpCode::Id::ATOMS: {
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+                             static_cast<int>(instr.atoms.type.Value()));
+
+        const s32 offset = instr.atoms.GetImmediateOffset();
+        Node address = GetRegister(instr.gpr8);
+        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+        Node memory = GetSharedMemory(std::move(address));
+        Node data = GetRegister(instr.gpr20);
+
+        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
    case OpCode::Id::AL2P: {
        // Ignore al2p.direction since we don't care about it.

@@ -336,7 +376,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                     Instruction instr,
-                                                                     bool is_write) {
+                                                                     bool is_read, bool is_write) {
    const auto addr_register{GetRegister(instr.gmem.gpr)};
    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};

@@ -351,11 +391,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
    const GlobalMemoryBase descriptor{index, offset};
    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
    auto& usage = entry->second;
-    if (is_write) {
-        usage.is_written = true;
-    } else {
-        usage.is_read = true;
-    }
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;

    const auto real_address =
        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(

 std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
                                                bool is_tld4) {
-    const auto [coord_offsets, size, wrap_value,
-                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
-        if (is_tld4) {
-            return {{0, 8, 16}, 6, 32, 64};
-        } else {
-            return {{0, 4, 8}, 4, 8, 16};
-        }
-    }();
+    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+    const u32 size = is_tld4 ? 6 : 4;
+    const s32 wrap_value = is_tld4 ? 32 : 8;
+    const s32 diff_value = is_tld4 ? 64 : 16;
    const u32 mask = (1U << size) - 1;

    std::vector<Node> aoffi;
@@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
        LOG_WARNING(HW_GPU,
                    "AOFFI constant folding failed, some hardware might have graphical issues");
        for (std::size_t coord = 0; coord < coord_count; ++coord) {
-            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
            const Node condition =
                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
    }

    for (std::size_t coord = 0; coord < coord_count; ++coord) {
-        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
        if (value >= wrap_value) {
            value -= diff_value;
        }
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
    AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
    AtomicImageExchange, /// (MetaImage, int[N] coords) -> void

+    UAtomicAdd, /// (smem, uint) -> uint
+
    Branch,         /// (uint branch_target) -> void
    BranchIndirect, /// (uint branch_target) -> void
    PushFlowStack,  /// (uint branch_target) -> void
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,7 @@ private:

    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                               Tegra::Shader::Instruction instr,
-                                                               bool is_write);
+                                                               bool is_read, bool is_write);

    /// Register new amending code and obtain the reference id.
    std::size_t DeclareAmend(Node new_amend);
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
    {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
    {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
    {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
-    {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8},
+    {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},

    {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
    {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -209,6 +209,11 @@ public:
        return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
    }

+    /// Returns the number of layers in the surface.
+    std::size_t GetNumLayers() const {
+        return is_layered ? depth : 1;
+    }
+
    /// Returns the debug name of the texture for use in graphic debuggers.
    std::string TargetName() const;

@@ -287,10 +292,6 @@ private:
    /// Returns the size of a layer
    std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;

-    std::size_t GetNumLayers() const {
-        return is_layered ? depth : 1;
-    }
-
    /// Returns true if these parameters are from a layered surface.
    bool IsLayered() const;
 };
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() {

    const QString main_window = QStringLiteral("Main Window");
    const QString load_file = QStringLiteral("Load File");
+    const QString load_amiibo = QStringLiteral("Load Amiibo");
    const QString exit_yuzu = QStringLiteral("Exit yuzu");
+    const QString restart_emulation = QStringLiteral("Restart Emulation");
    const QString stop_emulation = QStringLiteral("Stop Emulation");
    const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar");
    const QString toggle_status_bar = QStringLiteral("Toggle Status Bar");
    const QString fullscreen = QStringLiteral("Fullscreen");
+    const QString capture_screenshot = QStringLiteral("Capture Screenshot");

    ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file));
    ui.action_Load_File->setShortcutContext(
        hotkey_registry.GetShortcutContext(main_window, load_file));

+    ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo));
+    ui.action_Load_Amiibo->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, load_amiibo));
+
    ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu));
    ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu));

+    ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation));
+    ui.action_Restart->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, restart_emulation));
+
    ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation));
    ui.action_Stop->setShortcutContext(
        hotkey_registry.GetShortcutContext(main_window, stop_emulation));
@@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() {
    ui.action_Show_Status_Bar->setShortcutContext(
        hotkey_registry.GetShortcutContext(main_window, toggle_status_bar));

+    ui.action_Capture_Screenshot->setShortcut(
+        hotkey_registry.GetKeySequence(main_window, capture_screenshot));
+    ui.action_Capture_Screenshot->setShortcutContext(
+        hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
+
    connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
            &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
    connect(
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -15,7 +15,7 @@
  </property>
  <property name="windowIcon">
   <iconset>
-    <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset>
+    <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
  </property>
  <property name="tabShape">
   <enum>QTabWidget::Rounded</enum>
@@ -98,6 +98,7 @@
    <addaction name="action_Display_Dock_Widget_Headers"/>
    <addaction name="action_Show_Filter_Bar"/>
    <addaction name="action_Show_Status_Bar"/>
+    <addaction name="separator"/>
    <addaction name="menu_View_Debugging"/>
   </widget>
   <widget class="QMenu" name="menu_Tools">
Author	SHA1	Message	Date
ReinUsesLisp	b2c976ad0e	vk_shader_decompiler: Implement UAtomicAdd (ATOMS) on SPIR-V Also updates sirit to include atomic instructions.	2020-01-19 16:40:31 -03:00
Fernando Sahmkow	51c8aea979	Merge pull request #3317 from ReinUsesLisp/gl-decomp-cc-decomp gl_shader_decompiler: Fix decompilation of condition codes	2020-01-18 19:56:55 -04:00
bunnei	94c41ab1d1	Merge pull request #3323 from ReinUsesLisp/fix-template-res gl_state: Use bool instead of GLboolean	2020-01-18 17:37:05 -05:00
ReinUsesLisp	d110a371bb	gl_state: Use bool instead of GLboolean This fixes template resolution considering GLboolean an integer instead of a bool.	2020-01-18 19:10:34 -03:00
bunnei	e972016456	Merge pull request #3298 from Simek/missing_hotkeys GUI: add few missing hotkeys to main menu	2020-01-18 13:07:13 -05:00
bunnei	278264b9e5	Merge pull request #3314 from degasus/physical_mem core/hle/kernel: Simplify PhysicalMemory usages.	2020-01-18 03:03:48 -05:00
Markus Wick	56672b8c98	core/memory: Create a special MapMemoryRegion for physical memory. This allows us to create a fastmem arena within the memory.cpp helpers.	2020-01-18 08:38:47 +01:00
Markus Wick	55103da066	core/hle: Simplify PhysicalMemory usage in vm_manager.	2020-01-18 08:29:19 +01:00
Markus Wick	7e94e544f4	core/loaders: Simplify PhysicalMemory usage. It is currently a std::vector, however we might want to replace it with a more fancy allocator. So we can't use the C++ iterators any more.	2020-01-18 08:29:19 +01:00
bunnei	9bf4850f74	Merge pull request #3305 from ReinUsesLisp/point-size-program gl_state: Implement PROGRAM_POINT_SIZE	2020-01-18 01:56:32 -05:00
bunnei	15163edaaa	Merge pull request #3312 from ReinUsesLisp/atoms-u32 shader/memory: Implement ATOMS.ADD.U32	2020-01-18 00:54:07 -05:00
bunnei	3cce5056ff	Merge pull request #3318 from jroweboy/remove-cpu-vendor Remove unused CPU Vendor string and telemtry field	2020-01-17 22:24:52 -05:00
James Rowe	4512a6bbfc	Remove unused CPU Vendor string and telemtry field The information is duplicated in the brand string and the telemetry field is unused	2020-01-17 18:41:18 -07:00
ReinUsesLisp	f34e519da3	gl_shader_decompiler: Fix decompilation of condition codes Use Visit instead of reimplementing it. Fixes unimplemented negations for condition codes.	2020-01-17 21:23:01 -03:00
bunnei	530a761e7a	Merge pull request #3316 from TotalCaesar659/linux-headbar-icon Add headbar icon on Linux	2020-01-17 19:04:10 -05:00
TotalCaesar659	dd74fd014b	Add headbar icon on Linux	2020-01-18 02:46:07 +03:00
bunnei	48863afb65	Merge pull request #3306 from ReinUsesLisp/gl-texture gl_texture_cache: Minor fixes and style changes	2020-01-17 15:44:02 -05:00
bunnei	657b3a366e	Merge pull request #3311 from ReinUsesLisp/z32fx24s8 format_lookup_table: Fix ZF32_X24S8 component types	2020-01-17 08:22:32 -05:00
bunnei	e041f33569	Merge pull request #3300 from ReinUsesLisp/vk-texture-cache vk_texture_cache: Implement generic texture cache on Vulkan	2020-01-16 19:19:26 -05:00
ReinUsesLisp	f09cd52980	vk_texture_cache: Address feedback	2020-01-16 18:23:10 -03:00
ReinUsesLisp	63ba41a26d	shader/memory: Implement ATOMS.ADD.U32	2020-01-16 17:30:55 -03:00
ReinUsesLisp	0caab54b5d	format_lookup_table: Fix ZF32_X24S8 component types Component types for ZF32_X24S8 were using UNORM. Drivers will set FLOAT, UINT, UNORM, UNORM; causing a format mismatch. This commit addresses that.	2020-01-16 17:29:13 -03:00
Rodrigo Locatti	82e1285c1e	vk_texture_cache: Fix typo in commentary Co-Authored-By: MysticExile <30736337+MysticExile@users.noreply.github.com>	2020-01-16 16:59:46 -03:00
bunnei	30faf6a964	Merge pull request #3308 from lioncash/private maxwell_3d: Make dirty_pointers private	2020-01-16 13:26:35 -05:00
bunnei	d23869811d	Merge pull request #3304 from lioncash/fwd-decl renderer_opengl/utils: Forward declare private structs	2020-01-16 11:21:18 -05:00
bunnei	a43ac8c79e	Merge pull request #3307 from jroweboy/fix-git Fix git version in scm_rev.cpp	2020-01-16 10:00:43 -05:00
Lioncash	9e874898f5	maxwell_3d: Make dirty_pointers private This isn't used outside of the class itself, so we can make it private for the time being.	2020-01-16 04:07:15 -05:00
James Rowe	b429095b61	Fix git version in scm_rev.cpp	2020-01-16 00:12:50 -07:00
ReinUsesLisp	c375d735e6	gl_state: Implement PROGRAM_POINT_SIZE For gl_PointSize to have effect we have to activate GL_PROGRAM_POINT_SIZE.	2020-01-15 16:14:17 -03:00
Lioncash	7af56dfa76	renderer_opengl/utils: Remove unused header inclusions Nothing from these headers are used, so they can be removed.	2020-01-15 06:31:23 -05:00
Lioncash	06d30fbcca	renderer_opengl/utils: Forward declare private structs Keeps the definitions hidden and allows changes to the structs without needing to recompile all users of classes containing said structs.	2020-01-15 06:30:01 -05:00
ReinUsesLisp	66a1c777c9	gl_texture_cache: Use local variables to simplify DownloadTexture	2020-01-14 17:39:48 -03:00
ReinUsesLisp	cdb00546f0	gl_texture_cache: Fix format for RGBX16F	2020-01-14 17:38:33 -03:00
ReinUsesLisp	2d09467f6f	gl_texture_cache: Use Snorm internal format for RG8S	2020-01-14 17:37:58 -03:00
ReinUsesLisp	02624c35ec	gl_texture_cache: Use Snorm internal format for ABGR8S	2020-01-14 17:37:23 -03:00
Rodrigo Locatti	64cd46579b	Merge pull request #3303 from lioncash/reorder control_flow: Silence -Wreorder warning for CFGRebuildState	2020-01-14 16:15:18 -03:00
Rodrigo Locatti	81e9e229fa	Merge pull request #3302 from lioncash/unused-var gl_shader_cache: Remove unused variables	2020-01-14 16:14:47 -03:00
Lioncash	a1eee1749e	control_flow: Silence -Wreorder warning for CFGRebuildState Organizes the initializer list in the same order that the variables would actually be initialized in.	2020-01-14 13:28:48 -05:00
bunnei	a83e28b237	Merge pull request #3296 from Simek/hotkeys_resize GUI/configure: resize hotkeys action column to fit content	2020-01-14 13:17:16 -05:00
Lioncash	f10ea944e0	gl_shader_cache: Remove unused STAGE_RESERVED_UBOS constant Given this isn't used, this can be removed entirely.	2020-01-14 13:16:52 -05:00
Lioncash	4cd5ad90f3	gl_shader_cache: std::move entries in CachedShader constructor Avoids several reallocations of std::vector instances where applicable.	2020-01-14 13:14:16 -05:00
Lioncash	15a6840e7a	gl_shader_cache: Remove unused entries variable in BuildShader() Eliminates a few unnecessary constructions of std::vectors.	2020-01-14 13:11:49 -05:00
bunnei	55f95e7f26	Merge pull request #3287 from ReinUsesLisp/ldg-stg-16 shader_ir/memory: Implement u16 and u8 for STG and LDG	2020-01-14 09:57:08 -05:00
bunnei	15788ffcde	Merge pull request #3288 from ReinUsesLisp/uncurse-aoffi shader_ir/texture: Simplify AOFFI code	2020-01-13 23:52:12 -05:00
bunnei	6985eea519	Merge pull request #3290 from ReinUsesLisp/gl-clamp maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver	2020-01-13 19:16:06 -05:00
bunnei	e749f17257	Merge pull request #3292 from degasus/heap_space_fix core/kernel: Fix GetTotalPhysicalMemoryUsed.	2020-01-13 19:15:43 -05:00
ReinUsesLisp	09e17fbb0f	vk_texture_cache: Implement generic texture cache on Vulkan It currently ignores PBO linearizations since these should be dropped as soon as possible on OpenGL.	2020-01-13 20:37:50 -03:00
ReinUsesLisp	2b2712fa95	texture_cache/surface_params: Make GetNumLayers public	2020-01-13 20:35:43 -03:00
Bartosz Kaszubowski	da3049aa74	GUI: add few missing hotkeys to main menu	2020-01-13 00:49:44 +01:00
Markus Wick	c76ffa5019	core/kernel: Fix GetTotalPhysicalMemoryUsed. module._memory was already moved over to a new shared_ptr. So code_memory_size was not increased at all. This lowers the heap space and so saves a bit of memory, usually between 50 to 100 MB. This fixes a regression of `c0a01f3adc`	2020-01-11 14:04:44 +01:00
ReinUsesLisp	3d46709b7f	maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver Nvidia's driver defaults invalid enumerations to GL_CLAMP. Vulkan doesn't expose GL_CLAMP through its API, but we can hack it on Nvidia's driver using the internal driver defaults.	2020-01-10 17:12:50 -03:00
ReinUsesLisp	13021b534c	shader_ir/texture: Simplify AOFFI code	2020-01-09 03:50:37 -03:00
ReinUsesLisp	e2a2a556b9	shader_ir/memory: Implement u16 and u8 for STG and LDG Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.	2020-01-09 02:12:29 -03:00