Texture Cache: Improve documentation

Texture Cache: Address Feedback
Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios.
2019-12-22 12:29:23 -04:00 · 2019-12-22 12:24:34 -04:00 · 2019-12-22 12:24:34 -04:00 · 2019-12-22 11:23:09 -04:00 · 2019-12-22 11:20:55 -04:00 · 2019-12-21 22:50:28 -05:00
17 changed files with 637 additions and 295 deletions
--- a/.appveyor/UtilityFunctions.ps1
+++ b/.appveyor/UtilityFunctions.ps1
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
-    if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
-    }
-}
-popd
-
-function Which ($search_path, $name) {
-    ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
-    ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
-        Which $search_path $_.Value
-    }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
-    $final_deps = @()
-    $deps_to_process = GetDeps $search_path $binary
-    while ($deps_to_process.Count -gt 0) {
-        $current, $deps_to_process = $deps_to_process
-        if ($final_deps -contains $current) { continue }
-
-        # Is this a system dll file?
-        # We use the same algorithm that cmake uses to determine this.
-        if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
-        if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
-        if ($current -match "\\msvc[^\\]+dll") { continue }
-        if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
-        $final_deps += $current
-        $new_deps = GetDeps $search_path $current
-        $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
-    }
-    return $final_deps
-}
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
-  - C:\ProgramData\chocolatey\bin -> appveyor.yml
-  - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
-  # Tell msys2 to add mingw64 to the path
-  MSYSTEM: MINGW64
-  # Tell msys2 to inherit the current directory when starting the shell
-  CHERE_INVOKING: 1
-  matrix:
-    - BUILD_TYPE: msvc
-    - BUILD_TYPE: mingw
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - git submodule update --init --recursive
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'mingw') {
-          $dependencies = "mingw64/mingw-w64-x86_64-cmake",
-                          "mingw64/mingw-w64-x86_64-qt5",
-                          "mingw64/mingw-w64-x86_64-SDL2"
-          # redirect err to null to prevent warnings from becoming errors
-          # workaround to prevent pacman from failing due to cyclical dependencies
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
-        }
-
-before_build:
-  - mkdir %BUILD_TYPE%_build
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
-        }
-  - cd ..
-
-build_script:
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # https://www.appveyor.com/docs/build-phase
-          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
-        }
-
-after_build:
-  - ps: |
-        $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
-        $GITREV = $(git show -s --format='%h')
-
-        # Find out which kind of release we are producing by tag name
-        if ($env:APPVEYOR_REPO_TAG_NAME) {
-          $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
-        } else {
-          # There is no repo tag - make assumptions
-          $RELEASE_DIST = "head"
-        }
-
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # Where are these spaces coming from? Regardless, let's remove them
-          $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
-          $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
-          # set the build names as env vars so the artifacts can upload them
-          $env:BUILD_ZIP = $MSVC_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
-          $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
-          $BUILD_DIR = ".\msvc_build\bin\Release"
-
-          # Make a debug symbol upload
-          mkdir pdb
-          Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
-          7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
-          rm "$BUILD_DIR\*.pdb"
-
-          mkdir $RELEASE_DIST
-          # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
-          Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
-          rm "$RELEASE_DIST\*.exe"
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item .\license.txt -Destination $RELEASE_DIST
-          Copy-Item .\README.md -Destination $RELEASE_DIST
-          7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MSVC_SEVENZIP $RELEASE_DIST
-        } else {
-          $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
-          # not going to bother adding separate debug symbols for mingw, so just upload a README for it
-          # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
-          $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
-          Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
-          # store the build information in env vars so we can use them as artifacts
-          $env:BUILD_ZIP = $MINGW_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
-          $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
-          $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
-          $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
-          $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
-          mkdir $RELEASE_DIST
-          mkdir $RELEASE_DIST/platforms
-          mkdir $RELEASE_DIST/styles
-          mkdir $RELEASE_DIST/imageformats
-
-          # copy the compiled binaries and other release files to the release folder
-          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
-          # copy the qt windows plugin dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
-          # copy the qt windows vista style dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
-          # copy the qt jpeg imageformat dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
-          # copy all the dll dependencies to the release folder
-          . "./.appveyor/UtilityFunctions.ps1"
-          $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
-          $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\yuzu_cmd.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\imageformats\qjpeg.dll"
-          Write-Host "Detected the following dependencies:"
-          Write-Host $MingwDLLs
-          foreach ($file in $MingwDLLs) {
-            Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
-          }
-
-          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MINGW_SEVENZIP $RELEASE_DIST
-        }
-
-test_script:
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          ctest -VV -C Release
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
-        }
-  - cd ..
-
-artifacts:
-  - path: $(BUILD_ZIP)
-    name: build
-    type: zip
-
-deploy:
-  provider: GitHub
-  release: $(appveyor_repo_tag_name)
-  auth_token:
-    secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
-  artifact: update,build
-  draft: false
-  prerelease: false
-  on:
-    appveyor_repo_tag: true
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1973,7 +1973,7 @@ private:
            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/renderer_vulkan/shaders/blit.frag
@@ -0,0 +1,24 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+
+layout (location = 0) out vec4 color;
+
+layout (binding = 1) uniform sampler2D color_texture;
+
+void main() {
+    color = texture(color_texture, frag_tex_coord);
+}
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/renderer_vulkan/shaders/blit.vert
@@ -0,0 +1,28 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+
+layout (location = 0) out vec2 frag_tex_coord;
+
+layout (set = 0, binding = 0) uniform MatrixBlock {
+    mat4 modelview_matrix;
+};
+
+void main() {
+    gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) buffer OutputBuffer {
+    uint output_indexes[];
+};
+
+layout (push_constant) uniform PushConstants {
+    uint first;
+};
+
+void main() {
+    uint primitive = gl_GlobalInvocationID.x;
+    if (primitive * 6 >= output_indexes.length()) {
+        return;
+    }
+
+    const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
+    for (uint vertex = 0; vertex < 6; ++vertex) {
+        uint index = first + primitive * 4 + quad_map[vertex];
+        output_indexes[primitive * 6 + vertex] = index;
+    }
+}
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/renderer_vulkan/shaders/uint8.comp
@@ -0,0 +1,33 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
+    uint8_t input_indexes[];
+};
+
+layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
+    uint16_t output_indexes[];
+};
+
+void main() {
+    uint id = gl_GlobalInvocationID.x;
+    if (id < input_indexes.length()) {
+        output_indexes[id] = uint16_t(input_indexes[id]);
+    }
+}
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@

 namespace Vulkan {

-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
-    : device{device}, resource_manager{resource_manager} {
-    next_fence = &resource_manager.CommitFence();
-    AllocateNewContext();
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+                                           const vk::DispatchLoaderDynamic& dld) {
+    auto command = first;
+    while (command != nullptr) {
+        auto next = command->GetNext();
+        command->Execute(cmdbuf, dld);
+        command->~Command();
+        command = next;
+    }
+
+    command_offset = 0;
+    first = nullptr;
+    last = nullptr;
 }

-VKScheduler::~VKScheduler() = default;
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager}, next_fence{
+                                                              &resource_manager.CommitFence()} {
+    AcquireNewChunk();
+    AllocateNewContext();
+    worker_thread = std::thread(&VKScheduler::WorkerThread, this);
+}
+
+VKScheduler::~VKScheduler() {
+    quit = true;
+    cv.notify_all();
+    worker_thread.join();
+}

 void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

 void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
    current_fence->Wait();
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

+void VKScheduler::WaitWorker() {
+    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+    DispatchWork();
+
+    bool finished = false;
+    do {
+        cv.notify_all();
+        std::unique_lock lock{mutex};
+        finished = chunk_queue.Empty();
+    } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+    if (chunk->Empty()) {
+        return;
+    }
+    chunk_queue.Push(std::move(chunk));
+    cv.notify_all();
+    AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+    if (state.renderpass && renderpass_bi == *state.renderpass) {
+        return;
+    }
+    const bool end_renderpass = state.renderpass.has_value();
+    state.renderpass = renderpass_bi;
+    Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+        if (end_renderpass) {
+            cmdbuf.endRenderPass(dld);
+        }
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+    });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+    EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+    if (state.graphics_pipeline == pipeline) {
+        return;
+    }
+    state.graphics_pipeline = pipeline;
+    Record([pipeline](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+    });
+}
+
+void VKScheduler::WorkerThread() {
+    std::unique_lock lock{mutex};
+    do {
+        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+        if (quit) {
+            continue;
+        }
+        auto extracted_chunk = std::move(chunk_queue.Front());
+        chunk_queue.Pop();
+        extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+        chunk_reserve.Push(std::move(extracted_chunk));
+    } while (!quit);
+}
+
 void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    EndPendingOperations();
+    InvalidateState();
+    WaitWorker();
+
+    std::unique_lock lock{mutex};
+
+    const auto queue = device.GetGraphicsQueue();
    const auto& dld = device.GetDispatchLoader();
    current_cmdbuf.end(dld);

-    const auto queue = device.GetGraphicsQueue();
-    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
                                     &semaphore);
-    queue.submit({submit_info}, *current_fence, dld);
+    queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
 }

 void VKScheduler::AllocateNewContext() {
+    std::unique_lock lock{mutex};
    current_fence = next_fence;
-    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
    next_fence = &resource_manager.CommitFence();

-    const auto& dld = device.GetDispatchLoader();
-    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+                         device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+    state.graphics_pipeline = nullptr;
+    state.viewports = false;
+    state.scissors = false;
+    state.depth_bias = false;
+    state.blend_constants = false;
+    state.depth_bounds = false;
+    state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+    EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+    if (!state.renderpass) {
+        return;
+    }
+    state.renderpass = std::nullopt;
+    Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+    if (chunk_reserve.Empty()) {
+        chunk = std::make_unique<CommandChunk>();
+        return;
+    }
+    chunk = std::move(chunk_reserve.Front());
+    chunk_reserve.Pop();
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@

 #pragma once

+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
 #include "common/common_types.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/declarations.h"

 namespace Vulkan {
@@ -30,23 +37,6 @@ private:
    VKFence* const& fence;
 };

-class VKCommandBufferView {
-public:
-    VKCommandBufferView() = default;
-    VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
-
-    const vk::CommandBuffer* operator->() const noexcept {
-        return &cmdbuf;
-    }
-
-    operator vk::CommandBuffer() const noexcept {
-        return cmdbuf;
-    }
-
-private:
-    const vk::CommandBuffer& cmdbuf;
-};
-
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
@@ -54,32 +44,190 @@ public:
    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
    ~VKScheduler();

-    /// Gets a reference to the current fence.
-    VKFenceView GetFence() const {
-        return current_fence;
-    }
-
-    /// Gets a reference to the current command buffer.
-    VKCommandBufferView GetCommandBuffer() const {
-        return current_cmdbuf;
-    }
-
    /// Sends the current execution context to the GPU.
    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);

+    /// Waits for the worker thread to finish executing everything. After this function returns it's
+    /// safe to touch worker resources.
+    void WaitWorker();
+
+    /// Sends currently recorded work to the worker thread.
+    void DispatchWork();
+
+    /// Requests to begin a renderpass.
+    void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+    /// Requests the current executino context to be able to execute operations only allowed outside
+    /// of a renderpass.
+    void RequestOutsideRenderPassOperationContext();
+
+    /// Binds a pipeline to the current execution context.
+    void BindGraphicsPipeline(vk::Pipeline pipeline);
+
+    /// Returns true when viewports have been set in the current command buffer.
+    bool TouchViewports() {
+        return std::exchange(state.viewports, true);
+    }
+
+    /// Returns true when scissors have been set in the current command buffer.
+    bool TouchScissors() {
+        return std::exchange(state.scissors, true);
+    }
+
+    /// Returns true when depth bias have been set in the current command buffer.
+    bool TouchDepthBias() {
+        return std::exchange(state.depth_bias, true);
+    }
+
+    /// Returns true when blend constants have been set in the current command buffer.
+    bool TouchBlendConstants() {
+        return std::exchange(state.blend_constants, true);
+    }
+
+    /// Returns true when depth bounds have been set in the current command buffer.
+    bool TouchDepthBounds() {
+        return std::exchange(state.depth_bounds, true);
+    }
+
+    /// Returns true when stencil values have been set in the current command buffer.
+    bool TouchStencilValues() {
+        return std::exchange(state.stencil_values, true);
+    }
+
+    /// Send work to a separate thread.
+    template <typename T>
+    void Record(T&& command) {
+        if (chunk->Record(command)) {
+            return;
+        }
+        DispatchWork();
+        (void)chunk->Record(command);
+    }
+
+    /// Gets a reference to the current fence.
+    VKFenceView GetFence() const {
+        return current_fence;
+    }
+
 private:
+    class Command {
+    public:
+        virtual ~Command() = default;
+
+        virtual void Execute(vk::CommandBuffer cmdbuf,
+                             const vk::DispatchLoaderDynamic& dld) const = 0;
+
+        Command* GetNext() const {
+            return next;
+        }
+
+        void SetNext(Command* next_) {
+            next = next_;
+        }
+
+    private:
+        Command* next = nullptr;
+    };
+
+    template <typename T>
+    class TypedCommand final : public Command {
+    public:
+        explicit TypedCommand(T&& command) : command{std::move(command)} {}
+        ~TypedCommand() override = default;
+
+        TypedCommand(TypedCommand&&) = delete;
+        TypedCommand& operator=(TypedCommand&&) = delete;
+
+        void Execute(vk::CommandBuffer cmdbuf,
+                     const vk::DispatchLoaderDynamic& dld) const override {
+            command(cmdbuf, dld);
+        }
+
+    private:
+        T command;
+    };
+
+    class CommandChunk final {
+    public:
+        void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+        template <typename T>
+        bool Record(T& command) {
+            using FuncType = TypedCommand<T>;
+            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+            if (command_offset > sizeof(data) - sizeof(FuncType)) {
+                return false;
+            }
+
+            Command* current_last = last;
+
+            last = new (data.data() + command_offset) FuncType(std::move(command));
+
+            if (current_last) {
+                current_last->SetNext(last);
+            } else {
+                first = last;
+            }
+
+            command_offset += sizeof(FuncType);
+            return true;
+        }
+
+        bool Empty() const {
+            return command_offset == 0;
+        }
+
+    private:
+        Command* first = nullptr;
+        Command* last = nullptr;
+
+        std::size_t command_offset = 0;
+        std::array<u8, 0x8000> data{};
+    };
+
+    void WorkerThread();
+
    void SubmitExecution(vk::Semaphore semaphore);

    void AllocateNewContext();

+    void InvalidateState();
+
+    void EndPendingOperations();
+
+    void EndRenderPass();
+
+    void AcquireNewChunk();
+
    const VKDevice& device;
    VKResourceManager& resource_manager;
    vk::CommandBuffer current_cmdbuf;
    VKFence* current_fence = nullptr;
    VKFence* next_fence = nullptr;
+
+    struct State {
+        std::optional<vk::RenderPassBeginInfo> renderpass;
+        vk::Pipeline graphics_pipeline;
+        bool viewports = false;
+        bool scissors = false;
+        bool depth_bias = false;
+        bool blend_constants = false;
+        bool depth_bounds = false;
+        bool stencil_values = false;
+    } state;
+
+    std::unique_ptr<CommandChunk> chunk;
+    std::thread worker_thread;
+
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool quit = false;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -2552,29 +2552,7 @@ public:
    }

    Id operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        Id target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = decomp.v_false;
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = decomp.v_true;
-                break;
-            default:
-                target = decomp.predicates.at(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
-        } else {
-            UNREACHABLE();
-        }
-
-        return decomp.OpLoad(decomp.t_bool, target);
+        return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
    }

    Id operator()(const ExprVar& expr) {
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    case OpCode::Id::I2F_R:
    case OpCode::Id::I2F_C:
    case OpCode::Id::I2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in I2F is not implemented");

-        Node value = [&]() {
+        Node value = [&] {
            switch (opcode->get().GetId()) {
            case OpCode::Id::I2F_R:
                return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
                return Immediate(0);
            }
        }();
+
        const bool input_signed = instr.conversion.is_input_signed;
+
+        if (instr.conversion.src_size == Register::Size::Byte) {
+            const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+            if (offset > 0) {
+                value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+                                        std::move(value), Immediate(offset));
+            }
+        } else {
+            UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+        }
+
        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -22,7 +22,23 @@ using Tegra::Shader::Register;

 namespace {

-u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+    case Tegra::Shader::UniformType::Single:
+        return 1;
+    case Tegra::Shader::UniformType::Double:
+        return 2;
+    case Tegra::Shader::UniformType::Quad:
+    case Tegra::Shader::UniformType::UnsignedQuad:
+        return 4;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+        return 1;
+    }
+}
+
+u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
        return 1;
@@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        const auto [real_address_base, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, false);

-        const u32 count = GetUniformTypeElementsCount(type);
+        const u32 count = GetLdgMemorySize(type);
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
@@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
-            const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
-            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+            if (type == Tegra::Shader::UniformType::UnsignedByte) {
+                // To handle unaligned loads get the byte used to dereferenced global memory
+                // and extract that byte from the loaded uint32.
+                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
+                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
+                                 Immediate(8));
+            }

            SetTemporary(bb, i, gmem);
        }
+
        for (u32 i = 0; i < count; ++i) {
            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
@@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            break;
        }

-        const u32 count = GetUniformTypeElementsCount(type);
+        const u32 count = GetStgMemorySize(type);
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    // When lod is used always is in gpr20
    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);

-    // Fill empty entries from the guest sampler.
+    // Fill empty entries from the guest sampler
    const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
    if (type_coord_count != entry_coord_count) {
        LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
-    }
-    for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
-        coords.push_back(GetRegister(Register::ZeroIndex));
+
+        // When the size is higher we insert zeroes
+        for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+            coords.push_back(GetRegister(Register::ZeroIndex));
+        }
+
+        // Then we ensure the size matches the number of entries (dropping unused values)
+        coords.resize(entry_coord_count);
    }

    Node4 values;
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const {
    }
 }

+u32 SurfaceParams::GetBlockSize() const {
+    const u32 x = 64U << block_width;
+    const u32 y = 8U << block_height;
+    const u32 z = 1U << block_depth;
+    return x * y * z;
+}
+
+std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
+    const u32 x_pixels = 64U / GetBytesPerPixel();
+    const u32 x = x_pixels << block_width;
+    const u32 y = 8U << block_height;
+    return {x, y};
+}
+
+std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
+    const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
+    const u32 block_size = GetBlockSize();
+    const u32 block_index = offset / block_size;
+    const u32 gob_offset = offset % block_size;
+    const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
+    const u32 x_gob_pixels = 64U / GetBytesPerPixel();
+    const u32 x_block_pixels = x_gob_pixels << block_width;
+    const u32 y_block_pixels = 8U << block_height;
+    const u32 z_block_pixels = 1U << block_depth;
+    const u32 x_blocks = div_ceil(width, x_block_pixels);
+    const u32 y_blocks = div_ceil(height, y_block_pixels);
+    const u32 z_blocks = div_ceil(depth, z_block_pixels);
+    const u32 base_x = block_index % x_blocks;
+    const u32 base_y = (block_index / x_blocks) % y_blocks;
+    const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
+    u32 x = base_x * x_block_pixels;
+    u32 y = base_y * y_block_pixels;
+    u32 z = base_z * z_block_pixels;
+    z += gob_index >> block_height;
+    y += (gob_index * 8U) % y_block_pixels;
+    return {x, y, z};
+}
+
 } // namespace VideoCommon
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <utility>
+
 #include "common/alignment.h"
 #include "common/bit_util.h"
 #include "common/cityhash.h"
@@ -136,6 +138,15 @@ public:

    std::size_t GetConvertedMipmapSize(u32 level) const;

+    /// Get this texture Tegra Block size in guest memory layout
+    u32 GetBlockSize() const;
+
+    /// Get X, Y coordinates max sizes of a single block.
+    std::pair<u32, u32> GetBlockXY() const;
+
+    /// Get the offset in x, y, z coordinates from a memory offset
+    std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
+
    /// Returns the size of a layer in bytes in guest memory.
    std::size_t GetGuestLayerSize() const {
        return GetLayerSize(false, false);
@@ -269,7 +280,8 @@ private:

    /// Returns the size of all mipmap levels and aligns as needed.
    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
-        return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
+        return GetLayerSize(as_host_size, uncompressed) *
+               (layer_only ? 1U : (is_layered ? depth : 1U));
    }

    /// Returns the size of a layer
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -615,6 +615,86 @@ private:
        return {{new_surface, new_surface->GetMainView()}};
    }

+    /**
+     * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
+     * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
+     * the HLE methods.
+     *
+     * @param overlaps          The overlapping surfaces registered in the cache.
+     * @param params            The parameters on the new surface.
+     * @param gpu_addr          The starting address of the new surface.
+     * @param cache_addr        The starting address of the new surface on physical memory.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or
+     *                          left blank.
+     */
+    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
+                                                               const SurfaceParams& params,
+                                                               const GPUVAddr gpu_addr,
+                                                               const CacheAddr cache_addr,
+                                                               bool preserve_contents) {
+        if (params.target == SurfaceTarget::Texture3D) {
+            bool failed = false;
+            if (params.num_levels > 1) {
+                // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
+                return std::nullopt;
+            }
+            TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+            bool modified = false;
+            for (auto& surface : overlaps) {
+                const SurfaceParams& src_params = surface->GetSurfaceParams();
+                if (src_params.target != SurfaceTarget::Texture2D) {
+                    failed = true;
+                    break;
+                }
+                if (src_params.height != params.height) {
+                    failed = true;
+                    break;
+                }
+                if (src_params.block_depth != params.block_depth ||
+                    src_params.block_height != params.block_height) {
+                    failed = true;
+                    break;
+                }
+                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
+                const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
+                modified |= surface->IsModified();
+                const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
+                                             1);
+                ImageCopy(surface, new_surface, copy_params);
+            }
+            if (failed) {
+                return std::nullopt;
+            }
+            for (const auto& surface : overlaps) {
+                Unregister(surface);
+            }
+            new_surface->MarkAsModified(modified, Tick());
+            Register(new_surface);
+            auto view = new_surface->GetMainView();
+            return {{std::move(new_surface), view}};
+        } else {
+            for (const auto& surface : overlaps) {
+                if (!surface->MatchTarget(params.target)) {
+                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
+                        if (Settings::values.use_accurate_gpu_emulation) {
+                            return std::nullopt;
+                        }
+                        Unregister(surface);
+                        return InitializeSurface(gpu_addr, params, preserve_contents);
+                    }
+                    return std::nullopt;
+                }
+                if (surface->GetCacheAddr() != cache_addr) {
+                    continue;
+                }
+                if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
+                    return {{surface, surface->GetMainView()}};
+                }
+            }
+            return InitializeSurface(gpu_addr, params, preserve_contents);
+        }
+    }
+
    /**
     * Gets the starting address and parameters of a candidate surface and tries
     * to find a matching surface within the cache. This is done in 3 big steps:
@@ -687,6 +767,15 @@ private:
            }
        }

+        // Check if it's a 3D texture
+        if (params.block_depth > 0) {
+            auto surface =
+                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
+            if (surface) {
+                return *surface;
+            }
+        }
+
        // Split cases between 1 overlap or many.
        if (overlaps.size() == 1) {
            TSurface current_surface = overlaps[0];
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -12,6 +12,10 @@ namespace Tegra::Texture {

 // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
 // an small rect of (64/bytes_per_pixel)X8.
+inline std::size_t GetGOBSize() {
+    return 512;
+}
+
 inline std::size_t GetGOBSizeShift() {
    return 9;
 }
Author	SHA1	Message	Date
Fernando Sahmkow	218ee18417	Texture Cache: Improve documentation	2019-12-22 12:29:23 -04:00
Fernando Sahmkow	a3916588b6	Texture Cache: Address Feedback	2019-12-22 12:24:34 -04:00
Fernando Sahmkow	51c9e98677	Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios. This commit adds a series of HLE methods for handling 3D textures in general. This helps games that generate 3D textures on every frame and may reduce loading times for certain games.	2019-12-22 12:24:34 -04:00
Fernando Sahmkow	aea978e037	Merge pull request #3230 from ReinUsesLisp/vk-emu-shaders renderer_vulkan/shader: Add helper GLSL shaders	2019-12-22 11:23:09 -04:00
Fernando Sahmkow	27efcc15e9	Merge pull request #3240 from ReinUsesLisp/decomp-cond-code vk_shader_decompiler: Use Visit instead of reimplementing it	2019-12-22 11:20:55 -04:00
bunnei	16dcfacbfc	Merge pull request #3235 from ReinUsesLisp/ldg-u8 shader/memory: Implement LDG.U8 and unaligned U8 loads	2019-12-21 22:50:28 -05:00
ReinUsesLisp	af93909c9c	vk_shader_decompiler: Use Visit instead of reimplementing it ExprCondCode visit implements the generic Visit. Use this instead of that one. As an intended side effect this fixes unwritten memory usages in cases when a negation of a condition code is used.	2019-12-20 21:36:25 -03:00
bunnei	7be65c6a68	Merge pull request #3234 from ReinUsesLisp/i2f-u8-selector shader/conversion: Implement byte selector in I2F	2019-12-19 22:36:26 -05:00
bunnei	6d55b14cc0	Merge pull request #3233 from ReinUsesLisp/mismatch-sizes shader/texture: Properly shrink unused entries in size mismatches	2019-12-19 20:40:27 -05:00
bunnei	1eb4a95d2b	Merge pull request #3232 from ReinUsesLisp/gl-decompiler-images gl_shader_decompiler: Add missing DeclareImages	2019-12-19 11:32:47 -05:00
bunnei	253aa52351	Merge pull request #3231 from ReinUsesLisp/tld4s-encoding shader_bytecode: Fix TLD4S encoding	2019-12-19 11:32:25 -05:00
bunnei	d53cf05513	Merge pull request #3221 from ReinUsesLisp/vk-scheduler vk_scheduler: Delegate commands to a worker thread and state track	2019-12-18 22:04:08 -05:00
ReinUsesLisp	ae8d4b6c0c	shader/memory: Implement LDG.U8 and unaligned U8 loads LDG can load single bytes instead of full integers or packs of integers. These have the advantage of loading bytes that are not aligned to 4 bytes. To emulate these this commit gets the byte being referenced (by doing "address & 3" and then using that to extract the byte from the loaded integer: result = bitfieldExtract(loaded_integer, (address % 4) * 8, 8)	2019-12-18 01:21:46 -03:00
ReinUsesLisp	a7d6bd1ef1	shader/conversion: Implement byte selector in I2F I2F's byte selector is used to choose what bytes to convert to float. e.g. if the input is 0xaabbccdd and the selector is ".B3" it will convert 0xaa. The default (when it's not shown in nvdisasm) is ".B0", in that example the default would convert 0xdd to float.	2019-12-18 00:41:22 -03:00
bunnei	c053269017	Merge pull request #3227 from amilajack/patch-1 delete appveyor config	2019-12-17 21:49:22 -05:00
ReinUsesLisp	15a753b9a5	shader/texture: Properly shrink unused entries in size mismatches When a image format mismatches we were inserting zeroes to the texture itself. This was not handling cases were the mismatch uses less coordinates than the guest shader code. Address that by resizing the vector.	2019-12-17 23:38:10 -03:00
ReinUsesLisp	8b26b4228b	shader_bytecode: Fix TLD4S encoding	2019-12-17 23:32:10 -03:00
Amila Welihinda	8a23c32cf0	delete .appeveyor dir	2019-12-17 00:20:34 -08:00
ReinUsesLisp	b52297767e	renderer_vulkan/shader: Add helper GLSL shaders These shaders are used to specify code that is not dynamically generated in the Vulkan backend. Instead of packing it inside the build system, it's manually built and copied to the C++ file to avoid adding unnecessary build time dependencies. quad_array should be dropped in the future since it can be emulated with a memory pool generated from the CPU.	2019-12-16 17:59:08 -03:00
Amila Welihinda	0471eb6dc7	delete appveyor config	2019-12-15 11:16:39 -08:00
ReinUsesLisp	2df9a2dcaf	vk_scheduler: Delegate commands to a worker thread and state track Introduce a worker thread approach for delegating Vulkan work derived from dxvk's approach. https://github.com/doitsujin/dxvk Now that the scheduler is what handles all Vulkan work related to command streaming, store state tracking in itself. This way we can know when to reupload Vulkan dynamic state to the queue (since this one is invalidated between command buffers unlike NVN). We can also store the renderpass state and graphics pipeline bound to avoid redundant binds and renderpass begins/ends.	2019-12-13 02:24:48 -03:00