Compare commits

...

30 Commits

Author SHA1 Message Date
winappsx
a015901fbc Liste des jeux Vulkan testé ! Par la communauté !
En Français !
2019-12-10 17:37:03 +01:00
Fernando Sahmkow
6edadef96d Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler
vk_shader_decompiler: Add tessellation and misc changes
2019-12-10 08:01:41 -04:00
ReinUsesLisp
233ed96a5c vk_shader_decompiler: Fix build issues on old gcc versions 2019-12-10 01:55:38 -03:00
ReinUsesLisp
d30cf51d7d vk_shader_decompiler: Reduce YNegate's severity 2019-12-09 23:52:28 -03:00
ReinUsesLisp
0b5b93053d shader_ir/other: Implement S2R InvocationId 2019-12-09 23:52:28 -03:00
ReinUsesLisp
ecbfa416f0 vk_shader_decompiler: Misc changes
Update Sirit and its usage in vk_shader_decompiler. Highlights:
- Implement tessellation shaders
- Implement geometry shaders
- Implement some missing features
- Use native half float instructions when available.
2019-12-09 23:51:57 -03:00
ReinUsesLisp
9ad6327fbd shader: Keep track of shaders using warp instructions 2019-12-09 23:40:41 -03:00
ReinUsesLisp
6233b1db08 shader_ir/memory: Implement patch stores 2019-12-09 23:25:21 -03:00
Fernando Sahmkow
f2458106e6 Merge pull request #3205 from ReinUsesLisp/vk-device
vk_device: Misc changes
2019-12-09 20:02:58 -04:00
ReinUsesLisp
19ce0d4f1a vk_device: Misc changes
- Setup more features and requirements.
- Improve logging for missing features.
- Collect telemetry parameters.
- Add queries for more image formats.
- Query push constants limits.
- Optionally enable some extensions.
2019-12-09 01:04:48 -03:00
bunnei
faf5ae6a50 Merge pull request #3198 from ReinUsesLisp/tessellation-maxwell
maxwell_3d: Add tessellation state entries
2019-12-08 22:28:25 -05:00
bunnei
116a940dbb Merge pull request #3204 from ReinUsesLisp/vulkan-headers
externals: Update Vulkan-Headers
2019-12-08 22:28:02 -05:00
ReinUsesLisp
7ea362e134 externals: Update Vulkan-Headers 2019-12-08 22:08:19 -03:00
Rodrigo Locatti
e54699565a Merge pull request #3199 from ReinUsesLisp/vk-swapchain
vk_swapchain: Add support for swapping sRGB
2019-12-08 21:53:22 -03:00
Rodrigo Locatti
f73e569ba8 Merge pull request #3202 from lioncash/kernel-include
kernel: Remove unnecessary includes
2019-12-08 01:23:58 -03:00
Lioncash
c3e43c7e81 kernel: Remove unnecessary includes
Over the course of the changes to the kernel code, a few includes are no
longer necessary, particularly with the change over to std::shared_ptr
from Boost's intrusive_ptr.
2019-12-07 22:37:05 -05:00
ReinUsesLisp
f632d00eb1 vk_swapchain: Add support for swapping sRGB
We don't know until the game is running if it's using an sRGB color
space or not. Add support for hot-swapping swapchain surface formats.
2019-12-06 22:42:08 -03:00
ReinUsesLisp
36651f215a maxwell_3d: Add tessellation tess level registers 2019-12-06 22:08:22 -03:00
ReinUsesLisp
707bf41c6f maxwell_3d: Add tessellation mode register 2019-12-06 22:07:31 -03:00
ReinUsesLisp
d2b50c5ebd maxwell_3d: Add patch vertices register 2019-12-06 22:06:53 -03:00
bunnei
4bbb22a477 Merge pull request #3195 from FernandoS27/clear-exclusive
CpuCore: Clear exclusive state after doing a run in dynarmic.
2019-12-06 20:00:23 -05:00
bunnei
d49ed4a421 Merge pull request #3197 from ReinUsesLisp/shader-char
shader_bytecode: Remove corrupted character
2019-12-06 19:05:40 -05:00
ReinUsesLisp
74f515e8b6 shader_bytecode: Remove corrupted character 2019-12-06 20:31:56 -03:00
bunnei
e36814d6d5 Merge pull request #3109 from FernandoS27/new-instr
Implement FLO & TXD Instructions on GPU Shaders
2019-12-06 18:18:16 -05:00
bunnei
ef2b6733d0 Merge pull request #3196 from jmerdich/fix-ea-source-build
[EA source]: Ignore git-related files in cmake for early access tarballs
2019-12-06 13:19:04 -05:00
Fernando Sahmkow
40cd4df584 CpuCore: Clear exclusive state after doing a run in dynarmic.
This commit corrects an error in which a Core could remain with an
exclusive state after running, leaving space for possible race
conditions between changing cores.
2019-12-05 18:08:59 -04:00
Fernando Sahmkow
c8473f399e Shader_IR: Address Feedback 2019-11-18 07:34:34 -04:00
Fernando Sahmkow
cd0f5dfc17 Shader_IR: Implement TXD instruction. 2019-11-14 11:15:27 -04:00
Fernando Sahmkow
f3d1b370aa Shader_IR: Implement FLO instruction. 2019-11-14 11:15:27 -04:00
Fernando Sahmkow
95137a04e1 Shader_Bytecode: Add encodings for FLO, SHF and TXD 2019-11-14 11:15:26 -04:00
38 changed files with 2294 additions and 911 deletions

13
FR-Vulkan-Supported-Games Normal file
View File

@@ -0,0 +1,13 @@
FR : Liste des jeux qui fonctionne avec "Vulkan"
// Uniquement les jeux testé //
1. Zelda link's awakening = Fonctionne tres bien ! (mieux comparé a OpenGL)
2. Super Mario Party = Fonctionne bien !
3. Super Mario Odyssey = Fonctionne bien (mieux comparé a OpenGL)
Jeux en cours de test actuellement !
-----------------------------------------------------------------
Contibué pour l'instant par : FlyZox,

View File

@@ -96,6 +96,8 @@ void Cpu::RunLoop(bool tight_loop) {
} else {
arm_interface->Step();
}
// We are stopping a run, exclusive state must be cleared
arm_interface->ClearExclusiveState();
}
core_timing.Advance();

View File

@@ -11,8 +11,6 @@
#include "core/core_cpu.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/result.h"

View File

@@ -4,10 +4,10 @@
#pragma once
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
union ResultCode;

View File

@@ -8,7 +8,6 @@
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/server_port.h"
#include "core/hle/kernel/server_session.h"
#include "core/hle/kernel/session.h"
namespace Kernel {

View File

@@ -4,7 +4,9 @@
#pragma once
#include <memory>
#include <string>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
#include "core/hle/result.h"

View File

@@ -6,6 +6,8 @@
#include <array>
#include <cstddef>
#include <memory>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
#include "core/hle/result.h"

View File

@@ -13,7 +13,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"

View File

@@ -4,6 +4,7 @@
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <utility>
#include <vector>

View File

@@ -5,6 +5,8 @@
#pragma once
#include <array>
#include <memory>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"

View File

@@ -458,7 +458,6 @@ void Scheduler::SwitchContext() {
cpu_core.LoadContext(new_thread->GetContext());
cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
cpu_core.ClearExclusiveState();
} else {
current_thread = nullptr;
// Note: We do not reset the current process and current page table when idling because

View File

@@ -4,11 +4,12 @@
#pragma once
#include <mutex>
#include <atomic>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "common/multi_level_queue.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/thread.h"
namespace Core {

View File

@@ -6,9 +6,9 @@
#include <memory>
#include <string>
#include <utility>
#include "core/hle/kernel/wait_object.h"
#include "core/hle/result.h"
namespace Kernel {

View File

@@ -6,7 +6,6 @@
#include <memory>
#include <string>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"

View File

@@ -5,7 +5,6 @@
#pragma once
#include <memory>
#include <vector>
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/physical_memory.h"

View File

@@ -4,8 +4,9 @@
#pragma once
#include <memory>
#include <vector>
#include <boost/smart_ptr/intrusive_ptr.hpp>
#include "core/hle/kernel/object.h"
namespace Kernel {

View File

@@ -4,6 +4,8 @@
#pragma once
#include <memory>
#include "core/hle/kernel/object.h"
namespace Kernel {

View File

@@ -491,6 +491,23 @@ public:
INSERT_UNION_PADDING_WORDS(1);
};
enum class DepthMode : u32 {
MinusOneToOne = 0,
ZeroToOne = 1,
};
enum class TessellationPrimitive : u32 {
Isolines = 0,
Triangles = 1,
Quads = 2,
};
enum class TessellationSpacing : u32 {
Equal = 0,
FractionalOdd = 1,
FractionalEven = 2,
};
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -628,7 +645,19 @@ public:
};
} sync_info;
INSERT_UNION_PADDING_WORDS(0x11E);
INSERT_UNION_PADDING_WORDS(0x15);
union {
BitField<0, 2, TessellationPrimitive> prim;
BitField<4, 2, TessellationSpacing> spacing;
BitField<8, 1, u32> cw;
BitField<9, 1, u32> connected;
} tess_mode;
std::array<f32, 4> tess_level_outer;
std::array<f32, 2> tess_level_inner;
INSERT_UNION_PADDING_WORDS(0x102);
u32 tfb_enabled;
@@ -662,7 +691,9 @@ public:
u32 polygon_offset_line_enable;
u32 polygon_offset_fill_enable;
INSERT_UNION_PADDING_WORDS(0xD);
u32 patch_vertices;
INSERT_UNION_PADDING_WORDS(0xC);
std::array<ScissorTest, NumViewports> scissor_test;
@@ -1386,6 +1417,9 @@ ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1397,6 +1431,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);

View File

@@ -98,10 +98,11 @@ union Attribute {
BitField<20, 10, u64> immediate;
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<31, 1, u64> patch;
BitField<47, 3, AttributeSize> size;
bool IsPhysical() const {
return element == 0 && static_cast<u64>(index.Value()) == 0;
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
@@ -799,6 +800,12 @@ union Instruction {
BitField<40, 1, u64> invert;
} popc;
union {
BitField<41, 1, u64> sh;
BitField<40, 1, u64> invert;
BitField<48, 1, u64> is_signed;
} flo;
union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> neg_pred;
@@ -1439,6 +1446,26 @@ union Instruction {
}
} tlds;
union {
BitField<28, 1, u64> is_array;
BitField<29, 2, TextureType> texture_type;
BitField<35, 1, u64> aoffi_flag;
BitField<49, 1, u64> nodep_flag;
bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return aoffi_flag != 0;
case TextureMiscMode::NODEP:
return nodep_flag != 0;
default:
break;
}
return false;
}
} txd;
union {
BitField<24, 2, StoreCacheManagement> cache_management;
BitField<33, 3, ImageType> image_type;
@@ -1632,6 +1659,8 @@ public:
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
TXD, // Texture Gradient/Load with Derivates
TXD_B, // Texture Gradient/Load with Derivates Bindless
SUST, // Surface Store
SULD, // Surface Load
SUATOM, // Surface Atomic Operation
@@ -1664,6 +1693,9 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
FLO_R,
FLO_C,
FLO_IMM,
LEA_R1,
LEA_R2,
LEA_RZ,
@@ -1727,6 +1759,10 @@ public:
SHR_C,
SHR_R,
SHR_IMM,
SHF_RIGHT_R,
SHF_RIGHT_IMM,
SHF_LEFT_R,
SHF_LEFT_IMM,
FMNMX_C,
FMNMX_R,
FMNMX_IMM,
@@ -1924,6 +1960,8 @@ private:
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
@@ -1965,6 +2003,9 @@ private:
INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
@@ -2022,6 +2063,10 @@ private:
INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),

View File

@@ -49,8 +49,9 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -1112,6 +1113,8 @@ private:
expr += GenerateTextureArgument(*argument);
} else if (std::holds_alternative<TextureAoffi>(variant)) {
expr += GenerateTextureAoffi(meta->aoffi);
} else if (std::holds_alternative<TextureDerivates>(variant)) {
expr += GenerateTextureDerivates(meta->derivates);
} else {
UNREACHABLE();
}
@@ -1181,6 +1184,36 @@ private:
return expr;
}
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
if (derivates.empty()) {
return {};
}
constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
std::string expr = ", ";
const std::size_t components = derivates.size() / 2;
std::string dx = coord_constructors.at(components - 1);
std::string dy = coord_constructors.at(components - 1);
dx += '(';
dy += '(';
for (std::size_t index = 0; index < components; ++index) {
const auto operand_x{derivates.at(index * 2)};
const auto operand_y{derivates.at(index * 2 + 1)};
dx += Visit(operand_x).AsFloat();
dy += Visit(operand_y).AsFloat();
if (index + 1 < components) {
dx += ", ";
dy += ", ";
}
}
dx += ')';
dy += ')';
expr += dx + ", " + dy;
return expr;
}
std::string BuildIntegerCoordinates(Operation operation) {
constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const std::size_t coords_count{operation.GetOperandsCount()};
@@ -1450,6 +1483,11 @@ private:
return GenerateUnary(operation, "bitCount", type, type);
}
template <Type type>
Expression BitMSB(Operation operation) {
return GenerateUnary(operation, "findMSB", type, type);
}
Expression HNegate(Operation operation) {
const auto GetNegate = [&](std::size_t index) {
return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
@@ -1738,6 +1776,14 @@ private:
return {std::move(expr), Type::Float};
}
Expression TextureGradient(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
}
Expression ImageLoad(Operation operation) {
if (!device.HasImageLoadFormatted()) {
LOG_ERROR(Render_OpenGL,
@@ -1869,6 +1915,10 @@ private:
return {};
}
Expression InvocationId(Operation operation) {
return {"gl_InvocationID", Type::Int};
}
Expression YNegate(Operation operation) {
return {"y_direction", Type::Float};
}
@@ -2003,6 +2053,7 @@ private:
&GLSLDecompiler::BitfieldInsert<Type::Int>,
&GLSLDecompiler::BitfieldExtract<Type::Int>,
&GLSLDecompiler::BitCount<Type::Int>,
&GLSLDecompiler::BitMSB<Type::Int>,
&GLSLDecompiler::Add<Type::Uint>,
&GLSLDecompiler::Mul<Type::Uint>,
@@ -2021,6 +2072,7 @@ private:
&GLSLDecompiler::BitfieldInsert<Type::Uint>,
&GLSLDecompiler::BitfieldExtract<Type::Uint>,
&GLSLDecompiler::BitCount<Type::Uint>,
&GLSLDecompiler::BitMSB<Type::Uint>,
&GLSLDecompiler::Add<Type::HalfFloat>,
&GLSLDecompiler::Mul<Type::HalfFloat>,
@@ -2084,6 +2136,7 @@ private:
&GLSLDecompiler::TextureQueryDimensions,
&GLSLDecompiler::TextureQueryLod,
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::TextureGradient,
&GLSLDecompiler::ImageLoad,
&GLSLDecompiler::ImageStore,
@@ -2104,6 +2157,7 @@ private:
&GLSLDecompiler::EmitVertex,
&GLSLDecompiler::EndPrimitive,
&GLSLDecompiler::InvocationId,
&GLSLDecompiler::YNegate,
&GLSLDecompiler::LocalInvocationId<0>,
&GLSLDecompiler::LocalInvocationId<1>,

View File

@@ -4,6 +4,17 @@
#pragma once
namespace vk {
class DispatchLoaderDynamic;
}
namespace Vulkan {
constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
}
#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#include <vulkan/vulkan.hpp>
namespace Vulkan {
@@ -41,5 +52,7 @@ using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;
} // namespace Vulkan

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <bitset>
#include <cstdlib>
#include <optional>
#include <set>
#include <string_view>
@@ -15,6 +16,15 @@ namespace Vulkan {
namespace {
namespace Alternatives {
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
vk::Format::eD16UnormS8Uint, vk::Format{}};
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint, vk::Format{}};
} // namespace Alternatives
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -22,7 +32,7 @@ void SetNext(void**& next, T& data) {
}
template <typename T>
T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
vk::PhysicalDeviceFeatures2 features;
T extension_features;
features.pNext = &extension_features;
@@ -30,17 +40,14 @@ T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
return extension_features;
}
} // Anonymous namespace
namespace Alternatives {
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
vk::Format::eD16UnormS8Uint, vk::Format{}};
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint, vk::Format{}};
constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
} // namespace Alternatives
template <typename T>
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
vk::PhysicalDeviceProperties2 properties;
T extension_properties;
properties.pNext = &extension_properties;
physical.getProperties2(&properties, dldi);
return extension_properties;
}
constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
switch (format) {
@@ -53,8 +60,7 @@ constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
}
}
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
FormatType format_type) {
vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) {
switch (format_type) {
case FormatType::Linear:
return properties.linearTilingFeatures;
@@ -67,11 +73,13 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properti
}
}
} // Anonymous namespace
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface)
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
: physical{physical}, properties{physical.getProperties(dldi)},
format_properties{GetFormatProperties(dldi, physical)} {
SetupFamilies(dldi, surface);
SetupProperties(dldi);
SetupFeatures(dldi);
}
@@ -89,12 +97,22 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.depthClamp = true;
features.samplerAnisotropy = true;
features.largePoints = true;
features.multiViewport = true;
features.depthBiasClamp = true;
features.geometryShader = true;
features.tessellationShader = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
vertex_divisor.vertexAttributeInstanceRateDivisor = true;
vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
SetNext(next, vertex_divisor);
vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage;
bit16_storage.uniformAndStorageBuffer16BitAccess = true;
SetNext(next, bit16_storage);
vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage;
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
SetNext(next, bit8_storage);
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
@@ -120,6 +138,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
nullptr, static_cast<u32>(extensions.size()), extensions.data(),
nullptr);
@@ -135,16 +157,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
if (khr_driver_properties) {
vk::PhysicalDeviceDriverPropertiesKHR driver;
vk::PhysicalDeviceProperties2 properties;
properties.pNext = &driver;
physical.getProperties2(&properties, dld);
driver_id = driver.driverID;
LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
} else {
LOG_INFO(Render_Vulkan, "Driver: Unknown");
}
CollectTelemetryParameters();
graphics_queue = logical->getQueue(graphics_family, 0, dld);
present_queue = logical->getQueue(present_family, 0, dld);
@@ -190,6 +203,18 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const {
// Disable for now to avoid converting ASTC twice.
return false;
static constexpr std::array astc_formats = {
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
if (!features.textureCompressionASTC_LDR) {
return false;
}
@@ -197,12 +222,6 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
constexpr std::array astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
for (const auto format : astc_formats) {
const auto format_properties{physical.getFormatProperties(format, dldi)};
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
@@ -225,11 +244,17 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
bool is_suitable = true;
constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
constexpr std::array required_extensions = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
};
std::bitset<required_extensions.size()> available_extensions{};
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
@@ -246,7 +271,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
if (available_extensions[i]) {
continue;
}
LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
is_suitable = false;
}
}
@@ -263,7 +288,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
}
if (!has_graphics || !has_present) {
LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
is_suitable = false;
}
@@ -273,8 +298,15 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
constexpr u32 required_ubo_size = 65536;
if (limits.maxUniformBufferRange < required_ubo_size) {
LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
limits.maxUniformBufferRange, required_ubo_size);
LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
limits.maxUniformBufferRange, required_ubo_size);
is_suitable = false;
}
constexpr u32 required_num_viewports = 16;
if (limits.maxViewports < required_num_viewports) {
LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
limits.maxViewports, required_num_viewports);
is_suitable = false;
}
@@ -285,24 +317,32 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
std::make_pair(features.depthClamp, "depthClamp"),
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
std::make_pair(features.largePoints, "largePoints"),
std::make_pair(features.multiViewport, "multiViewport"),
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
};
for (const auto& [supported, name] : feature_report) {
if (supported) {
continue;
}
LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
is_suitable = false;
}
if (!is_suitable) {
LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
}
return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
extensions.reserve(7);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@@ -317,13 +357,30 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
extensions.reserve(13);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
bool khr_shader_float16_int8{};
bool ext_subgroup_size_control{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
}
if (khr_shader_float16_int8) {
@@ -332,6 +389,23 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize &&
properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = properties.requiredSubgroupSizeStages;
}
} else {
is_warp_potentially_bigger = true;
}
return extensions;
}
@@ -358,19 +432,23 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
present_family = *present_family_;
}
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
const auto props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
void VKDevice::CollectTelemetryParameters() {
const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld);
driver_id = driver.driverID;
vendor_name = driver.driverName;
const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld);
reported_extensions.reserve(std::size(extensions));
for (const auto& extension : extensions) {
reported_extensions.push_back(extension.extensionName);
}
}
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static const float QUEUE_PRIORITY = 1.0f;
@@ -385,50 +463,70 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8SnormPack32,
vk::Format::eA8B8G8R8SrgbPack32,
vk::Format::eB5G6R5UnormPack16,
vk::Format::eA2B10G10R10UnormPack32,
vk::Format::eR32G32B32A32Sfloat,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
vk::Format::eR16G16Sfloat,
vk::Format::eR16Unorm,
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8G8Unorm,
vk::Format::eR8G8Snorm,
vk::Format::eR8Unorm,
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
vk::Format::eD32Sfloat,
vk::Format::eD16Unorm,
vk::Format::eD16UnormS8Uint,
vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint,
vk::Format::eBc1RgbaUnormBlock,
vk::Format::eBc2UnormBlock,
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eBc1RgbaSrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4UnormBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8SnormPack32,
vk::Format::eA8B8G8R8SrgbPack32,
vk::Format::eB5G6R5UnormPack16,
vk::Format::eA2B10G10R10UnormPack32,
vk::Format::eA1R5G5B5UnormPack16,
vk::Format::eR32G32B32A32Sfloat,
vk::Format::eR32G32B32A32Uint,
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
vk::Format::eR16G16Sfloat,
vk::Format::eR16Unorm,
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8G8Unorm,
vk::Format::eR8G8Snorm,
vk::Format::eR8Unorm,
vk::Format::eR8Uint,
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR32Uint,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
vk::Format::eR4G4B4A4UnormPack16,
vk::Format::eD32Sfloat,
vk::Format::eD16Unorm,
vk::Format::eD16UnormS8Uint,
vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint,
vk::Format::eBc1RgbaUnormBlock,
vk::Format::eBc2UnormBlock,
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eBc6HUfloatBlock,
vk::Format::eBc6HSfloatBlock,
vk::Format::eBc1RgbaSrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x6UnormBlock,
vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock,
vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock,
vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x6UnormBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock,
vk::Format::eAstc6x5SrgbBlock,
vk::Format::eE5B9G9R9UfloatPack32};
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));

View File

@@ -4,6 +4,8 @@
#pragma once
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
@@ -14,6 +16,9 @@ namespace Vulkan {
/// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer };
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
class VKDevice final {
public:
@@ -71,7 +76,22 @@ public:
/// Returns true if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
}
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
u32 GetApiVersion() const {
return properties.apiVersion;
}
/// Returns the current driver version provided in Vulkan-formatted version numbers.
u32 GetDriverVersion() const {
return properties.driverVersion;
}
/// Returns the device name.
std::string_view GetModelName() const {
return properties.deviceName;
}
/// Returns the driver ID.
@@ -80,18 +100,23 @@ public:
}
/// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
vk::DeviceSize GetUniformBufferAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
}
/// Returns storage alignment requeriment.
u64 GetStorageBufferAlignment() const {
return storage_buffer_alignment;
vk::DeviceSize GetStorageBufferAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
vk::DeviceSize GetMaxStorageBufferRange() const {
return properties.limits.maxStorageBufferRange;
}
/// Returns the maximum size for push constants.
vk::DeviceSize GetMaxPushConstantsSize() const {
return properties.limits.maxPushConstantsSize;
}
/// Returns true if ASTC is natively supported.
@@ -104,6 +129,16 @@ public:
return is_float16_supported;
}
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
bool IsWarpSizePotentiallyBiggerThanGuest() const {
return is_warp_potentially_bigger;
}
/// Returns true if the device can be forced to use the guest warp size.
bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const {
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -114,6 +149,26 @@ public:
return ext_index_type_uint8;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
}
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return ext_shader_viewport_index_layer;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
}
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
return reported_extensions;
}
/// Checks if the physical device is suitable.
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface);
@@ -125,12 +180,12 @@ private:
/// Sets up queue families.
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
/// Sets up device properties.
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
/// Sets up device features.
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Returns a list of queue initialization descriptors.
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
@@ -148,23 +203,28 @@ private:
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::PhysicalDeviceType device_type; ///< Physical device type.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
u64 max_storage_buffer_range{}; ///< Max storage buffer size.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
std::unordered_map<vk::Format, vk::FormatProperties>
format_properties; ///< Format properties dictionary.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
/// Format properties dictionary.
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -5,29 +5,28 @@
#pragma once
#include <array>
#include <bitset>
#include <memory>
#include <set>
#include <type_traits>
#include <utility>
#include <vector>
#include <sirit/sirit.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace Vulkan {
class VKDevice;
}
namespace Vulkan::VKShader {
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using TexelBufferEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -46,39 +45,74 @@ private:
class GlobalBufferEntry {
public:
explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}
u32 GetCbufIndex() const {
constexpr u32 GetCbufIndex() const {
return cbuf_index;
}
u32 GetCbufOffset() const {
constexpr u32 GetCbufOffset() const {
return cbuf_offset;
}
constexpr bool IsWritten() const {
return is_written;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
};
struct ShaderEntries {
u32 const_buffers_base_binding{};
u32 global_buffers_base_binding{};
u32 samplers_base_binding{};
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
texel_buffers.size() + samplers.size() + images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
std::vector<TexelBufferEntry> texel_buffers;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
Sirit::Id entry_function{};
std::vector<Sirit::Id> interfaces;
bool uses_warps{};
};
using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
struct Specialization final {
u32 base_binding{};
DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage);
// Compute specific
std::array<u32, 3> workgroup_size{};
u32 shared_memory_size{};
} // namespace Vulkan::VKShader
// Graphics specific
Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
// Tessellation specific
struct {
Maxwell::TessellationPrimitive primitive{};
Maxwell::TessellationSpacing spacing{};
bool clockwise{};
} tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
struct SPIRVShader {
std::vector<u32> code;
ShaderEntries entries;
};
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const Specialization& specialization);
} // namespace Vulkan

View File

@@ -19,12 +19,18 @@
namespace Vulkan {
namespace {
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats,
bool srgb) {
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
vk::SurfaceFormatKHR format;
format.format = vk::Format::eB8G8R8A8Unorm;
format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
return format.format == vk::Format::eB8G8R8A8Unorm &&
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm;
return format.format == request_format &&
format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
});
return found != formats.end() ? *found : formats[0];
@@ -51,28 +57,26 @@ vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u3
std::min(capabilities.maxImageExtent.height, extent.height));
return extent;
}
} // namespace
} // Anonymous namespace
VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
: surface{surface}, device{device} {}
VKSwapchain::~VKSwapchain() = default;
void VKSwapchain::Create(u32 width, u32 height) {
const auto dev = device.GetLogical();
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
const auto& dld = device.GetDispatchLoader();
const auto physical_device = device.GetPhysical();
const vk::SurfaceCapabilitiesKHR capabilities{
physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
return;
}
dev.waitIdle(dld);
device.GetLogical().waitIdle(dld);
Destroy();
CreateSwapchain(capabilities, width, height);
CreateSwapchain(capabilities, width, height, srgb);
CreateSemaphores();
CreateImageViews();
@@ -107,7 +111,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
break;
case vk::Result::eErrorOutOfDateKHR:
if (current_width > 0 && current_height > 0) {
Create(current_width, current_height);
Create(current_width, current_height, current_srgb);
recreated = true;
}
break;
@@ -129,23 +133,19 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
}
void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
u32 height) {
const auto dev{device.GetLogical()};
u32 height, bool srgb) {
const auto& dld{device.GetDispatchLoader()};
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)};
const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)};
const std::vector<vk::SurfaceFormatKHR> formats{
physical_device.getSurfaceFormatsKHR(surface, dld)};
const std::vector<vk::PresentModeKHR> present_modes{
physical_device.getSurfacePresentModesKHR(surface, dld)};
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
extent = ChooseSwapExtent(capabilities, width, height);
current_width = extent.width;
current_height = extent.height;
current_srgb = srgb;
u32 requested_image_count{capabilities.minImageCount + 1};
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -169,6 +169,7 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
}
const auto dev{device.GetLogical()};
swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
images = dev.getSwapchainImagesKHR(*swapchain, dld);

View File

@@ -24,7 +24,7 @@ public:
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height);
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
@@ -60,8 +60,13 @@ public:
return image_format;
}
bool GetSrgbState() const {
return current_srgb;
}
private:
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
void CreateSemaphores();
void CreateImageViews();
@@ -87,6 +92,7 @@ private:
u32 current_width{};
u32 current_height{};
bool current_srgb{};
};
} // namespace Vulkan

View File

@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FLO_R:
case OpCode::Id::FLO_C:
case OpCode::Id::FLO_IMM: {
Node value;
if (instr.flo.invert) {
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
}
if (instr.flo.is_signed) {
value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
} else {
value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
}
if (instr.flo.sh) {
value =
Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
}
SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {

View File

@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
namespace {
u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::Single:
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
u64 element = instr.attribute.fmt20.element;
auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
next_element, GetRegister(instr.gpr39));
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
Node dest;
if (instr.attribute.fmt20.patch) {
const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
dest = MakeNode<PatchNode>(offset);
} else {
dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
GetRegister(instr.gpr39));
}
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
StoreNextElement(reg_offset);
// Load the next attribute element into the following register. If the element to load
// goes beyond the vec4 size, load the first element of the next attribute.
element = (element + 1) % 4;
index = index + (element == 0 ? 1 : 0);
}
break;
}
case OpCode::Id::ST_L:

View File

@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:

View File

@@ -134,13 +134,52 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
WriteTexsInstructionFloat(bb, instr, values, true);
break;
}
case OpCode::Id::TXD_B:
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXD: {
UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
u64 base_reg = instr.gpr8.Value();
const auto derivate_reg = instr.gpr20.Value();
const auto texture_type = instr.txd.texture_type.Value();
const auto coord_count = GetCoordCount(texture_type);
const auto& sampler = is_bindless
? GetBindlessSampler(base_reg, {{texture_type, false, false}})
: GetSampler(instr.sampler, {{texture_type, false, false}});
if (is_bindless) {
base_reg++;
}
std::vector<Node> coords;
std::vector<Node> derivates;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(base_reg + i));
const std::size_t derivate = i * 2;
derivates.push_back(GetRegister(derivate_reg + derivate));
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
}
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
}
WriteTexInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ_B:
is_bindless = true;
[[fallthrough]];
@@ -158,7 +197,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -212,7 +251,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -442,7 +481,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -574,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -608,7 +647,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
@@ -653,7 +692,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;

View File

@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
// Signal the backend that this shader uses warp instructions.
uses_warps = true;
switch (opcode->get().GetId()) {
case OpCode::Id::VOTE: {
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);

View File

@@ -68,6 +68,7 @@ enum class OperationCode {
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
IBitCount, /// (MetaArithmetic, int) -> int
IBitMSB, /// (MetaArithmetic, int) -> int
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
@@ -86,6 +87,7 @@ enum class OperationCode {
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
UBitCount, /// (MetaArithmetic, uint) -> uint
UBitMSB, /// (MetaArithmetic, uint) -> uint
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
@@ -149,6 +151,7 @@ enum class OperationCode {
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
ImageLoad, /// (MetaImage, int[N] coords) -> void
ImageStore, /// (MetaImage, int[N] coords) -> void
@@ -169,6 +172,7 @@ enum class OperationCode {
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
InvocationId, /// () -> int
YNegate, /// () -> float
LocalInvocationIdX, /// () -> uint
LocalInvocationIdY, /// () -> uint
@@ -210,13 +214,14 @@ class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
class PatchNode;
class SmemNode;
class GmemNode;
class CommentNode;
using NodeData =
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -367,6 +372,7 @@ struct MetaTexture {
Node array;
Node depth_compare;
std::vector<Node> aoffi;
std::vector<Node> derivates;
Node bias;
Node lod;
Node component{};
@@ -538,6 +544,19 @@ private:
u32 element{};
};
/// Patch memory (used to communicate tessellation stages).
class PatchNode final {
public:
explicit PatchNode(u32 offset) : offset{offset} {}
u32 GetOffset() const {
return offset;
}
private:
u32 offset{};
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:

View File

@@ -137,6 +137,10 @@ public:
return uses_vertex_id;
}
bool UsesWarps() const {
return uses_warps;
}
bool HasPhysicalAttributes() const {
return uses_physical_attributes;
}
@@ -415,6 +419,7 @@ private:
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_warps{};
Tegra::Shader::Header header;
};

View File

@@ -7,6 +7,7 @@
#include <variant>
#include "common/common_types.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {