Compare commits

...

19 Commits

Author SHA1 Message Date
winappsx
a015901fbc Liste des jeux Vulkan testé ! Par la communauté !
En Français !
2019-12-10 17:37:03 +01:00
Fernando Sahmkow
6edadef96d Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler
vk_shader_decompiler: Add tessellation and misc changes
2019-12-10 08:01:41 -04:00
ReinUsesLisp
233ed96a5c vk_shader_decompiler: Fix build issues on old gcc versions 2019-12-10 01:55:38 -03:00
ReinUsesLisp
d30cf51d7d vk_shader_decompiler: Reduce YNegate's severity 2019-12-09 23:52:28 -03:00
ReinUsesLisp
0b5b93053d shader_ir/other: Implement S2R InvocationId 2019-12-09 23:52:28 -03:00
ReinUsesLisp
ecbfa416f0 vk_shader_decompiler: Misc changes
Update Sirit and its usage in vk_shader_decompiler. Highlights:
- Implement tessellation shaders
- Implement geometry shaders
- Implement some missing features
- Use native half float instructions when available.
2019-12-09 23:51:57 -03:00
ReinUsesLisp
9ad6327fbd shader: Keep track of shaders using warp instructions 2019-12-09 23:40:41 -03:00
ReinUsesLisp
6233b1db08 shader_ir/memory: Implement patch stores 2019-12-09 23:25:21 -03:00
Fernando Sahmkow
f2458106e6 Merge pull request #3205 from ReinUsesLisp/vk-device
vk_device: Misc changes
2019-12-09 20:02:58 -04:00
ReinUsesLisp
19ce0d4f1a vk_device: Misc changes
- Setup more features and requirements.
- Improve logging for missing features.
- Collect telemetry parameters.
- Add queries for more image formats.
- Query push constants limits.
- Optionally enable some extensions.
2019-12-09 01:04:48 -03:00
bunnei
faf5ae6a50 Merge pull request #3198 from ReinUsesLisp/tessellation-maxwell
maxwell_3d: Add tessellation state entries
2019-12-08 22:28:25 -05:00
bunnei
116a940dbb Merge pull request #3204 from ReinUsesLisp/vulkan-headers
externals: Update Vulkan-Headers
2019-12-08 22:28:02 -05:00
ReinUsesLisp
7ea362e134 externals: Update Vulkan-Headers 2019-12-08 22:08:19 -03:00
Rodrigo Locatti
e54699565a Merge pull request #3199 from ReinUsesLisp/vk-swapchain
vk_swapchain: Add support for swapping sRGB
2019-12-08 21:53:22 -03:00
Rodrigo Locatti
f73e569ba8 Merge pull request #3202 from lioncash/kernel-include
kernel: Remove unnecessary includes
2019-12-08 01:23:58 -03:00
ReinUsesLisp
f632d00eb1 vk_swapchain: Add support for swapping sRGB
We don't know until the game is running if it's using an sRGB color
space or not. Add support for hot-swapping swapchain surface formats.
2019-12-06 22:42:08 -03:00
ReinUsesLisp
36651f215a maxwell_3d: Add tessellation tess level registers 2019-12-06 22:08:22 -03:00
ReinUsesLisp
707bf41c6f maxwell_3d: Add tessellation mode register 2019-12-06 22:07:31 -03:00
ReinUsesLisp
d2b50c5ebd maxwell_3d: Add patch vertices register 2019-12-06 22:06:53 -03:00
19 changed files with 2109 additions and 896 deletions

13
FR-Vulkan-Supported-Games Normal file
View File

@@ -0,0 +1,13 @@
FR : Liste des jeux qui fonctionne avec "Vulkan"
// Uniquement les jeux testé //
1. Zelda link's awakening = Fonctionne tres bien ! (mieux comparé a OpenGL)
2. Super Mario Party = Fonctionne bien !
3. Super Mario Odyssey = Fonctionne bien (mieux comparé a OpenGL)
Jeux en cours de test actuellement !
-----------------------------------------------------------------
Contibué pour l'instant par : FlyZox,

View File

@@ -491,6 +491,23 @@ public:
INSERT_UNION_PADDING_WORDS(1);
};
enum class DepthMode : u32 {
MinusOneToOne = 0,
ZeroToOne = 1,
};
enum class TessellationPrimitive : u32 {
Isolines = 0,
Triangles = 1,
Quads = 2,
};
enum class TessellationSpacing : u32 {
Equal = 0,
FractionalOdd = 1,
FractionalEven = 2,
};
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -628,7 +645,19 @@ public:
};
} sync_info;
INSERT_UNION_PADDING_WORDS(0x11E);
INSERT_UNION_PADDING_WORDS(0x15);
union {
BitField<0, 2, TessellationPrimitive> prim;
BitField<4, 2, TessellationSpacing> spacing;
BitField<8, 1, u32> cw;
BitField<9, 1, u32> connected;
} tess_mode;
std::array<f32, 4> tess_level_outer;
std::array<f32, 2> tess_level_inner;
INSERT_UNION_PADDING_WORDS(0x102);
u32 tfb_enabled;
@@ -662,7 +691,9 @@ public:
u32 polygon_offset_line_enable;
u32 polygon_offset_fill_enable;
INSERT_UNION_PADDING_WORDS(0xD);
u32 patch_vertices;
INSERT_UNION_PADDING_WORDS(0xC);
std::array<ScissorTest, NumViewports> scissor_test;
@@ -1386,6 +1417,9 @@ ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1397,6 +1431,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);

View File

@@ -98,10 +98,11 @@ union Attribute {
BitField<20, 10, u64> immediate;
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
BitField<31, 1, u64> patch;
BitField<47, 3, AttributeSize> size;
bool IsPhysical() const {
return element == 0 && static_cast<u64>(index.Value()) == 0;
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;

View File

@@ -1915,6 +1915,10 @@ private:
return {};
}
Expression InvocationId(Operation operation) {
return {"gl_InvocationID", Type::Int};
}
Expression YNegate(Operation operation) {
return {"y_direction", Type::Float};
}
@@ -2153,6 +2157,7 @@ private:
&GLSLDecompiler::EmitVertex,
&GLSLDecompiler::EndPrimitive,
&GLSLDecompiler::InvocationId,
&GLSLDecompiler::YNegate,
&GLSLDecompiler::LocalInvocationId<0>,
&GLSLDecompiler::LocalInvocationId<1>,

View File

@@ -4,6 +4,17 @@
#pragma once
namespace vk {
class DispatchLoaderDynamic;
}
namespace Vulkan {
constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
}
#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#include <vulkan/vulkan.hpp>
namespace Vulkan {
@@ -41,5 +52,7 @@ using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;
} // namespace Vulkan

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <bitset>
#include <cstdlib>
#include <optional>
#include <set>
#include <string_view>
@@ -15,6 +16,15 @@ namespace Vulkan {
namespace {
namespace Alternatives {
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
vk::Format::eD16UnormS8Uint, vk::Format{}};
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint, vk::Format{}};
} // namespace Alternatives
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -22,7 +32,7 @@ void SetNext(void**& next, T& data) {
}
template <typename T>
T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
vk::PhysicalDeviceFeatures2 features;
T extension_features;
features.pNext = &extension_features;
@@ -30,17 +40,14 @@ T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
return extension_features;
}
} // Anonymous namespace
namespace Alternatives {
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
vk::Format::eD16UnormS8Uint, vk::Format{}};
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint, vk::Format{}};
constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
} // namespace Alternatives
template <typename T>
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
vk::PhysicalDeviceProperties2 properties;
T extension_properties;
properties.pNext = &extension_properties;
physical.getProperties2(&properties, dldi);
return extension_properties;
}
constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
switch (format) {
@@ -53,8 +60,7 @@ constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
}
}
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
FormatType format_type) {
vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) {
switch (format_type) {
case FormatType::Linear:
return properties.linearTilingFeatures;
@@ -67,11 +73,13 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properti
}
}
} // Anonymous namespace
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface)
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
: physical{physical}, properties{physical.getProperties(dldi)},
format_properties{GetFormatProperties(dldi, physical)} {
SetupFamilies(dldi, surface);
SetupProperties(dldi);
SetupFeatures(dldi);
}
@@ -89,12 +97,22 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.depthClamp = true;
features.samplerAnisotropy = true;
features.largePoints = true;
features.multiViewport = true;
features.depthBiasClamp = true;
features.geometryShader = true;
features.tessellationShader = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
vertex_divisor.vertexAttributeInstanceRateDivisor = true;
vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
SetNext(next, vertex_divisor);
vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage;
bit16_storage.uniformAndStorageBuffer16BitAccess = true;
SetNext(next, bit16_storage);
vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage;
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
SetNext(next, bit8_storage);
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
@@ -120,6 +138,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
nullptr, static_cast<u32>(extensions.size()), extensions.data(),
nullptr);
@@ -135,16 +157,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
if (khr_driver_properties) {
vk::PhysicalDeviceDriverPropertiesKHR driver;
vk::PhysicalDeviceProperties2 properties;
properties.pNext = &driver;
physical.getProperties2(&properties, dld);
driver_id = driver.driverID;
LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
} else {
LOG_INFO(Render_Vulkan, "Driver: Unknown");
}
CollectTelemetryParameters();
graphics_queue = logical->getQueue(graphics_family, 0, dld);
present_queue = logical->getQueue(present_family, 0, dld);
@@ -190,6 +203,18 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const {
// Disable for now to avoid converting ASTC twice.
return false;
static constexpr std::array astc_formats = {
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
if (!features.textureCompressionASTC_LDR) {
return false;
}
@@ -197,12 +222,6 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
constexpr std::array astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
for (const auto format : astc_formats) {
const auto format_properties{physical.getFormatProperties(format, dldi)};
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
@@ -225,11 +244,17 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
bool is_suitable = true;
constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
constexpr std::array required_extensions = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
};
std::bitset<required_extensions.size()> available_extensions{};
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
@@ -246,7 +271,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
if (available_extensions[i]) {
continue;
}
LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
is_suitable = false;
}
}
@@ -263,7 +288,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
}
if (!has_graphics || !has_present) {
LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
is_suitable = false;
}
@@ -273,8 +298,15 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
constexpr u32 required_ubo_size = 65536;
if (limits.maxUniformBufferRange < required_ubo_size) {
LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
limits.maxUniformBufferRange, required_ubo_size);
LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
limits.maxUniformBufferRange, required_ubo_size);
is_suitable = false;
}
constexpr u32 required_num_viewports = 16;
if (limits.maxViewports < required_num_viewports) {
LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
limits.maxViewports, required_num_viewports);
is_suitable = false;
}
@@ -285,24 +317,32 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
std::make_pair(features.depthClamp, "depthClamp"),
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
std::make_pair(features.largePoints, "largePoints"),
std::make_pair(features.multiViewport, "multiViewport"),
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
};
for (const auto& [supported, name] : feature_report) {
if (supported) {
continue;
}
LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
is_suitable = false;
}
if (!is_suitable) {
LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
}
return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
extensions.reserve(7);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@@ -317,13 +357,30 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
extensions.reserve(13);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
bool khr_shader_float16_int8{};
bool ext_subgroup_size_control{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
}
if (khr_shader_float16_int8) {
@@ -332,6 +389,23 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize &&
properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = properties.requiredSubgroupSizeStages;
}
} else {
is_warp_potentially_bigger = true;
}
return extensions;
}
@@ -358,19 +432,23 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
present_family = *present_family_;
}
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
const auto props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
void VKDevice::CollectTelemetryParameters() {
const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld);
driver_id = driver.driverID;
vendor_name = driver.driverName;
const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld);
reported_extensions.reserve(std::size(extensions));
for (const auto& extension : extensions) {
reported_extensions.push_back(extension.extensionName);
}
}
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
static const float QUEUE_PRIORITY = 1.0f;
@@ -385,50 +463,70 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8SnormPack32,
vk::Format::eA8B8G8R8SrgbPack32,
vk::Format::eB5G6R5UnormPack16,
vk::Format::eA2B10G10R10UnormPack32,
vk::Format::eR32G32B32A32Sfloat,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
vk::Format::eR16G16Sfloat,
vk::Format::eR16Unorm,
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8G8Unorm,
vk::Format::eR8G8Snorm,
vk::Format::eR8Unorm,
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
vk::Format::eD32Sfloat,
vk::Format::eD16Unorm,
vk::Format::eD16UnormS8Uint,
vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint,
vk::Format::eBc1RgbaUnormBlock,
vk::Format::eBc2UnormBlock,
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eBc1RgbaSrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4UnormBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock};
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8SnormPack32,
vk::Format::eA8B8G8R8SrgbPack32,
vk::Format::eB5G6R5UnormPack16,
vk::Format::eA2B10G10R10UnormPack32,
vk::Format::eA1R5G5B5UnormPack16,
vk::Format::eR32G32B32A32Sfloat,
vk::Format::eR32G32B32A32Uint,
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,
vk::Format::eR16G16Sfloat,
vk::Format::eR16Unorm,
vk::Format::eR8G8B8A8Srgb,
vk::Format::eR8G8Unorm,
vk::Format::eR8G8Snorm,
vk::Format::eR8Unorm,
vk::Format::eR8Uint,
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR32Uint,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
vk::Format::eR4G4B4A4UnormPack16,
vk::Format::eD32Sfloat,
vk::Format::eD16Unorm,
vk::Format::eD16UnormS8Uint,
vk::Format::eD24UnormS8Uint,
vk::Format::eD32SfloatS8Uint,
vk::Format::eBc1RgbaUnormBlock,
vk::Format::eBc2UnormBlock,
vk::Format::eBc3UnormBlock,
vk::Format::eBc4UnormBlock,
vk::Format::eBc5UnormBlock,
vk::Format::eBc5SnormBlock,
vk::Format::eBc7UnormBlock,
vk::Format::eBc6HUfloatBlock,
vk::Format::eBc6HSfloatBlock,
vk::Format::eBc1RgbaSrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x6UnormBlock,
vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock,
vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock,
vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x6UnormBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock,
vk::Format::eAstc6x5SrgbBlock,
vk::Format::eE5B9G9R9UfloatPack32};
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));

View File

@@ -4,6 +4,8 @@
#pragma once
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
@@ -14,6 +16,9 @@ namespace Vulkan {
/// Format usage descriptor.
enum class FormatType { Linear, Optimal, Buffer };
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
class VKDevice final {
public:
@@ -71,7 +76,22 @@ public:
/// Returns true if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
}
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
u32 GetApiVersion() const {
return properties.apiVersion;
}
/// Returns the current driver version provided in Vulkan-formatted version numbers.
u32 GetDriverVersion() const {
return properties.driverVersion;
}
/// Returns the device name.
std::string_view GetModelName() const {
return properties.deviceName;
}
/// Returns the driver ID.
@@ -80,18 +100,23 @@ public:
}
/// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
vk::DeviceSize GetUniformBufferAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
}
/// Returns storage alignment requeriment.
u64 GetStorageBufferAlignment() const {
return storage_buffer_alignment;
vk::DeviceSize GetStorageBufferAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
vk::DeviceSize GetMaxStorageBufferRange() const {
return properties.limits.maxStorageBufferRange;
}
/// Returns the maximum size for push constants.
vk::DeviceSize GetMaxPushConstantsSize() const {
return properties.limits.maxPushConstantsSize;
}
/// Returns true if ASTC is natively supported.
@@ -104,6 +129,16 @@ public:
return is_float16_supported;
}
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
bool IsWarpSizePotentiallyBiggerThanGuest() const {
return is_warp_potentially_bigger;
}
/// Returns true if the device can be forced to use the guest warp size.
bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const {
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -114,6 +149,26 @@ public:
return ext_index_type_uint8;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
}
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return ext_shader_viewport_index_layer;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
}
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
return reported_extensions;
}
/// Checks if the physical device is suitable.
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface);
@@ -125,12 +180,12 @@ private:
/// Sets up queue families.
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
/// Sets up device properties.
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
/// Sets up device features.
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Returns a list of queue initialization descriptors.
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
@@ -148,23 +203,28 @@ private:
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::PhysicalDeviceType device_type; ///< Physical device type.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
u64 max_storage_buffer_range{}; ///< Max storage buffer size.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
std::unordered_map<vk::Format, vk::FormatProperties>
format_properties; ///< Format properties dictionary.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
/// Format properties dictionary.
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -5,29 +5,28 @@
#pragma once
#include <array>
#include <bitset>
#include <memory>
#include <set>
#include <type_traits>
#include <utility>
#include <vector>
#include <sirit/sirit.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace Vulkan {
class VKDevice;
}
namespace Vulkan::VKShader {
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using TexelBufferEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -46,39 +45,74 @@ private:
class GlobalBufferEntry {
public:
explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}
u32 GetCbufIndex() const {
constexpr u32 GetCbufIndex() const {
return cbuf_index;
}
u32 GetCbufOffset() const {
constexpr u32 GetCbufOffset() const {
return cbuf_offset;
}
constexpr bool IsWritten() const {
return is_written;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
};
struct ShaderEntries {
u32 const_buffers_base_binding{};
u32 global_buffers_base_binding{};
u32 samplers_base_binding{};
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
texel_buffers.size() + samplers.size() + images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
std::vector<TexelBufferEntry> texel_buffers;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
Sirit::Id entry_function{};
std::vector<Sirit::Id> interfaces;
bool uses_warps{};
};
using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
struct Specialization final {
u32 base_binding{};
DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage);
// Compute specific
std::array<u32, 3> workgroup_size{};
u32 shared_memory_size{};
} // namespace Vulkan::VKShader
// Graphics specific
Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
// Tessellation specific
struct {
Maxwell::TessellationPrimitive primitive{};
Maxwell::TessellationSpacing spacing{};
bool clockwise{};
} tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
struct SPIRVShader {
std::vector<u32> code;
ShaderEntries entries;
};
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const Specialization& specialization);
} // namespace Vulkan

View File

@@ -19,12 +19,18 @@
namespace Vulkan {
namespace {
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats,
bool srgb) {
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
vk::SurfaceFormatKHR format;
format.format = vk::Format::eB8G8R8A8Unorm;
format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
return format;
}
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
return format.format == vk::Format::eB8G8R8A8Unorm &&
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm;
return format.format == request_format &&
format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
});
return found != formats.end() ? *found : formats[0];
@@ -51,28 +57,26 @@ vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u3
std::min(capabilities.maxImageExtent.height, extent.height));
return extent;
}
} // namespace
} // Anonymous namespace
VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
: surface{surface}, device{device} {}
VKSwapchain::~VKSwapchain() = default;
void VKSwapchain::Create(u32 width, u32 height) {
const auto dev = device.GetLogical();
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
const auto& dld = device.GetDispatchLoader();
const auto physical_device = device.GetPhysical();
const vk::SurfaceCapabilitiesKHR capabilities{
physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
return;
}
dev.waitIdle(dld);
device.GetLogical().waitIdle(dld);
Destroy();
CreateSwapchain(capabilities, width, height);
CreateSwapchain(capabilities, width, height, srgb);
CreateSemaphores();
CreateImageViews();
@@ -107,7 +111,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
break;
case vk::Result::eErrorOutOfDateKHR:
if (current_width > 0 && current_height > 0) {
Create(current_width, current_height);
Create(current_width, current_height, current_srgb);
recreated = true;
}
break;
@@ -129,23 +133,19 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
}
void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
u32 height) {
const auto dev{device.GetLogical()};
u32 height, bool srgb) {
const auto& dld{device.GetDispatchLoader()};
const auto physical_device{device.GetPhysical()};
const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)};
const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)};
const std::vector<vk::SurfaceFormatKHR> formats{
physical_device.getSurfaceFormatsKHR(surface, dld)};
const std::vector<vk::PresentModeKHR> present_modes{
physical_device.getSurfacePresentModesKHR(surface, dld)};
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
extent = ChooseSwapExtent(capabilities, width, height);
current_width = extent.width;
current_height = extent.height;
current_srgb = srgb;
u32 requested_image_count{capabilities.minImageCount + 1};
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -169,6 +169,7 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
}
const auto dev{device.GetLogical()};
swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
images = dev.getSwapchainImagesKHR(*swapchain, dld);

View File

@@ -24,7 +24,7 @@ public:
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height);
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
@@ -60,8 +60,13 @@ public:
return image_format;
}
bool GetSrgbState() const {
return current_srgb;
}
private:
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
void CreateSemaphores();
void CreateImageViews();
@@ -87,6 +92,7 @@ private:
u32 current_width{};
u32 current_height{};
bool current_srgb{};
};
} // namespace Vulkan

View File

@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
namespace {
u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
switch (uniform_type) {
case Tegra::Shader::UniformType::Single:
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
"Unaligned attribute loads are not supported");
u64 next_element = instr.attribute.fmt20.element;
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
u64 element = instr.attribute.fmt20.element;
auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
const auto StoreNextElement = [&](u32 reg_offset) {
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
next_element, GetRegister(instr.gpr39));
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
Node dest;
if (instr.attribute.fmt20.patch) {
const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
dest = MakeNode<PatchNode>(offset);
} else {
dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
GetRegister(instr.gpr39));
}
const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
bb.push_back(Operation(OperationCode::Assign, dest, src));
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
// attribute.
next_element = (next_element + 1) % 4;
next_index = next_index + (next_element == 0 ? 1 : 0);
};
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
StoreNextElement(reg_offset);
// Load the next attribute element into the following register. If the element to load
// goes beyond the vec4 size, load the first element of the next attribute.
element = (element + 1) % 4;
index = index + (element == 0 ? 1 : 0);
}
break;
}
case OpCode::Id::ST_L:

View File

@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case OpCode::Id::MOV_SYS: {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:

View File

@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
// Signal the backend that this shader uses warp instructions.
uses_warps = true;
switch (opcode->get().GetId()) {
case OpCode::Id::VOTE: {
const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);

View File

@@ -172,6 +172,7 @@ enum class OperationCode {
EmitVertex, /// () -> void
EndPrimitive, /// () -> void
InvocationId, /// () -> int
YNegate, /// () -> float
LocalInvocationIdX, /// () -> uint
LocalInvocationIdY, /// () -> uint
@@ -213,13 +214,14 @@ class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
class PatchNode;
class SmemNode;
class GmemNode;
class CommentNode;
using NodeData =
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -542,6 +544,19 @@ private:
u32 element{};
};
/// Patch memory (used to communicate tessellation stages).
class PatchNode final {
public:
explicit PatchNode(u32 offset) : offset{offset} {}
u32 GetOffset() const {
return offset;
}
private:
u32 offset{};
};
/// Constant buffer node, usually mapped to uniform buffers in GLSL
class CbufNode final {
public:

View File

@@ -137,6 +137,10 @@ public:
return uses_vertex_id;
}
bool UsesWarps() const {
return uses_warps;
}
bool HasPhysicalAttributes() const {
return uses_physical_attributes;
}
@@ -415,6 +419,7 @@ private:
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
bool uses_instance_id{};
bool uses_vertex_id{};
bool uses_warps{};
Tegra::Shader::Header header;
};

View File

@@ -7,6 +7,7 @@
#include <variant>
#include "common/common_types.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {