Compare commits

..

27 Commits

Author SHA1 Message Date
Liam
3009d0bd7d Address review comments 2022-03-17 14:48:18 -04:00
Liam
e228a40db8 shader_recompiler: Use functions for indirect const buffer accesses 2022-03-17 13:30:21 -04:00
Liam
3ac522ba41 Address review comments 2022-03-17 09:30:41 -04:00
Liam
1415542f73 shader_recompiler: Implement LDC.IS address mode 2022-03-16 11:05:04 -04:00
Liam
52895fab67 shader: add support for const buffer indirect addressing 2022-03-14 19:43:32 -04:00
Fernando S
cd07a43724 Merge pull request #8008 from ameerj/rescale-offsets-array
rescaling_pass: Fix rescaling Color2DArray ImageFetch offsets
2022-03-15 00:08:22 +01:00
Fernando S
f9e1f559b1 Merge pull request #8000 from liamwhite/hagi
Initial support for Wii Hagi emulator
2022-03-15 00:08:05 +01:00
bunnei
cc285b9924 Merge pull request #8015 from FernandoS27/fix-global-mem
Shader decompiler: Fix storage tracking in deko3d.
2022-03-14 16:03:23 -07:00
byte[]
be0e6a2bb4 Maxwell3D: Link to override constant definition in nouveau 2022-03-14 11:06:25 -04:00
Fernando S
0331b8d799 Merge pull request #8016 from merryhime/kill-mem-use
dynarmic: Reduce size of code caches
2022-03-14 16:04:46 +01:00
byte[]
364c67e49b Maxwell3D: restore original topology when topology overrides are disabled 2022-03-14 11:00:08 -04:00
Liam
37aa472269 Maxwell3D: Use override constants from nouveau
This fixes some incorrect rendering in Sunshine
2022-03-14 10:11:58 -04:00
Merry
220674d0d6 dynarmic: Reduce size of code caches 2022-03-13 22:17:14 +00:00
Fernando Sahmkow
185fc03c3c Shader decompiler: do constant propgation before texture pass. 2022-03-13 21:49:40 +01:00
Fernando Sahmkow
ec9f0f064e Shader decompiler: Fix storage tracking in deko3d. 2022-03-13 17:41:16 +01:00
bunnei
8decc8d1a5 Merge pull request #8007 from ameerj/vs-2022-errors
emit_spirv, vk_compute_pass: Resolve VS2022 compiler errors
2022-03-13 03:43:06 -07:00
merry
1f6bbb6257 Merge pull request #8009 from ameerj/dynarmic-exclusives-config
config: Write dynarmic exclusive memory configs
2022-03-13 07:42:38 +00:00
ameerj
6b164a80a1 config: Write dynarmic exclusive memory configs
Ensures the configs are written and saved between boots
2022-03-12 03:42:50 -05:00
ameerj
f87f8d4610 rescaling_pass: Fix rescaling Color2DArray ImageFetch offsets
ImageFetch offsets for 2D array coordinates have a different composite size than the coordinates. The rescaling pass was not taking this into account.

Fixes broken shaders when scaling is enabled in Astral Chain, and likely other titles.
2022-03-12 03:31:56 -05:00
ameerj
e8c50e709e emit_spirv, vk_compute_pass: Resolve VS2022 compiler errors 2022-03-12 02:54:33 -05:00
bunnei
27cc7b6a73 Merge pull request #7997 from Wunkolo/cpu_detect_more
cpu_detect: Add additional x86 flags and telemetry
2022-03-11 17:26:41 -08:00
Liam
56c646d82c Maxwell3D: Restrict topology override effect to after the register is set 2022-03-11 19:42:12 -05:00
bunnei
5c74dd6462 Merge pull request #8003 from yuzu-emu/revert-7982-fix_cmake_missing_qt5_dbus
Revert "build(cmake): fix missing Qt5::DBus target on linux"
2022-03-11 15:22:30 -08:00
bunnei
15fdc2cd09 Revert "build(cmake): fix missing Qt5::DBus target on linux" 2022-03-11 15:22:24 -08:00
Liam
70e632f153 Maxwell3D: mark index buffers as dirty after updating counts 2022-03-11 08:51:22 -05:00
Liam
82c3042c0f TextureCacheRuntime: allow converting D24S8 to ABGR8
I can't see how this would be useful, but Galaxy uses it.
2022-03-10 20:25:34 -05:00
Liam
f1521183f8 Maxwell3D: read small-index draw and primitive topology override registers
This allows Galaxy and Sunshine to render for the first time.
2022-03-10 19:21:04 -05:00
18 changed files with 281 additions and 80 deletions

View File

@@ -363,7 +363,7 @@ if(ENABLE_QT)
set(YUZU_QT_NO_CMAKE_SYSTEM_PATH "NO_CMAKE_SYSTEM_PATH")
endif()
find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets DBus ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
find_package(Qt5 ${QT_VERSION} REQUIRED COMPONENTS Widgets ${QT_PREFIX_HINT} ${YUZU_QT_NO_CMAKE_SYSTEM_PATH})
if (YUZU_USE_QT_WEB_ENGINE)
find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets)
endif()

View File

@@ -148,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
config.code_cache_size = 128_MiB;
config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {

View File

@@ -208,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.wall_clock_cntpct = uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
config.far_code_offset = 400_MiB;
config.code_cache_size = 128_MiB;
config.far_code_offset = 100_MiB;
// Safe optimizations
if (Settings::values.cpu_debug_mode) {

View File

@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
struct RescalingLayout {
alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
alignas(16) u32 down_factor;
u32 down_factor;
};
constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);

View File

@@ -123,34 +123,36 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
}
Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
const IR::Value& binding, const IR::Value& offset) {
const IR::Value& binding, const IR::Value& offset, const Id indirect_func) {
Id buffer_offset;
const Id uniform_type{ctx.uniform_types.*member_ptr};
if (offset.IsImmediate()) {
// Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
const Id imm_offset{ctx.Const(offset.U32() / element_size)};
buffer_offset = imm_offset;
} else if (element_size > 1) {
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
const Id shift{ctx.Const(log2_element_size)};
buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
} else {
buffer_offset = ctx.Def(offset);
}
if (!binding.IsImmediate()) {
throw NotImplementedException("Constant buffer indexing");
return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset);
}
const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
const Id uniform_type{ctx.uniform_types.*member_ptr};
if (!offset.IsImmediate()) {
Id index{ctx.Def(offset)};
if (element_size > 1) {
const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
const Id shift{ctx.Const(log2_element_size)};
index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
}
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
return ctx.OpLoad(result_type, access_chain);
}
// Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
const Id imm_offset{ctx.Const(offset.U32() / element_size)};
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)};
return ctx.OpLoad(result_type, access_chain);
}
Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset);
return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset,
ctx.load_const_func_u32);
}
Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset);
return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset,
ctx.load_const_func_u32x4);
}
Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
@@ -201,7 +203,8 @@ void EmitGetIndirectBranchVariable(EmitContext&) {
Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)};
const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset,
ctx.load_const_func_u8)};
return ctx.OpUConvert(ctx.U32[1], load);
}
Id element{};
@@ -217,7 +220,8 @@ Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of
Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)};
const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset,
ctx.load_const_func_u8)};
return ctx.OpSConvert(ctx.U32[1], load);
}
Id element{};
@@ -233,8 +237,8 @@ Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of
Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
const Id load{
GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)};
const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset,
ctx.load_const_func_u16)};
return ctx.OpUConvert(ctx.U32[1], load);
}
Id element{};
@@ -250,8 +254,8 @@ Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
const Id load{
GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)};
const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset,
ctx.load_const_func_u16)};
return ctx.OpSConvert(ctx.U32[1], load);
}
Id element{};
@@ -276,7 +280,8 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing) {
return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset,
ctx.load_const_func_f32);
} else {
const Id vector{GetCbufU32x4(ctx, binding, offset)};
return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
@@ -285,8 +290,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
if (ctx.profile.support_descriptor_aliasing) {
return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding,
offset);
return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset,
ctx.load_const_func_u32x2);
} else {
const Id vector{GetCbufU32x4(ctx, binding, offset)};
return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),

View File

@@ -464,6 +464,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
DefineSharedMemory(program);
DefineSharedMemoryFunctions(program);
DefineConstantBuffers(program.info, uniform_binding);
DefineConstantBufferIndirectFunctions(program.info);
DefineStorageBuffers(program.info, storage_binding);
DefineTextureBuffers(program.info, texture_binding);
DefineImageBuffers(program.info, image_binding);
@@ -993,7 +994,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
}
return;
}
IR::Type types{info.used_constant_buffer_types};
IR::Type types{info.used_constant_buffer_types | info.used_indirect_cbuf_types};
if (True(types & IR::Type::U8)) {
if (profile.support_int8) {
DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
@@ -1027,6 +1028,62 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
binding += static_cast<u32>(info.constant_buffer_descriptors.size());
}
void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
if (!info.uses_cbuf_indirect) {
return;
}
const auto make_accessor{[&](Id buffer_type, Id UniformDefinitions::*member_ptr) {
const Id func_type{TypeFunction(buffer_type, U32[1], U32[1])};
const Id func{OpFunction(buffer_type, spv::FunctionControlMask::MaskNone, func_type)};
const Id binding{OpFunctionParameter(U32[1])};
const Id offset{OpFunctionParameter(U32[1])};
AddLabel();
const Id merge_label{OpLabel()};
const Id uniform_type{uniform_types.*member_ptr};
std::array<Id, Info::MAX_CBUFS> buf_labels;
std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals;
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
buf_labels[i] = OpLabel();
buf_literals[i] = Sirit::Literal{i};
}
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
AddLabel(buf_labels[i]);
const Id cbuf{cbufs[i].*member_ptr};
const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
const Id result{OpLoad(buffer_type, access_chain)};
OpReturnValue(result);
}
AddLabel(merge_label);
OpUnreachable();
OpFunctionEnd();
return func;
}};
IR::Type types{info.used_indirect_cbuf_types};
if (True(types & IR::Type::U8)) {
load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
}
if (True(types & IR::Type::U16)) {
load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
}
if (True(types & IR::Type::F32)) {
load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
}
if (True(types & IR::Type::U32)) {
load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
}
if (True(types & IR::Type::U32x2)) {
load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
}
if (True(types & IR::Type::U32x4)) {
load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
}
}
void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
if (info.storage_buffers_descriptors.empty()) {
return;

View File

@@ -294,6 +294,13 @@ public:
std::vector<Id> interfaces;
Id load_const_func_u8{};
Id load_const_func_u16{};
Id load_const_func_u32{};
Id load_const_func_f32{};
Id load_const_func_u32x2{};
Id load_const_func_u32x4{};
private:
void DefineCommonTypes(const Info& info);
void DefineCommonConstants();
@@ -302,6 +309,7 @@ private:
void DefineSharedMemory(const IR::Program& program);
void DefineSharedMemoryFunctions(const IR::Program& program);
void DefineConstantBuffers(const Info& info, u32& binding);
void DefineConstantBufferIndirectFunctions(const Info& info);
void DefineStorageBuffers(const Info& info, u32& binding);
void DefineTextureBuffers(const Info& info, u32& binding);
void DefineImageBuffers(const Info& info, u32& binding);

View File

@@ -11,10 +11,20 @@ namespace Shader::Maxwell {
using namespace LDC;
namespace {
std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
const IR::U32& reg, const IR::U32& imm) {
const IR::U32& reg, const IR::U32& imm_offset) {
switch (mode) {
case Mode::Default:
return {imm_index, ir.IAdd(reg, imm)};
return {imm_index, ir.IAdd(reg, imm_offset)};
case Mode::IS: {
// Segmented addressing mode
// Ra+imm_offset points into a flat mapping of const buffer
// address space
const IR::U32 address{ir.IAdd(reg, imm_offset)};
const IR::U32 index{ir.BitFieldExtract(address, ir.Imm32(16), ir.Imm32(16))};
const IR::U32 offset{ir.BitFieldExtract(address, ir.Imm32(0), ir.Imm32(16))};
return {ir.IAdd(index, imm_index), offset};
}
default:
break;
}

View File

@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
Optimization::ConstantPropagationPass(program);
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::TexturePass(env, program);
Optimization::ConstantPropagationPass(program);
if (Settings::values.resolution_info.active) {
Optimization::RescalingPass(program);
}

View File

@@ -29,6 +29,46 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
});
}
void AddRegisterIndexedLdc(Info& info) {
info.uses_cbuf_indirect = true;
// The shader can use any possible constant buffer
info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
auto& cbufs{info.constant_buffer_descriptors};
cbufs.clear();
for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
// The shader can use any possible access size
info.constant_buffer_used_sizes[i] = 0x10'000;
}
}
u32 GetElementSize(IR::Type& used_type, Shader::IR::Opcode opcode) {
switch (opcode) {
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
used_type |= IR::Type::U8;
return 1;
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
used_type |= IR::Type::U16;
return 2;
case IR::Opcode::GetCbufU32:
used_type |= IR::Type::U32;
return 4;
case IR::Opcode::GetCbufF32:
used_type |= IR::Type::F32;
return 4;
case IR::Opcode::GetCbufU32x2:
used_type |= IR::Type::U32x2;
return 8;
default:
throw InvalidArgument("Invalid opcode {}", opcode);
}
}
void GetPatch(Info& info, IR::Patch patch) {
if (!IR::IsGeneric(patch)) {
throw NotImplementedException("Reading non-generic patch {}", patch);
@@ -463,42 +503,18 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GetCbufU32x2: {
const IR::Value index{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
if (!index.IsImmediate()) {
throw NotImplementedException("Constant buffer with non-immediate index");
}
AddConstantBufferDescriptor(info, index.U32(), 1);
u32 element_size{};
switch (inst.GetOpcode()) {
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
info.used_constant_buffer_types |= IR::Type::U8;
element_size = 1;
break;
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
info.used_constant_buffer_types |= IR::Type::U16;
element_size = 2;
break;
case IR::Opcode::GetCbufU32:
info.used_constant_buffer_types |= IR::Type::U32;
element_size = 4;
break;
case IR::Opcode::GetCbufF32:
info.used_constant_buffer_types |= IR::Type::F32;
element_size = 4;
break;
case IR::Opcode::GetCbufU32x2:
info.used_constant_buffer_types |= IR::Type::U32x2;
element_size = 8;
break;
default:
break;
}
u32& size{info.constant_buffer_used_sizes[index.U32()]};
if (offset.IsImmediate()) {
size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
if (index.IsImmediate()) {
AddConstantBufferDescriptor(info, index.U32(), 1);
u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode());
u32& size{info.constant_buffer_used_sizes[index.U32()]};
if (offset.IsImmediate()) {
size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
} else {
size = 0x10'000;
}
} else {
size = 0x10'000;
AddRegisterIndexedLdc(info);
GetElementSize(info.used_indirect_cbuf_types, inst.GetOpcode());
}
break;
}

View File

@@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};

View File

@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
}
}
void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
size_t index) {
const IR::Value composite{inst.Arg(index)};
if (composite.IsEmpty()) {
return;
}
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
switch (info.type) {
case TextureType::ColorArray2D:
case TextureType::Color2D:
inst.SetArg(index, ir.CompositeConstruct(x, y));
break;
case TextureType::Color1D:
case TextureType::ColorArray1D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
// Nothing to patch here
break;
}
}
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
ScaleIntegerComposite(ir, inst, is_scaled, 2);
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
ScaleIntegerComposite(ir, inst, is_scaled, 2);
ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {

View File

@@ -173,9 +173,11 @@ struct Info {
bool uses_atomic_image_u32{};
bool uses_shadow_lod{};
bool uses_rescaling_uniform{};
bool uses_cbuf_indirect{};
IR::Type used_constant_buffer_types{};
IR::Type used_storage_buffer_types{};
IR::Type used_indirect_cbuf_types{};
u32 constant_buffer_mask{};
std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};

View File

@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
case MAXWELL3D_REG_INDEX(small_index):
regs.index_array.count = regs.small_index.count;
regs.index_array.first = regs.small_index.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
return DrawArrays();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -360,6 +369,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
void Maxwell3D::ProcessTopologyOverride() {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
PrimitiveTopology topology{};
switch (regs.topology_override) {
case PrimitiveTopologyOverride::None:
topology = regs.draw.topology;
break;
case PrimitiveTopologyOverride::Points:
topology = PrimitiveTopology::Points;
break;
case PrimitiveTopologyOverride::Lines:
topology = PrimitiveTopology::Lines;
break;
case PrimitiveTopologyOverride::LineStrip:
topology = PrimitiveTopology::LineStrip;
break;
default:
topology = static_cast<PrimitiveTopology>(regs.topology_override);
break;
}
if (use_topology_override) {
regs.draw.topology.Assign(topology);
}
}
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +408,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
ProcessTopologyOverride();
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -529,6 +569,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
ProcessTopologyOverride();
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;

View File

@@ -367,6 +367,22 @@ public:
Patches = 0xe,
};
// Constants as from NVC0_3D_UNK1970_D3D
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
enum class PrimitiveTopologyOverride : u32 {
None = 0x0,
Points = 0x1,
Lines = 0x2,
LineStrip = 0x3,
Triangles = 0x4,
TriangleStrip = 0x5,
LinesAdjacency = 0xa,
LineStripAdjacency = 0xb,
TrianglesAdjacency = 0xc,
TriangleStripAdjacency = 0xd,
Patches = 0xe,
};
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
}
} index_array;
INSERT_PADDING_WORDS_NOINIT(0x7);
union {
BitField<0, 16, u32> first;
BitField<16, 16, u32> count;
} small_index;
INSERT_PADDING_WORDS_NOINIT(0x6);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1265,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
INSERT_PADDING_WORDS_NOINIT(0x1F);
INSERT_PADDING_WORDS_NOINIT(0xC);
PrimitiveTopologyOverride topology_override;
INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1531,6 +1556,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1569,6 +1597,7 @@ private:
Upload::State upload_state;
bool execute_on{true};
bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1714,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1724,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include <memory>
#include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
const std::array push_constants{base_vertex, index_shift};
const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);

View File

@@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
}
break;
case PixelFormat::A8B8G8R8_UNORM:
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM ||
src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
break;

View File

@@ -1155,6 +1155,8 @@ void Config::SaveCpuValues() {
WriteBasicSetting(Settings::values.cpuopt_misc_ir);
WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
WriteBasicSetting(Settings::values.cpuopt_fastmem);
WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();