shader_recompiler: Implement LowerInt16ToInt32
AMD drivers 22.3.2 and later expose a bug in yuzu, where the application would submit 16-bit integer instructions to GPUs that don't support 16-bit integers, namely GCN 4 devices. Replace any 16-bit instructions with 32-bit ones so newer AMD drivers will work with VK_KHR_workgroup_memory_explicit_layout.
This commit is contained in:
@@ -219,6 +219,7 @@ add_library(shader_recompiler STATIC
|
||||
ir_opt/global_memory_to_storage_buffer_pass.cpp
|
||||
ir_opt/identity_removal_pass.cpp
|
||||
ir_opt/lower_fp16_to_fp32.cpp
|
||||
ir_opt/lower_int16_to_int32.cpp
|
||||
ir_opt/lower_int64_to_int32.cpp
|
||||
ir_opt/passes.h
|
||||
ir_opt/rescaling_pass.cpp
|
||||
|
||||
@@ -209,6 +209,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||
if (!host_info.support_int64) {
|
||||
Optimization::LowerInt64ToInt32(program);
|
||||
}
|
||||
if (!host_info.support_int16) {
|
||||
Optimization::LowerInt16ToInt32(program);
|
||||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
||||
Optimization::ConstantPropagationPass(program);
|
||||
|
||||
@@ -11,6 +11,7 @@ namespace Shader {
|
||||
/// Misc information about the host
|
||||
struct HostTranslateInfo {
|
||||
bool support_float16{}; ///< True when the device supports 16-bit floats
|
||||
bool support_int16{}; ///< True when the device supports 16-bit integers
|
||||
bool support_int64{}; ///< True when the device supports 64-bit integers
|
||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
};
|
||||
|
||||
72
src/shader_recompiler/ir_opt/lower_int16_to_int32.cpp
Normal file
72
src/shader_recompiler/ir_opt/lower_int16_to_int32.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
IR::Opcode Replace(IR::Opcode op) {
|
||||
switch (op) {
|
||||
case IR::Opcode::GetCbufU16:
|
||||
case IR::Opcode::GetCbufS16:
|
||||
return IR::Opcode::GetCbufU32;
|
||||
case IR::Opcode::UndefU16:
|
||||
return IR::Opcode::UndefU32;
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
return IR::Opcode::LoadGlobal32;
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
return IR::Opcode::WriteGlobal32;
|
||||
case IR::Opcode::LoadStorageU16:
|
||||
case IR::Opcode::LoadStorageS16:
|
||||
return IR::Opcode::LoadStorage32;
|
||||
case IR::Opcode::WriteStorageU16:
|
||||
case IR::Opcode::WriteStorageS16:
|
||||
return IR::Opcode::WriteStorage32;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
return IR::Opcode::LoadSharedU32;
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
return IR::Opcode::WriteSharedU32;
|
||||
case IR::Opcode::SelectU16:
|
||||
return IR::Opcode::SelectU32;
|
||||
case IR::Opcode::BitCastU16F16:
|
||||
return IR::Opcode::BitCastU32F32;
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
return IR::Opcode::BitCastF32U32;
|
||||
case IR::Opcode::ConvertS16F16:
|
||||
case IR::Opcode::ConvertS16F32:
|
||||
return IR::Opcode::ConvertS32F32;
|
||||
case IR::Opcode::ConvertS16F64:
|
||||
return IR::Opcode::ConvertS32F64;
|
||||
case IR::Opcode::ConvertU16F16:
|
||||
case IR::Opcode::ConvertU16F32:
|
||||
return IR::Opcode::ConvertU32F32;
|
||||
case IR::Opcode::ConvertU16F64:
|
||||
return IR::Opcode::ConvertU32F64;
|
||||
case IR::Opcode::ConvertF16S16:
|
||||
case IR::Opcode::ConvertF32S16:
|
||||
return IR::Opcode::ConvertF32S32;
|
||||
case IR::Opcode::ConvertF16U16:
|
||||
case IR::Opcode::ConvertF32U16:
|
||||
return IR::Opcode::ConvertF32U32;
|
||||
case IR::Opcode::ConvertF64S16:
|
||||
case IR::Opcode::ConvertF64U16:
|
||||
return IR::Opcode::ConvertF64U32;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerInt16ToInt32(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
@@ -14,6 +14,7 @@ void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt16ToInt32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
void RescalingPass(IR::Program& program);
|
||||
void SsaRewritePass(IR::Program& program);
|
||||
|
||||
@@ -322,6 +322,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
|
||||
};
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int16 = device.IsShaderInt16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
.needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
|
||||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
|
||||
|
||||
Reference in New Issue
Block a user