Merge branch 'master' of https://github.com/yuzu-emu/yuzu
This commit is contained in:
@@ -53,7 +53,7 @@ build_script:
|
||||
# https://www.appveyor.com/docs/build-phase
|
||||
msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
|
||||
} else {
|
||||
C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1'
|
||||
C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
|
||||
}
|
||||
|
||||
after_build:
|
||||
|
||||
@@ -354,10 +354,35 @@ public:
|
||||
f32 scale_x;
|
||||
f32 scale_y;
|
||||
f32 scale_z;
|
||||
u32 translate_x;
|
||||
u32 translate_y;
|
||||
u32 translate_z;
|
||||
f32 translate_x;
|
||||
f32 translate_y;
|
||||
f32 translate_z;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
|
||||
MathUtil::Rectangle<s32> GetRect() const {
|
||||
return {
|
||||
GetX(), // left
|
||||
GetY() + GetHeight(), // top
|
||||
GetX() + GetWidth(), // right
|
||||
GetY() // bottom
|
||||
};
|
||||
};
|
||||
|
||||
s32 GetX() const {
|
||||
return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x)));
|
||||
}
|
||||
|
||||
s32 GetY() const {
|
||||
return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y)));
|
||||
}
|
||||
|
||||
s32 GetWidth() const {
|
||||
return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX();
|
||||
}
|
||||
|
||||
s32 GetHeight() const {
|
||||
return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY();
|
||||
}
|
||||
} viewport_transform[NumViewports];
|
||||
|
||||
struct {
|
||||
@@ -371,15 +396,6 @@ public:
|
||||
};
|
||||
float depth_range_near;
|
||||
float depth_range_far;
|
||||
|
||||
MathUtil::Rectangle<s32> GetRect() const {
|
||||
return {
|
||||
static_cast<s32>(x), // left
|
||||
static_cast<s32>(y + height), // top
|
||||
static_cast<s32>(x + width), // right
|
||||
static_cast<s32>(y) // bottom
|
||||
};
|
||||
};
|
||||
} viewport[NumViewports];
|
||||
|
||||
INSERT_PADDING_WORDS(0x1D);
|
||||
|
||||
@@ -156,6 +156,13 @@ enum class PredOperation : u64 {
|
||||
Xor = 2,
|
||||
};
|
||||
|
||||
enum class LogicOperation : u64 {
|
||||
And = 0,
|
||||
Or = 1,
|
||||
Xor = 2,
|
||||
PassB = 3,
|
||||
};
|
||||
|
||||
enum class SubOp : u64 {
|
||||
Cos = 0x0,
|
||||
Sin = 0x1,
|
||||
@@ -166,6 +173,13 @@ enum class SubOp : u64 {
|
||||
Min = 0x8,
|
||||
};
|
||||
|
||||
enum class FloatRoundingOp : u64 {
|
||||
None = 0,
|
||||
Floor = 1,
|
||||
Ceil = 2,
|
||||
Trunc = 3,
|
||||
};
|
||||
|
||||
union Instruction {
|
||||
Instruction& operator=(const Instruction& instr) {
|
||||
value = instr.value;
|
||||
@@ -202,6 +216,12 @@ union Instruction {
|
||||
BitField<42, 1, u64> negate_pred;
|
||||
} fmnmx;
|
||||
|
||||
union {
|
||||
BitField<53, 2, LogicOperation> operation;
|
||||
BitField<55, 1, u64> invert_a;
|
||||
BitField<56, 1, u64> invert_b;
|
||||
} lop;
|
||||
|
||||
float GetImm20_19() const {
|
||||
float result{};
|
||||
u32 imm{static_cast<u32>(imm20_19)};
|
||||
@@ -217,8 +237,21 @@ union Instruction {
|
||||
std::memcpy(&result, &imm, sizeof(imm));
|
||||
return result;
|
||||
}
|
||||
|
||||
s32 GetSignedImm20_20() const {
|
||||
u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
|
||||
// Sign extend the 20-bit value.
|
||||
u32 mask = 1U << (20 - 1);
|
||||
return static_cast<s32>((immediate ^ mask) - mask);
|
||||
}
|
||||
} alu;
|
||||
|
||||
union {
|
||||
BitField<39, 5, u64> shift_amount;
|
||||
BitField<48, 1, u64> negate_b;
|
||||
BitField<49, 1, u64> negate_a;
|
||||
} iscadd;
|
||||
|
||||
union {
|
||||
BitField<48, 1, u64> negate_b;
|
||||
BitField<49, 1, u64> negate_c;
|
||||
@@ -238,6 +271,16 @@ union Instruction {
|
||||
BitField<56, 1, u64> neg_b;
|
||||
} fsetp;
|
||||
|
||||
union {
|
||||
BitField<0, 3, u64> pred0;
|
||||
BitField<3, 3, u64> pred3;
|
||||
BitField<39, 3, u64> pred39;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
BitField<45, 2, PredOperation> op;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
BitField<49, 3, PredCondition> cond;
|
||||
} isetp;
|
||||
|
||||
union {
|
||||
BitField<39, 3, u64> pred39;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
@@ -245,20 +288,29 @@ union Instruction {
|
||||
BitField<44, 1, u64> abs_b;
|
||||
BitField<45, 2, PredOperation> op;
|
||||
BitField<48, 4, PredCondition> cond;
|
||||
BitField<52, 1, u64> bf;
|
||||
BitField<53, 1, u64> neg_b;
|
||||
BitField<54, 1, u64> abs_a;
|
||||
BitField<52, 1, u64> bf;
|
||||
BitField<55, 1, u64> ftz;
|
||||
BitField<56, 1, u64> neg_imm;
|
||||
} fset;
|
||||
|
||||
union {
|
||||
BitField<10, 2, Register::Size> size;
|
||||
BitField<13, 1, u64> is_signed;
|
||||
BitField<12, 1, u64> is_output_signed;
|
||||
BitField<13, 1, u64> is_input_signed;
|
||||
BitField<41, 2, u64> selector;
|
||||
BitField<45, 1, u64> negate_a;
|
||||
BitField<49, 1, u64> abs_a;
|
||||
BitField<50, 1, u64> saturate_a;
|
||||
|
||||
union {
|
||||
BitField<39, 2, FloatRoundingOp> rounding;
|
||||
} f2i;
|
||||
|
||||
union {
|
||||
BitField<39, 4, u64> rounding;
|
||||
} f2f;
|
||||
} conversion;
|
||||
|
||||
union {
|
||||
@@ -288,6 +340,20 @@ union Instruction {
|
||||
}
|
||||
} texs;
|
||||
|
||||
union {
|
||||
BitField<20, 24, u64> target;
|
||||
BitField<5, 1, u64> constant_buffer;
|
||||
|
||||
s32 GetBranchTarget() const {
|
||||
// Sign extend the branch target offset
|
||||
u32 mask = 1U << (24 - 1);
|
||||
u32 value = static_cast<u32>(target);
|
||||
// The branch offset is relative to the next instruction and is stored in bytes, so
|
||||
// divide it by the size of an instruction and add 1 to it.
|
||||
return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
|
||||
}
|
||||
} bra;
|
||||
|
||||
BitField<61, 1, u64> is_b_imm;
|
||||
BitField<60, 1, u64> is_b_gpr;
|
||||
BitField<59, 1, u64> is_c_gpr;
|
||||
@@ -306,6 +372,7 @@ class OpCode {
|
||||
public:
|
||||
enum class Id {
|
||||
KIL,
|
||||
BRA,
|
||||
LD_A,
|
||||
ST_A,
|
||||
TEX,
|
||||
@@ -325,6 +392,9 @@ public:
|
||||
FMUL_R,
|
||||
FMUL_IMM,
|
||||
FMUL32_IMM,
|
||||
ISCADD_C, // Scale and Add
|
||||
ISCADD_R,
|
||||
ISCADD_IMM,
|
||||
MUFU, // Multi-Function Operator
|
||||
RRO_C, // Range Reduction Operator
|
||||
RRO_R,
|
||||
@@ -346,6 +416,9 @@ public:
|
||||
MOV_R,
|
||||
MOV_IMM,
|
||||
MOV32_IMM,
|
||||
SHL_C,
|
||||
SHL_R,
|
||||
SHL_IMM,
|
||||
SHR_C,
|
||||
SHR_R,
|
||||
SHR_IMM,
|
||||
@@ -367,6 +440,9 @@ public:
|
||||
enum class Type {
|
||||
Trivial,
|
||||
Arithmetic,
|
||||
Logic,
|
||||
Shift,
|
||||
ScaledAdd,
|
||||
Ffma,
|
||||
Flow,
|
||||
Memory,
|
||||
@@ -470,6 +546,7 @@ private:
|
||||
std::vector<Matcher> table = {
|
||||
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
|
||||
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
|
||||
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
|
||||
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
|
||||
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
||||
INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
|
||||
@@ -489,6 +566,9 @@ private:
|
||||
INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
|
||||
INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
|
||||
INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
|
||||
INST("0100110000011---", Id::ISCADD_C, Type::ScaledAdd, "ISCADD_C"),
|
||||
INST("0101110000011---", Id::ISCADD_R, Type::ScaledAdd, "ISCADD_R"),
|
||||
INST("0011100-00011---", Id::ISCADD_IMM, Type::ScaledAdd, "ISCADD_IMM"),
|
||||
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
|
||||
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
|
||||
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
|
||||
@@ -496,20 +576,23 @@ private:
|
||||
INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
|
||||
INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
|
||||
INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
|
||||
INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
|
||||
INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
|
||||
INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
|
||||
INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"),
|
||||
INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
|
||||
INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
|
||||
INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
|
||||
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
|
||||
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
|
||||
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
|
||||
INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
|
||||
INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"),
|
||||
INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"),
|
||||
INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"),
|
||||
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
|
||||
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
|
||||
INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
|
||||
INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
|
||||
INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
|
||||
INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
|
||||
INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
|
||||
INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
|
||||
INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
|
||||
INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
|
||||
INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
|
||||
INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
|
||||
INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
|
||||
|
||||
@@ -26,6 +26,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
|
||||
ASSERT(format != RenderTargetFormat::NONE);
|
||||
|
||||
switch (format) {
|
||||
case RenderTargetFormat::RGBA32_FLOAT:
|
||||
return 16;
|
||||
case RenderTargetFormat::RGBA16_FLOAT:
|
||||
return 8;
|
||||
case RenderTargetFormat::RGBA8_UNORM:
|
||||
case RenderTargetFormat::RGB10_A2_UNORM:
|
||||
return 4;
|
||||
|
||||
@@ -15,10 +15,12 @@ namespace Tegra {
|
||||
|
||||
enum class RenderTargetFormat : u32 {
|
||||
NONE = 0x0,
|
||||
RGBA32_FLOAT = 0xC0,
|
||||
RGBA16_FLOAT = 0xCA,
|
||||
RGB10_A2_UNORM = 0xD1,
|
||||
RGBA8_UNORM = 0xD5,
|
||||
RGBA8_SRGB = 0xD6,
|
||||
R11G11B10_FLOAT = 0xE0,
|
||||
};
|
||||
|
||||
/// Returns the number of bytes per pixel of each rendertarget format.
|
||||
|
||||
@@ -298,7 +298,7 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
const bool has_stencil = false;
|
||||
const bool using_color_fb = true;
|
||||
const bool using_depth_fb = false;
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
|
||||
|
||||
const bool write_color_fb =
|
||||
state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
|
||||
@@ -702,7 +702,7 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
|
||||
|
||||
void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) {
|
||||
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()};
|
||||
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
|
||||
|
||||
state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale;
|
||||
state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
|
||||
|
||||
@@ -50,6 +50,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
|
||||
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
|
||||
@@ -60,8 +61,10 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
|
||||
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
|
||||
if (type == SurfaceType::ColorTexture) {
|
||||
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
|
||||
// For now only UNORM components are supported, or RGBA16F which is type FLOAT
|
||||
ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
|
||||
// For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
|
||||
// type FLOAT
|
||||
ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
|
||||
pixel_format == PixelFormat::R11FG11FB10F);
|
||||
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
|
||||
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
|
||||
// TODO(Subv): Implement depth formats
|
||||
@@ -110,11 +113,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
|
||||
Tegra::GPUVAddr),
|
||||
SurfaceParams::MaxPixelFormat>
|
||||
morton_to_gl_fns = {
|
||||
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
|
||||
MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
|
||||
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
|
||||
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
|
||||
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
|
||||
MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
|
||||
MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
|
||||
MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
|
||||
MortonCopy<true, PixelFormat::DXN1>,
|
||||
};
|
||||
|
||||
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
|
||||
@@ -127,6 +131,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
|
||||
MortonCopy<false, PixelFormat::A1B5G5R5>,
|
||||
MortonCopy<false, PixelFormat::R8>,
|
||||
MortonCopy<false, PixelFormat::RGBA16F>,
|
||||
MortonCopy<false, PixelFormat::R11FG11FB10F>,
|
||||
// TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
||||
@@ -60,10 +60,11 @@ struct SurfaceParams {
|
||||
A1B5G5R5 = 3,
|
||||
R8 = 4,
|
||||
RGBA16F = 5,
|
||||
DXT1 = 6,
|
||||
DXT23 = 7,
|
||||
DXT45 = 8,
|
||||
DXN1 = 9, // This is also known as BC4
|
||||
R11FG11FB10F = 6,
|
||||
DXT1 = 7,
|
||||
DXT23 = 8,
|
||||
DXT45 = 9,
|
||||
DXN1 = 10, // This is also known as BC4
|
||||
|
||||
Max,
|
||||
Invalid = 255,
|
||||
@@ -104,7 +105,8 @@ struct SurfaceParams {
|
||||
1, // A2B10G10R10
|
||||
1, // A1B5G5R5
|
||||
1, // R8
|
||||
2, // RGBA16F
|
||||
1, // RGBA16F
|
||||
1, // R11FG11FB10F
|
||||
4, // DXT1
|
||||
4, // DXT23
|
||||
4, // DXT45
|
||||
@@ -129,6 +131,7 @@ struct SurfaceParams {
|
||||
16, // A1B5G5R5
|
||||
8, // R8
|
||||
64, // RGBA16F
|
||||
32, // R11FG11FB10F
|
||||
64, // DXT1
|
||||
128, // DXT23
|
||||
128, // DXT45
|
||||
@@ -151,6 +154,8 @@ struct SurfaceParams {
|
||||
return PixelFormat::A2B10G10R10;
|
||||
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
|
||||
return PixelFormat::RGBA16F;
|
||||
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
|
||||
return PixelFormat::R11FG11FB10F;
|
||||
default:
|
||||
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
|
||||
UNREACHABLE();
|
||||
@@ -182,6 +187,8 @@ struct SurfaceParams {
|
||||
return PixelFormat::R8;
|
||||
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
|
||||
return PixelFormat::RGBA16F;
|
||||
case Tegra::Texture::TextureFormat::BF10GF11RF11:
|
||||
return PixelFormat::R11FG11FB10F;
|
||||
case Tegra::Texture::TextureFormat::DXT1:
|
||||
return PixelFormat::DXT1;
|
||||
case Tegra::Texture::TextureFormat::DXT23:
|
||||
@@ -211,6 +218,8 @@ struct SurfaceParams {
|
||||
return Tegra::Texture::TextureFormat::R8;
|
||||
case PixelFormat::RGBA16F:
|
||||
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
|
||||
case PixelFormat::R11FG11FB10F:
|
||||
return Tegra::Texture::TextureFormat::BF10GF11RF11;
|
||||
case PixelFormat::DXT1:
|
||||
return Tegra::Texture::TextureFormat::DXT1;
|
||||
case PixelFormat::DXT23:
|
||||
@@ -243,6 +252,7 @@ struct SurfaceParams {
|
||||
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
|
||||
return ComponentType::UNorm;
|
||||
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
|
||||
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
|
||||
return ComponentType::Float;
|
||||
default:
|
||||
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
|
||||
|
||||
@@ -88,6 +88,20 @@ private:
|
||||
return *subroutines.insert(std::move(subroutine)).first;
|
||||
}
|
||||
|
||||
/// Merges exit method of two parallel branches.
|
||||
static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
|
||||
if (a == ExitMethod::Undetermined) {
|
||||
return b;
|
||||
}
|
||||
if (b == ExitMethod::Undetermined) {
|
||||
return a;
|
||||
}
|
||||
if (a == b) {
|
||||
return a;
|
||||
}
|
||||
return ExitMethod::Conditional;
|
||||
}
|
||||
|
||||
/// Scans a range of code for labels and determines the exit method.
|
||||
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
||||
auto [iter, inserted] =
|
||||
@@ -97,10 +111,27 @@ private:
|
||||
return exit_method;
|
||||
|
||||
for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
|
||||
if (const auto opcode = OpCode::Decode({program_code[offset]})) {
|
||||
const Instruction instr = {program_code[offset]};
|
||||
if (const auto opcode = OpCode::Decode(instr)) {
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
// The EXIT instruction can be predicated, which means that the shader can
|
||||
// conditionally end on this instruction. We have to consider the case where the
|
||||
// condition is not met and check the exit method of that other basic block.
|
||||
using Tegra::Shader::Pred;
|
||||
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
|
||||
return exit_method = ExitMethod::AlwaysEnd;
|
||||
} else {
|
||||
ExitMethod not_met = Scan(offset + 1, end, labels);
|
||||
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
|
||||
}
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
u32 target = offset + instr.bra.GetBranchTarget();
|
||||
labels.insert(target);
|
||||
ExitMethod no_jmp = Scan(offset + 1, end, labels);
|
||||
ExitMethod jmp = Scan(target, end, labels);
|
||||
return exit_method = ParallelExit(no_jmp, jmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -197,6 +228,11 @@ public:
|
||||
return active_type == Type::Integer;
|
||||
}
|
||||
|
||||
/// Returns the current active type of the register
|
||||
Type GetActiveType() const {
|
||||
return active_type;
|
||||
}
|
||||
|
||||
/// Returns the index of the register
|
||||
size_t GetIndex() const {
|
||||
return index;
|
||||
@@ -328,22 +364,28 @@ public:
|
||||
shader.AddLine(dest + " = " + src + ';');
|
||||
}
|
||||
|
||||
/// Generates code representing a uniform (C buffer) register.
|
||||
std::string GetUniform(const Uniform& uniform, const Register& dest_reg) {
|
||||
/// Generates code representing a uniform (C buffer) register, interpreted as the input type.
|
||||
std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) {
|
||||
declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index),
|
||||
static_cast<unsigned>(uniform.offset), stage);
|
||||
std::string value =
|
||||
'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
|
||||
|
||||
if (regs[dest_reg].IsFloat()) {
|
||||
if (type == GLSLRegister::Type::Float) {
|
||||
return value;
|
||||
} else if (regs[dest_reg].IsInteger()) {
|
||||
} else if (type == GLSLRegister::Type::Integer) {
|
||||
return "floatBitsToInt(" + value + ')';
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates code representing a uniform (C buffer) register, interpreted as the type of the
|
||||
/// destination register.
|
||||
std::string GetUniform(const Uniform& uniform, const Register& dest_reg) {
|
||||
return GetUniform(uniform, regs[dest_reg].GetActiveType());
|
||||
}
|
||||
|
||||
/// Add declarations for registers
|
||||
void GenerateDeclarations() {
|
||||
for (const auto& reg : regs) {
|
||||
@@ -612,9 +654,9 @@ private:
|
||||
std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
|
||||
using Tegra::Shader::PredCondition;
|
||||
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
|
||||
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
|
||||
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
|
||||
{PredCondition::GreaterEqual, ">="},
|
||||
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
|
||||
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
|
||||
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
|
||||
};
|
||||
|
||||
auto comparison = PredicateComparisonStrings.find(condition);
|
||||
@@ -808,6 +850,102 @@ private:
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Type::Logic: {
|
||||
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
|
||||
|
||||
if (instr.alu.lop.invert_a)
|
||||
op_a = "~(" + op_a + ')';
|
||||
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::LOP32I: {
|
||||
u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
|
||||
|
||||
if (instr.alu.lop.invert_b)
|
||||
imm = ~imm;
|
||||
|
||||
switch (instr.alu.lop.operation) {
|
||||
case Tegra::Shader::LogicOperation::And: {
|
||||
regs.SetRegisterToInteger(instr.gpr0, false, 0,
|
||||
'(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
|
||||
break;
|
||||
}
|
||||
case Tegra::Shader::LogicOperation::Or: {
|
||||
regs.SetRegisterToInteger(instr.gpr0, false, 0,
|
||||
'(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
|
||||
break;
|
||||
}
|
||||
case Tegra::Shader::LogicOperation::Xor: {
|
||||
regs.SetRegisterToInteger(instr.gpr0, false, 0,
|
||||
'(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
|
||||
static_cast<u32>(instr.alu.lop.operation.Value()));
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Type::Shift: {
|
||||
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
|
||||
std::string op_b;
|
||||
|
||||
if (instr.is_b_imm) {
|
||||
op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
|
||||
} else {
|
||||
if (instr.is_b_gpr) {
|
||||
op_b += regs.GetRegisterAsInteger(instr.gpr20);
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
|
||||
}
|
||||
}
|
||||
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::SHL_C:
|
||||
case OpCode::Id::SHL_R:
|
||||
case OpCode::Id::SHL_IMM:
|
||||
regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
|
||||
break;
|
||||
default: {
|
||||
NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OpCode::Type::ScaledAdd: {
|
||||
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
|
||||
|
||||
if (instr.iscadd.negate_a)
|
||||
op_a = '-' + op_a;
|
||||
|
||||
std::string op_b = instr.iscadd.negate_b ? "-" : "";
|
||||
|
||||
if (instr.is_b_imm) {
|
||||
op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
|
||||
} else {
|
||||
if (instr.is_b_gpr) {
|
||||
op_b += regs.GetRegisterAsInteger(instr.gpr20);
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
|
||||
}
|
||||
}
|
||||
|
||||
std::string shift = std::to_string(instr.iscadd.shift_amount.Value());
|
||||
|
||||
regs.SetRegisterToInteger(instr.gpr0, true, 0,
|
||||
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
|
||||
break;
|
||||
}
|
||||
case OpCode::Type::Ffma: {
|
||||
std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
|
||||
std::string op_b = instr.ffma.negate_b ? "-" : "";
|
||||
@@ -849,21 +987,35 @@ private:
|
||||
ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
|
||||
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::I2I_R:
|
||||
case OpCode::Id::I2F_R: {
|
||||
case OpCode::Id::I2I_R: {
|
||||
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
|
||||
|
||||
std::string op_a =
|
||||
regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
|
||||
regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
|
||||
|
||||
if (instr.conversion.abs_a) {
|
||||
op_a = "abs(" + op_a + ')';
|
||||
}
|
||||
|
||||
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
|
||||
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
|
||||
1);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::I2F_R: {
|
||||
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
|
||||
std::string op_a =
|
||||
regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
|
||||
|
||||
if (instr.conversion.abs_a) {
|
||||
op_a = "abs(" + op_a + ')';
|
||||
}
|
||||
|
||||
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::F2F_R: {
|
||||
// TODO(Subv): Implement rounding operations.
|
||||
ASSERT_MSG(instr.conversion.f2f.rounding == 0, "Unimplemented rounding operation");
|
||||
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
|
||||
|
||||
if (instr.conversion.abs_a) {
|
||||
@@ -873,6 +1025,43 @@ private:
|
||||
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::F2I_R: {
|
||||
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
|
||||
|
||||
if (instr.conversion.abs_a) {
|
||||
op_a = "abs(" + op_a + ')';
|
||||
}
|
||||
|
||||
using Tegra::Shader::FloatRoundingOp;
|
||||
switch (instr.conversion.f2i.rounding) {
|
||||
case FloatRoundingOp::None:
|
||||
break;
|
||||
case FloatRoundingOp::Floor:
|
||||
op_a = "floor(" + op_a + ')';
|
||||
break;
|
||||
case FloatRoundingOp::Ceil:
|
||||
op_a = "ceil(" + op_a + ')';
|
||||
break;
|
||||
case FloatRoundingOp::Trunc:
|
||||
op_a = "trunc(" + op_a + ')';
|
||||
break;
|
||||
default:
|
||||
NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
|
||||
static_cast<u32>(instr.conversion.f2i.rounding.Value()));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
if (instr.conversion.is_output_signed) {
|
||||
op_a = "int(" + op_a + ')';
|
||||
} else {
|
||||
op_a = "uint(" + op_a + ')';
|
||||
}
|
||||
|
||||
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
|
||||
1);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
|
||||
UNREACHABLE();
|
||||
@@ -986,7 +1175,7 @@ private:
|
||||
if (instr.is_b_gpr) {
|
||||
op_b += regs.GetRegisterAsFloat(instr.gpr20);
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.uniform, instr.gpr0);
|
||||
op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1017,6 +1206,42 @@ private:
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Type::IntegerSetPredicate: {
|
||||
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
|
||||
|
||||
std::string op_b{};
|
||||
|
||||
ASSERT_MSG(!instr.is_b_imm, "ISETP_IMM not implemented");
|
||||
|
||||
if (instr.is_b_gpr) {
|
||||
op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
|
||||
}
|
||||
|
||||
using Tegra::Shader::Pred;
|
||||
// We can't use the constant predicate as destination.
|
||||
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
|
||||
|
||||
std::string second_pred =
|
||||
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
|
||||
|
||||
std::string comparator = GetPredicateComparison(instr.isetp.cond);
|
||||
std::string combiner = GetPredicateCombiner(instr.isetp.op);
|
||||
|
||||
std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
|
||||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
SetPredicate(instr.isetp.pred3,
|
||||
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
|
||||
|
||||
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
||||
// if enabled
|
||||
SetPredicate(instr.isetp.pred0,
|
||||
"!(" + predicate + ") " + combiner + " (" + second_pred + ')');
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Type::FloatSet: {
|
||||
std::string op_a = instr.fset.neg_a ? "-" : "";
|
||||
op_a += regs.GetRegisterAsFloat(instr.gpr8);
|
||||
@@ -1037,7 +1262,7 @@ private:
|
||||
if (instr.is_b_gpr) {
|
||||
op_b += regs.GetRegisterAsFloat(instr.gpr20);
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.uniform, instr.gpr0);
|
||||
op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1056,15 +1281,17 @@ private:
|
||||
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
|
||||
combiner + " (" + second_pred + "))";
|
||||
|
||||
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
|
||||
if (instr.fset.bf) {
|
||||
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
|
||||
} else {
|
||||
regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
|
||||
1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex),
|
||||
"Predicated exits not implemented");
|
||||
|
||||
// Final color output is currently hardcoded to GPR0-3 for fragment shaders
|
||||
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
|
||||
shader.AddLine("color.r = " + regs.GetRegisterAsFloat(0) + ';');
|
||||
@@ -1074,13 +1301,25 @@ private:
|
||||
}
|
||||
|
||||
shader.AddLine("return true;");
|
||||
offset = PROGRAM_END - 1;
|
||||
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
|
||||
// If this is an unconditional exit then just end processing here, otherwise we
|
||||
// have to account for the possibility of the condition not being met, so
|
||||
// continue processing the next instruction.
|
||||
offset = PROGRAM_END - 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::KIL: {
|
||||
shader.AddLine("discard;");
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BRA: {
|
||||
ASSERT_MSG(instr.bra.constant_buffer == 0,
|
||||
"BRA with constant buffers are not implemented");
|
||||
u32 target = offset + instr.bra.GetBranchTarget();
|
||||
shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::IPA: {
|
||||
const auto& attribute = instr.attribute.fmt28;
|
||||
regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
|
||||
|
||||
@@ -55,6 +55,7 @@ u32 BytesPerPixel(TextureFormat format) {
|
||||
return 16;
|
||||
case TextureFormat::A8R8G8B8:
|
||||
case TextureFormat::A2B10G10R10:
|
||||
case TextureFormat::BF10GF11RF11:
|
||||
return 4;
|
||||
case TextureFormat::A1B5G5R5:
|
||||
case TextureFormat::B5G6R5:
|
||||
@@ -92,6 +93,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::R16_G16_B16_A16:
|
||||
case TextureFormat::BF10GF11RF11:
|
||||
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
|
||||
unswizzled_data.data(), true, block_height);
|
||||
break;
|
||||
@@ -118,6 +120,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
|
||||
case TextureFormat::A1B5G5R5:
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::BF10GF11RF11:
|
||||
// TODO(Subv): For the time being just forward the same data without any decoding.
|
||||
rgba_data = texture_data;
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user