Compare commits

..

29 Commits

Author SHA1 Message Date
David Marcec
ee3e816433 fixed overflow 2018-07-03 14:10:32 +10:00
David Marcec
1f39d7b406 fixed slight offset miscalculation 2018-07-03 13:50:33 +10:00
David Marcec
baa380632c Merge branch 'master' of https://github.com/yuzu-emu/yuzu into audren-voices-section 2018-07-03 13:04:44 +10:00
David Marcec
028a3947aa voice section updating 2018-07-03 13:03:06 +10:00
bunnei
5410b4659d Merge pull request #610 from Subv/mufu_8
GPU: Implemented MUFU suboperation 8, sqrt.
2018-07-02 22:26:42 -04:00
bunnei
92c7135065 Merge pull request #608 from Subv/depth
GPU: Implemented the depth buffer and depth test + culling
2018-07-02 21:24:43 -04:00
Subv
a6d4903aaf GPU: Set up the culling configuration on each draw. 2018-07-02 19:51:29 -05:00
Subv
6e4e0b2b41 GPU: Implemented MUFU suboperation 8, sqrt. 2018-07-02 19:48:15 -05:00
Sebastian Valle
055f1546d7 Merge pull request #606 from Subv/base_vertex
GPU: Fixed the index offset and implement BaseVertex when doing indexed rendering.
2018-07-02 14:07:38 -05:00
Sebastian Valle
79167fc989 Merge pull request #603 from Subv/nvmap_free
GPU: Remove unmapped surfaces from the rasterizer cache and fix our nvmap::Free behavior.
2018-07-02 14:07:17 -05:00
Sebastian Valle
9685dd5840 Merge pull request #605 from Subv/dma_copy
GPU: Directly copy the pixels when performing a same-layout DMA.
2018-07-02 14:06:56 -05:00
Subv
18c8ae7750 GPU: Set up the depth test state on every draw. 2018-07-02 13:33:06 -05:00
Subv
d480b63e0d MaxwellToGL: Added conversion functions for depth test and cull mode. 2018-07-02 13:31:49 -05:00
Subv
c1f55c32c8 GPU: Added registers for depth test and cull mode. 2018-07-02 13:31:20 -05:00
Subv
0f929762b3 GPU: Implemented the Z24S8 depth format and load the depth framebuffer. 2018-07-02 12:42:04 -05:00
Subv
4c59105adf GPU: Implement offsetted rendering when using non-indexed drawing. 2018-07-02 11:23:36 -05:00
Subv
fca3d1cc65 GPU: Fixed the index offset rendering, and implemented the base vertex functionality.
This fixes Stardew Valley.
2018-07-02 11:22:17 -05:00
Subv
cc73bad293 GPU: Added register definitions for the vertex buffer base element. 2018-07-02 11:21:23 -05:00
bunnei
3d41fdfbba Merge pull request #604 from Subv/invalid_textures
GPU: Ignore invalid and disabled textures when drawing.
2018-07-02 11:48:18 -04:00
Subv
ca633a5a3c GPU: Directly copy the pixels when performing a same-layout DMA. 2018-07-02 09:46:33 -05:00
Subv
6c0c81dfdc GPU: Remove a surface from the cache when its backing memory is being unmapped from the GPU's MMU. 2018-07-01 10:50:06 -05:00
Subv
a093feca62 nvmap: Return the address of the nvmap object when Freeing it for the last time.
This behavior is confirmed by reverse engineering.
2018-07-01 10:48:50 -05:00
bunnei
066d6184d4 Merge pull request #602 from Subv/mufu_subop
GPU: Corrected the size of the MUFU subop field, and removed incorrect "min" operation.
2018-07-01 11:06:04 -04:00
bunnei
b611d852db Merge pull request #601 from Subv/rgba32_ui
GPU: Implement the RGBA32_UINT rendertarget format.
2018-07-01 03:22:38 -04:00
bunnei
85a60e2044 Merge pull request #600 from bunnei/pred-not-eq-nan
gl_shader_decompiler: Implement predicate NotEqualWithNan.
2018-07-01 03:22:11 -04:00
Subv
f33e406ff2 GPU: Corrected the size of the MUFU subop field, and removed incorrect "min" operation. 2018-06-30 14:48:25 -05:00
Subv
c0e2d52758 GPU: Implemented the RGBA32_UINT rendertarget format. 2018-06-30 14:23:13 -05:00
Subv
b11072d54a GLCache: Specify the component type along the texture type in the format tuple. 2018-06-30 14:08:51 -05:00
bunnei
c96da97630 gl_shader_decompiler: Implement predicate NotEqualWithNan. 2018-06-30 03:01:25 -04:00
16 changed files with 441 additions and 87 deletions

View File

@@ -47,6 +47,7 @@ public:
// Start the audio event
CoreTiming::ScheduleEvent(audio_ticks, audio_event);
voice_status_list.reserve(worker_params.voice_count);
}
~IAudioRenderer() {
CoreTiming::UnscheduleEvent(audio_event, 0);
@@ -68,6 +69,12 @@ private:
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size,
memory_pool_count * sizeof(MemoryPoolInfo));
std::vector<VoiceInfo> voice_info(worker_params.voice_count);
std::memcpy(voice_info.data(),
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size +
config.memory_pools_size + config.voice_resource_size,
worker_params.voice_count * sizeof(VoiceInfo));
UpdateDataHeader response_data{worker_params};
ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
@@ -86,6 +93,23 @@ private:
std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
response_data.memory_pools_size);
for (unsigned i = 0; i < voice_info.size(); i++) {
if (voice_info[i].is_new) {
voice_status_list[i].played_sample_count = 0;
voice_status_list[i].wave_buffer_consumed = 0;
} else if (voice_info[i].play_state == (u8)PlayStates::Started) {
for (u32 buff_idx = 0; buff_idx < voice_info[i].wave_buffer_count; buff_idx++) {
voice_status_list[i].played_sample_count +=
(voice_info[i].wave_buffer[buff_idx].end_sample_offset -
voice_info[i].wave_buffer[buff_idx].start_sample_offset) /
2;
voice_status_list[i].wave_buffer_consumed++;
}
}
}
std::memcpy(output.data() + sizeof(UpdateDataHeader) + response_data.memory_pools_size,
voice_status_list.data(), response_data.voices_size);
ctx.WriteBuffer(output);
IPC::ResponseBuilder rb{ctx, 2};
@@ -130,6 +154,11 @@ private:
Released = 0x6,
};
enum class PlayStates : u8 {
Started = 0,
Stopped = 1,
};
struct MemoryPoolEntry {
MemoryPoolStates state;
u32_le unknown_4;
@@ -175,11 +204,69 @@ private:
};
static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size");
struct BiquadFilter {
u8 enable;
INSERT_PADDING_BYTES(1);
s16_le numerator[3];
s16_le denominator[2];
};
static_assert(sizeof(BiquadFilter) == 0xc, "BiquadFilter has wrong size");
struct WaveBuffer {
u64_le buffer_addr;
u64_le buffer_sz;
s32_le start_sample_offset;
s32_le end_sample_offset;
u8 loop;
u8 end_of_stream;
u8 sent_to_server;
INSERT_PADDING_BYTES(5);
u64 context_addr;
u64 context_sz;
INSERT_PADDING_BYTES(8);
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
struct VoiceInfo {
u32_le id;
u32_le node_id;
u8 is_new;
u8 is_in_use;
u8 play_state;
u8 sample_format;
u32_le sample_rate;
u32_le priority;
u32_le sorting_order;
u32_le channel_count;
float_le pitch;
float_le volume;
BiquadFilter biquad_filter[2];
u32_le wave_buffer_count;
u16_le wave_buffer_head;
INSERT_PADDING_BYTES(6);
u64_le additional_params_addr;
u64_le additional_params_sz;
u32_le mix_id;
u32_le splitter_info_id;
WaveBuffer wave_buffer[4];
u32_le voice_channel_resource_ids[6];
INSERT_PADDING_BYTES(24);
};
static_assert(sizeof(VoiceInfo) == 0x170, "VoiceInfo is wrong size");
struct VoiceOutStatus {
u64_le played_sample_count;
u32_le wave_buffer_consumed;
INSERT_PADDING_WORDS(1);
};
static_assert(sizeof(VoiceOutStatus) == 0x10, "VoiceOutStatus has wrong size");
/// This is used to trigger the audio event callback.
CoreTiming::EventType* audio_event;
Kernel::SharedPtr<Kernel::Event> system_event;
AudioRendererParameter worker_params;
std::vector<VoiceOutStatus> voice_status_list;
};
class IAudioDevice final : public ServiceFramework<IAudioDevice> {

View File

@@ -8,6 +8,8 @@
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Service::Nvidia::Devices {
@@ -154,6 +156,9 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
ASSERT_MSG(itr != buffer_mappings.end(), "Tried to unmap invalid mapping");
// Remove this memory region from the rasterizer cache.
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(params.offset, itr->second.size);
params.offset = gpu.memory_manager->UnmapBuffer(params.offset, itr->second.size);
buffer_mappings.erase(itr->second.offset);

View File

@@ -148,6 +148,7 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
}
u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
// TODO(Subv): These flags are unconfirmed.
enum FreeFlags {
Freed = 0,
NotFreedYet = 1,
@@ -161,15 +162,21 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
auto itr = handles.find(params.handle);
ASSERT(itr != handles.end());
ASSERT(itr->second->refcount > 0);
itr->second->refcount--;
params.refcount = itr->second->refcount;
params.size = itr->second->size;
if (itr->second->refcount == 0)
if (itr->second->refcount == 0) {
params.flags = Freed;
else
// The address of the nvmap is written to the output if we're finally freeing it, otherwise
// 0 is written.
params.address = itr->second->addr;
} else {
params.flags = NotFreedYet;
params.address = 0;
}
handles.erase(params.handle);

View File

@@ -94,7 +94,7 @@ private:
struct IocFreeParams {
u32_le handle;
INSERT_PADDING_BYTES(4);
u64_le refcount;
u64_le address;
u32_le size;
u32_le flags;
};

View File

@@ -280,6 +280,34 @@ public:
UnsignedInt = 0x2,
};
enum class ComparisonOp : u32 {
Never = 0x200,
Less = 0x201,
Equal = 0x202,
LessEqual = 0x203,
Greater = 0x204,
NotEqual = 0x205,
GreaterEqual = 0x206,
Always = 0x207,
};
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
CounterClockWise = 0x0901,
};
enum class CullFace : u32 {
Front = 0x0404,
Back = 0x0405,
FrontAndBack = 0x0408,
};
u32 enabled;
FrontFace front_face;
CullFace cull_face;
};
struct Blend {
enum class Equation : u32 {
Add = 1,
@@ -413,7 +441,7 @@ public:
struct {
u32 address_high;
u32 address_low;
u32 format;
Tegra::DepthFormat format;
u32 block_dimensions;
u32 layer_stride;
@@ -435,11 +463,21 @@ public:
};
} rt_control;
INSERT_PADDING_WORDS(0x31);
INSERT_PADDING_WORDS(0x2B);
u32 depth_test_enable;
INSERT_PADDING_WORDS(0x5);
u32 independent_blend_enable;
INSERT_PADDING_WORDS(0x15);
u32 depth_write_enabled;
INSERT_PADDING_WORDS(0x8);
ComparisonOp depth_test_func;
INSERT_PADDING_WORDS(0xB);
struct {
u32 separate_alpha;
@@ -455,7 +493,11 @@ public:
u32 enable[NumRenderTargets];
} blend;
INSERT_PADDING_WORDS(0x77);
INSERT_PADDING_WORDS(0x2D);
u32 vb_element_base;
INSERT_PADDING_WORDS(0x49);
struct {
u32 tsc_address_high;
@@ -536,7 +578,13 @@ public:
}
} index_array;
INSERT_PADDING_WORDS(0xC7);
INSERT_PADDING_WORDS(0x7);
INSERT_PADDING_WORDS(0x46);
Cull cull;
INSERT_PADDING_WORDS(0x77);
struct {
u32 query_address_high;
@@ -743,13 +791,18 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);

View File

@@ -49,7 +49,11 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.src_params.pos_y == 0);
ASSERT(regs.dst_params.pos_x == 0);
ASSERT(regs.dst_params.pos_y == 0);
ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
return;
}
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);

View File

@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
GreaterThan = 4,
NotEqual = 5,
GreaterEqual = 6,
NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -165,7 +166,7 @@ enum class SubOp : u64 {
Lg2 = 0x3,
Rcp = 0x4,
Rsq = 0x5,
Min = 0x8,
Sqrt = 0x8,
};
enum class F2iRoundingOp : u64 {
@@ -209,7 +210,7 @@ union Instruction {
} pred;
BitField<19, 1, u64> negate_pred;
BitField<20, 8, Register> gpr20;
BitField<20, 7, SubOp> sub_op;
BitField<20, 4, SubOp> sub_op;
BitField<28, 8, Register> gpr28;
BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode;

View File

@@ -16,6 +16,7 @@ namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_FLOAT = 0xCA,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
@@ -23,6 +24,15 @@ enum class RenderTargetFormat : u32 {
R11G11B10_FLOAT = 0xE0,
};
enum class DepthFormat : u32 {
Z32_FLOAT = 0xA,
Z16_UNORM = 0x13,
S8_Z24_UNORM = 0x14,
Z24_X8_UNORM = 0x15,
Z24_S8_UNORM = 0x16,
Z24_C8_UNORM = 0x18,
};
/// Returns the number of bytes per pixel of each rendertarget format.
u32 RenderTargetBytesPerPixel(RenderTargetFormat format);

View File

@@ -304,10 +304,15 @@ void RasterizerOpenGL::DrawArrays() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): Implement these
// Sync the depth test state before configuring the framebuffer surfaces.
SyncDepthTestState();
// TODO(bunnei): Implement this
const bool has_stencil = false;
const bool using_color_fb = true;
const bool using_depth_fb = false;
const bool using_depth_fb = regs.zeta.Address() != 0;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
const bool write_color_fb =
@@ -338,11 +343,9 @@ void RasterizerOpenGL::DrawArrays() {
// Bind the framebuffer surfaces
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
// Sync the viewport
SyncViewport(surfaces_rect);
// Sync the blend state registers
SyncBlendState();
SyncCullMode();
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
@@ -412,14 +415,16 @@ void RasterizerOpenGL::DrawArrays() {
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
if (is_indexed) {
const GLint index_min{static_cast<GLint>(regs.index_array.first)};
const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset),
-index_min);
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
// Adjust the index buffer offset so it points to the first desired index.
index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes();
glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset), base_vertex);
} else {
glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
}
// Disable scissor test
@@ -710,7 +715,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
UNREACHABLE();
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
}
void RasterizerOpenGL::SyncDepthScale() {
@@ -721,6 +730,14 @@ void RasterizerOpenGL::SyncDepthOffset() {
UNREACHABLE();
}
void RasterizerOpenGL::SyncDepthTestState() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;

View File

@@ -126,6 +126,9 @@ private:
/// Syncs the depth offset to match the guest state
void SyncDepthOffset();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
/// Syncs the blend state to match the guest state
void SyncBlendState();

View File

@@ -27,6 +27,7 @@ struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
ComponentType component_type;
bool compressed;
};
@@ -65,36 +66,36 @@ struct FormatTuple {
}
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // Z24S8
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::ColorTexture) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
// For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which
// are type FLOAT
ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
pixel_format == PixelFormat::R11FG11FB10F);
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
// TODO(Subv): Implement depth formats
ASSERT_MSG(false, "Unimplemented");
}
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
ASSERT(component_type == format.component_type);
UNREACHABLE();
return {};
return format;
}
VAddr SurfaceParams::GetCpuAddr() const {
@@ -144,11 +145,17 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::
const auto& gpu = Core::System::GetInstance().GPU();
if (morton_to_gl) {
auto data = Tegra::Texture::UnswizzleTexture(
*gpu.memory_manager->GpuToCpuAddress(addr),
SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) {
auto data = Tegra::Texture::UnswizzleTexture(
*gpu.memory_manager->GpuToCpuAddress(addr),
SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
} else {
auto data = Tegra::Texture::UnswizzleDepthTexture(
*gpu.memory_manager->GpuToCpuAddress(addr),
SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height);
std::memcpy(gl_buffer, data.data(), data.size());
}
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
@@ -166,9 +173,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -181,12 +189,14 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<false, PixelFormat::R8>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
// TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::Z24S8>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -390,9 +400,15 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
// get color and depth surfaces
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])};
const SurfaceParams depth_params{color_params};
SurfaceParams depth_params{color_params};
ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
if (using_depth_fb) {
depth_params.addr = regs.zeta.Address();
depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(regs.zeta.format);
depth_params.component_type = SurfaceParams::ComponentTypeFromDepthFormat(regs.zeta.format);
depth_params.type = SurfaceParams::GetFormatType(depth_params.pixel_format);
depth_params.size_in_bytes = depth_params.SizeInBytes();
}
MathUtil::Rectangle<u32> color_rect{};
Surface color_surface;

View File

@@ -30,13 +30,21 @@ struct SurfaceParams {
R8 = 4,
RGBA16F = 5,
R11FG11FB10F = 6,
DXT1 = 7,
DXT23 = 8,
DXT45 = 9,
DXN1 = 10, // This is also known as BC4
ASTC_2D_4X4 = 11,
RGBA32UI = 7,
DXT1 = 8,
DXT23 = 9,
DXT45 = 10,
DXN1 = 11, // This is also known as BC4
ASTC_2D_4X4 = 12,
Max,
MaxColorFormat,
// DepthStencil formats
Z24S8 = 13,
MaxDepthStencilFormat,
Max = MaxDepthStencilFormat,
Invalid = 255,
};
@@ -77,11 +85,13 @@ struct SurfaceParams {
1, // R8
1, // RGBA16F
1, // R11FG11FB10F
1, // RGBA32UI
4, // DXT1
4, // DXT23
4, // DXT45
4, // DXN1
4, // ASTC_2D_4X4
1, // Z24S8
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -100,11 +110,13 @@ struct SurfaceParams {
8, // R8
64, // RGBA16F
32, // R11FG11FB10F
128, // RGBA32UI
64, // DXT1
128, // DXT23
128, // DXT45
64, // DXN1
32, // ASTC_2D_4X4
32, // Z24S8
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -114,6 +126,16 @@ struct SurfaceParams {
return GetFormatBpp(pixel_format);
}
static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::Z24_S8_UNORM:
return PixelFormat::Z24S8;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -125,6 +147,8 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return PixelFormat::R11FG11FB10F;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -148,6 +172,8 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
return PixelFormat::R11FG11FB10F;
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
return PixelFormat::RGBA32UI;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
@@ -181,6 +207,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
case PixelFormat::R11FG11FB10F:
return Tegra::Texture::TextureFormat::BF10GF11RF11;
case PixelFormat::RGBA32UI:
return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
@@ -196,6 +224,15 @@ struct SurfaceParams {
}
}
static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
switch (format) {
case PixelFormat::Z24S8:
return Tegra::DepthFormat::Z24_S8_UNORM;
default:
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
// TODO(Subv): Implement more component types
switch (type) {
@@ -217,6 +254,8 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return ComponentType::Float;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return ComponentType::UInt;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -233,11 +272,26 @@ struct SurfaceParams {
}
}
static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::Z24_S8_UNORM:
return ComponentType::UNorm;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
return SurfaceType::ColorTexture;
}
if (static_cast<size_t>(pixel_format) <
static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
return SurfaceType::DepthStencil;
}
// TODO(Subv): Implement the other formats
ASSERT(false);

View File

@@ -719,21 +719,31 @@ private:
/**
* Returns the comparison string to use to compare two values in the 'set' family of
* instructions.
* @params condition The condition used in the 'set'-family instruction.
* @param condition The condition used in the 'set'-family instruction.
* @param op_a First operand to use for the comparison.
* @param op_b Second operand to use for the comparison.
* @returns String corresponding to the GLSL operator that matches the desired comparison.
*/
std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::NotEqualWithNan, "!="},
};
auto comparison = PredicateComparisonStrings.find(condition);
const auto& comparison{PredicateComparisonStrings.find(condition)};
ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
"Unknown predicate comparison operation");
return comparison->second;
std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
if (condition == PredCondition::NotEqualWithNan) {
predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
}
return predicate;
}
/**
@@ -907,8 +917,8 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
instr.alu.saturate_d);
break;
case SubOp::Min:
regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
case SubOp::Sqrt:
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
instr.alu.saturate_d);
break;
default:
@@ -1415,10 +1425,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.fsetp.cond);
std::string combiner = GetPredicateCombiner(instr.fsetp.op);
std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1453,10 +1462,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.isetp.cond);
std::string combiner = GetPredicateCombiner(instr.isetp.op);
std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1503,11 +1511,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.fset.cond);
std::string combiner = GetPredicateCombiner(instr.fset.op);
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
combiner + " (" + second_pred + "))";
std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1538,11 +1545,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.iset.cond);
std::string combiner = GetPredicateCombiner(instr.iset.op);
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
combiner + " (" + second_pred + "))";
std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);

View File

@@ -201,4 +201,54 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
return {};
}
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
return GL_NEVER;
case Maxwell::ComparisonOp::Less:
return GL_LESS;
case Maxwell::ComparisonOp::Equal:
return GL_EQUAL;
case Maxwell::ComparisonOp::LessEqual:
return GL_LEQUAL;
case Maxwell::ComparisonOp::Greater:
return GL_GREATER;
case Maxwell::ComparisonOp::NotEqual:
return GL_NOTEQUAL;
case Maxwell::ComparisonOp::GreaterEqual:
return GL_GEQUAL;
case Maxwell::ComparisonOp::Always:
return GL_ALWAYS;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
UNREACHABLE();
return {};
}
inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
switch (front_face) {
case Maxwell::Cull::FrontFace::ClockWise:
return GL_CW;
case Maxwell::Cull::FrontFace::CounterClockWise:
return GL_CCW;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
UNREACHABLE();
return {};
}
inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
switch (cull_face) {
case Maxwell::Cull::CullFace::Front:
return GL_FRONT;
case Maxwell::Cull::CullFace::Back:
return GL_BACK;
case Maxwell::Cull::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
UNREACHABLE();
return {};
}
} // namespace MaxwellToGL

View File

@@ -5,6 +5,7 @@
#include <cstring>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -65,6 +66,18 @@ u32 BytesPerPixel(TextureFormat format) {
return 1;
case TextureFormat::R16_G16_B16_A16:
return 8;
case TextureFormat::R32_G32_B32_A32:
return 16;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
}
static u32 DepthBytesPerPixel(DepthFormat format) {
switch (format) {
case DepthFormat::Z24_S8_UNORM:
return 4;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
@@ -94,6 +107,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
case TextureFormat::B5G6R5:
case TextureFormat::R8:
case TextureFormat::R16_G16_B16_A16:
case TextureFormat::R32_G32_B32_A32:
case TextureFormat::BF10GF11RF11:
case TextureFormat::ASTC_2D_4X4:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
@@ -107,6 +121,26 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
return unswizzled_data;
}
std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
u32 block_height) {
u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = DepthBytesPerPixel(format);
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case DepthFormat::Z24_S8_UNORM:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
return unswizzled_data;
}
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
u32 height) {
std::vector<u8> rgba_data;
@@ -124,6 +158,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::B5G6R5:
case TextureFormat::R8:
case TextureFormat::BF10GF11RF11:
case TextureFormat::R32_G32_B32_A32:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;

View File

@@ -17,6 +17,12 @@ namespace Texture {
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/**
* Unswizzles a swizzled depth texture without changing its format.
*/
std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height);