Compare commits

...

52 Commits

Author SHA1 Message Date
David Marcec
ee3e816433 fixed overflow 2018-07-03 14:10:32 +10:00
David Marcec
1f39d7b406 fixed slight offset miscalculation 2018-07-03 13:50:33 +10:00
David Marcec
baa380632c Merge branch 'master' of https://github.com/yuzu-emu/yuzu into audren-voices-section 2018-07-03 13:04:44 +10:00
David Marcec
028a3947aa voice section updating 2018-07-03 13:03:06 +10:00
bunnei
5410b4659d Merge pull request #610 from Subv/mufu_8
GPU: Implemented MUFU suboperation 8, sqrt.
2018-07-02 22:26:42 -04:00
bunnei
92c7135065 Merge pull request #608 from Subv/depth
GPU: Implemented the depth buffer and depth test + culling
2018-07-02 21:24:43 -04:00
Subv
a6d4903aaf GPU: Set up the culling configuration on each draw. 2018-07-02 19:51:29 -05:00
Subv
6e4e0b2b41 GPU: Implemented MUFU suboperation 8, sqrt. 2018-07-02 19:48:15 -05:00
Sebastian Valle
055f1546d7 Merge pull request #606 from Subv/base_vertex
GPU: Fixed the index offset and implement BaseVertex when doing indexed rendering.
2018-07-02 14:07:38 -05:00
Sebastian Valle
79167fc989 Merge pull request #603 from Subv/nvmap_free
GPU: Remove unmapped surfaces from the rasterizer cache and fix our nvmap::Free behavior.
2018-07-02 14:07:17 -05:00
Sebastian Valle
9685dd5840 Merge pull request #605 from Subv/dma_copy
GPU: Directly copy the pixels when performing a same-layout DMA.
2018-07-02 14:06:56 -05:00
Subv
18c8ae7750 GPU: Set up the depth test state on every draw. 2018-07-02 13:33:06 -05:00
Subv
d480b63e0d MaxwellToGL: Added conversion functions for depth test and cull mode. 2018-07-02 13:31:49 -05:00
Subv
c1f55c32c8 GPU: Added registers for depth test and cull mode. 2018-07-02 13:31:20 -05:00
Subv
0f929762b3 GPU: Implemented the Z24S8 depth format and load the depth framebuffer. 2018-07-02 12:42:04 -05:00
Subv
4c59105adf GPU: Implement offsetted rendering when using non-indexed drawing. 2018-07-02 11:23:36 -05:00
Subv
fca3d1cc65 GPU: Fixed the index offset rendering, and implemented the base vertex functionality.
This fixes Stardew Valley.
2018-07-02 11:22:17 -05:00
Subv
cc73bad293 GPU: Added register definitions for the vertex buffer base element. 2018-07-02 11:21:23 -05:00
bunnei
3d41fdfbba Merge pull request #604 from Subv/invalid_textures
GPU: Ignore invalid and disabled textures when drawing.
2018-07-02 11:48:18 -04:00
Subv
ca633a5a3c GPU: Directly copy the pixels when performing a same-layout DMA. 2018-07-02 09:46:33 -05:00
Subv
80c5e8ae99 GPU: Ignore disabled textures and textures with an invalid address. 2018-07-02 09:43:38 -05:00
Subv
e9d147349b GPU: Allow GpuToCpuAddress to return boost::none for unmapped addresses. 2018-07-02 09:42:48 -05:00
Subv
6c0c81dfdc GPU: Remove a surface from the cache when its backing memory is being unmapped from the GPU's MMU. 2018-07-01 10:50:06 -05:00
Subv
a093feca62 nvmap: Return the address of the nvmap object when Freeing it for the last time.
This behavior is confirmed by reverse engineering.
2018-07-01 10:48:50 -05:00
bunnei
066d6184d4 Merge pull request #602 from Subv/mufu_subop
GPU: Corrected the size of the MUFU subop field, and removed incorrect "min" operation.
2018-07-01 11:06:04 -04:00
bunnei
b611d852db Merge pull request #601 from Subv/rgba32_ui
GPU: Implement the RGBA32_UINT rendertarget format.
2018-07-01 03:22:38 -04:00
bunnei
85a60e2044 Merge pull request #600 from bunnei/pred-not-eq-nan
gl_shader_decompiler: Implement predicate NotEqualWithNan.
2018-07-01 03:22:11 -04:00
Subv
f33e406ff2 GPU: Corrected the size of the MUFU subop field, and removed incorrect "min" operation. 2018-06-30 14:48:25 -05:00
Subv
c0e2d52758 GPU: Implemented the RGBA32_UINT rendertarget format. 2018-06-30 14:23:13 -05:00
Subv
b11072d54a GLCache: Specify the component type along the texture type in the format tuple. 2018-06-30 14:08:51 -05:00
bunnei
c96da97630 gl_shader_decompiler: Implement predicate NotEqualWithNan. 2018-06-30 03:01:25 -04:00
bunnei
50ef2beb58 Merge pull request #595 from bunnei/raster-cache
Rewrite the OpenGL rasterizer cache
2018-06-29 14:07:28 -04:00
bunnei
c18425ef98 gl_rasterizer_cache: Only dereference color_surface/depth_surface if valid. 2018-06-29 13:08:08 -04:00
bunnei
da2bdbc0d7 Merge pull request #588 from mailwl/hwopus
Service/Audio: add hwopus service, stub GetWorkBufferSize function
2018-06-27 21:57:21 -04:00
bunnei
7fa9177830 gl_shader_decompiler: Add a return path for unknown instructions. 2018-06-27 01:14:34 -04:00
bunnei
1dd754590f gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces.
gl_rasterizer_cache: Improved cache management based on Citra's implementation.

gl_surface_cache: Add some docstrings.
2018-06-27 00:15:44 -04:00
bunnei
8af1ae46aa gl_rasterizer_cache: Various fixes for ASTC handling. 2018-06-27 00:08:04 -04:00
bunnei
c7c379bd19 gl_rasterizer_cache: Use SurfaceParams as a key for surface caching. 2018-06-27 00:08:04 -04:00
bunnei
6a28a66832 maxwell_3d: Add a struct for RenderTargetConfig. 2018-06-27 00:08:04 -04:00
bunnei
1bbbd26563 settings: Add a configuration for use_accurate_framebuffers. 2018-06-27 00:08:04 -04:00
bunnei
3f9f047375 gl_rasterizer: Implement AccelerateDisplay to forward textures to framebuffers. 2018-06-27 00:08:03 -04:00
bunnei
ff6785f3e8 gl_rasterizer_cache: Cache size_in_bytes as a const per surface. 2018-06-27 00:08:03 -04:00
bunnei
9f2f819bb6 gl_rasterizer_cache: Refactor to make SurfaceParams members const. 2018-06-27 00:08:03 -04:00
bunnei
5f57ab1b2a gl_rasterizer_cache: Remove Citra's rasterizer cache, always load/flush surfaces. 2018-06-27 00:08:03 -04:00
bunnei
84cadf9918 Merge pull request #594 from bunnei/max-constbuff
gl_rasterizer: Workaround for when exceeding max UBO size.
2018-06-27 00:06:23 -04:00
bunnei
10422f3c18 gl_rasterizer: Workaround for when exceeding max UBO size. 2018-06-26 23:07:34 -04:00
bunnei
dfac394e60 Merge pull request #593 from bunnei/fix-swizzle
gl_state: Fix state management for texture swizzle.
2018-06-26 22:05:49 -04:00
bunnei
73de9bab1a Merge pull request #592 from bunnei/cleanup-gl-state
gl_state: Remove unused state management from 3DS.
2018-06-26 22:05:03 -04:00
bunnei
0399d98cd9 Merge pull request #591 from bunnei/fix-rgb565
gl_rasterizer_cache: Fix inverted B5G6R5 format.
2018-06-26 22:04:42 -04:00
bunnei
20b58bab9c gl_state: Remove unused state management from 3DS. 2018-06-26 17:09:25 -04:00
bunnei
41b3725d28 gl_rasterizer_cache: Fix inverted B5G6R5 format. 2018-06-26 17:07:36 -04:00
mailwl
11fb17054e Service/Audio: add hwopus service, stub GetWorkBufferSize function 2018-06-25 16:44:17 +03:00
33 changed files with 950 additions and 1650 deletions

View File

@@ -126,6 +126,8 @@ add_library(core STATIC
hle/service/audio/audren_u.h
hle/service/audio/codecctl.cpp
hle/service/audio/codecctl.h
hle/service/audio/hwopus.cpp
hle/service/audio/hwopus.h
hle/service/bcat/module.cpp
hle/service/bcat/module.h
hle/service/bcat/bcat.cpp

View File

@@ -8,6 +8,7 @@
#include "core/hle/service/audio/audrec_u.h"
#include "core/hle/service/audio/audren_u.h"
#include "core/hle/service/audio/codecctl.h"
#include "core/hle/service/audio/hwopus.h"
namespace Service::Audio {
@@ -17,6 +18,7 @@ void InstallInterfaces(SM::ServiceManager& service_manager) {
std::make_shared<AudRecU>()->InstallAsService(service_manager);
std::make_shared<AudRenU>()->InstallAsService(service_manager);
std::make_shared<CodecCtl>()->InstallAsService(service_manager);
std::make_shared<HwOpus>()->InstallAsService(service_manager);
}
} // namespace Service::Audio

View File

@@ -47,6 +47,7 @@ public:
// Start the audio event
CoreTiming::ScheduleEvent(audio_ticks, audio_event);
voice_status_list.reserve(worker_params.voice_count);
}
~IAudioRenderer() {
CoreTiming::UnscheduleEvent(audio_event, 0);
@@ -68,6 +69,12 @@ private:
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size,
memory_pool_count * sizeof(MemoryPoolInfo));
std::vector<VoiceInfo> voice_info(worker_params.voice_count);
std::memcpy(voice_info.data(),
buf.data() + sizeof(UpdateDataHeader) + config.behavior_size +
config.memory_pools_size + config.voice_resource_size,
worker_params.voice_count * sizeof(VoiceInfo));
UpdateDataHeader response_data{worker_params};
ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
@@ -86,6 +93,23 @@ private:
std::memcpy(output.data() + sizeof(UpdateDataHeader), memory_pool.data(),
response_data.memory_pools_size);
for (unsigned i = 0; i < voice_info.size(); i++) {
if (voice_info[i].is_new) {
voice_status_list[i].played_sample_count = 0;
voice_status_list[i].wave_buffer_consumed = 0;
} else if (voice_info[i].play_state == (u8)PlayStates::Started) {
for (u32 buff_idx = 0; buff_idx < voice_info[i].wave_buffer_count; buff_idx++) {
voice_status_list[i].played_sample_count +=
(voice_info[i].wave_buffer[buff_idx].end_sample_offset -
voice_info[i].wave_buffer[buff_idx].start_sample_offset) /
2;
voice_status_list[i].wave_buffer_consumed++;
}
}
}
std::memcpy(output.data() + sizeof(UpdateDataHeader) + response_data.memory_pools_size,
voice_status_list.data(), response_data.voices_size);
ctx.WriteBuffer(output);
IPC::ResponseBuilder rb{ctx, 2};
@@ -130,6 +154,11 @@ private:
Released = 0x6,
};
enum class PlayStates : u8 {
Started = 0,
Stopped = 1,
};
struct MemoryPoolEntry {
MemoryPoolStates state;
u32_le unknown_4;
@@ -175,11 +204,69 @@ private:
};
static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size");
struct BiquadFilter {
u8 enable;
INSERT_PADDING_BYTES(1);
s16_le numerator[3];
s16_le denominator[2];
};
static_assert(sizeof(BiquadFilter) == 0xc, "BiquadFilter has wrong size");
struct WaveBuffer {
u64_le buffer_addr;
u64_le buffer_sz;
s32_le start_sample_offset;
s32_le end_sample_offset;
u8 loop;
u8 end_of_stream;
u8 sent_to_server;
INSERT_PADDING_BYTES(5);
u64 context_addr;
u64 context_sz;
INSERT_PADDING_BYTES(8);
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
struct VoiceInfo {
u32_le id;
u32_le node_id;
u8 is_new;
u8 is_in_use;
u8 play_state;
u8 sample_format;
u32_le sample_rate;
u32_le priority;
u32_le sorting_order;
u32_le channel_count;
float_le pitch;
float_le volume;
BiquadFilter biquad_filter[2];
u32_le wave_buffer_count;
u16_le wave_buffer_head;
INSERT_PADDING_BYTES(6);
u64_le additional_params_addr;
u64_le additional_params_sz;
u32_le mix_id;
u32_le splitter_info_id;
WaveBuffer wave_buffer[4];
u32_le voice_channel_resource_ids[6];
INSERT_PADDING_BYTES(24);
};
static_assert(sizeof(VoiceInfo) == 0x170, "VoiceInfo is wrong size");
struct VoiceOutStatus {
u64_le played_sample_count;
u32_le wave_buffer_consumed;
INSERT_PADDING_WORDS(1);
};
static_assert(sizeof(VoiceOutStatus) == 0x10, "VoiceOutStatus has wrong size");
/// This is used to trigger the audio event callback.
CoreTiming::EventType* audio_event;
Kernel::SharedPtr<Kernel::Event> system_event;
AudioRendererParameter worker_params;
std::vector<VoiceOutStatus> voice_status_list;
};
class IAudioDevice final : public ServiceFramework<IAudioDevice> {

View File

@@ -0,0 +1,29 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/service/audio/hwopus.h"
namespace Service::Audio {
void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(0x4000);
}
HwOpus::HwOpus() : ServiceFramework("hwopus") {
static const FunctionInfo functions[] = {
{0, nullptr, "Initialize"},
{1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"},
{2, nullptr, "InitializeMultiStream"},
{3, nullptr, "GetWorkBufferSizeMultiStream"},
};
RegisterHandlers(functions);
}
} // namespace Service::Audio

View File

@@ -0,0 +1,20 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/hle/service/service.h"
namespace Service::Audio {
class HwOpus final : public ServiceFramework<HwOpus> {
public:
explicit HwOpus();
~HwOpus() = default;
private:
void GetWorkBufferSize(Kernel::HLERequestContext& ctx);
};
} // namespace Service::Audio

View File

@@ -8,6 +8,8 @@
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Service::Nvidia::Devices {
@@ -154,6 +156,9 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
ASSERT_MSG(itr != buffer_mappings.end(), "Tried to unmap invalid mapping");
// Remove this memory region from the rasterizer cache.
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(params.offset, itr->second.size);
params.offset = gpu.memory_manager->UnmapBuffer(params.offset, itr->second.size);
buffer_mappings.erase(itr->second.offset);

View File

@@ -148,6 +148,7 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
}
u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
// TODO(Subv): These flags are unconfirmed.
enum FreeFlags {
Freed = 0,
NotFreedYet = 1,
@@ -161,15 +162,21 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
auto itr = handles.find(params.handle);
ASSERT(itr != handles.end());
ASSERT(itr->second->refcount > 0);
itr->second->refcount--;
params.refcount = itr->second->refcount;
params.size = itr->second->size;
if (itr->second->refcount == 0)
if (itr->second->refcount == 0) {
params.flags = Freed;
else
// The address of the nvmap is written to the output if we're finally freeing it, otherwise
// 0 is written.
params.address = itr->second->addr;
} else {
params.flags = NotFreedYet;
params.address = 0;
}
handles.erase(params.handle);

View File

@@ -94,7 +94,7 @@ private:
struct IocFreeParams {
u32_le handle;
INSERT_PADDING_BYTES(4);
u64_le refcount;
u64_le address;
u32_le size;
u32_le flags;
};

View File

@@ -129,6 +129,7 @@ struct Values {
// Renderer
float resolution_factor;
bool toggle_framelimit;
bool use_accurate_framebuffers;
float bg_red;
float bg_green;

View File

@@ -161,6 +161,8 @@ TelemetrySession::TelemetrySession() {
Settings::values.resolution_factor);
AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
Settings::values.toggle_framelimit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers",
Settings::values.use_accurate_framebuffers);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
Settings::values.use_docked_mode);
}

View File

@@ -280,6 +280,34 @@ public:
UnsignedInt = 0x2,
};
enum class ComparisonOp : u32 {
Never = 0x200,
Less = 0x201,
Equal = 0x202,
LessEqual = 0x203,
Greater = 0x204,
NotEqual = 0x205,
GreaterEqual = 0x206,
Always = 0x207,
};
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
CounterClockWise = 0x0901,
};
enum class CullFace : u32 {
Front = 0x0404,
Back = 0x0405,
FrontAndBack = 0x0408,
};
u32 enabled;
FrontFace front_face;
CullFace cull_face;
};
struct Blend {
enum class Equation : u32 {
Add = 1,
@@ -321,6 +349,24 @@ public:
INSERT_PADDING_WORDS(1);
};
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
u32 block_dimensions;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
INSERT_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
};
union {
struct {
INSERT_PADDING_WORDS(0x45);
@@ -333,23 +379,7 @@ public:
INSERT_PADDING_WORDS(0x1B8);
struct {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
u32 block_dimensions;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
INSERT_PADDING_WORDS(7);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} rt[NumRenderTargets];
RenderTargetConfig rt[NumRenderTargets];
struct {
f32 scale_x;
@@ -411,7 +441,7 @@ public:
struct {
u32 address_high;
u32 address_low;
u32 format;
Tegra::DepthFormat format;
u32 block_dimensions;
u32 layer_stride;
@@ -433,11 +463,21 @@ public:
};
} rt_control;
INSERT_PADDING_WORDS(0x31);
INSERT_PADDING_WORDS(0x2B);
u32 depth_test_enable;
INSERT_PADDING_WORDS(0x5);
u32 independent_blend_enable;
INSERT_PADDING_WORDS(0x15);
u32 depth_write_enabled;
INSERT_PADDING_WORDS(0x8);
ComparisonOp depth_test_func;
INSERT_PADDING_WORDS(0xB);
struct {
u32 separate_alpha;
@@ -453,7 +493,11 @@ public:
u32 enable[NumRenderTargets];
} blend;
INSERT_PADDING_WORDS(0x77);
INSERT_PADDING_WORDS(0x2D);
u32 vb_element_base;
INSERT_PADDING_WORDS(0x49);
struct {
u32 tsc_address_high;
@@ -534,7 +578,13 @@ public:
}
} index_array;
INSERT_PADDING_WORDS(0xC7);
INSERT_PADDING_WORDS(0x7);
INSERT_PADDING_WORDS(0x46);
Cull cull;
INSERT_PADDING_WORDS(0x77);
struct {
u32 query_address_high;
@@ -741,13 +791,18 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);

View File

@@ -49,7 +49,11 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.src_params.pos_y == 0);
ASSERT(regs.dst_params.pos_x == 0);
ASSERT(regs.dst_params.pos_y == 0);
ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
return;
}
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);

View File

@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
GreaterThan = 4,
NotEqual = 5,
GreaterEqual = 6,
NotEqualWithNan = 13,
// TODO(Subv): Other condition types
};
@@ -165,7 +166,7 @@ enum class SubOp : u64 {
Lg2 = 0x3,
Rcp = 0x4,
Rsq = 0x5,
Min = 0x8,
Sqrt = 0x8,
};
enum class F2iRoundingOp : u64 {
@@ -209,7 +210,7 @@ union Instruction {
} pred;
BitField<19, 1, u64> negate_pred;
BitField<20, 8, Register> gpr20;
BitField<20, 7, SubOp> sub_op;
BitField<20, 4, SubOp> sub_op;
BitField<28, 8, Register> gpr28;
BitField<39, 8, Register> gpr39;
BitField<48, 16, u64> opcode;

View File

@@ -16,6 +16,7 @@ namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_FLOAT = 0xCA,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
@@ -23,6 +24,15 @@ enum class RenderTargetFormat : u32 {
R11G11B10_FLOAT = 0xE0,
};
enum class DepthFormat : u32 {
Z32_FLOAT = 0xA,
Z16_UNORM = 0x13,
S8_Z24_UNORM = 0x14,
Z24_X8_UNORM = 0x15,
Z24_S8_UNORM = 0x16,
Z24_C8_UNORM = 0x18,
};
/// Returns the number of bytes per pixel of each rendertarget format.
u32 RenderTargetBytesPerPixel(RenderTargetFormat format);

View File

@@ -100,9 +100,9 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
VAddr base_addr = PageSlot(gpu_addr);
ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));
if (base_addr == static_cast<u64>(PageStatus::Allocated)) {
if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
base_addr == static_cast<u64>(PageStatus::Unmapped)) {
return {};
}

View File

@@ -51,9 +51,8 @@ public:
}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
VAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) {
return false;
}

View File

@@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
u64 size = end - start + 1;
// Copy vertex array data
res_cache.FlushRegion(start, size, nullptr);
Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
// Bind the vertex array to the buffer at the current offset.
@@ -305,10 +304,15 @@ void RasterizerOpenGL::DrawArrays() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): Implement these
// Sync the depth test state before configuring the framebuffer surfaces.
SyncDepthTestState();
// TODO(bunnei): Implement this
const bool has_stencil = false;
const bool using_color_fb = true;
const bool using_depth_fb = false;
const bool using_depth_fb = regs.zeta.Address() != 0;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
const bool write_color_fb =
@@ -325,32 +329,23 @@ void RasterizerOpenGL::DrawArrays() {
std::tie(color_surface, depth_surface, surfaces_rect) =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
const u16 res_scale = color_surface != nullptr
? color_surface->res_scale
: (depth_surface == nullptr ? 1u : depth_surface->res_scale);
MathUtil::Rectangle<u32> draw_rect{
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
surfaces_rect.left, surfaces_rect.right)), // Left
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
surfaces_rect.bottom, surfaces_rect.top)), // Top
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
surfaces_rect.left, surfaces_rect.right)), // Right
static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
surfaces_rect.left, surfaces_rect.right)), // Left
static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top)), // Top
static_cast<u32>(
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right)), // Right
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
// Bind the framebuffer surfaces
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
// Sync the viewport
SyncViewport(surfaces_rect, res_scale);
// Sync the blend state registers
SyncViewport(surfaces_rect);
SyncBlendState();
SyncCullMode();
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
@@ -420,14 +415,16 @@ void RasterizerOpenGL::DrawArrays() {
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
if (is_indexed) {
const GLint index_min{static_cast<GLint>(regs.index_array.first)};
const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset),
-index_min);
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
// Adjust the index buffer offset so it points to the first desired index.
index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes();
glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset), base_vertex);
} else {
glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
}
// Disable scissor test
@@ -442,19 +439,11 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
MathUtil::Rectangle<u32> draw_rect_unscaled{
draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
draw_rect.bottom / res_scale};
if (color_surface != nullptr && write_color_fb) {
auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled);
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
color_surface);
res_cache.MarkSurfaceAsDirty(color_surface);
}
if (depth_surface != nullptr && write_depth_fb) {
auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled);
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
depth_surface);
res_cache.MarkSurfaceAsDirty(depth_surface);
}
}
@@ -462,7 +451,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushAll();
res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
}
void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
@@ -472,13 +461,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size, nullptr);
res_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size, nullptr);
res_cache.InvalidateRegion(addr, size);
}
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
@@ -497,45 +486,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
return true;
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
if (framebuffer_addr == 0) {
return false;
if (!framebuffer_addr) {
return {};
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
SurfaceParams src_params;
src_params.cpu_addr = framebuffer_addr;
src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0);
src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height;
src_params.stride = pixel_stride;
src_params.is_tiled = true;
src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
src_params.component_type =
SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams();
MathUtil::Rectangle<u32> src_rect;
Surface src_surface;
std::tie(src_surface, src_rect) =
res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
if (src_surface == nullptr) {
return false;
const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
if (!surface) {
return {};
}
u32 scaled_width = src_surface->GetScaledWidth();
u32 scaled_height = src_surface->GetScaledHeight();
// Verify that the cached surface is the same size and format as the requested framebuffer
const auto& params{surface->GetSurfaceParams()};
const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
screen_info.display_texcoords = MathUtil::Rectangle<float>(
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
screen_info.display_texture = src_surface->texture.handle;
screen_info.display_texture = surface->Texture().handle;
return true;
}
@@ -613,6 +585,12 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
size = buffer.size * sizeof(float);
if (size > MaxConstbufferSize) {
NGLOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
MaxConstbufferSize);
size = MaxConstbufferSize;
}
} else {
// Buffer is accessed directly, upload just what we use
size = used_buffer.GetSize() * sizeof(float);
@@ -663,12 +641,16 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
glProgramUniform1i(program, uniform, current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
ASSERT(texture.enabled);
if (!texture.enabled) {
state.texture_units[current_bindpoint].texture_2d = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
@@ -694,16 +676,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_surface != nullptr ? color_surface->texture.handle : 0, 0);
color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
if (depth_surface != nullptr) {
if (has_stencil) {
// attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0);
depth_surface->Texture().handle, 0);
} else {
// attach depth
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0);
depth_surface->Texture().handle, 0);
// clear stencil attachment
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
}
@@ -714,14 +696,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
}
}
void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) {
void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale;
state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale);
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale);
state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
}
void RasterizerOpenGL::SyncClipEnabled() {
@@ -733,7 +715,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
UNREACHABLE();
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
}
void RasterizerOpenGL::SyncDepthScale() {
@@ -744,6 +730,14 @@ void RasterizerOpenGL::SyncDepthOffset() {
UNREACHABLE();
}
void RasterizerOpenGL::SyncDepthTestState() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;

View File

@@ -55,7 +55,7 @@ public:
};
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr size_t MaxConstbufferSize = 0x1000;
static constexpr size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
@@ -109,7 +109,7 @@ private:
u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
/// Syncs the viewport to match the guest state
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
@@ -126,6 +126,9 @@ private:
/// Syncs the depth offset to match the guest state
void SyncDepthOffset();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
/// Syncs the blend state to match the guest state
void SyncBlendState();

File diff suppressed because it is too large Load Diff

View File

@@ -1,57 +1,26 @@
// Copyright 2015 Citra Emulator Project
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <map>
#include <memory>
#include <set>
#include <tuple>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#endif
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#include <boost/optional.hpp>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/textures/texture.h"
struct CachedSurface;
class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSet = std::set<Surface>;
using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>;
using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>;
using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>;
using SurfaceInterval = SurfaceCache::interval_type;
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
"incorrect interval types");
using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u64, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
Upscale, // only allow higher scale than params
Ignore // accept every scaled res
};
struct SurfaceParams {
enum class PixelFormat {
ABGR8 = 0,
@@ -61,13 +30,21 @@ struct SurfaceParams {
R8 = 4,
RGBA16F = 5,
R11FG11FB10F = 6,
DXT1 = 7,
DXT23 = 8,
DXT45 = 9,
DXN1 = 10, // This is also known as BC4
ASTC_2D_4X4 = 11,
RGBA32UI = 7,
DXT1 = 8,
DXT23 = 9,
DXT45 = 10,
DXN1 = 11, // This is also known as BC4
ASTC_2D_4X4 = 12,
Max,
MaxColorFormat,
// DepthStencil formats
Z24S8 = 13,
MaxDepthStencilFormat,
Max = MaxDepthStencilFormat,
Invalid = 255,
};
@@ -93,10 +70,10 @@ struct SurfaceParams {
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
* compressed image. This is used for maintaining proper surface sizes for compressed texture
* formats.
* compressed image. This is used for maintaining proper surface sizes for compressed
* texture formats.
*/
static constexpr u32 GetCompresssionFactor(PixelFormat format) {
static constexpr u32 GetCompressionFactor(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
@@ -108,19 +85,18 @@ struct SurfaceParams {
1, // R8
1, // RGBA16F
1, // R11FG11FB10F
1, // RGBA32UI
4, // DXT1
4, // DXT23
4, // DXT45
4, // DXN1
1, // ASTC_2D_4X4
4, // ASTC_2D_4X4
1, // Z24S8
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
return compression_factor_table[static_cast<size_t>(format)];
}
u32 GetCompresssionFactor() const {
return GetCompresssionFactor(pixel_format);
}
static constexpr u32 GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid)
@@ -134,11 +110,13 @@ struct SurfaceParams {
8, // R8
64, // RGBA16F
32, // R11FG11FB10F
128, // RGBA32UI
64, // DXT1
128, // DXT23
128, // DXT45
64, // DXN1
32, // ASTC_2D_4X4
32, // Z24S8
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -148,6 +126,16 @@ struct SurfaceParams {
return GetFormatBpp(pixel_format);
}
static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::Z24_S8_UNORM:
return PixelFormat::Z24S8;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -159,25 +147,8 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return PixelFormat::R11FG11FB10F;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static bool IsFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return true;
default:
return false;
}
}
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -201,6 +172,8 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
return PixelFormat::R11FG11FB10F;
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
return PixelFormat::RGBA32UI;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
@@ -234,6 +207,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
case PixelFormat::R11FG11FB10F:
return Tegra::Texture::TextureFormat::BF10GF11RF11;
case PixelFormat::RGBA32UI:
return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
@@ -249,6 +224,15 @@ struct SurfaceParams {
}
}
static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
switch (format) {
case PixelFormat::Z24S8:
return Tegra::DepthFormat::Z24_S8_UNORM;
default:
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
// TODO(Subv): Implement more component types
switch (type) {
@@ -270,16 +254,27 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return ComponentType::Float;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return ComponentType::UInt;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromGPUPixelFormat(
Tegra::FramebufferConfig::PixelFormat format) {
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::Z24_S8_UNORM:
return ComponentType::UNorm;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
@@ -287,198 +282,117 @@ struct SurfaceParams {
}
}
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
return true;
}
if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
return true;
}
if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
return true;
}
return false;
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
return SurfaceType::ColorTexture;
}
if (static_cast<size_t>(pixel_format) <
static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
return SurfaceType::DepthStencil;
}
// TODO(Subv): Implement the other formats
ASSERT(false);
return SurfaceType::Invalid;
}
/// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
/// and "pixel_format"
void UpdateParams() {
if (stride == 0) {
stride = width;
}
type = GetFormatType(pixel_format);
size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
: BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
end = addr + size;
}
SurfaceInterval GetInterval() const {
return SurfaceInterval::right_open(addr, end);
}
// Returns the outer rectangle containing "interval"
SurfaceParams FromInterval(SurfaceInterval interval) const;
SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
// Returns the region of the biggest valid rectange within interval
SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
/**
* Gets the actual width (in pixels) of the surface. This is provided because `width` is used
* for tracking the surface region in memory, which may be compressed for certain formats. In
* this scenario, `width` is actually the compressed width.
*/
u32 GetActualWidth() const {
return width * GetCompresssionFactor();
}
/**
* Gets the actual height (in pixels) of the surface. This is provided because `height` is used
* for tracking the surface region in memory, which may be compressed for certain formats. In
* this scenario, `height` is actually the compressed height.
*/
u32 GetActualHeight() const {
return height * GetCompresssionFactor();
}
u32 GetScaledWidth() const {
return width * res_scale;
}
u32 GetScaledHeight() const {
return height * res_scale;
}
MathUtil::Rectangle<u32> GetRect() const {
return {0, height, width, 0};
}
MathUtil::Rectangle<u32> GetScaledRect() const {
return {0, GetScaledHeight(), GetScaledWidth(), 0};
}
u64 PixelsInBytes(u64 size) const {
return size * CHAR_BIT / GetFormatBpp(pixel_format);
}
u64 BytesInPixels(u64 pixels) const {
return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
/// Returns the rectangle corresponding to this surface
MathUtil::Rectangle<u32> GetRect() const;
/// Returns the size of this surface in bytes, adjusted for compression
size_t SizeInBytes() const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) *
GetFormatBpp(pixel_format) / CHAR_BIT;
}
/// Returns the CPU virtual address for this surface
VAddr GetCpuAddr() const;
bool ExactMatch(const SurfaceParams& other_surface) const;
bool CanSubRect(const SurfaceParams& sub_surface) const;
bool CanExpand(const SurfaceParams& expanded_surface) const;
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
/// Returns true if the specified region overlaps with this surface's region in Switch memory
bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
}
MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
/// Creates SurfaceParams from a texture configation
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
Tegra::GPUVAddr addr = 0;
Tegra::GPUVAddr end = 0;
boost::optional<VAddr> cpu_addr;
u64 size = 0;
/// Creates SurfaceParams from a framebuffer configation
static SurfaceParams CreateForFramebuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
u32 width = 0;
u32 height = 0;
u32 stride = 0;
u32 block_height = 0;
u16 res_scale = 1;
bool is_tiled = false;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
ComponentType component_type = ComponentType::Invalid;
Tegra::GPUVAddr addr;
bool is_tiled;
u32 block_height;
PixelFormat pixel_format;
ComponentType component_type;
SurfaceType type;
u32 width;
u32 height;
u32 unaligned_height;
size_t size_in_bytes;
};
struct CachedSurface : SurfaceParams {
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
static SurfaceKey Create(const SurfaceParams& params) {
SurfaceKey res;
res.state = params;
return res;
}
};
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
namespace std {
template <>
struct hash<SurfaceKey> {
size_t operator()(const SurfaceKey& k) const {
return k.Hash();
}
};
} // namespace std
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
const OGLTexture& Texture() const {
return texture;
}
bool IsSurfaceFullyInvalid() const {
return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval());
}
bool registered = false;
SurfaceRegions invalid_regions;
u64 fill_size = 0; /// Number of bytes to read from fill_data
std::array<u8, 4> fill_data;
OGLTexture texture;
static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
if (format == PixelFormat::Invalid)
static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
if (format == SurfaceParams::PixelFormat::Invalid)
return 0;
return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
}
std::unique_ptr<u8[]> gl_buffer;
size_t gl_buffer_size = 0;
const SurfaceParams& GetSurfaceParams() const {
return params;
}
// Read/Write data in Switch memory to/from gl_buffer
void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end);
void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end);
void LoadGLBuffer();
void FlushGLBuffer();
// Upload/Download data in gl_buffer in/to this surface's texture
void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle);
void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle);
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
private:
OGLTexture texture;
std::vector<u8> gl_buffer;
SurfaceParams params;
};
class RasterizerCacheOpenGL : NonCopyable {
class RasterizerCacheOpenGL final : NonCopyable {
public:
RasterizerCacheOpenGL();
~RasterizerCacheOpenGL();
/// Blit one surface's texture to another
bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
/// Copy one surface's region to another
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
SurfaceInterval copy_interval);
/// Load a texture from Switch memory to OpenGL and cache it (if not already cached)
Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Tries to find a framebuffer GPU address based on the provided CPU address
boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const;
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// Switch memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
@@ -486,29 +400,21 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
/// Get a surface that matches the fill config
Surface GetFillSurface(const void* config);
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
void MarkSurfaceAsDirty(const Surface& surface);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr);
void FlushRegion(Tegra::GPUVAddr addr, size_t size);
/// Mark region as being invalidated by region_owner (nullptr if Switch memory)
void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner);
/// Flush all cached resources tracked by this cache manager
void FlushAll();
/// Mark the specified region as being invalidated
void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
private:
void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
/// Update surface's texture for given region when necessary
void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@@ -519,18 +425,9 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
SurfaceCache surface_cache;
std::unordered_map<SurfaceKey, Surface> surface_cache;
PageMap cached_pages;
SurfaceMap dirty_regions;
SurfaceSet remove_surfaces;
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
OGLVertexArray attributeless_vao;
OGLBuffer d24s8_abgr_buffer;
GLsizeiptr d24s8_abgr_buffer_size;
OGLProgram d24s8_abgr_shader;
GLint d24s8_abgr_tbo_size_u_id;
GLint d24s8_abgr_viewport_u_id;
};

View File

@@ -719,21 +719,31 @@ private:
/**
* Returns the comparison string to use to compare two values in the 'set' family of
* instructions.
* @params condition The condition used in the 'set'-family instruction.
* @param condition The condition used in the 'set'-family instruction.
* @param op_a First operand to use for the comparison.
* @param op_b Second operand to use for the comparison.
* @returns String corresponding to the GLSL operator that matches the desired comparison.
*/
std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
{PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
{PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
{PredCondition::NotEqualWithNan, "!="},
};
auto comparison = PredicateComparisonStrings.find(condition);
const auto& comparison{PredicateComparisonStrings.find(condition)};
ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
"Unknown predicate comparison operation");
return comparison->second;
std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
if (condition == PredCondition::NotEqualWithNan) {
predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
}
return predicate;
}
/**
@@ -811,6 +821,7 @@ private:
if (!opcode) {
NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value);
UNREACHABLE();
return offset + 1;
}
shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName());
@@ -906,8 +917,8 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
instr.alu.saturate_d);
break;
case SubOp::Min:
regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
case SubOp::Sqrt:
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
instr.alu.saturate_d);
break;
default:
@@ -1414,10 +1425,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.fsetp.cond);
std::string combiner = GetPredicateCombiner(instr.fsetp.op);
std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1452,10 +1462,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.isetp.cond);
std::string combiner = GetPredicateCombiner(instr.isetp.op);
std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1502,11 +1511,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.fset.cond);
std::string combiner = GetPredicateCombiner(instr.fset.op);
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
combiner + " (" + second_pred + "))";
std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1537,11 +1545,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
std::string comparator = GetPredicateComparison(instr.iset.cond);
std::string combiner = GetPredicateCombiner(instr.iset.op);
std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
combiner + " (" + second_pred + "))";
std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);

View File

@@ -51,16 +51,6 @@ OpenGLState::OpenGLState() {
texture_unit.Reset();
}
lighting_lut.texture_buffer = 0;
fog_lut.texture_buffer = 0;
proctex_lut.texture_buffer = 0;
proctex_diff_lut.texture_buffer = 0;
proctex_color_map.texture_buffer = 0;
proctex_alpha_map.texture_buffer = 0;
proctex_noise_lut.texture_buffer = 0;
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
@@ -224,48 +214,6 @@ void OpenGLState::Apply() const {
}
}
// Lighting LUTs
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
glActiveTexture(TextureUnits::LightingLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer);
}
// Fog LUT
if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) {
glActiveTexture(TextureUnits::FogLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer);
}
// ProcTex Noise LUT
if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer);
}
// ProcTex Color Map
if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer);
}
// ProcTex Alpha Map
if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer);
}
// ProcTex LUT
if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer);
}
// ProcTex Diff LUT
if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer);
}
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -339,20 +287,6 @@ OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
unit.Unbind();
}
}
if (lighting_lut.texture_buffer == handle)
lighting_lut.texture_buffer = 0;
if (fog_lut.texture_buffer == handle)
fog_lut.texture_buffer = 0;
if (proctex_noise_lut.texture_buffer == handle)
proctex_noise_lut.texture_buffer = 0;
if (proctex_color_map.texture_buffer == handle)
proctex_color_map.texture_buffer = 0;
if (proctex_alpha_map.texture_buffer == handle)
proctex_alpha_map.texture_buffer = 0;
if (proctex_lut.texture_buffer == handle)
proctex_lut.texture_buffer = 0;
if (proctex_diff_lut.texture_buffer == handle)
proctex_diff_lut.texture_buffer = 0;
return *this;
}

View File

@@ -106,34 +106,6 @@ public:
}
} texture_units[32];
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} lighting_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} fog_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_noise_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_color_map;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_alpha_map;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_diff_lut;
struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING

View File

@@ -201,4 +201,54 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
return {};
}
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
switch (comparison) {
case Maxwell::ComparisonOp::Never:
return GL_NEVER;
case Maxwell::ComparisonOp::Less:
return GL_LESS;
case Maxwell::ComparisonOp::Equal:
return GL_EQUAL;
case Maxwell::ComparisonOp::LessEqual:
return GL_LEQUAL;
case Maxwell::ComparisonOp::Greater:
return GL_GREATER;
case Maxwell::ComparisonOp::NotEqual:
return GL_NOTEQUAL;
case Maxwell::ComparisonOp::GreaterEqual:
return GL_GEQUAL;
case Maxwell::ComparisonOp::Always:
return GL_ALWAYS;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
UNREACHABLE();
return {};
}
inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
switch (front_face) {
case Maxwell::Cull::FrontFace::ClockWise:
return GL_CW;
case Maxwell::Cull::FrontFace::CounterClockWise:
return GL_CCW;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
UNREACHABLE();
return {};
}
inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
switch (cull_face) {
case Maxwell::Cull::CullFace::Front:
return GL_FRONT;
case Maxwell::Cull::CullFace::Back:
return GL_BACK;
case Maxwell::Cull::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
NGLOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
UNREACHABLE();
return {};
}
} // namespace MaxwellToGL

View File

@@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
screen_info)) {
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
Memory::FlushMode::Flush);

View File

@@ -27,7 +27,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture;
MathUtil::Rectangle<float> display_texcoords;
const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};

View File

@@ -5,6 +5,7 @@
#include <cstring>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -65,6 +66,18 @@ u32 BytesPerPixel(TextureFormat format) {
return 1;
case TextureFormat::R16_G16_B16_A16:
return 8;
case TextureFormat::R32_G32_B32_A32:
return 16;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
}
static u32 DepthBytesPerPixel(DepthFormat format) {
switch (format) {
case DepthFormat::Z24_S8_UNORM:
return 4;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
@@ -94,6 +107,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
case TextureFormat::B5G6R5:
case TextureFormat::R8:
case TextureFormat::R16_G16_B16_A16:
case TextureFormat::R32_G32_B32_A32:
case TextureFormat::BF10GF11RF11:
case TextureFormat::ASTC_2D_4X4:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
@@ -107,6 +121,26 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
return unswizzled_data;
}
std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
u32 block_height) {
u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = DepthBytesPerPixel(format);
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) {
case DepthFormat::Z24_S8_UNORM:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;
default:
UNIMPLEMENTED_MSG("Format not implemented");
break;
}
return unswizzled_data;
}
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
u32 height) {
std::vector<u8> rgba_data;
@@ -124,6 +158,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::B5G6R5:
case TextureFormat::R8:
case TextureFormat::BF10GF11RF11:
case TextureFormat::R32_G32_B32_A32:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;

View File

@@ -17,6 +17,12 @@ namespace Texture {
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/**
* Unswizzles a swizzled depth texture without changing its format.
*/
std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height);

View File

@@ -84,6 +84,8 @@ void Config::ReadValues() {
qt_config->beginGroup("Renderer");
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
Settings::values.toggle_framelimit = qt_config->value("toggle_framelimit", true).toBool();
Settings::values.use_accurate_framebuffers =
qt_config->value("use_accurate_framebuffers", false).toBool();
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -184,6 +186,7 @@ void Config::SaveValues() {
qt_config->beginGroup("Renderer");
qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
qt_config->setValue("toggle_framelimit", Settings::values.toggle_framelimit);
qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);
// Cast to double because Qt's written float values are not human-readable
qt_config->setValue("bg_red", (double)Settings::values.bg_red);

View File

@@ -59,11 +59,13 @@ void ConfigureGraphics::setConfiguration() {
ui->resolution_factor_combobox->setCurrentIndex(
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_framelimit->setChecked(Settings::values.toggle_framelimit);
ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
}
void ConfigureGraphics::applyConfiguration() {
Settings::values.resolution_factor =
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.toggle_framelimit = ui->toggle_framelimit->isChecked();
Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
Settings::Apply();
}

View File

@@ -29,6 +29,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_accurate_framebuffers">
<property name="text">
<string>Use accurate framebuffers (slow)</string>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>

View File

@@ -98,6 +98,8 @@ void Config::ReadValues() {
(float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
Settings::values.toggle_framelimit =
sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true);
Settings::values.use_accurate_framebuffers =
sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);
Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);

View File

@@ -102,6 +102,10 @@ resolution_factor =
# 0 (default): Off, 1: On
use_vsync =
# Whether to use accurate framebuffers
# 0 (default): Off (fast), 1 : On (slow)
use_accurate_framebuffers =
# The clear color for the renderer. What shows up on the sides of the bottom screen.
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
bg_red =