Compare commits

..

16 Commits

Author SHA1 Message Date
Lioncash
9046f764bf rasterizer_cache: Remove reliance on the System singleton
Rather than have a transparent dependency, we can make it explicit in
the interface. This also gets rid of the need to put the core include in
a header.
2018-11-08 06:16:38 -05:00
Lioncash
29f082775b gl_resource_manager: Amend clang-format discrepancies
Fixes the buildbot.
2018-11-08 00:23:45 -05:00
bunnei
0e05a9d58f Merge pull request #1658 from ogniK5377/holdtype-style
Updated npad styles on holdtype switches
2018-11-07 20:59:01 -08:00
David
581406af18 svcBreak now dumps information from the debug buffer passed (#1646)
* svcBreak now dumps information from the debug buffer passed

info1 and info2 seem to somtimes hold an address to a buffer, this is usually 4 bytes or the size of the int and contains an error code. There's other circumstances where it can be something different so we hexdump these to examine them at a later date.

* Addressed comments
2018-11-07 20:43:54 -08:00
bunnei
481d8716e0 Merge pull request #1655 from ogniK5377/shantae
Implement acc:TrySelectUserWithoutInteraction
2018-11-07 20:41:06 -08:00
David Marcec
a9c25ab9e4 Updated npad styles on holdtype switches
Fixes input for megaman
2018-11-08 01:07:14 +11:00
bunnei
81ff9e2473 Merge pull request #1630 from bunnei/fix-mapbufferex
memory_manager: Do not MapBufferEx over already in use memory.
2018-11-07 00:14:36 -08:00
bunnei
74bce4d68f Merge pull request #1635 from Tinob/master
Implement multi-target viewports and blending
2018-11-07 00:11:49 -08:00
bunnei
e5a0a23553 Merge pull request #1653 from degasus/profiler
gl_rasterizer: Update microprofile scopes.
2018-11-07 00:10:13 -08:00
Markus Wick
2c87f10267 gl_rasterizer_cache: Add profiles for Copy and Blit.
They were missed, and Copy is very high in profile here. It doesn't block the GPU,
but it stalls the driver thread. So with our bad GL instructions, this might block quite a while.
2018-11-06 17:45:32 +01:00
Markus Wick
7e59e907ef gl_resource_manager: Profile creation and deletion. 2018-11-06 17:45:32 +01:00
Markus Wick
80e4dbdce7 gl_stream_buffer: Profile orphaning of stream buffer.
This serialize to the driver thread and so it may block for a while.
So if it is in the benchmark, we get noticed if it happens too often.
2018-11-06 17:45:32 +01:00
Markus Wick
2ba4d878e5 microprofile: Drop ReleaseActiveBuffer scope.
This was created with the unfinished resampling PR in mind.
As the resampling is now on the audio thread, we don't need to care about this here any more.
2018-11-06 17:45:32 +01:00
Rodolfo Bogado
19038db489 Add support to color mask to avoid issues in blending caused by wrong values in the alpha channel in some render targets. 2018-11-05 00:24:19 -03:00
Rodolfo Bogado
145ae36963 Implement multi-target viewports and blending 2018-11-04 20:49:48 -03:00
bunnei
4aa9779ae1 memory_manager: Do not MapBufferEx over already in use memory.
- This fixes rendering when changing areas in Super Mario Odyssey.
2018-11-01 18:57:59 -04:00
21 changed files with 495 additions and 199 deletions

View File

@@ -11,7 +11,6 @@
#include "audio_core/stream.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/settings.h"
@@ -104,10 +103,7 @@ void Stream::PlayNextBuffer() {
CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
}
MICROPROFILE_DEFINE(AudioOutput, "Audio", "ReleaseActiveBuffer", MP_RGB(100, 100, 255));
void Stream::ReleaseActiveBuffer() {
MICROPROFILE_SCOPE(AudioOutput);
ASSERT(active_buffer);
released_buffers.push(std::move(active_buffer));
release_callback();

View File

@@ -395,16 +395,42 @@ struct BreakReason {
/// Break program execution
static void Break(u32 reason, u64 info1, u64 info2) {
BreakReason break_reason{reason};
bool has_dumped_buffer{};
const auto handle_debug_buffer = [&](VAddr addr, u64 sz) {
if (sz == 0 || addr == 0 || has_dumped_buffer) {
return;
}
// This typically is an error code so we're going to assume this is the case
if (sz == sizeof(u32)) {
LOG_CRITICAL(Debug_Emulated, "debug_buffer_err_code={:X}", Memory::Read32(addr));
} else {
// We don't know what's in here so we'll hexdump it
std::vector<u8> debug_buffer(sz);
Memory::ReadBlock(addr, debug_buffer.data(), sz);
std::string hexdump;
for (std::size_t i = 0; i < debug_buffer.size(); i++) {
hexdump += fmt::format("{:02X} ", debug_buffer[i]);
if (i != 0 && i % 16 == 0) {
hexdump += '\n';
}
}
LOG_CRITICAL(Debug_Emulated, "debug_buffer=\n{}", hexdump);
}
has_dumped_buffer = true;
};
switch (break_reason.break_type) {
case BreakType::Panic:
LOG_CRITICAL(Debug_Emulated, "Signalling debugger, PANIC! info1=0x{:016X}, info2=0x{:016X}",
info1, info2);
handle_debug_buffer(info1, info2);
break;
case BreakType::AssertionFailed:
LOG_CRITICAL(Debug_Emulated,
"Signalling debugger, Assertion failed! info1=0x{:016X}, info2=0x{:016X}",
info1, info2);
handle_debug_buffer(info1, info2);
break;
case BreakType::PreNROLoad:
LOG_WARNING(
@@ -433,6 +459,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
Debug_Emulated,
"Signalling debugger, Unknown break reason {}, info1=0x{:016X}, info2=0x{:016X}",
static_cast<u32>(break_reason.break_type.Value()), info1, info2);
handle_debug_buffer(info1, info2);
break;
}
@@ -441,6 +468,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
Debug_Emulated,
"Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
reason, info1, info2);
handle_debug_buffer(info1, info2);
ASSERT(false);
Core::CurrentProcess()->PrepareForTermination();

View File

@@ -392,8 +392,10 @@ std::size_t Controller_NPad::GetSupportedNPadIdTypesSize() const {
}
void Controller_NPad::SetHoldType(NpadHoldType joy_hold_type) {
styleset_changed_event->Signal();
hold_type = joy_hold_type;
}
Controller_NPad::NpadHoldType Controller_NPad::GetHoldType() const {
return hold_type;
}

View File

@@ -37,6 +37,22 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.viewport[viewport].depth_range_near = 0.0f;
regs.viewport[viewport].depth_range_far = 1.0f;
}
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
// so initialize blend registers with sane values
regs.blend.equation_rgb = Regs::Blend::Equation::Add;
regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
regs.blend.equation_a = Regs::Blend::Equation::Add;
regs.blend.factor_source_a = Regs::Blend::Factor::One;
regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
}
}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {

View File

@@ -462,6 +462,16 @@ public:
}
};
struct ColorMask {
union {
u32 raw;
BitField<0, 4, u32> R;
BitField<4, 4, u32> G;
BitField<8, 4, u32> B;
BitField<12, 4, u32> A;
};
};
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -571,7 +581,11 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
INSERT_PADDING_WORDS(0x13);
INSERT_PADDING_WORDS(0xC);
u32 color_mask_common;
INSERT_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
@@ -646,8 +660,14 @@ public:
ComparisonOp depth_test_func;
float alpha_test_ref;
ComparisonOp alpha_test_func;
INSERT_PADDING_WORDS(0x9);
u32 draw_tfb_stride;
struct {
float r;
float g;
float b;
float a;
} blend_color;
INSERT_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -841,8 +861,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
INSERT_PADDING_WORDS(0x4B);
INSERT_PADDING_WORDS(0xB);
std::array<ColorMask, NumRenderTargets> color_mask;
INSERT_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -1075,6 +1096,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1087,6 +1109,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
ASSERT_REG_POSITION(blend_color, 0x4C7);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil_enable, 0x4E0);
ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
@@ -1117,6 +1143,7 @@ ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(color_mask, 0x680);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);

View File

@@ -4,18 +4,21 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/memory_manager.h"
namespace Tegra {
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
std::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align);
ASSERT(gpu_addr);
const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
VAddr& slot = PageSlot(*gpu_addr + offset);
ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = static_cast<u64>(PageStatus::Allocated);
}
@@ -23,10 +26,11 @@ GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
}
GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
VAddr& slot = PageSlot(gpu_addr + offset);
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = static_cast<u64>(PageStatus::Allocated);
}
@@ -34,17 +38,19 @@ GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
}
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
std::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE);
ASSERT(gpu_addr);
const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
VAddr& slot = PageSlot(*gpu_addr + offset);
ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = cpu_addr + offset;
}
MappedRegion region{cpu_addr, *gpu_addr, size};
const MappedRegion region{cpu_addr, *gpu_addr, size};
mapped_regions.push_back(region);
return *gpu_addr;
@@ -53,14 +59,31 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
VAddr& slot = PageSlot(gpu_addr + offset);
if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
// Page has been already mapped. In this case, we must find a new area of memory to use that
// is different than the specified one. Super Mario Odyssey hits this scenario when changing
// areas, but we do not want to overwrite the old pages.
// TODO(bunnei): We need to write a hardware test to confirm this behavior.
LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
const std::optional<GPUVAddr> new_gpu_addr{
FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
gpu_addr = *new_gpu_addr;
}
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
slot = cpu_addr + offset;
}
MappedRegion region{cpu_addr, gpu_addr, size};
const MappedRegion region{cpu_addr, gpu_addr, size};
mapped_regions.push_back(region);
return gpu_addr;
@@ -69,11 +92,12 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
VAddr& slot = PageSlot(gpu_addr + offset);
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
slot != static_cast<u64>(PageStatus::Unmapped));
slot = static_cast<u64>(PageStatus::Unmapped);
}
@@ -97,13 +121,14 @@ GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
return {};
}
std::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
GPUVAddr gpu_addr = 0;
u64 free_space = 0;
std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
PageStatus status) {
GPUVAddr gpu_addr{region_start};
u64 free_space{};
align = (align + PAGE_MASK) & ~PAGE_MASK;
while (gpu_addr + free_space < MAX_ADDRESS) {
if (!IsPageMapped(gpu_addr + free_space)) {
if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
free_space += PAGE_SIZE;
if (free_space >= size) {
return gpu_addr;
@@ -119,7 +144,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
VAddr base_addr = PageSlot(gpu_addr);
const VAddr base_addr{PageSlot(gpu_addr)};
if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
base_addr == static_cast<u64>(PageStatus::Unmapped)) {
@@ -133,19 +158,15 @@ std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
std::vector<GPUVAddr> results;
for (const auto& region : mapped_regions) {
if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
u64 offset = cpu_addr - region.cpu_addr;
const u64 offset{cpu_addr - region.cpu_addr};
results.push_back(region.gpu_addr + offset);
}
}
return results;
}
bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) {
return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped);
}
VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
if (!block) {
block = std::make_unique<PageBlock>();
block->fill(static_cast<VAddr>(PageStatus::Unmapped));

View File

@@ -34,15 +34,15 @@ public:
static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
private:
std::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1);
bool IsPageMapped(GPUVAddr gpu_addr);
VAddr& PageSlot(GPUVAddr gpu_addr);
enum class PageStatus : u64 {
Unmapped = 0xFFFFFFFFFFFFFFFFULL,
Allocated = 0xFFFFFFFFFFFFFFFEULL,
};
std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
PageStatus status);
VAddr& PageSlot(GPUVAddr gpu_addr);
static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
static constexpr u64 PAGE_TABLE_BITS{10};
static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};

View File

@@ -10,10 +10,8 @@
#include <boost/range/iterator_range_core.hpp>
#include "common/common_types.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
class RasterizerCacheObject {
public:
@@ -64,6 +62,8 @@ class RasterizerCache : NonCopyable {
friend class RasterizerCacheObject;
public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -109,14 +109,12 @@ protected:
void Register(const T& object) {
object->SetIsRegistered(true);
object_cache.add({GetInterval(object), ObjectSet{object}});
auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
}
/// Unregisters an object from the cache
void Unregister(const T& object) {
object->SetIsRegistered(false);
auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
@@ -177,4 +175,5 @@ private:
ObjectCache object_cache; ///< Cache of objects
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
VideoCore::RasterizerInterface& rasterizer;
};

View File

@@ -9,10 +9,12 @@
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) {

View File

@@ -15,6 +15,8 @@
namespace OpenGL {
class RasterizerOpenGL;
struct CachedBufferEntry final : public RasterizerCacheObject {
VAddr GetAddr() const override {
return addr;
@@ -35,7 +37,7 @@ struct CachedBufferEntry final : public RasterizerCacheObject {
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
public:
explicit OGLBufferCache(std::size_t size);
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.

View File

@@ -6,6 +6,7 @@
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"

View File

@@ -79,7 +79,8 @@ struct DrawParameters {
};
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -511,10 +512,10 @@ void RasterizerOpenGL::Clear() {
OpenGLState clear_state;
clear_state.draw.draw_framebuffer = framebuffer.handle;
clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
@@ -573,14 +574,13 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers();
SyncColorMask();
SyncDepthTestState();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncDepthRange();
SyncScissorTest();
// Alpha Testing is synced on shaders.
SyncTransformFeedback();
@@ -899,12 +899,16 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
void RasterizerOpenGL::SyncViewport() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
state.viewport.x = viewport_rect.left;
state.viewport.y = viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
auto& viewport = state.viewports[i];
viewport.x = viewport_rect.left;
viewport.y = viewport_rect.bottom;
viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
viewport.depth_range_far = regs.viewport[i].depth_range_far;
viewport.depth_range_near = regs.viewport[i].depth_range_near;
}
}
void RasterizerOpenGL::SyncClipEnabled() {
@@ -946,13 +950,6 @@ void RasterizerOpenGL::SyncPrimitiveRestart() {
state.primitive_restart.index = regs.primitive_restart.index;
}
void RasterizerOpenGL::SyncDepthRange() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
state.depth.depth_range_near = regs.viewport->depth_range_near;
state.depth.depth_range_far = regs.viewport->depth_range_far;
}
void RasterizerOpenGL::SyncDepthTestState() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -993,26 +990,60 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.back.write_mask = regs.stencil_back_mask;
}
void RasterizerOpenGL::SyncColorMask() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
auto& dest = state.color_mask[i];
dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
}
}
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
// TODO(Subv): Support more than just render target 0.
state.blend.enabled = regs.blend.enable[0] != 0;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
state.blend_color.blue = regs.blend_color.b;
state.blend_color.alpha = regs.blend_color.a;
if (!state.blend.enabled)
state.independant_blend.enabled = regs.independent_blend_enable;
if (!state.independant_blend.enabled) {
auto& blend = state.blend[0];
blend.enabled = regs.blend.enable[0] != 0;
blend.separate_alpha = regs.blend.separate_alpha;
blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
if (blend.separate_alpha) {
blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
}
for (size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false;
}
return;
}
ASSERT_MSG(regs.logic_op.enable == 0,
"Blending and logic op can't be enabled at the same time.");
ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
auto& blend = state.blend[i];
blend.enabled = regs.blend.enable[i] != 0;
if (!blend.enabled)
continue;
blend.separate_alpha = regs.independent_blend[i].separate_alpha;
blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_rgb);
blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_rgb);
blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_rgb);
if (blend.separate_alpha) {
blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_a);
blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_a);
}
}
}
void RasterizerOpenGL::SyncLogicOpState() {
@@ -1031,19 +1062,19 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
// TODO: what is the correct behavior here, a single scissor for all targets
// or scissor disabled for the rest of the targets?
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
state.scissor.enabled = (regs.scissor_test.enable != 0);
// TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
// implemented.
if (regs.scissor_test.enable != 0) {
const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
state.scissor.x = regs.scissor_test.min_x;
state.scissor.y = regs.scissor_test.min_y;
state.scissor.width = width;
state.scissor.height = height;
if (regs.scissor_test.enable == 0) {
return;
}
const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
state.scissor.x = regs.scissor_test.min_x;
state.scissor.y = regs.scissor_test.min_y;
state.scissor.width = width;
state.scissor.height = height;
}
void RasterizerOpenGL::SyncTransformFeedback() {

View File

@@ -133,7 +133,7 @@ private:
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit);
/// Syncs the viewport to match the guest state
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
/// Syncs the clip enabled status to match the guest state
@@ -148,9 +148,6 @@ private:
/// Syncs the primitve restart to match the guest state
void SyncPrimitiveRestart();
/// Syncs the depth range to match the guest state
void SyncDepthRange();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
@@ -172,6 +169,9 @@ private:
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs Color Mask
void SyncColorMask();
/// Check asserts for alpha testing.
void CheckAlphaTests();

View File

@@ -15,6 +15,7 @@
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/utils.h"
@@ -562,9 +563,11 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
}
}
MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
MICROPROFILE_SCOPE(OpenGL_BlitSurface);
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -704,9 +707,11 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
0, 0, width, height, 1);
}
MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
GLuint copy_pbo_handle, GLenum src_attachment = 0,
GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
MICROPROFILE_SCOPE(OpenGL_CopySurface);
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
const auto& src_params{src_surface->GetSurfaceParams()};
@@ -975,7 +980,7 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
gl_buffer.resize(params.max_mip_level);
@@ -1157,7 +1162,7 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
if (params.type == SurfaceType::Fill)
return;
@@ -1168,7 +1173,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {
read_framebuffer.Create();
draw_framebuffer.Create();
copy_pbo.Create();

View File

@@ -264,6 +264,8 @@ struct hash<SurfaceReserveKey> {
namespace OpenGL {
class RasterizerOpenGL;
class CachedSurface final : public RasterizerCacheObject {
public:
CachedSurface(const SurfaceParams& params);
@@ -311,7 +313,7 @@ private:
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
public:
RasterizerCacheOpenGL();
explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,

View File

@@ -5,21 +5,29 @@
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
void OGLTexture::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTexture::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
@@ -28,12 +36,16 @@ void OGLTexture::Release() {
void OGLSampler::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenSamplers(1, &handle);
}
void OGLSampler::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0;
@@ -44,12 +56,16 @@ void OGLShader::Create(const char* source, GLenum type) {
return;
if (source == nullptr)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
}
void OGLShader::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteShader(handle);
handle = 0;
}
@@ -63,12 +79,16 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
geo.Create(geo_shader, GL_GEOMETRY_SHADER);
if (frag_shader)
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(separable_program, vert.handle, geo.handle, frag.handle);
}
void OGLProgram::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
OpenGLState::GetCurState().ResetProgram(handle).Apply();
handle = 0;
@@ -77,12 +97,16 @@ void OGLProgram::Release() {
void OGLPipeline::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenProgramPipelines(1, &handle);
}
void OGLPipeline::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0;
@@ -91,12 +115,16 @@ void OGLPipeline::Release() {
void OGLBuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenBuffers(1, &handle);
}
void OGLBuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
OpenGLState::GetCurState().ResetBuffer(handle).Apply();
handle = 0;
@@ -105,12 +133,16 @@ void OGLBuffer::Release() {
void OGLSync::Create() {
if (handle != 0)
return;
// Don't profile here, this one is expected to happen ingame.
handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void OGLSync::Release() {
if (handle == 0)
return;
// Don't profile here, this one is expected to happen ingame.
glDeleteSync(handle);
handle = 0;
}
@@ -118,12 +150,16 @@ void OGLSync::Release() {
void OGLVertexArray::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenVertexArrays(1, &handle);
}
void OGLVertexArray::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteVertexArrays(1, &handle);
OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
handle = 0;
@@ -132,12 +168,16 @@ void OGLVertexArray::Release() {
void OGLFramebuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenFramebuffers(1, &handle);
}
void OGLFramebuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0;

View File

@@ -6,10 +6,10 @@
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/utils.h"
namespace OpenGL {
@@ -135,6 +135,8 @@ GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
return target_program.handle;
};
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const VAddr program_addr{GetShaderAddress(program)};

View File

@@ -16,6 +16,8 @@
namespace OpenGL {
class CachedShader;
class RasterizerOpenGL;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -104,6 +106,8 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
};

View File

@@ -22,17 +22,15 @@ OpenGLState::OpenGLState() {
depth.test_enabled = false;
depth.test_func = GL_LESS;
depth.write_mask = GL_TRUE;
depth.depth_range_near = 0.0f;
depth.depth_range_far = 1.0f;
primitive_restart.enabled = false;
primitive_restart.index = 0;
color_mask.red_enabled = GL_TRUE;
color_mask.green_enabled = GL_TRUE;
color_mask.blue_enabled = GL_TRUE;
color_mask.alpha_enabled = GL_TRUE;
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
stencil.test_enabled = false;
auto reset_stencil = [](auto& config) {
config.test_func = GL_ALWAYS;
@@ -45,19 +43,33 @@ OpenGLState::OpenGLState() {
};
reset_stencil(stencil.front);
reset_stencil(stencil.back);
blend.enabled = true;
blend.rgb_equation = GL_FUNC_ADD;
blend.a_equation = GL_FUNC_ADD;
blend.src_rgb_func = GL_ONE;
blend.dst_rgb_func = GL_ZERO;
blend.src_a_func = GL_ONE;
blend.dst_a_func = GL_ZERO;
blend.color.red = 0.0f;
blend.color.green = 0.0f;
blend.color.blue = 0.0f;
blend.color.alpha = 0.0f;
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
item.width = 0;
item.height = 0;
item.depth_range_near = 0.0f;
item.depth_range_far = 1.0f;
}
scissor.enabled = false;
scissor.x = 0;
scissor.y = 0;
scissor.width = 0;
scissor.height = 0;
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
item.a_equation = GL_FUNC_ADD;
item.src_rgb_func = GL_ONE;
item.dst_rgb_func = GL_ZERO;
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
independant_blend.enabled = false;
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -73,17 +85,6 @@ OpenGLState::OpenGLState() {
draw.shader_program = 0;
draw.program_pipeline = 0;
scissor.enabled = false;
scissor.x = 0;
scissor.y = 0;
scissor.width = 0;
scissor.height = 0;
viewport.x = 0;
viewport.y = 0;
viewport.width = 0;
viewport.height = 0;
clip_distance = {};
point.size = 1;
@@ -134,6 +135,32 @@ void OpenGLState::ApplyCulling() const {
}
}
void OpenGLState::ApplyColorMask() const {
if (GLAD_GL_ARB_viewport_array) {
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
const auto& updated = color_mask[i];
const auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
updated.blue_enabled, updated.alpha_enabled);
}
}
} else {
const auto& updated = color_mask[0];
const auto& current = cur_state.color_mask[0];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() const {
// Depth test
const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
@@ -152,11 +179,6 @@ void OpenGLState::ApplyDepth() const {
if (depth.write_mask != cur_state.depth.write_mask) {
glDepthMask(depth.write_mask);
}
// Depth range
if (depth.depth_range_near != cur_state.depth.depth_range_near ||
depth.depth_range_far != cur_state.depth.depth_range_far) {
glDepthRange(depth.depth_range_near, depth.depth_range_far);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
@@ -208,7 +230,7 @@ void OpenGLState::ApplyStencilTest() const {
}
}
void OpenGLState::ApplyScissorTest() const {
void OpenGLState::ApplyScissor() const {
const bool scissor_changed = scissor.enabled != cur_state.scissor.enabled;
if (scissor_changed) {
if (scissor.enabled) {
@@ -217,51 +239,141 @@ void OpenGLState::ApplyScissorTest() const {
glDisable(GL_SCISSOR_TEST);
}
}
if (scissor_changed || scissor_changed || scissor.x != cur_state.scissor.x ||
scissor.y != cur_state.scissor.y || scissor.width != cur_state.scissor.width ||
scissor.height != cur_state.scissor.height) {
if (scissor.enabled &&
(scissor_changed || scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height)) {
glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
}
}
void OpenGLState::ApplyBlending() const {
const bool blend_changed = blend.enabled != cur_state.blend.enabled;
void OpenGLState::ApplyViewport() const {
if (GLAD_GL_ARB_viewport_array) {
for (GLuint i = 0;
i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); i++) {
const auto& current = cur_state.viewports[i];
const auto& updated = viewports[i];
if (updated.x != current.x || updated.y != current.y ||
updated.width != current.width || updated.height != current.height) {
glViewportIndexedf(i, updated.x, updated.y, updated.width, updated.height);
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
}
} else {
const auto& current = cur_state.viewports[0];
const auto& updated = viewports[0];
if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
updated.height != current.height) {
glViewport(updated.x, updated.y, updated.width, updated.height);
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
glDepthRange(updated.depth_range_near, updated.depth_range_far);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
const bool blend_changed = updated.enabled != current.enabled;
if (blend_changed) {
if (blend.enabled) {
ASSERT(!logic_op.enabled);
if (updated.enabled) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
}
if (blend.enabled) {
if (blend_changed || blend.color.red != cur_state.blend.color.red ||
blend.color.green != cur_state.blend.color.green ||
blend.color.blue != cur_state.blend.color.blue ||
blend.color.alpha != cur_state.blend.color.alpha) {
glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha);
if (!updated.enabled) {
return;
}
if (updated.separate_alpha) {
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func ||
updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
if (blend_changed || blend.src_rgb_func != cur_state.blend.src_rgb_func ||
blend.dst_rgb_func != cur_state.blend.dst_rgb_func ||
blend.src_a_func != cur_state.blend.src_a_func ||
blend.dst_a_func != cur_state.blend.dst_a_func) {
glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func,
blend.dst_a_func);
if (blend_changed || updated.rgb_equation != current.rgb_equation ||
updated.a_equation != current.a_equation) {
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
} else {
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func) {
glBlendFunc(updated.src_rgb_func, updated.dst_rgb_func);
}
if (blend_changed || blend.rgb_equation != cur_state.blend.rgb_equation ||
blend.a_equation != cur_state.blend.a_equation) {
glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
if (blend_changed || updated.rgb_equation != current.rgb_equation) {
glBlendEquation(updated.rgb_equation);
}
}
}
void OpenGLState::ApplyTargetBlending(int target, bool force) const {
const Blend& updated = blend[target];
const Blend& current = cur_state.blend[target];
const bool blend_changed = updated.enabled != current.enabled || force;
if (blend_changed) {
if (updated.enabled) {
glEnablei(GL_BLEND, static_cast<GLuint>(target));
} else {
glDisablei(GL_BLEND, static_cast<GLuint>(target));
}
}
if (!updated.enabled) {
return;
}
if (updated.separate_alpha) {
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func ||
updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparateiARB(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
if (blend_changed || updated.rgb_equation != current.rgb_equation ||
updated.a_equation != current.a_equation) {
glBlendEquationSeparateiARB(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
} else {
if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func) {
glBlendFunciARB(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func);
}
if (blend_changed || updated.rgb_equation != current.rgb_equation) {
glBlendEquationiARB(static_cast<GLuint>(target), updated.rgb_equation);
}
}
}
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
ApplyTargetBlending(i,
independant_blend.enabled != cur_state.independant_blend.enabled);
}
} else {
ApplyGlobalBlending();
}
if (blend_color.red != cur_state.blend_color.red ||
blend_color.green != cur_state.blend_color.green ||
blend_color.blue != cur_state.blend_color.blue ||
blend_color.alpha != cur_state.blend_color.alpha) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
if (logic_op_changed) {
if (logic_op.enabled) {
ASSERT(!blend.enabled);
glEnable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_COLOR_LOGIC_OP);
@@ -348,12 +460,6 @@ void OpenGLState::Apply() const {
if (draw.program_pipeline != cur_state.draw.program_pipeline) {
glBindProgramPipeline(draw.program_pipeline);
}
// Viewport
if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y ||
viewport.width != cur_state.viewport.width ||
viewport.height != cur_state.viewport.height) {
glViewport(viewport.x, viewport.y, viewport.width, viewport.height);
}
// Clip distance
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
@@ -364,19 +470,13 @@ void OpenGLState::Apply() const {
}
}
}
// Color mask
if (color_mask.red_enabled != cur_state.color_mask.red_enabled ||
color_mask.green_enabled != cur_state.color_mask.green_enabled ||
color_mask.blue_enabled != cur_state.color_mask.blue_enabled ||
color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) {
glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled,
color_mask.alpha_enabled);
}
// Point
if (point.size != cur_state.point.size) {
glPointSize(point.size);
}
ApplyScissorTest();
ApplyColorMask();
ApplyViewport();
ApplyScissor();
ApplyStencilTest();
ApplySRgb();
ApplyCulling();

View File

@@ -46,11 +46,9 @@ public:
} cull;
struct {
bool test_enabled; // GL_DEPTH_TEST
GLenum test_func; // GL_DEPTH_FUNC
GLboolean write_mask; // GL_DEPTH_WRITEMASK
GLfloat depth_range_near; // GL_DEPTH_RANGE
GLfloat depth_range_far; // GL_DEPTH_RANGE
bool test_enabled; // GL_DEPTH_TEST
GLenum test_func; // GL_DEPTH_FUNC
GLboolean write_mask; // GL_DEPTH_WRITEMASK
} depth;
struct {
@@ -58,13 +56,14 @@ public:
GLuint index;
} primitive_restart; // GL_PRIMITIVE_RESTART
struct {
struct ColorMask {
GLboolean red_enabled;
GLboolean green_enabled;
GLboolean blue_enabled;
GLboolean alpha_enabled;
} color_mask; // GL_COLOR_WRITEMASK
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
struct {
bool test_enabled; // GL_STENCIL_TEST
struct {
@@ -78,22 +77,28 @@ public:
} front, back;
} stencil;
struct {
struct Blend {
bool enabled; // GL_BLEND
bool separate_alpha; // Independent blend enabled
GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
GLenum dst_a_func; // GL_BLEND_DST_ALPHA
};
std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
struct {
GLclampf red;
GLclampf green;
GLclampf blue;
GLclampf alpha;
} color; // GL_BLEND_COLOR
} blend;
struct {
bool enabled;
} independant_blend;
struct {
GLclampf red;
GLclampf green;
GLclampf blue;
GLclampf alpha;
} blend_color; // GL_BLEND_COLOR
struct {
bool enabled; // GL_LOGIC_OP_MODE
@@ -138,6 +143,16 @@ public:
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
} draw;
struct viewport {
GLfloat x;
GLfloat y;
GLfloat width;
GLfloat height;
GLfloat depth_range_near; // GL_DEPTH_RANGE
GLfloat depth_range_far; // GL_DEPTH_RANGE
};
std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> viewports;
struct {
bool enabled; // GL_SCISSOR_TEST
GLint x;
@@ -146,13 +161,6 @@ public:
GLsizei height;
} scissor;
struct {
GLint x;
GLint y;
GLsizei width;
GLsizei height;
} viewport;
struct {
float size; // GL_POINT_SIZE
} point;
@@ -191,14 +199,18 @@ private:
static bool s_rgb_used;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
void ApplyDepth() const;
void ApplyPrimitiveRestart() const;
void ApplyStencilTest() const;
void ApplyScissorTest() const;
void ApplyViewport() const;
void ApplyTargetBlending(int target, bool force) const;
void ApplyGlobalBlending() const;
void ApplyBlending() const;
void ApplyLogicOp() const;
void ApplyTextures() const;
void ApplySamplers() const;
void ApplyScissor() const;
};
} // namespace OpenGL

View File

@@ -6,9 +6,13 @@
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
MP_RGB(128, 128, 192));
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
@@ -75,6 +79,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
}
if (invalidate || !persistent) {
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);