diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6a17bed723..ef4a745b57 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -251,7 +251,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { if (!gpu.regs.IsShaderConfigEnabled(index)) { switch (program) { case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->BindGeometryShader(nullptr); + break; + case Maxwell::ShaderProgram::Fragment: + shader_program_manager->BindFragmentShader(nullptr); break; default: break; @@ -261,14 +264,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu, stage); - const auto [buffer, offset] = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - - // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer, offset, static_cast(sizeof(ubo))); - Shader shader{shader_cache.GetStageProgram(program)}; const auto stage_enum = static_cast(stage); @@ -282,13 +277,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: - shader_program_manager->UseProgrammableVertexShader(program_handle); + shader_program_manager->BindVertexShader(&program_handle); break; case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseProgrammableGeometryShader(program_handle); + shader_program_manager->BindGeometryShader(&program_handle); break; case Maxwell::ShaderProgram::Fragment: - shader_program_manager->UseProgrammableFragmentShader(program_handle); + shader_program_manager->BindFragmentShader(&program_handle); break; default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, @@ -605,11 +600,6 @@ void RasterizerOpenGL::DrawPrelude() { buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); } - // Uniform space for the 5 shader stages - buffer_size = Common::AlignUp(buffer_size, 4) + - (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * - Maxwell::MaxShaderStage; - // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); @@ -651,6 +641,7 @@ void RasterizerOpenGL::DrawPrelude() { gpu.dirty.ResetVertexArrays(); } + shader_program_manager->SetConstants(gpu); shader_program_manager->ApplyTo(state); state.Apply(); @@ -773,7 +764,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { SetupComputeImages(kernel); const auto [program, next_bindings] = kernel->GetProgramHandle(variant); - state.draw.shader_program = program; + state.draw.shader_program = program.handle; state.draw.program_pipeline = 0; const std::size_t buffer_size = diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd2..620cdfc7d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -23,9 +23,6 @@ namespace OpenGL { using VideoCommon::Shader::ProgramCode; -// One UBO is always reserved for emulation values on staged shaders -constexpr u32 STAGE_RESERVED_UBOS = 1; - struct UnspecializedShader { std::string code; GLShader::ShaderEntries entries; @@ -224,10 +221,6 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn } source += '\n'; - if (program_type != ProgramType::Compute) { - source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); - } - for (const auto& cbuf : entries.const_buffers) { source += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); @@ -273,7 +266,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn OGLShader shader; shader.Create(source.c_str(), GetShaderType(program_type)); - auto program = std::make_shared(); + auto program = std::make_shared(); program->Create(true, hint_retrievable, shader.handle); return program; } @@ -348,28 +341,26 @@ Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, new CachedShader(params, ProgramType::Compute, std::move(result))); } -std::tuple CachedShader::GetProgramHandle(const ProgramVariant& variant) { +std::tuple CachedShader::GetProgramHandle( + const ProgramVariant& variant) { const auto [entry, is_cache_miss] = programs.try_emplace(variant); - auto& program = entry->second; + auto& stage_program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(variant); - if (!program) { - program = SpecializeShader(code, entries, program_type, variant); + stage_program = TryLoadProgram(variant); + if (!stage_program) { + stage_program = SpecializeShader(code, entries, program_type, variant); disk_cache.SaveUsage(GetUsage(variant)); } - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + LabelGLObject(GL_PROGRAM, stage_program->handle, cpu_addr); } - auto base_bindings = variant.base_bindings; + auto base_bindings{variant.base_bindings}; base_bindings.cbuf += static_cast(entries.const_buffers.size()); - if (program_type != ProgramType::Compute) { - base_bindings.cbuf += STAGE_RESERVED_UBOS; - } base_bindings.gmem += static_cast(entries.global_memory_entries.size()); base_bindings.sampler += static_cast(entries.samplers.size()); - return {program->handle, base_bindings}; + return {*stage_program, base_bindings}; } CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { @@ -516,7 +507,7 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( return {}; } - CachedProgram shader = std::make_shared(); + CachedProgram shader = std::make_shared(); shader->handle = glCreateProgram(); glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5de..e8804a4f92 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -20,6 +20,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" namespace Core { class System; @@ -37,7 +38,7 @@ class RasterizerOpenGL; struct UnspecializedShader; using Shader = std::shared_ptr; -using CachedProgram = std::shared_ptr; +using CachedProgram = std::shared_ptr; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PrecompiledPrograms = std::unordered_map; using PrecompiledShaders = std::unordered_map; @@ -80,7 +81,8 @@ public: } /// Gets the GL program handle for the shader - std::tuple GetProgramHandle(const ProgramVariant& variant); + std::tuple GetProgramHandle( + const ProgramVariant& variant); private: explicit CachedShader(const ShaderParameters& params, ProgramType program_type, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6a610a3bca..3a82556da7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2486,7 +2486,10 @@ std::string GetCommonDeclarations() { " bvec2 is_nan2 = isnan(pair2);\n" " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " "is_nan2.y);\n" - "}}\n\n"); + "}}\n\n" + "layout(location = 0) uniform uvec4 config_pack; // instance_id, flip_stage, y_direction, " + "padding\n" + "layout(location = 1) uniform vec2 viewport_flip;\n\n"); } ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b5a43e79e7..bdf2f52a88 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -27,14 +27,6 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu std::string out = "// Shader Unique Id: VS" + id + "\n\n"; out += GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding -}; - -)"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; ProgramResult program = Decompile(device, program_ir, stage, "vertex"); @@ -77,14 +69,6 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se std::string out = "// Shader Unique Id: GS" + id + "\n\n"; out += GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding -}; - -)"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); out += program.first; @@ -92,7 +76,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { out += R"( void main() { execute_geometry(); -};)"; +})"; return {std::move(out), std::move(program.second)}; } @@ -113,11 +97,6 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { - vec4 viewport_flip; - uvec4 config_pack; // instance_id, flip_stage, y_direction, padding -}; - )"; const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index b05f90f20d..f179143cb3 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,13 +2,34 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -using Tegra::Engines::Maxwell3D; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +enum ProgramLocations : u32 { + CONFIG_PACK = 0, + VIEWPORT_SCALE = 1, +}; + +StageProgram::StageProgram() = default; + +StageProgram::~StageProgram() = default; + +void StageProgram::UpdateConstants() { + if (state.config_pack != old_state.config_pack) { + glProgramUniform4uiv(handle, CONFIG_PACK, 1, state.config_pack.data()); + old_state.config_pack = state.config_pack; + } + if (state.viewport_scale != old_state.viewport_scale) { + glProgramUniform2fv(handle, VIEWPORT_SCALE, 1, state.viewport_scale.data()); + old_state.viewport_scale = state.viewport_scale; + } +} ProgramManager::ProgramManager() { pipeline.Create(); @@ -16,12 +37,56 @@ ProgramManager::ProgramManager() { ProgramManager::~ProgramManager() = default; +void ProgramManager::SetConstants(Tegra::Engines::Maxwell3D& maxwell_3d) { + const auto& regs = maxwell_3d.regs; + const auto& state = maxwell_3d.state; + + // TODO(bunnei): Support more than one viewport + const GLfloat flip_x = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; + const GLfloat flip_y = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; + + const GLuint instance_id = state.current_instance; + + // Assign in which stage the position has to be flipped (the last stage before the fragment + // shader). + const GLuint flip_stage = [&]() { + constexpr u32 geometry_index = static_cast(Maxwell::ShaderProgram::Geometry); + if (regs.shader_config[geometry_index].enable) { + return geometry_index; + } else { + return static_cast(Maxwell::ShaderProgram::VertexB); + } + }(); + + // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. + const GLfloat y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; + + for (const auto stage : + std::array{current_state.vertex, current_state.geometry, current_state.fragment}) { + if (!stage) { + continue; + } + stage->SetInstanceID(instance_id); + stage->SetFlipStage(flip_stage); + stage->SetYDirection(y_direction); + stage->SetViewportScale(flip_x, flip_y); + stage->UpdateConstants(); + } +} + void ProgramManager::ApplyTo(OpenGLState& state) { UpdatePipeline(); state.draw.shader_program = 0; state.draw.program_pipeline = pipeline.handle; } +GLuint GetHandle(StageProgram* program) { + if (!program) { + return 0; + } + return program->handle; +} + void ProgramManager::UpdatePipeline() { // Avoid updating the pipeline when values have no changed if (old_state == current_state) { @@ -33,34 +98,11 @@ void ProgramManager::UpdatePipeline() { GL_FRAGMENT_SHADER_BIT}; glUseProgramStages(pipeline.handle, all_used_stages, 0); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); - glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, GetHandle(current_state.vertex)); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, GetHandle(current_state.geometry)); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, GetHandle(current_state.fragment)); old_state = current_state; } -void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { - const auto& regs = maxwell.regs; - const auto& state = maxwell.state; - - // TODO(bunnei): Support more than one viewport - viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; - viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - - instance_id = state.current_instance; - - // Assign in which stage the position has to be flipped - // (the last stage before the fragment shader). - constexpr u32 geometry_index = static_cast(Maxwell3D::Regs::ShaderProgram::Geometry); - if (maxwell.regs.shader_config[geometry_index].enable) { - flip_stage = geometry_index; - } else { - flip_stage = static_cast(Maxwell3D::Regs::ShaderProgram::VertexB); - } - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f; -} - } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 6961e702a2..b55b72f162 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,71 +4,98 @@ #pragma once +#include #include +#include #include +#include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" +namespace Tegra::Engines { +class Maxwell3D; +} + namespace OpenGL::GLShader { -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); +class StageProgram final : public OGLProgram { +public: + explicit StageProgram(); + ~StageProgram(); - alignas(16) GLvec4 viewport_flip; - struct alignas(16) { - GLuint instance_id; - GLuint flip_stage; - GLfloat y_direction; + void UpdateConstants(); + + void SetInstanceID(GLuint instance_id) { + state.instance_id = instance_id; + } + + void SetFlipStage(GLuint flip_stage) { + state.flip_stage = flip_stage; + } + + void SetYDirection(GLfloat y_direction) { + state.y_direction = y_direction; + } + + void SetViewportScale(GLfloat x, GLfloat y) { + state.viewport_scale = {x, y}; + } + +private: + struct State { + union { + std::array config_pack{}; + struct { + GLuint instance_id; + GLuint flip_stage; + GLfloat y_direction; + }; + }; + + std::array viewport_scale{}; }; -}; -static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); -class ProgramManager { + State state; + State old_state; +}; + +class ProgramManager final { public: explicit ProgramManager(); ~ProgramManager(); + void SetConstants(Tegra::Engines::Maxwell3D& maxwell_3d); + void ApplyTo(OpenGLState& state); - void UseProgrammableVertexShader(GLuint program) { - current_state.vertex_shader = program; + void BindVertexShader(StageProgram* program) { + current_state.vertex = program; } - void UseProgrammableGeometryShader(GLuint program) { - current_state.geometry_shader = program; + void BindGeometryShader(StageProgram* program) { + current_state.geometry = program; } - void UseProgrammableFragmentShader(GLuint program) { - current_state.fragment_shader = program; - } - - void UseTrivialGeometryShader() { - current_state.geometry_shader = 0; + void BindFragmentShader(StageProgram* program) { + current_state.fragment = program; } private: struct PipelineState { bool operator==(const PipelineState& rhs) const { - return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && - geometry_shader == rhs.geometry_shader; + return vertex == rhs.vertex && fragment == rhs.fragment && geometry == rhs.geometry; } bool operator!=(const PipelineState& rhs) const { return !operator==(rhs); } - GLuint vertex_shader{}; - GLuint fragment_shader{}; - GLuint geometry_shader{}; + StageProgram* vertex{}; + StageProgram* fragment{}; + StageProgram* geometry{}; }; void UpdatePipeline();