Compare commits

..

3 Commits

Author SHA1 Message Date
Melissa Goad
597d619197 Merge a9571a383d into 60e6e8953e 2018-04-19 09:01:52 +00:00
Melissa Goad
a9571a383d fixup 2018-04-19 04:01:43 -05:00
Melissa Goad
95691e1a8d Add RGB10_A2 texture formats 2018-04-19 03:29:29 -05:00
11 changed files with 67 additions and 126 deletions

View File

@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu does not run any commercial Switch games. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

View File

@@ -74,6 +74,11 @@ inline u64 _rotr64(u64 x, unsigned int shift) {
#else // _MSC_VER
#if (_MSC_VER < 1900)
// Function Cross-Compatibility
#define snprintf _snprintf
#endif
// Locale Cross-Compatibility
#define locale_t _locale_t

View File

@@ -5,7 +5,6 @@
#include <algorithm>
#include "common/alignment.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/core_timing.h"
@@ -129,8 +128,6 @@ void NVFlinger::Compose() {
// Search for a queued buffer and acquire it
auto buffer = buffer_queue->AcquireBuffer();
MicroProfileFlip();
if (buffer == boost::none) {
// There was no queued buffer to draw, render previous frame
Core::System::GetInstance().perf_stats.EndGameFrame();

View File

@@ -90,7 +90,6 @@ union OpCode {
enum class Id : u64 {
TEXS = 0x6C,
IPA = 0xE0,
FMUL32_IMM = 0x1E,
FFMA_IMM = 0x65,
FFMA_CR = 0x93,
FFMA_RC = 0xA3,
@@ -143,7 +142,6 @@ union OpCode {
switch (op2) {
case Id::IPA:
case Id::FMUL32_IMM:
return op2;
}
@@ -237,7 +235,6 @@ union OpCode {
info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
info_table[Id::EXIT] = {Type::Trivial, "exit"};
@@ -312,8 +309,7 @@ union Instruction {
BitField<39, 8, Register> gpr39;
union {
BitField<20, 19, u64> imm20_19;
BitField<20, 32, u64> imm20_32;
BitField<20, 19, u64> imm20;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<48, 1, u64> negate_a;
@@ -321,21 +317,14 @@ union Instruction {
BitField<50, 1, u64> abs_d;
BitField<56, 1, u64> negate_imm;
float GetImm20_19() const {
float GetImm20() const {
float result{};
u32 imm{static_cast<u32>(imm20_19)};
u32 imm{static_cast<u32>(imm20)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
std::memcpy(&result, &imm, sizeof(imm));
return result;
}
float GetImm20_32() const {
float result{};
u32 imm{static_cast<u32>(imm20_32)};
std::memcpy(&result, &imm, sizeof(imm));
return result;
}
} alu;
union {

View File

@@ -49,11 +49,10 @@ struct FormatTuple {
};
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_10_10_10_2, false, 1}, // RGB10_A2
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -83,6 +82,23 @@ static u16 GetResolutionScaleFactor() {
: Settings::values.resolution_factor);
}
template <bool morton_to_gl, PixelFormat format>
static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
for (u32 y = 0; y < 8; ++y) {
for (u32 x = 0; x < 8; ++x) {
u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
if (morton_to_gl) {
std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
} else {
std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
}
}
}
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
VAddr end) {
@@ -106,9 +122,10 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr b
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>,
MortonCopy<true, PixelFormat::ABGR8>,
MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::DXT1>,
MortonCopy<true, PixelFormat::RGB10_A2>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
@@ -116,10 +133,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
gl_to_morton_fns = {
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::B5G6R5>,
// TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
nullptr,
nullptr,
// TODO(Subv): Swizzling the DXT1 format is not yet supported
nullptr,
MortonCopy<false, PixelFormat::RGB10_A2>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface

View File

@@ -55,8 +55,7 @@ struct SurfaceParams {
ABGR8 = 0,
B5G6R5 = 1,
DXT1 = 2,
DXT23 = 3,
DXT45 = 4,
RGB10_A2 = 3,
Max,
Invalid = 255,
@@ -86,11 +85,10 @@ struct SurfaceParams {
return 0;
constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
32, // ABGR8
16, // B5G6R5
64, // DXT1
128, // DXT23
128, // DXT45
32, // ABGR8
16, // B5G6R5
64, // DXT1
32, // RGB10_A2
};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -104,6 +102,8 @@ struct SurfaceParams {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
return PixelFormat::ABGR8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return PixelFormat::RGB10_A2;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -129,10 +129,6 @@ struct SurfaceParams {
return PixelFormat::B5G6R5;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
return PixelFormat::DXT23;
case Tegra::Texture::TextureFormat::DXT45:
return PixelFormat::DXT45;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -148,10 +144,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::B5G6R5;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
return Tegra::Texture::TextureFormat::DXT23;
case PixelFormat::DXT45:
return Tegra::Texture::TextureFormat::DXT45;
case PixelFormat::RGB10_A2:
return Tegra::Texture::TextureFormat::RGB10_A2;
default:
UNREACHABLE();
}

View File

@@ -5,7 +5,6 @@
#include <map>
#include <set>
#include <string>
#include <string_view>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
@@ -110,25 +109,12 @@ private:
class ShaderWriter {
public:
void AddLine(std::string_view text) {
void AddLine(const std::string& text) {
DEBUG_ASSERT(scope >= 0);
if (!text.empty()) {
AppendIndentation();
shader_source += std::string(static_cast<size_t>(scope) * 4, ' ');
}
shader_source += text;
AddNewLine();
}
void AddLine(char character) {
DEBUG_ASSERT(scope >= 0);
AppendIndentation();
shader_source += character;
AddNewLine();
}
void AddNewLine() {
DEBUG_ASSERT(scope >= 0);
shader_source += '\n';
shader_source += text + '\n';
}
std::string GetResult() {
@@ -138,10 +124,6 @@ public:
int scope = 0;
private:
void AppendIndentation() {
shader_source.append(static_cast<size_t>(scope) * 4, ' ');
}
std::string shader_source;
};
@@ -208,14 +190,9 @@ private:
}
}
/// Generates code representing a 19-bit immediate value
static std::string GetImmediate19(const Instruction& instr) {
return std::to_string(instr.alu.GetImm20_19());
}
/// Generates code representing a 32-bit immediate value
static std::string GetImmediate32(const Instruction& instr) {
return std::to_string(instr.alu.GetImm20_32());
/// Generates code representing an immediate value
static std::string GetImmediate(const Instruction& instr) {
return std::to_string(instr.alu.GetImm20());
}
/// Generates code representing a temporary (GPR) register.
@@ -299,7 +276,7 @@ private:
std::string op_b = instr.alu.negate_b ? "-" : "";
if (instr.is_b_imm) {
op_b += GetImmediate19(instr);
op_b += GetImmediate(instr);
} else {
if (instr.is_b_gpr) {
op_b += GetRegister(instr.gpr20);
@@ -319,11 +296,6 @@ private:
SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
break;
}
case OpCode::Id::FMUL32_IMM: {
// fmul32i doesn't have abs or neg bits.
SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
@@ -392,7 +364,7 @@ private:
break;
}
case OpCode::Id::FFMA_IMM: {
op_b += GetImmediate19(instr);
op_b += GetImmediate(instr);
op_c += GetRegister(instr.gpr39);
break;
}
@@ -427,18 +399,11 @@ private:
const std::string op_a = GetRegister(instr.gpr8);
const std::string op_b = GetRegister(instr.gpr20);
const std::string sampler = GetSampler(instr.sampler);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
// Add an extra scope and declare the texture coords inside to prevent overwriting
// them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
const std::string texture = "texture(" + sampler + ", coords)";
const std::string coord = "vec2(" + op_a + ", " + op_b + ")";
const std::string texture = "texture(" + sampler + ", " + coord + ")";
for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
}
--shader.scope;
shader.AddLine("}");
break;
}
default: {
@@ -499,7 +464,7 @@ private:
for (const auto& subroutine : subroutines) {
shader.AddLine("bool " + subroutine.GetName() + "();");
}
shader.AddNewLine();
shader.AddLine("");
// Add the main entry point
shader.AddLine("bool exec_shader() {");
@@ -542,14 +507,14 @@ private:
}
--shader.scope;
shader.AddLine('}');
shader.AddLine("}");
}
shader.AddLine("default: return false;");
shader.AddLine('}');
shader.AddLine("}");
--shader.scope;
shader.AddLine('}');
shader.AddLine("}");
shader.AddLine("return false;");
}
@@ -576,7 +541,7 @@ private:
for (const auto& reg : declr_register) {
declarations.AddLine("float " + reg + " = 0.0;");
}
declarations.AddNewLine();
declarations.AddLine("");
for (const auto& index : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
@@ -585,7 +550,7 @@ private:
static_cast<u32>(Attribute::Index::Attribute_0)) +
") in vec4 " + GetInputAttribute(index) + ";");
}
declarations.AddNewLine();
declarations.AddLine("");
for (const auto& index : declr_output_attribute) {
// TODO(bunnei): Use proper number of elements for these
@@ -594,15 +559,15 @@ private:
static_cast<u32>(Attribute::Index::Attribute_0)) +
") out vec4 " + GetOutputAttribute(index) + ";");
}
declarations.AddNewLine();
declarations.AddLine("");
unsigned const_buffer_layout = 0;
for (const auto& entry : GetConstBuffersDeclarations()) {
declarations.AddLine("layout(std430) buffer " + entry.GetName());
declarations.AddLine('{');
declarations.AddLine("{");
declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];");
declarations.AddLine("};");
declarations.AddNewLine();
declarations.AddLine("");
++const_buffer_layout;
}
}
@@ -621,7 +586,7 @@ private:
std::set<Attribute::Index> declr_input_attribute;
std::set<Attribute::Index> declr_output_attribute;
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
}; // namespace Decompiler
};
std::string GetCommonDeclarations() {
return "bool exec_shader();";

View File

@@ -48,10 +48,6 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::DXT1:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 8;
case TextureFormat::DXT23:
case TextureFormat::DXT45:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 16;
case TextureFormat::A8R8G8B8:
return 4;
case TextureFormat::B5G6R5:
@@ -71,9 +67,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
switch (format) {
case TextureFormat::DXT1:
case TextureFormat::DXT23:
case TextureFormat::DXT45:
// In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
// In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;
@@ -97,8 +91,6 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
// TODO(Subv): Implement.
switch (format) {
case TextureFormat::DXT1:
case TextureFormat::DXT23:
case TextureFormat::DXT45:
case TextureFormat::A8R8G8B8:
case TextureFormat::B5G6R5:
// TODO(Subv): For the time being just forward the same data without any decoding.

View File

@@ -19,6 +19,7 @@ enum class TextureFormat : u32 {
DXT1 = 0x24,
DXT23 = 0x25,
DXT45 = 0x26,
RGB10_A2 = 0xD1,
};
enum class TextureType : u32 {

View File

@@ -44,15 +44,6 @@
Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
#endif
#ifdef _WIN32
extern "C" {
// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
// graphics
__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
}
#endif
/**
* "Callouts" are one-time instructional messages shown to the user. In the config settings, there
* is a bitfield "callout_flags" options, used to track if a message has already been shown to the

View File

@@ -37,15 +37,6 @@
#include "yuzu_cmd/config.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
#ifdef _WIN32
extern "C" {
// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
// graphics
__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001;
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
}
#endif
static void PrintHelp(const char* argv0) {
std::cout << "Usage: " << argv0
<< " [options] <filename>\n"