Compare commits

...

15 Commits

Author SHA1 Message Date
EliEron
a2f2c56780 Make the ExeFS dumper take updates into account 2022-02-15 22:16:50 +01:00
bunnei
666b37ad56 Merge pull request #4242 from ReinUsesLisp/maxwell-dma
maxwell_dma: Match official doc and support pitch->voxel copies
2020-07-14 14:04:16 -04:00
bunnei
e2730372b8 Merge pull request #4294 from MerryMage/cpu-opt-settings
configuration: Add settings to enable/disable specific CPU optimizations
2020-07-14 12:38:03 -04:00
bunnei
450cbcfee6 Merge pull request #4282 from Morph1984/fs-size
filesystem: Set various NAND partition sizes to their defaults
2020-07-14 12:16:42 -04:00
bunnei
bf9c010be5 Merge pull request #4338 from ameerj/disconnected-adapter
gcadapter: Fix crash if gc configured but adapter not connected
2020-07-14 12:01:43 -04:00
MerryMage
a67d00ef31 configure_cpu: Split optimization settings off into Debug tab 2020-07-12 19:32:32 +01:00
MerryMage
da11a27f42 configure_cpu: Add tooltips 2020-07-11 16:38:38 +01:00
MerryMage
505aa3a4c1 configure_cpu: Show/Hide debugging options 2020-07-11 16:38:38 +01:00
MerryMage
0193202964 configuration: Add settings to enable/disable specific CPU optimizations 2020-07-11 14:34:09 +01:00
ReinUsesLisp
c574ab5aa1 video_core/textures: Add and use SwizzleSliceToVoxel, and minor style changes
Change GOB sizes from free-functions to constexpr constants.

Add SwizzleSliceToVoxel, a function that swizzles a 2D array of pixels
into a 3D texture and use it for 3D copies.
2020-07-10 04:09:32 -03:00
Morph
b24b463c87 bis_factory: Set User NAND free space to be 1 MiB less than total. 2020-07-10 00:37:39 -04:00
Morph
17242a8865 sdmc_factory: Set the SDMC total size to 1 TiB
We should not be limited by the SDMC's partition size, set this to 1 TiB. Hardware is limited to the max allowed by the MBR partition table which is 2 TiB.
2020-07-10 00:37:39 -04:00
Morph
0373ead96e bis_factory: Use hardware default NAND partition sizes
Sets the total space of user and system partitions to their hardware defaults.
Furthermore, return the total space as free space for the user partition to prevent it from reaching zero.
Some games like Bioshock 2 check for the available free space prior to save creation, and we should not be limited by arbitrary limits.
2020-07-10 00:37:39 -04:00
Morph
47e26d7bc7 settings: Remove storage size options 2020-07-10 00:37:39 -04:00
ReinUsesLisp
2a9d17b7e7 maxwell_dma: Rename registers to match official docs and reorder
Rename registers in the MaxwellDMA class to match Nvidia's official
documentation. This one can be found here:

https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h

While we are at it, reorganize the code in MaxwellDMA to be separated in
different functions.
2020-07-07 19:19:33 -03:00
31 changed files with 1205 additions and 648 deletions

View File

@@ -142,10 +142,32 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
// Timing
config.wall_clock_cntpct = uses_wall_clock;
// Optimizations
if (Settings::values.disable_cpu_opt) {
config.enable_optimizations = false;
config.enable_fast_dispatch = false;
// Safe optimizations
if (Settings::values.cpu_accuracy != Settings::CPUAccuracy::Accurate) {
if (!Settings::values.cpuopt_page_tables) {
config.page_table = nullptr;
}
if (!Settings::values.cpuopt_block_linking) {
config.optimizations &= ~Dynarmic::OptimizationFlag::BlockLinking;
}
if (!Settings::values.cpuopt_return_stack_buffer) {
config.optimizations &= ~Dynarmic::OptimizationFlag::ReturnStackBuffer;
}
if (!Settings::values.cpuopt_fast_dispatcher) {
config.optimizations &= ~Dynarmic::OptimizationFlag::FastDispatch;
}
if (!Settings::values.cpuopt_context_elimination) {
config.optimizations &= ~Dynarmic::OptimizationFlag::GetSetElimination;
}
if (!Settings::values.cpuopt_const_prop) {
config.optimizations &= ~Dynarmic::OptimizationFlag::ConstProp;
}
if (!Settings::values.cpuopt_misc_ir) {
config.optimizations &= ~Dynarmic::OptimizationFlag::MiscIROpt;
}
if (!Settings::values.cpuopt_reduce_misalign_checks) {
config.only_detect_misalignment_via_page_table_on_page_boundary = false;
}
}
return std::make_unique<Dynarmic::A32::Jit>(config);

View File

@@ -191,15 +191,37 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
// Unpredictable instructions
config.define_unpredictable_behaviour = true;
// Optimizations
if (Settings::values.disable_cpu_opt) {
config.enable_optimizations = false;
config.enable_fast_dispatch = false;
}
// Timing
config.wall_clock_cntpct = uses_wall_clock;
// Safe optimizations
if (Settings::values.cpu_accuracy != Settings::CPUAccuracy::Accurate) {
if (!Settings::values.cpuopt_page_tables) {
config.page_table = nullptr;
}
if (!Settings::values.cpuopt_block_linking) {
config.optimizations &= ~Dynarmic::OptimizationFlag::BlockLinking;
}
if (!Settings::values.cpuopt_return_stack_buffer) {
config.optimizations &= ~Dynarmic::OptimizationFlag::ReturnStackBuffer;
}
if (!Settings::values.cpuopt_fast_dispatcher) {
config.optimizations &= ~Dynarmic::OptimizationFlag::FastDispatch;
}
if (!Settings::values.cpuopt_context_elimination) {
config.optimizations &= ~Dynarmic::OptimizationFlag::GetSetElimination;
}
if (!Settings::values.cpuopt_const_prop) {
config.optimizations &= ~Dynarmic::OptimizationFlag::ConstProp;
}
if (!Settings::values.cpuopt_misc_ir) {
config.optimizations &= ~Dynarmic::OptimizationFlag::MiscIROpt;
}
if (!Settings::values.cpuopt_reduce_misalign_checks) {
config.only_detect_misalignment_via_page_table_on_page_boundary = false;
}
}
return std::make_shared<Dynarmic::A64::Jit>(config);
}

View File

@@ -12,6 +12,10 @@
namespace FileSys {
constexpr u64 NAND_USER_SIZE = 0x680000000; // 26624 MiB
constexpr u64 NAND_SYSTEM_SIZE = 0xA0000000; // 2560 MiB
constexpr u64 NAND_TOTAL_SIZE = 0x747C00000; // 29820 MiB
BISFactory::BISFactory(VirtualDir nand_root_, VirtualDir load_root_, VirtualDir dump_root_)
: nand_root(std::move(nand_root_)), load_root(std::move(load_root_)),
dump_root(std::move(dump_root_)),
@@ -110,30 +114,29 @@ VirtualDir BISFactory::GetImageDirectory() const {
u64 BISFactory::GetSystemNANDFreeSpace() const {
const auto sys_dir = GetOrCreateDirectoryRelative(nand_root, "/system");
if (sys_dir == nullptr)
return 0;
if (sys_dir == nullptr) {
return GetSystemNANDTotalSpace();
}
return GetSystemNANDTotalSpace() - sys_dir->GetSize();
}
u64 BISFactory::GetSystemNANDTotalSpace() const {
return static_cast<u64>(Settings::values.nand_system_size);
return NAND_SYSTEM_SIZE;
}
u64 BISFactory::GetUserNANDFreeSpace() const {
const auto usr_dir = GetOrCreateDirectoryRelative(nand_root, "/user");
if (usr_dir == nullptr)
return 0;
return GetUserNANDTotalSpace() - usr_dir->GetSize();
// For some reason games such as BioShock 1 checks whether this is exactly 0x680000000 bytes.
// Set the free space to be 1 MiB less than the total as a workaround to this issue.
return GetUserNANDTotalSpace() - 0x100000;
}
u64 BISFactory::GetUserNANDTotalSpace() const {
return static_cast<u64>(Settings::values.nand_user_size);
return NAND_USER_SIZE;
}
u64 BISFactory::GetFullNANDTotalSpace() const {
return static_cast<u64>(Settings::values.nand_total_size);
return NAND_TOTAL_SIZE;
}
VirtualDir BISFactory::GetBCATDirectory(u64 title_id) const {

View File

@@ -80,16 +80,6 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
if (exefs == nullptr)
return exefs;
if (Settings::values.dump_exefs) {
LOG_INFO(Loader, "Dumping ExeFS for title_id={:016X}", title_id);
const auto dump_dir =
Core::System::GetInstance().GetFileSystemController().GetModificationDumpRoot(title_id);
if (dump_dir != nullptr) {
const auto exefs_dir = GetOrCreateDirectoryRelative(dump_dir, "/exefs");
VfsRawCopyD(exefs, exefs_dir);
}
}
const auto& installed = Core::System::GetInstance().GetContentProvider();
const auto& disabled = Settings::values.disabled_addons[title_id];
@@ -135,6 +125,16 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
}
}
if (Settings::values.dump_exefs) {
LOG_INFO(Loader, "Dumping ExeFS for title_id={:016X}", title_id);
const auto dump_dir =
Core::System::GetInstance().GetFileSystemController().GetModificationDumpRoot(title_id);
if (dump_dir != nullptr) {
const auto exefs_dir = GetOrCreateDirectoryRelative(dump_dir, "/exefs");
VfsRawCopyD(exefs, exefs_dir);
}
}
return exefs;
}

View File

@@ -10,6 +10,8 @@
namespace FileSys {
constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB
SDMCFactory::SDMCFactory(VirtualDir dir_)
: dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>(
GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"),
@@ -46,7 +48,7 @@ u64 SDMCFactory::GetSDMCFreeSpace() const {
}
u64 SDMCFactory::GetSDMCTotalSpace() const {
return static_cast<u64>(Settings::values.sdmc_size);
return SDMC_TOTAL_SIZE;
}
} // namespace FileSys

View File

@@ -346,31 +346,6 @@ struct TouchscreenInput {
u32 rotation_angle;
};
enum class NANDTotalSize : u64 {
S29_1GB = 0x747C00000ULL,
};
enum class NANDUserSize : u64 {
S26GB = 0x680000000ULL,
};
enum class NANDSystemSize : u64 {
S2_5GB = 0xA0000000,
};
enum class SDMCSize : u64 {
S1GB = 0x40000000,
S2GB = 0x80000000,
S4GB = 0x100000000ULL,
S8GB = 0x200000000ULL,
S16GB = 0x400000000ULL,
S32GB = 0x800000000ULL,
S64GB = 0x1000000000ULL,
S128GB = 0x2000000000ULL,
S256GB = 0x4000000000ULL,
S1TB = 0x10000000000ULL,
};
enum class RendererBackend {
OpenGL = 0,
Vulkan = 1,
@@ -382,6 +357,11 @@ enum class GPUAccuracy : u32 {
Extreme = 2,
};
enum class CPUAccuracy {
Accurate = 0,
DebugMode = 1,
};
extern bool configuring_global;
template <typename Type>
@@ -427,6 +407,18 @@ struct Values {
// Core
Setting<bool> use_multi_core;
// Cpu
CPUAccuracy cpu_accuracy;
bool cpuopt_page_tables;
bool cpuopt_block_linking;
bool cpuopt_return_stack_buffer;
bool cpuopt_fast_dispatcher;
bool cpuopt_context_elimination;
bool cpuopt_const_prop;
bool cpuopt_misc_ir;
bool cpuopt_reduce_misalign_checks;
// Renderer
Setting<RendererBackend> renderer_backend;
bool renderer_debug;
@@ -491,10 +483,6 @@ struct Values {
bool gamecard_inserted;
bool gamecard_current_game;
std::string gamecard_path;
NANDTotalSize nand_total_size;
NANDSystemSize nand_system_size;
NANDUserSize nand_user_size;
SDMCSize sdmc_size;
// Debugging
bool record_frame_times;
@@ -505,7 +493,6 @@ struct Values {
bool dump_nso;
bool reporting_services;
bool quest_flag;
bool disable_cpu_opt;
bool disable_macro_jit;
// Misceallaneous

View File

@@ -14,50 +14,45 @@
namespace Tegra::Engines {
using namespace Texture;
MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid MaxwellDMA register, increase the size of the Regs structure");
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
regs.reg_array[method] = method_argument;
#define MAXWELLDMA_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
switch (method) {
case MAXWELLDMA_REG_INDEX(exec): {
HandleCopy();
break;
if (method == offsetof(Regs, launch_dma) / sizeof(u32)) {
Launch();
}
}
#undef MAXWELLDMA_REG_INDEX
}
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
for (std::size_t i = 0; i < amount; i++) {
for (size_t i = 0; i < amount; ++i) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
}
void MaxwellDMA::HandleCopy() {
LOG_TRACE(HW_GPU, "Requested a DMA copy");
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
void MaxwellDMA::Launch() {
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
static_cast<GPUVAddr>(regs.offset_out));
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
ASSERT(regs.dst_params.pos_x == 0);
ASSERT(regs.dst_params.pos_y == 0);
const LaunchDMA& launch = regs.launch_dma;
ASSERT(launch.remap_enable == 0);
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
ASSERT(regs.dst_params.origin.y == 0);
if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
@@ -66,144 +61,161 @@ void MaxwellDMA::HandleCopy() {
// All copies here update the main memory, so mark all rasterizer states as invalid.
system.GPU().Maxwell3D().OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
memory_manager.CopyBlock(dest, source, regs.x_count);
return;
}
// If both the source and the destination are in linear layout, perform a line-by-line
// copy. We're going to take a subrect of size (x_count, y_count) from the source
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
const GPUVAddr source_line = source + line * regs.src_pitch;
const GPUVAddr dest_line = dest + line * regs.dst_pitch;
memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
}
return;
}
ASSERT(regs.exec.enable_2d == 1);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.BlockDepth() == 0);
// Optimized path for micro copies.
if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::GetGOBSize();
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
u32 pos_x = regs.src_params.pos_x;
u32 pos_y = regs.src_params.pos_y;
const u64 offset =
Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
regs.src_params.BlockDepth(), bytes_per_pixel);
const u32 x_in_gob = 64 / bytes_per_pixel;
pos_x = pos_x % x_in_gob;
pos_y = pos_y % 8;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
}
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
return;
}
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
const std::size_t src_layer_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
}
Texture::UnswizzleSubrect(
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
if (is_src_pitch && is_dst_pitch) {
CopyPitchToPitch();
} else {
ASSERT(regs.dst_params.BlockDepth() == 0);
ASSERT(launch.multi_line_enable == 1);
const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
const std::size_t dst_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
if (!is_src_pitch && is_dst_pitch) {
CopyBlockLinearToPitch();
} else {
memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
CopyPitchToBlockLinear();
}
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(
regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}
void MaxwellDMA::CopyPitchToPitch() {
// When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
// buffer of length `line_length_in`.
// Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
if (!regs.launch_dma.multi_line_enable) {
memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
return;
}
// Perform a line-by-line copy.
// We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
// There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
for (u32 line = 0; line < regs.line_count; ++line) {
const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
}
}
void MaxwellDMA::CopyBlockLinearToPitch() {
ASSERT(regs.src_params.block_size.depth == 0);
// Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
FastCopyBlockLinearToPitch();
return;
}
// Deswizzle the input and copy it over.
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const Parameters& src_params = regs.src_params;
const u32 width = src_params.width;
const u32 height = src_params.height;
const u32 depth = src_params.depth;
const u32 block_height = src_params.block_size.height;
const u32 block_depth = src_params.block_size.depth;
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t src_layer_size =
CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(),
block_height, src_params.origin.x, src_params.origin.y);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 width = dst_params.width;
const u32 height = dst_params.height;
const u32 depth = dst_params.depth;
const u32 block_height = dst_params.block_size.height;
const u32 block_depth = dst_params.block_size.depth;
const size_t dst_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t dst_layer_size =
CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
// If the input is linear and the output is tiled, swizzle the input and copy it over.
if (regs.dst_params.block_size.depth > 0) {
ASSERT(dst_params.layer == 0);
SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height,
bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
dst_params.origin.y, write_buffer.data(), read_buffer.data());
} else {
SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
block_height, dst_params.origin.x, dst_params.origin.y);
}
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const size_t src_size = GOB_SIZE;
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
u32 pos_x = regs.src_params.origin.x;
u32 pos_y = regs.src_params.origin.y;
const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
regs.src_params.block_size.height, bytes_per_pixel);
const u32 x_in_gob = 64 / bytes_per_pixel;
pos_x = pos_x % x_in_gob;
pos_y = pos_y % 8;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
if (Settings::IsGPULevelExtreme()) {
memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
} else {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
}
UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
bytes_per_pixel, read_buffer.data(), write_buffer.data(),
regs.src_params.block_size.height, pos_x, pos_y);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
} // namespace Tegra::Engines

View File

@@ -24,12 +24,167 @@ class MemoryManager;
namespace Tegra::Engines {
/**
* This Engine is known as GK104_Copy. Documentation can be found in:
* This engine is known as gk104_copy. Documentation can be found in:
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final : public EngineInterface {
public:
struct PackedGPUVAddr {
u32 upper;
u32 lower;
constexpr operator GPUVAddr() const noexcept {
return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower;
}
};
union BlockSize {
BitField<0, 4, u32> width;
BitField<4, 4, u32> height;
BitField<8, 4, u32> depth;
BitField<12, 4, u32> gob_height;
};
static_assert(sizeof(BlockSize) == 4);
union Origin {
BitField<0, 16, u32> x;
BitField<16, 16, u32> y;
};
static_assert(sizeof(Origin) == 4);
struct Parameters {
BlockSize block_size;
u32 width;
u32 height;
u32 depth;
u32 layer;
Origin origin;
};
static_assert(sizeof(Parameters) == 24);
struct Semaphore {
PackedGPUVAddr address;
u32 payload;
};
static_assert(sizeof(Semaphore) == 12);
struct RenderEnable {
enum class Mode : u32 {
FALSE = 0,
TRUE = 1,
CONDITIONAL = 2,
RENDER_IF_EQUAL = 3,
RENDER_IF_NOT_EQUAL = 4,
};
PackedGPUVAddr address;
BitField<0, 3, Mode> mode;
};
static_assert(sizeof(RenderEnable) == 12);
enum class PhysModeTarget : u32 {
LOCAL_FB = 0,
COHERENT_SYSMEM = 1,
NONCOHERENT_SYSMEM = 2,
};
using PhysMode = BitField<0, 2, PhysModeTarget>;
union LaunchDMA {
enum class DataTransferType : u32 {
NONE = 0,
PIPELINED = 1,
NON_PIPELINED = 2,
};
enum class SemaphoreType : u32 {
NONE = 0,
RELEASE_ONE_WORD_SEMAPHORE = 1,
RELEASE_FOUR_WORD_SEMAPHORE = 2,
};
enum class InterruptType : u32 {
NONE = 0,
BLOCKING = 1,
NON_BLOCKING = 2,
};
enum class MemoryLayout : u32 {
BLOCKLINEAR = 0,
PITCH = 1,
};
enum class Type : u32 {
VIRTUAL = 0,
PHYSICAL = 1,
};
enum class SemaphoreReduction : u32 {
IMIN = 0,
IMAX = 1,
IXOR = 2,
IAND = 3,
IOR = 4,
IADD = 5,
INC = 6,
DEC = 7,
FADD = 0xA,
};
enum class SemaphoreReductionSign : u32 {
SIGNED = 0,
UNSIGNED = 1,
};
enum class BypassL2 : u32 {
USE_PTE_SETTING = 0,
FORCE_VOLATILE = 1,
};
BitField<0, 2, DataTransferType> data_transfer_type;
BitField<2, 1, u32> flush_enable;
BitField<3, 2, SemaphoreType> semaphore_type;
BitField<5, 2, InterruptType> interrupt_type;
BitField<7, 1, MemoryLayout> src_memory_layout;
BitField<8, 1, MemoryLayout> dst_memory_layout;
BitField<9, 1, u32> multi_line_enable;
BitField<10, 1, u32> remap_enable;
BitField<11, 1, u32> rmwdisable;
BitField<12, 1, Type> src_type;
BitField<13, 1, Type> dst_type;
BitField<14, 4, SemaphoreReduction> semaphore_reduction;
BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign;
BitField<19, 1, u32> reduction_enable;
BitField<20, 1, BypassL2> bypass_l2;
};
static_assert(sizeof(LaunchDMA) == 4);
struct RemapConst {
enum Swizzle : u32 {
SRC_X = 0,
SRC_Y = 1,
SRC_Z = 2,
SRC_W = 3,
CONST_A = 4,
CONST_B = 5,
NO_WRITE = 6,
};
PackedGPUVAddr address;
union {
BitField<0, 3, Swizzle> dst_x;
BitField<4, 3, Swizzle> dst_y;
BitField<8, 3, Swizzle> dst_z;
BitField<12, 3, Swizzle> dst_w;
BitField<16, 2, u32> component_size_minus_one;
BitField<20, 2, u32> num_src_components_minus_one;
BitField<24, 2, u32> num_dst_components_minus_one;
};
};
static_assert(sizeof(RemapConst) == 12);
explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
~MaxwellDMA() = default;
@@ -40,144 +195,19 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
struct Regs {
static constexpr std::size_t NUM_REGS = 0x1D6;
struct Parameters {
union {
BitField<0, 4, u32> block_depth;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_width;
};
u32 size_x;
u32 size_y;
u32 size_z;
u32 pos_z;
union {
BitField<0, 16, u32> pos_x;
BitField<16, 16, u32> pos_y;
};
u32 BlockHeight() const {
return block_height.Value();
}
u32 BlockDepth() const {
return block_depth.Value();
}
};
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
enum class ComponentMode : u32 {
Src0 = 0,
Src1 = 1,
Src2 = 2,
Src3 = 3,
Const0 = 4,
Const1 = 5,
Zero = 6,
};
enum class CopyMode : u32 {
None = 0,
Unk1 = 1,
Unk2 = 2,
};
enum class QueryMode : u32 {
None = 0,
Short = 1,
Long = 2,
};
enum class QueryIntr : u32 {
None = 0,
Block = 1,
NonBlock = 2,
};
union {
struct {
INSERT_UNION_PADDING_WORDS(0xC0);
struct {
union {
BitField<0, 2, CopyMode> copy_mode;
BitField<2, 1, u32> flush;
BitField<3, 2, QueryMode> query_mode;
BitField<5, 2, QueryIntr> query_intr;
BitField<7, 1, u32> is_src_linear;
BitField<8, 1, u32> is_dst_linear;
BitField<9, 1, u32> enable_2d;
BitField<10, 1, u32> enable_swizzle;
};
} exec;
INSERT_UNION_PADDING_WORDS(0x3F);
struct {
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} src_address;
struct {
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} dst_address;
u32 src_pitch;
u32 dst_pitch;
u32 x_count;
u32 y_count;
INSERT_UNION_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
union {
BitField<0, 4, ComponentMode> component0;
BitField<4, 4, ComponentMode> component1;
BitField<8, 4, ComponentMode> component2;
BitField<12, 4, ComponentMode> component3;
BitField<16, 2, u32> component_size;
BitField<20, 3, u32> src_num_components;
BitField<24, 3, u32> dst_num_components;
u32 SrcBytePerPixel() const {
return src_num_components.Value() * component_size.Value();
}
u32 DstBytePerPixel() const {
return dst_num_components.Value() * component_size.Value();
}
} swizzle_config;
Parameters dst_params;
INSERT_UNION_PADDING_WORDS(1);
Parameters src_params;
INSERT_UNION_PADDING_WORDS(0x13);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
private:
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void Launch();
void CopyPitchToPitch();
void CopyBlockLinearToPitch();
void CopyPitchToBlockLinear();
void FastCopyBlockLinearToPitch();
Core::System& system;
MemoryManager& memory_manager;
@@ -185,28 +215,58 @@ private:
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
};
static constexpr std::size_t NUM_REGS = 0x800;
struct Regs {
union {
struct {
u32 reserved[0x40];
u32 nop;
u32 reserved01[0xf];
u32 pm_trigger;
u32 reserved02[0x3f];
Semaphore semaphore;
u32 reserved03[0x2];
RenderEnable render_enable;
PhysMode src_phys_mode;
PhysMode dst_phys_mode;
u32 reserved04[0x26];
LaunchDMA launch_dma;
u32 reserved05[0x3f];
PackedGPUVAddr offset_in;
PackedGPUVAddr offset_out;
u32 pitch_in;
u32 pitch_out;
u32 line_length_in;
u32 line_count;
u32 reserved06[0xb8];
RemapConst remap_const;
Parameters dst_params;
u32 reserved07[0x1];
Parameters src_params;
u32 reserved08[0x275];
u32 pm_trigger_end;
u32 reserved09[0x3ba];
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(exec, 0xC0);
ASSERT_REG_POSITION(src_address, 0x100);
ASSERT_REG_POSITION(dst_address, 0x102);
ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107);
ASSERT_REG_POSITION(const0, 0x1C0);
ASSERT_REG_POSITION(const1, 0x1C1);
ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);
ASSERT_REG_POSITION(launch_dma, 0xC0);
ASSERT_REG_POSITION(offset_in, 0x100);
ASSERT_REG_POSITION(offset_out, 0x102);
ASSERT_REG_POSITION(pitch_in, 0x104);
ASSERT_REG_POSITION(pitch_out, 0x105);
ASSERT_REG_POSITION(line_length_in, 0x106);
ASSERT_REG_POSITION(line_count, 0x107);
ASSERT_REG_POSITION(remap_const, 0x1C0);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);
#undef ASSERT_REG_POSITION
};
} // namespace Tegra::Engines

View File

@@ -343,8 +343,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
}
if (is_tiled && is_layered) {
return Common::AlignBits(size,
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
return size;
}
@@ -418,7 +417,7 @@ std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
const u32 block_size = GetBlockSize();
const u32 block_index = offset / block_size;
const u32 gob_offset = offset % block_size;
const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
const u32 x_block_pixels = x_gob_pixels << block_width;
const u32 y_block_pixels = 8U << block_height;

View File

@@ -204,7 +204,7 @@ public:
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
const u32 block_depth) {
return Common::AlignBits(out_size,
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
}
/// Converts a width from a type of surface into another. This helps represent the

View File

@@ -6,6 +6,7 @@
#include <cstring>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -37,20 +38,10 @@ struct alignas(64) SwizzleTable {
std::array<std::array<u16, M>, N> values{};
};
constexpr u32 gob_size_x_shift = 6;
constexpr u32 gob_size_y_shift = 3;
constexpr u32 gob_size_z_shift = 0;
constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift;
constexpr u32 FAST_SWIZZLE_ALIGN = 16;
constexpr u32 gob_size_x = 1U << gob_size_x_shift;
constexpr u32 gob_size_y = 1U << gob_size_y_shift;
constexpr u32 gob_size_z = 1U << gob_size_z_shift;
constexpr u32 gob_size = 1U << gob_size_shift;
constexpr u32 fast_swizzle_align = 16;
constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>();
constexpr auto fast_swizzle_table = SwizzleTable<gob_size_y, 4, fast_swizzle_align>();
constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>();
constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>();
/**
* This function manages ALL the GOBs(Group of Bytes) Inside a single block.
@@ -69,17 +60,17 @@ void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, con
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
const auto& table = legacy_swizzle_table[y % gob_size_y];
const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (u32 x = x_start; x < x_end; x++) {
const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]};
const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
}
pixel_base += stride_x;
if ((y + 1) % gob_size_y == 0)
y_address += gob_size;
if ((y + 1) % GOB_SIZE_Y == 0)
y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -104,18 +95,18 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
u32 y_address = z_address;
u32 pixel_base = layer_z * z + y_start * stride_x;
for (u32 y = y_start; y < y_end; y++) {
const auto& table = fast_swizzle_table[y % gob_size_y];
for (u32 xb = x_startb; xb < x_endb; xb += fast_swizzle_align) {
const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
const u32 pixel_index{out_x + pixel_base};
data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
}
pixel_base += stride_x;
if ((y + 1) % gob_size_y == 0)
y_address += gob_size;
if ((y + 1) % GOB_SIZE_Y == 0)
y_address += GOB_SIZE;
}
z_address += xy_block_size;
}
@@ -138,9 +129,9 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
const u32 gob_elements_x = gob_size_x / bytes_per_pixel;
constexpr u32 gob_elements_y = gob_size_y;
constexpr u32 gob_elements_z = gob_size_z;
const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel;
constexpr u32 gob_elements_y = GOB_SIZE_Y;
constexpr u32 gob_elements_z = GOB_SIZE_Z;
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
@@ -148,7 +139,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
const u32 xy_block_size = gob_size * block_height;
const u32 xy_block_size = GOB_SIZE * block_height;
const u32 block_size = xy_block_size * block_depth;
u32 tile_offset = 0;
for (u32 zb = 0; zb < blocks_on_z; zb++) {
@@ -182,7 +173,7 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
const u32 block_height_size{1U << block_height};
const u32 block_depth_size{1U << block_depth};
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height_size,
block_depth_size, width_spacing);
@@ -259,25 +250,26 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
const u32 image_width_in_gobs =
(swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_y = line + offset_y;
const u32 gob_address_y =
(dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
(dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
u8* dest_addr = swizzled_data + swizzled_offset;
gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
const u8* const source_line = unswizzled_data + unswizzled_offset;
u8* const dest_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_addr, source_line, bytes_per_pixel);
}
}
@@ -289,14 +281,15 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
const u32 block_height = 1U << block_height_bit;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 y2 = line + offset_y;
const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size;
const auto& table = legacy_swizzle_table[y2 % gob_size_y];
const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height +
((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 x2 = (x + offset_x) * bytes_per_pixel;
const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height;
const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X];
const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel;
u8* dest_line = unswizzled_data + unswizzled_offset;
u8* source_addr = swizzled_data + swizzled_offset;
std::memcpy(dest_line, source_addr, bytes_per_pixel);
@@ -304,21 +297,48 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
}
}
void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
u32 origin_y, u8* output, const u8* input) {
UNIMPLEMENTED_IF(origin_x > 0);
UNIMPLEMENTED_IF(origin_y > 0);
const u32 stride = width * bytes_per_pixel;
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 block_height_mask = (1U << block_height) - 1;
const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth));
for (u32 line = 0; line < line_count; ++line) {
const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
const u32 block_y = line / GOB_SIZE_Y;
const u32 dst_offset_y =
(block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
for (u32 x = 0; x < line_length_in; ++x) {
const u32 dst_offset =
((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
const u32 src_offset = x * bytes_per_pixel + line * pitch;
std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
}
}
}
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
u8* swizzle_data) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X};
std::size_t count = 0;
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
const std::size_t gob_address_y =
(y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
const auto& table = legacy_swizzle_table[y % gob_size_y];
(y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
const std::size_t gob_address =
gob_address_y + (x / gob_size_x) * gob_size * block_height;
const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
const std::size_t swizzled_offset = gob_address + table[x % GOB_SIZE_X];
const u8* source_line = source_data + count;
u8* dest_addr = swizzle_data + swizzled_offset;
count++;
@@ -373,9 +393,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift);
const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height);
const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth);
const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height);
const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
@@ -386,14 +406,14 @@ u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 gobs_in_block = 1 << block_height;
const u32 y_blocks = gob_size_y << block_height;
const u32 x_per_gob = gob_size_x / bytes_per_pixel;
const u32 y_blocks = GOB_SIZE_Y << block_height;
const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel;
const u32 x_blocks = div_ceil(width, x_per_gob);
const u32 block_size = gob_size * gobs_in_block;
const u32 block_size = GOB_SIZE * gobs_in_block;
const u32 stride = block_size * x_blocks;
const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
const u32 relative_y = dst_y % y_blocks;
return base + (relative_y / gob_size_y) * gob_size;
return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE;
}
} // namespace Tegra::Texture

View File

@@ -10,15 +10,15 @@
namespace Tegra::Texture {
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
// an small rect of (64/bytes_per_pixel)X8.
inline std::size_t GetGOBSize() {
return 512;
}
constexpr u32 GOB_SIZE_X = 64;
constexpr u32 GOB_SIZE_Y = 8;
constexpr u32 GOB_SIZE_Z = 1;
constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
inline std::size_t GetGOBSizeShift() {
return 9;
}
constexpr std::size_t GOB_SIZE_X_SHIFT = 6;
constexpr std::size_t GOB_SIZE_Y_SHIFT = 3;
constexpr std::size_t GOB_SIZE_Z_SHIFT = 0;
constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
/// Unswizzles a swizzled texture without changing its format.
void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
@@ -48,14 +48,32 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
u32 offset_x, u32 offset_y);
u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
u32 offset_x, u32 offset_y);
/// @brief Swizzles a 2D array of pixels into a 3D texture
/// @param line_length_in Number of pixels per line
/// @param line_count Number of lines
/// @param pitch Number of bytes per line
/// @param width Width of the swizzled texture
/// @param height Height of the swizzled texture
/// @param bytes_per_pixel Number of bytes used per pixel
/// @param block_height Block height shift
/// @param block_depth Block depth shift
/// @param origin_x Column offset in pixels of the swizzled texture
/// @param origin_y Row offset in pixels of the swizzled texture
/// @param output Pointer to the pixels of the swizzled texture
/// @param input Pointer to the 2D array of pixels used as input
/// @pre input and output points to an array large enough to hold the number of bytes used
void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
u32 origin_y, u8* output, const u8* input);
void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
std::size_t copy_size, const u8* source_data, u8* swizzle_data);

View File

@@ -30,6 +30,12 @@ add_executable(yuzu
configuration/configure_audio.cpp
configuration/configure_audio.h
configuration/configure_audio.ui
configuration/configure_cpu.cpp
configuration/configure_cpu.h
configuration/configure_cpu.ui
configuration/configure_cpu_debug.cpp
configuration/configure_cpu_debug.h
configuration/configure_cpu_debug.ui
configuration/configure_debug.cpp
configuration/configure_debug.h
configuration/configure_debug.ui

View File

@@ -505,22 +505,6 @@ void Config::ReadDataStorageValues() {
ReadSetting(QStringLiteral("gamecard_current_game"), false).toBool();
Settings::values.gamecard_path =
ReadSetting(QStringLiteral("gamecard_path"), QStringLiteral("")).toString().toStdString();
Settings::values.nand_total_size = static_cast<Settings::NANDTotalSize>(
ReadSetting(QStringLiteral("nand_total_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDTotalSize::S29_1GB)))
.toULongLong());
Settings::values.nand_user_size = static_cast<Settings::NANDUserSize>(
ReadSetting(QStringLiteral("nand_user_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDUserSize::S26GB)))
.toULongLong());
Settings::values.nand_system_size = static_cast<Settings::NANDSystemSize>(
ReadSetting(QStringLiteral("nand_system_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDSystemSize::S2_5GB)))
.toULongLong());
Settings::values.sdmc_size = static_cast<Settings::SDMCSize>(
ReadSetting(QStringLiteral("sdmc_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::SDMCSize::S16GB)))
.toULongLong());
qt_config->endGroup();
}
@@ -540,8 +524,6 @@ void Config::ReadDebuggingValues() {
Settings::values.reporting_services =
ReadSetting(QStringLiteral("reporting_services"), false).toBool();
Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
Settings::values.disable_cpu_opt =
ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool();
Settings::values.disable_macro_jit =
ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool();
@@ -633,6 +615,34 @@ void Config::ReadPathValues() {
qt_config->endGroup();
}
void Config::ReadCpuValues() {
qt_config->beginGroup(QStringLiteral("Cpu"));
if (global) {
Settings::values.cpu_accuracy = static_cast<Settings::CPUAccuracy>(
ReadSetting(QStringLiteral("cpu_accuracy"), 0).toInt());
Settings::values.cpuopt_page_tables =
ReadSetting(QStringLiteral("cpuopt_page_tables"), true).toBool();
Settings::values.cpuopt_block_linking =
ReadSetting(QStringLiteral("cpuopt_block_linking"), true).toBool();
Settings::values.cpuopt_return_stack_buffer =
ReadSetting(QStringLiteral("cpuopt_return_stack_buffer"), true).toBool();
Settings::values.cpuopt_fast_dispatcher =
ReadSetting(QStringLiteral("cpuopt_fast_dispatcher"), true).toBool();
Settings::values.cpuopt_context_elimination =
ReadSetting(QStringLiteral("cpuopt_context_elimination"), true).toBool();
Settings::values.cpuopt_const_prop =
ReadSetting(QStringLiteral("cpuopt_const_prop"), true).toBool();
Settings::values.cpuopt_misc_ir =
ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool();
Settings::values.cpuopt_reduce_misalign_checks =
ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool();
}
qt_config->endGroup();
}
void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
@@ -829,6 +839,7 @@ void Config::ReadValues() {
ReadMiscellaneousValues();
}
ReadCoreValues();
ReadCpuValues();
ReadRendererValues();
ReadAudioValues();
ReadSystemValues();
@@ -929,6 +940,7 @@ void Config::SaveValues() {
SaveMiscellaneousValues();
}
SaveCoreValues();
SaveCpuValues();
SaveRendererValues();
SaveAudioValues();
SaveSystemValues();
@@ -1006,18 +1018,7 @@ void Config::SaveDataStorageValues() {
false);
WriteSetting(QStringLiteral("gamecard_path"),
QString::fromStdString(Settings::values.gamecard_path), QStringLiteral(""));
WriteSetting(QStringLiteral("nand_total_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::values.nand_total_size)),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDTotalSize::S29_1GB)));
WriteSetting(QStringLiteral("nand_user_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::values.nand_user_size)),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDUserSize::S26GB)));
WriteSetting(QStringLiteral("nand_system_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::values.nand_system_size)),
QVariant::fromValue<u64>(static_cast<u64>(Settings::NANDSystemSize::S2_5GB)));
WriteSetting(QStringLiteral("sdmc_size"),
QVariant::fromValue<u64>(static_cast<u64>(Settings::values.sdmc_size)),
QVariant::fromValue<u64>(static_cast<u64>(Settings::SDMCSize::S16GB)));
qt_config->endGroup();
}
@@ -1033,7 +1034,6 @@ void Config::SaveDebuggingValues() {
WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false);
WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false);
WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false);
qt_config->endGroup();
@@ -1097,6 +1097,32 @@ void Config::SavePathValues() {
qt_config->endGroup();
}
void Config::SaveCpuValues() {
qt_config->beginGroup(QStringLiteral("Cpu"));
if (global) {
WriteSetting(QStringLiteral("cpu_accuracy"),
static_cast<int>(Settings::values.cpu_accuracy), 0);
WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables,
true);
WriteSetting(QStringLiteral("cpuopt_block_linking"), Settings::values.cpuopt_block_linking,
true);
WriteSetting(QStringLiteral("cpuopt_return_stack_buffer"),
Settings::values.cpuopt_return_stack_buffer, true);
WriteSetting(QStringLiteral("cpuopt_fast_dispatcher"),
Settings::values.cpuopt_fast_dispatcher, true);
WriteSetting(QStringLiteral("cpuopt_context_elimination"),
Settings::values.cpuopt_context_elimination, true);
WriteSetting(QStringLiteral("cpuopt_const_prop"), Settings::values.cpuopt_const_prop, true);
WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true);
WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"),
Settings::values.cpuopt_reduce_misalign_checks, true);
}
qt_config->endGroup();
}
void Config::SaveRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));

View File

@@ -49,6 +49,7 @@ private:
void ReadDisabledAddOnValues();
void ReadMiscellaneousValues();
void ReadPathValues();
void ReadCpuValues();
void ReadRendererValues();
void ReadShortcutValues();
void ReadSystemValues();
@@ -73,6 +74,7 @@ private:
void SaveDisabledAddOnValues();
void SaveMiscellaneousValues();
void SavePathValues();
void SaveCpuValues();
void SaveRendererValues();
void SaveShortcutValues();
void SaveSystemValues();

View File

@@ -78,6 +78,16 @@
<string>Hotkeys</string>
</attribute>
</widget>
<widget class="ConfigureCpu" name="cpuTab">
<attribute name="title">
<string>CPU</string>
</attribute>
</widget>
<widget class="ConfigureCpuDebug" name="cpuDebugTab">
<attribute name="title">
<string>Debug</string>
</attribute>
</widget>
<widget class="ConfigureGraphics" name="graphicsTab">
<attribute name="title">
<string>Graphics</string>
@@ -158,6 +168,18 @@
<header>configuration/configure_debug.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>ConfigureCpu</class>
<extends>QWidget</extends>
<header>configuration/configure_cpu.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>ConfigureCpuDebug</class>
<extends>QWidget</extends>
<header>configuration/configure_cpu_debug.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>ConfigureGraphics</class>
<extends>QWidget</extends>

View File

@@ -0,0 +1,61 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <QComboBox>
#include <QMessageBox>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
#include "ui_configure_cpu.h"
#include "yuzu/configuration/configure_cpu.h"
ConfigureCpu::ConfigureCpu(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureCpu) {
ui->setupUi(this);
SetConfiguration();
connect(ui->accuracy, qOverload<int>(&QComboBox::activated), this,
&ConfigureCpu::AccuracyUpdated);
}
ConfigureCpu::~ConfigureCpu() = default;
void ConfigureCpu::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->accuracy->setEnabled(runtime_lock);
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy));
}
void ConfigureCpu::AccuracyUpdated(int index) {
if (static_cast<Settings::CPUAccuracy>(index) == Settings::CPUAccuracy::DebugMode) {
const auto result = QMessageBox::warning(this, tr("Setting CPU to Debug Mode"),
tr("CPU Debug Mode is only intended for developer "
"use. Are you sure you want to enable this?"),
QMessageBox::Yes | QMessageBox::No);
if (result == QMessageBox::No) {
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::CPUAccuracy::Accurate));
return;
}
}
}
void ConfigureCpu::ApplyConfiguration() {
Settings::values.cpu_accuracy =
static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
}
void ConfigureCpu::changeEvent(QEvent* event) {
if (event->type() == QEvent::LanguageChange) {
RetranslateUI();
}
QWidget::changeEvent(event);
}
void ConfigureCpu::RetranslateUI() {
ui->retranslateUi(this);
}

View File

@@ -0,0 +1,33 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <QWidget>
#include "core/settings.h"
namespace Ui {
class ConfigureCpu;
}
class ConfigureCpu : public QWidget {
Q_OBJECT
public:
explicit ConfigureCpu(QWidget* parent = nullptr);
~ConfigureCpu() override;
void ApplyConfiguration();
private:
void changeEvent(QEvent* event) override;
void RetranslateUI();
void AccuracyUpdated(int index);
void SetConfiguration();
std::unique_ptr<Ui::ConfigureCpu> ui;
};

View File

@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>ConfigureCpu</class>
<widget class="QWidget" name="ConfigureCpu">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>321</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout">
<item>
<layout class="QVBoxLayout">
<item>
<widget class="QGroupBox">
<property name="title">
<string>General</string>
</property>
<layout class="QVBoxLayout">
<item>
<layout class="QHBoxLayout">
<item>
<widget class="QLabel">
<property name="text">
<string>Accuracy:</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="accuracy">
<item>
<property name="text">
<string>Accurate</string>
</property>
</item>
<item>
<property name="text">
<string>Enable Debug Mode</string>
</property>
</item>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QLabel">
<property name="wordWrap">
<bool>1</bool>
</property>
<property name="text">
<string>We recommend setting accuracy to "Accurate".</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QLabel" name="label_disable_info">
<property name="text">
<string>CPU settings are available only when game is not running.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

View File

@@ -0,0 +1,65 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <QComboBox>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
#include "ui_configure_cpu_debug.h"
#include "yuzu/configuration/configure_cpu_debug.h"
ConfigureCpuDebug::ConfigureCpuDebug(QWidget* parent)
: QWidget(parent), ui(new Ui::ConfigureCpuDebug) {
ui->setupUi(this);
SetConfiguration();
}
ConfigureCpuDebug::~ConfigureCpuDebug() = default;
void ConfigureCpuDebug::SetConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->cpuopt_page_tables->setEnabled(runtime_lock);
ui->cpuopt_page_tables->setChecked(Settings::values.cpuopt_page_tables);
ui->cpuopt_block_linking->setEnabled(runtime_lock);
ui->cpuopt_block_linking->setChecked(Settings::values.cpuopt_block_linking);
ui->cpuopt_return_stack_buffer->setEnabled(runtime_lock);
ui->cpuopt_return_stack_buffer->setChecked(Settings::values.cpuopt_return_stack_buffer);
ui->cpuopt_fast_dispatcher->setEnabled(runtime_lock);
ui->cpuopt_fast_dispatcher->setChecked(Settings::values.cpuopt_fast_dispatcher);
ui->cpuopt_context_elimination->setEnabled(runtime_lock);
ui->cpuopt_context_elimination->setChecked(Settings::values.cpuopt_context_elimination);
ui->cpuopt_const_prop->setEnabled(runtime_lock);
ui->cpuopt_const_prop->setChecked(Settings::values.cpuopt_const_prop);
ui->cpuopt_misc_ir->setEnabled(runtime_lock);
ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir);
ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock);
ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks);
}
void ConfigureCpuDebug::ApplyConfiguration() {
Settings::values.cpuopt_page_tables = ui->cpuopt_page_tables->isChecked();
Settings::values.cpuopt_block_linking = ui->cpuopt_block_linking->isChecked();
Settings::values.cpuopt_return_stack_buffer = ui->cpuopt_return_stack_buffer->isChecked();
Settings::values.cpuopt_fast_dispatcher = ui->cpuopt_fast_dispatcher->isChecked();
Settings::values.cpuopt_context_elimination = ui->cpuopt_context_elimination->isChecked();
Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked();
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
}
void ConfigureCpuDebug::changeEvent(QEvent* event) {
if (event->type() == QEvent::LanguageChange) {
RetranslateUI();
}
QWidget::changeEvent(event);
}
void ConfigureCpuDebug::RetranslateUI() {
ui->retranslateUi(this);
}

View File

@@ -0,0 +1,31 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <QWidget>
#include "core/settings.h"
namespace Ui {
class ConfigureCpuDebug;
}
class ConfigureCpuDebug : public QWidget {
Q_OBJECT
public:
explicit ConfigureCpuDebug(QWidget* parent = nullptr);
~ConfigureCpuDebug() override;
void ApplyConfiguration();
private:
void changeEvent(QEvent* event) override;
void RetranslateUI();
void SetConfiguration();
std::unique_ptr<Ui::ConfigureCpuDebug> ui;
};

View File

@@ -0,0 +1,174 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>ConfigureCpuDebug</class>
<widget class="QWidget" name="ConfigureCpuDebug">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>321</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout">
<item>
<layout class="QVBoxLayout">
<item>
<widget class="QGroupBox">
<property name="title">
<string>Toggle CPU Optimizations</string>
</property>
<layout class="QVBoxLayout">
<item>
<widget class="QLabel">
<property name="wordWrap">
<bool>1</bool>
</property>
<property name="text">
<string>
&lt;div&gt;
&lt;b&gt;For debugging only.&lt;/b&gt;
&lt;br&gt;
If you're not sure what these do, keep all of these enabled.
&lt;br&gt;
These settings only take effect when CPU Accuracy is "Debug Mode".
&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_page_tables">
<property name="text">
<string>Enable inline page tables</string>
</property>
<property name="toolTip">
<string>
&lt;div style="white-space: nowrap"&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
&lt;div style="white-space: nowrap"&gt;Enabling it inlines accesses to PageTable::pointers into emitted code.&lt;/div&gt;
&lt;div style="white-space: nowrap"&gt;Disabling this forces all memory accesses to go through the Memory::Read/Memory::Write functions.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_block_linking">
<property name="text">
<string>Enable block linking</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;This optimization avoids dispatcher lookups by allowing emitted basic blocks to jump directly to other basic blocks if the destination PC is static.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_return_stack_buffer">
<property name="text">
<string>Enable return stack buffer</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;This optimization avoids dispatcher lookups by keeping track potential return addresses of BL instructions. This approximates what happens with a return stack buffer on a real CPU.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_fast_dispatcher">
<property name="text">
<string>Enable fast dispatcher</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;Enable a two-tiered dispatch system. A faster dispatcher written in assembly has a small MRU cache of jump destinations is used first. If that fails, dispatch falls back to the slower C++ dispatcher.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_context_elimination">
<property name="text">
<string>Enable context elimination</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;Enables an IR optimization that reduces unnecessary accesses to the CPU context structure.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_const_prop">
<property name="text">
<string>Enable constant propagation</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;Enables IR optimizations that involve constant propagation.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_misc_ir">
<property name="text">
<string>Enable miscellaneous optimizations</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;Enables miscellaneous IR optimizations.&lt;/div&gt;
</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_reduce_misalign_checks">
<property name="text">
<string>Enable misalignment check reduction</string>
</property>
<property name="toolTip">
<string>
&lt;div style="white-space: nowrap"&gt;When enabled, a misalignment is only triggered when an access crosses a page boundary.&lt;/div&gt;
&lt;div style="white-space: nowrap"&gt;When disabled, a misalignment is triggered on all misaligned accesses.&lt;/div&gt;
</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QLabel" name="label_disable_info">
<property name="text">
<string>CPU settings are available only when game is not running.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

View File

@@ -36,7 +36,6 @@ void ConfigureDebug::SetConfiguration() {
ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args));
ui->reporting_services->setChecked(Settings::values.reporting_services);
ui->quest_flag->setChecked(Settings::values.quest_flag);
ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt);
ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
@@ -51,7 +50,6 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.program_args = ui->homebrew_args_edit->text().toStdString();
Settings::values.reporting_services = ui->reporting_services->isChecked();
Settings::values.quest_flag = ui->quest_flag->isChecked();
Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
Debugger::ToggleConsole();

View File

@@ -228,13 +228,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="disable_cpu_opt">
<property name="text">
<string>Disable CPU JIT optimizations</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@@ -42,6 +42,8 @@ void ConfigureDialog::ApplyConfiguration() {
ui->filesystemTab->applyConfiguration();
ui->inputTab->ApplyConfiguration();
ui->hotkeysTab->ApplyConfiguration(registry);
ui->cpuTab->ApplyConfiguration();
ui->cpuDebugTab->ApplyConfiguration();
ui->graphicsTab->ApplyConfiguration();
ui->graphicsAdvancedTab->ApplyConfiguration();
ui->audioTab->ApplyConfiguration();
@@ -76,9 +78,10 @@ void ConfigureDialog::RetranslateUI() {
Q_DECLARE_METATYPE(QList<QWidget*>);
void ConfigureDialog::PopulateSelectionList() {
const std::array<std::pair<QString, QList<QWidget*>>, 5> items{
const std::array<std::pair<QString, QList<QWidget*>>, 6> items{
{{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}},
{tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}},
{tr("CPU"), {ui->cpuTab, ui->cpuDebugTab}},
{tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}},
{tr("Audio"), {ui->audioTab}},
{tr("Controls"), {ui->inputTab, ui->hotkeysTab}}},
@@ -107,6 +110,8 @@ void ConfigureDialog::UpdateVisibleTabs() {
{ui->profileManagerTab, tr("Profiles")},
{ui->inputTab, tr("Input")},
{ui->hotkeysTab, tr("Hotkeys")},
{ui->cpuTab, tr("CPU")},
{ui->cpuDebugTab, tr("Debug")},
{ui->graphicsTab, tr("Graphics")},
{ui->graphicsAdvancedTab, tr("Advanced")},
{ui->audioTab, tr("Audio")},

View File

@@ -11,19 +11,6 @@
#include "yuzu/configuration/configure_filesystem.h"
#include "yuzu/uisettings.h"
namespace {
template <typename T>
void SetComboBoxFromData(QComboBox* combo_box, T data) {
const auto index = combo_box->findData(QVariant::fromValue(static_cast<u64>(data)));
if (index >= combo_box->count() || index < 0)
return;
combo_box->setCurrentIndex(index);
}
} // Anonymous namespace
ConfigureFilesystem::ConfigureFilesystem(QWidget* parent)
: QWidget(parent), ui(std::make_unique<Ui::ConfigureFilesystem>()) {
ui->setupUi(this);
@@ -73,11 +60,6 @@ void ConfigureFilesystem::setConfiguration() {
ui->cache_game_list->setChecked(UISettings::values.cache_game_list);
SetComboBoxFromData(ui->nand_size, Settings::values.nand_total_size);
SetComboBoxFromData(ui->usrnand_size, Settings::values.nand_user_size);
SetComboBoxFromData(ui->sysnand_size, Settings::values.nand_system_size);
SetComboBoxFromData(ui->sdmc_size, Settings::values.sdmc_size);
UpdateEnabledControls();
}
@@ -98,15 +80,6 @@ void ConfigureFilesystem::applyConfiguration() {
Settings::values.dump_nso = ui->dump_nso->isChecked();
UISettings::values.cache_game_list = ui->cache_game_list->isChecked();
Settings::values.nand_total_size = static_cast<Settings::NANDTotalSize>(
ui->nand_size->itemData(ui->nand_size->currentIndex()).toULongLong());
Settings::values.nand_system_size = static_cast<Settings::NANDSystemSize>(
ui->nand_size->itemData(ui->sysnand_size->currentIndex()).toULongLong());
Settings::values.nand_user_size = static_cast<Settings::NANDUserSize>(
ui->nand_size->itemData(ui->usrnand_size->currentIndex()).toULongLong());
Settings::values.sdmc_size = static_cast<Settings::SDMCSize>(
ui->nand_size->itemData(ui->sdmc_size->currentIndex()).toULongLong());
}
void ConfigureFilesystem::SetDirectory(DirectoryTarget target, QLineEdit* edit) {

View File

@@ -115,127 +115,6 @@
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>Storage Sizes</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="3" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>SD Card</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>System NAND</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="sysnand_size">
<item>
<property name="text">
<string>2.5 GB</string>
</property>
</item>
</widget>
</item>
<item row="3" column="1">
<widget class="QComboBox" name="sdmc_size">
<property name="currentText">
<string>32 GB</string>
</property>
<item>
<property name="text">
<string>1 GB</string>
</property>
</item>
<item>
<property name="text">
<string>2 GB</string>
</property>
</item>
<item>
<property name="text">
<string>4 GB</string>
</property>
</item>
<item>
<property name="text">
<string>8 GB</string>
</property>
</item>
<item>
<property name="text">
<string>16 GB</string>
</property>
</item>
<item>
<property name="text">
<string>32 GB</string>
</property>
</item>
<item>
<property name="text">
<string>64 GB</string>
</property>
</item>
<item>
<property name="text">
<string>128 GB</string>
</property>
</item>
<item>
<property name="text">
<string>256 GB</string>
</property>
</item>
<item>
<property name="text">
<string>1 TB</string>
</property>
</item>
</widget>
</item>
<item row="2" column="1">
<widget class="QComboBox" name="usrnand_size">
<item>
<property name="text">
<string>26 GB</string>
</property>
</item>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_6">
<property name="text">
<string>User NAND</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label_7">
<property name="text">
<string>NAND</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="nand_size">
<item>
<property name="text">
<string>29.1 GB</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_4">
<property name="title">

View File

@@ -335,15 +335,6 @@ void Config::ReadValues() {
Settings::values.gamecard_current_game =
sdl2_config->GetBoolean("Data Storage", "gamecard_current_game", false);
Settings::values.gamecard_path = sdl2_config->Get("Data Storage", "gamecard_path", "");
Settings::values.nand_total_size = static_cast<Settings::NANDTotalSize>(sdl2_config->GetInteger(
"Data Storage", "nand_total_size", static_cast<long>(Settings::NANDTotalSize::S29_1GB)));
Settings::values.nand_user_size = static_cast<Settings::NANDUserSize>(sdl2_config->GetInteger(
"Data Storage", "nand_user_size", static_cast<long>(Settings::NANDUserSize::S26GB)));
Settings::values.nand_system_size = static_cast<Settings::NANDSystemSize>(
sdl2_config->GetInteger("Data Storage", "nand_system_size",
static_cast<long>(Settings::NANDSystemSize::S2_5GB)));
Settings::values.sdmc_size = static_cast<Settings::SDMCSize>(sdl2_config->GetInteger(
"Data Storage", "sdmc_size", static_cast<long>(Settings::SDMCSize::S16GB)));
// System
Settings::values.use_docked_mode = sdl2_config->GetBoolean("System", "use_docked_mode", false);
@@ -437,8 +428,6 @@ void Config::ReadValues() {
Settings::values.reporting_services =
sdl2_config->GetBoolean("Debugging", "reporting_services", false);
Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
Settings::values.disable_cpu_opt =
sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false);
Settings::values.disable_macro_jit =
sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false);

View File

@@ -97,6 +97,39 @@ udp_pad_index=
# 0 (default): Disabled, 1: Enabled
use_multi_core=
[Cpu]
# Enable inline page tables optimization (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_page_tables =
# Enable block linking CPU optimization (reduce block dispatcher use during predictable jumps)
# 0: Disabled, 1 (default): Enabled
cpuopt_block_linking =
# Enable return stack buffer CPU optimization (reduce block dispatcher use during predictable returns)
# 0: Disabled, 1 (default): Enabled
cpuopt_return_stack_buffer =
# Enable fast dispatcher CPU optimization (use a two-tiered dispatcher architecture)
# 0: Disabled, 1 (default): Enabled
cpuopt_fast_dispatcher =
# Enable context elimination CPU Optimization (reduce host memory use for guest context)
# 0: Disabled, 1 (default): Enabled
cpuopt_context_elimination =
# Enable constant propagation CPU optimization (basic IR optimization)
# 0: Disabled, 1 (default): Enabled
cpuopt_const_prop =
# Enable miscellaneous CPU optimizations (basic IR optimization)
# 0: Disabled, 1 (default): Enabled
cpuopt_misc_ir =
# Enable reduction of memory misalignment checks (reduce memory fallbacks for misaligned access)
# 0: Disabled, 1 (default): Enabled
cpuopt_reduce_misalign_checks =
[Renderer]
# Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan
@@ -283,9 +316,6 @@ dump_nso=false
# Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode
# false: Retail/Normal Mode (default), true: Kiosk Mode
quest_flag =
# Determines whether or not JIT CPU optimizations are enabled
# false: Optimizations Enabled, true: Optimizations Disabled
disable_cpu_opt =
# Enables/Disables the macro JIT compiler
disable_macro_jit=false

View File

@@ -12,6 +12,39 @@ const char* sdl2_config_file = R"(
# 0 (default): Disabled, 1: Enabled
use_multi_core=
[Cpu]
# Enable inline page tables optimization (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_page_tables =
# Enable block linking CPU optimization (reduce block dispatcher use during predictable jumps)
# 0: Disabled, 1 (default): Enabled
cpuopt_block_linking =
# Enable return stack buffer CPU optimization (reduce block dispatcher use during predictable returns)
# 0: Disabled, 1 (default): Enabled
cpuopt_return_stack_buffer =
# Enable fast dispatcher CPU optimization (use a two-tiered dispatcher architecture)
# 0: Disabled, 1 (default): Enabled
cpuopt_fast_dispatcher =
# Enable context elimination CPU Optimization (reduce host memory use for guest context)
# 0: Disabled, 1 (default): Enabled
cpuopt_context_elimination =
# Enable constant propagation CPU optimization (basic IR optimization)
# 0: Disabled, 1 (default): Enabled
cpuopt_const_prop =
# Enable miscellaneous CPU optimizations (basic IR optimization)
# 0: Disabled, 1 (default): Enabled
cpuopt_misc_ir =
# Enable reduction of memory misalignment checks (reduce memory fallbacks for misaligned access)
# 0: Disabled, 1 (default): Enabled
cpuopt_reduce_misalign_checks =
[Renderer]
# Whether to use software or hardware rendering.
# 0: Software, 1 (default): Hardware