Compare commits

..

28 Commits

Author SHA1 Message Date
FernandoS27
caaa9914fd Clang format and other fixes 2018-10-17 18:52:11 -04:00
FernandoS27
cb9fdc7a26 Implement Reinterpret Surface, to accurately blit 3D textures 2018-10-17 18:52:10 -04:00
FernandoS27
dbc34db6ce Implement GetInRange in the Rasterizer Cache 2018-10-17 18:52:10 -04:00
FernandoS27
fd9e2d0073 Implement 3D Textures 2018-10-17 18:52:08 -04:00
bunnei
f912a82a8e Merge pull request #1497 from bunnei/flush-framebuffers
Implement flushing in the rasterizer cache
2018-10-17 18:40:34 -04:00
bunnei
6e8752881c Merge pull request #1498 from lioncash/aslr
svc: Clarify enum values for AddressSpaceBaseAddr and AddressSpaceSize in svcGetInfo()
2018-10-17 18:31:51 -04:00
bunnei
86dcf2942b Merge pull request #1496 from FernandoS27/tex-array
Implement Arrays on Tex Instruction
2018-10-17 18:30:44 -04:00
bunnei
afe22d8405 Merge pull request #1509 from DarkLordZach/device-save-data
savedata_factory: Add DeviceSaveData and fix TemporaryStorage
2018-10-17 18:22:05 -04:00
bunnei
648b55c6b9 gl_rasterizer_cache: Remove unnecessary block_depth=1 on Flush. 2018-10-17 18:20:15 -04:00
bunnei
2a035a1f6f gl_rasterizer_cache: Remove unnecessary temporary buffer with unswizzle. 2018-10-17 18:19:35 -04:00
bunnei
43b9494a0f gl_rasterizer_cache: Use AccurateCopySurface for use_accurate_gpu_emulation. 2018-10-16 17:20:49 -04:00
bunnei
ee7c2dbf5a config: Rename use_accurate_framebuffers -> use_accurate_gpu_emulation.
- This will be used as a catch-all for slow-but-accurate GPU emulation paths.
2018-10-16 17:02:29 -04:00
bunnei
91602de7f2 rasterizer_cache: Refactor to support in-order flushing. 2018-10-16 16:51:53 -04:00
bunnei
0e59291310 gl_rasterizer_cache: Refactor to only call GetRegionEnd on surface creation. 2018-10-16 11:31:02 -04:00
bunnei
949d7832fa gl_rasterizer_cache: Only flush when use_accurate_framebuffers is enabled. 2018-10-16 11:31:02 -04:00
bunnei
5f79ba04bd gl_rasterizer_cache: Separate guest and host surface size managment. 2018-10-16 11:31:01 -04:00
bunnei
58be4dff79 gl_rasterizer_cache: Rename GetGLBytesPerPixel to GetBytesPerPixel.
- This does not really have anything to do with OpenGL.
2018-10-16 11:31:01 -04:00
bunnei
cf7b46c101 gl_rasterizer_cache: Remove unused FlushSurface method. 2018-10-16 11:31:01 -04:00
bunnei
3afdfd7bfa gl_rasterizer: Implement flushing. 2018-10-16 11:31:01 -04:00
bunnei
b4e29ccb81 gl_rasterizer_cache: Remove usage of Memory::Read/Write functions.
- These cannot be used within the cache, as they change cache state.
2018-10-16 11:31:00 -04:00
bunnei
4e9683e9d5 gl_rasterizer_cache: Clamp cached surface size to mapped GPU region size. 2018-10-16 11:31:00 -04:00
bunnei
37575eae65 memory_manager: Add a method for querying the end of a mapped GPU region. 2018-10-16 11:31:00 -04:00
bunnei
0be7e82289 rasterizer_cache: Reintroduce method for flushing. 2018-10-16 11:31:00 -04:00
bunnei
9b929e934b gl_rasterizer_cache: Reintroduce code for handling swizzle and flush to guest RAM. 2018-10-16 11:30:59 -04:00
Zach Hilman
9d4e6176eb savedata_factory: Add TemporaryStorage SaveDataSpaceId
Required for TemporaryStorage saves (in addition to SaveDataType)
2018-10-16 10:20:04 -04:00
Zach Hilman
74890cf2da savedata_factory: Add support for DeviceSaveData
Uses the same path as SaveData except with UID 0. Adds a warning if UID is not 0.
2018-10-16 10:19:21 -04:00
Lioncash
90f8474fc1 svc: Clarify enum values for AddressSpaceBaseAddr and AddressSpaceSize in svcGetInfo()
So, one thing that's puzzled me is why the kernel seemed to *not* use
the direct code address ranges in some cases for some service functions.
For example, in svcMapMemory, the full address space width is compared
against for validity, but for svcMapSharedMemory, it compares against
0xFFE00000, 0xFF8000000, and 0x7FF8000000 as upper bounds, and uses
either 0x200000 or 0x8000000 as the lower-bounds as the beginning of the
compared range. Coincidentally, these exact same values are also used in
svcGetInfo, and also when initializing the user address space, so this
is actually retrieving the ASLR extents, not the extents of the address
space in general.
2018-10-14 20:11:16 -04:00
FernandoS27
1d6559fbd3 Implement Arrays on Tex Instruction 2018-10-14 13:31:02 -04:00
24 changed files with 811 additions and 525 deletions

View File

@@ -97,288 +97,11 @@ union NCASectionHeader {
};
static_assert(sizeof(NCASectionHeader) == 0x200, "NCASectionHeader has incorrect size.");
static bool IsValidNCA(const NCAHeader& header) {
bool IsValidNCA(const NCAHeader& header) {
// TODO(DarkLordZach): Add NCA2/NCA0 support.
return header.magic == Common::MakeMagic('N', 'C', 'A', '3');
}
NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset)
: file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)) {
if (file == nullptr) {
status = Loader::ResultStatus::ErrorNullFile;
return;
}
if (sizeof(NCAHeader) != file->ReadObject(&header)) {
LOG_ERROR(Loader, "File reader errored out during header read.");
status = Loader::ResultStatus::ErrorBadNCAHeader;
return;
}
if (!HandlePotentialHeaderDecryption()) {
return;
}
has_rights_id = std::any_of(header.rights_id.begin(), header.rights_id.end(),
[](char c) { return c != '\0'; });
const std::vector<NCASectionHeader> sections = ReadSectionHeaders();
is_update = std::any_of(sections.begin(), sections.end(), [](const NCASectionHeader& header) {
return header.raw.header.crypto_type == NCASectionCryptoType::BKTR;
});
if (!ReadSections(sections, bktr_base_ivfc_offset)) {
return;
}
status = Loader::ResultStatus::Success;
}
NCA::~NCA() = default;
bool NCA::CheckSupportedNCA(const NCAHeader& nca_header) {
if (nca_header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
status = Loader::ResultStatus::ErrorNCA2;
return false;
}
if (nca_header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
status = Loader::ResultStatus::ErrorNCA0;
return false;
}
return true;
}
bool NCA::HandlePotentialHeaderDecryption() {
if (IsValidNCA(header)) {
return true;
}
if (!CheckSupportedNCA(header)) {
return false;
}
NCAHeader dec_header{};
Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
cipher.XTSTranscode(&header, sizeof(NCAHeader), &dec_header, 0, 0x200,
Core::Crypto::Op::Decrypt);
if (IsValidNCA(dec_header)) {
header = dec_header;
encrypted = true;
} else {
if (!CheckSupportedNCA(dec_header)) {
return false;
}
if (keys.HasKey(Core::Crypto::S256KeyType::Header)) {
status = Loader::ResultStatus::ErrorIncorrectHeaderKey;
} else {
status = Loader::ResultStatus::ErrorMissingHeaderKey;
}
return false;
}
return true;
}
std::vector<NCASectionHeader> NCA::ReadSectionHeaders() const {
const std::ptrdiff_t number_sections =
std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
[](NCASectionTableEntry entry) { return entry.media_offset > 0; });
std::vector<NCASectionHeader> sections(number_sections);
const auto length_sections = SECTION_HEADER_SIZE * number_sections;
if (encrypted) {
auto raw = file->ReadBytes(length_sections, SECTION_HEADER_OFFSET);
Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
cipher.XTSTranscode(raw.data(), length_sections, sections.data(), 2, SECTION_HEADER_SIZE,
Core::Crypto::Op::Decrypt);
} else {
file->ReadBytes(sections.data(), length_sections, SECTION_HEADER_OFFSET);
}
return sections;
}
bool NCA::ReadSections(const std::vector<NCASectionHeader>& sections, u64 bktr_base_ivfc_offset) {
for (std::size_t i = 0; i < sections.size(); ++i) {
const auto& section = sections[i];
if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
if (!ReadRomFSSection(section, header.section_tables[i], bktr_base_ivfc_offset)) {
return false;
}
} else if (section.raw.header.filesystem_type == NCASectionFilesystemType::PFS0) {
if (!ReadPFS0Section(section, header.section_tables[i])) {
return false;
}
}
}
return true;
}
bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTableEntry& entry,
u64 bktr_base_ivfc_offset) {
const std::size_t base_offset = entry.media_offset * MEDIA_OFFSET_MULTIPLIER;
ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
const std::size_t romfs_offset = base_offset + ivfc_offset;
const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
auto dec = Decrypt(section, raw, romfs_offset);
if (dec == nullptr) {
if (status != Loader::ResultStatus::Success)
return false;
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return false;
}
if (section.raw.header.crypto_type == NCASectionCryptoType::BKTR) {
if (section.bktr.relocation.magic != Common::MakeMagic('B', 'K', 'T', 'R') ||
section.bktr.subsection.magic != Common::MakeMagic('B', 'K', 'T', 'R')) {
status = Loader::ResultStatus::ErrorBadBKTRHeader;
return false;
}
if (section.bktr.relocation.offset + section.bktr.relocation.size !=
section.bktr.subsection.offset) {
status = Loader::ResultStatus::ErrorBKTRSubsectionNotAfterRelocation;
return false;
}
const u64 size = MEDIA_OFFSET_MULTIPLIER * (entry.media_end_offset - entry.media_offset);
if (section.bktr.subsection.offset + section.bktr.subsection.size != size) {
status = Loader::ResultStatus::ErrorBKTRSubsectionNotAtEnd;
return false;
}
const u64 offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
RelocationBlock relocation_block{};
if (dec->ReadObject(&relocation_block, section.bktr.relocation.offset - offset) !=
sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadRelocationBlock;
return false;
}
SubsectionBlock subsection_block{};
if (dec->ReadObject(&subsection_block, section.bktr.subsection.offset - offset) !=
sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadSubsectionBlock;
return false;
}
std::vector<RelocationBucketRaw> relocation_buckets_raw(
(section.bktr.relocation.size - sizeof(RelocationBlock)) / sizeof(RelocationBucketRaw));
if (dec->ReadBytes(relocation_buckets_raw.data(),
section.bktr.relocation.size - sizeof(RelocationBlock),
section.bktr.relocation.offset + sizeof(RelocationBlock) - offset) !=
section.bktr.relocation.size - sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadRelocationBuckets;
return false;
}
std::vector<SubsectionBucketRaw> subsection_buckets_raw(
(section.bktr.subsection.size - sizeof(SubsectionBlock)) / sizeof(SubsectionBucketRaw));
if (dec->ReadBytes(subsection_buckets_raw.data(),
section.bktr.subsection.size - sizeof(SubsectionBlock),
section.bktr.subsection.offset + sizeof(SubsectionBlock) - offset) !=
section.bktr.subsection.size - sizeof(SubsectionBlock)) {
status = Loader::ResultStatus::ErrorBadSubsectionBuckets;
return false;
}
std::vector<RelocationBucket> relocation_buckets(relocation_buckets_raw.size());
std::transform(relocation_buckets_raw.begin(), relocation_buckets_raw.end(),
relocation_buckets.begin(), &ConvertRelocationBucketRaw);
std::vector<SubsectionBucket> subsection_buckets(subsection_buckets_raw.size());
std::transform(subsection_buckets_raw.begin(), subsection_buckets_raw.end(),
subsection_buckets.begin(), &ConvertSubsectionBucketRaw);
u32 ctr_low;
std::memcpy(&ctr_low, section.raw.section_ctr.data(), sizeof(ctr_low));
subsection_buckets.back().entries.push_back({section.bktr.relocation.offset, {0}, ctr_low});
subsection_buckets.back().entries.push_back({size, {0}, 0});
boost::optional<Core::Crypto::Key128> key = boost::none;
if (encrypted) {
if (has_rights_id) {
status = Loader::ResultStatus::Success;
key = GetTitlekey();
if (key == boost::none) {
status = Loader::ResultStatus::ErrorMissingTitlekey;
return false;
}
} else {
key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
if (key == boost::none) {
status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
return false;
}
}
}
if (bktr_base_romfs == nullptr) {
status = Loader::ResultStatus::ErrorMissingBKTRBaseRomFS;
return false;
}
auto bktr = std::make_shared<BKTR>(
bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
relocation_block, relocation_buckets, subsection_block, subsection_buckets, encrypted,
encrypted ? key.get() : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
section.raw.section_ctr);
// BKTR applies to entire IVFC, so make an offset version to level 6
files.push_back(std::make_shared<OffsetVfsFile>(
bktr, romfs_size, section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset));
} else {
files.push_back(std::move(dec));
}
romfs = files.back();
return true;
}
bool NCA::ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry) {
const u64 offset = (static_cast<u64>(entry.media_offset) * MEDIA_OFFSET_MULTIPLIER) +
section.pfs0.pfs0_header_offset;
const u64 size = MEDIA_OFFSET_MULTIPLIER * (entry.media_end_offset - entry.media_offset);
auto dec = Decrypt(section, std::make_shared<OffsetVfsFile>(file, size, offset), offset);
if (dec != nullptr) {
auto npfs = std::make_shared<PartitionFilesystem>(std::move(dec));
if (npfs->GetStatus() == Loader::ResultStatus::Success) {
dirs.push_back(std::move(npfs));
if (IsDirectoryExeFS(dirs.back()))
exefs = dirs.back();
} else {
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return false;
}
} else {
if (status != Loader::ResultStatus::Success)
return false;
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return false;
}
return true;
}
u8 NCA::GetCryptoRevision() const {
u8 master_key_id = header.crypto_type;
if (header.crypto_type_2 > master_key_id)
@@ -444,7 +167,7 @@ boost::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
return titlekey;
}
VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 starting_offset) {
VirtualFile NCA::Decrypt(NCASectionHeader s_header, VirtualFile in, u64 starting_offset) {
if (!encrypted)
return in;
@@ -492,6 +215,256 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s
}
}
NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset)
: file(std::move(file_)),
bktr_base_romfs(bktr_base_romfs_ ? std::move(bktr_base_romfs_) : nullptr) {
status = Loader::ResultStatus::Success;
if (file == nullptr) {
status = Loader::ResultStatus::ErrorNullFile;
return;
}
if (sizeof(NCAHeader) != file->ReadObject(&header)) {
LOG_ERROR(Loader, "File reader errored out during header read.");
status = Loader::ResultStatus::ErrorBadNCAHeader;
return;
}
encrypted = false;
if (!IsValidNCA(header)) {
if (header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
status = Loader::ResultStatus::ErrorNCA2;
return;
}
if (header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
status = Loader::ResultStatus::ErrorNCA0;
return;
}
NCAHeader dec_header{};
Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
cipher.XTSTranscode(&header, sizeof(NCAHeader), &dec_header, 0, 0x200,
Core::Crypto::Op::Decrypt);
if (IsValidNCA(dec_header)) {
header = dec_header;
encrypted = true;
} else {
if (dec_header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
status = Loader::ResultStatus::ErrorNCA2;
return;
}
if (dec_header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
status = Loader::ResultStatus::ErrorNCA0;
return;
}
if (!keys.HasKey(Core::Crypto::S256KeyType::Header))
status = Loader::ResultStatus::ErrorMissingHeaderKey;
else
status = Loader::ResultStatus::ErrorIncorrectHeaderKey;
return;
}
}
has_rights_id = std::find_if_not(header.rights_id.begin(), header.rights_id.end(),
[](char c) { return c == '\0'; }) != header.rights_id.end();
const std::ptrdiff_t number_sections =
std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
[](NCASectionTableEntry entry) { return entry.media_offset > 0; });
std::vector<NCASectionHeader> sections(number_sections);
const auto length_sections = SECTION_HEADER_SIZE * number_sections;
if (encrypted) {
auto raw = file->ReadBytes(length_sections, SECTION_HEADER_OFFSET);
Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
cipher.XTSTranscode(raw.data(), length_sections, sections.data(), 2, SECTION_HEADER_SIZE,
Core::Crypto::Op::Decrypt);
} else {
file->ReadBytes(sections.data(), length_sections, SECTION_HEADER_OFFSET);
}
is_update = std::find_if(sections.begin(), sections.end(), [](const NCASectionHeader& header) {
return header.raw.header.crypto_type == NCASectionCryptoType::BKTR;
}) != sections.end();
ivfc_offset = 0;
for (std::ptrdiff_t i = 0; i < number_sections; ++i) {
auto section = sections[i];
if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
const std::size_t base_offset =
header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
const std::size_t romfs_offset = base_offset + ivfc_offset;
const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
auto dec = Decrypt(section, raw, romfs_offset);
if (dec == nullptr) {
if (status != Loader::ResultStatus::Success)
return;
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return;
}
if (section.raw.header.crypto_type == NCASectionCryptoType::BKTR) {
if (section.bktr.relocation.magic != Common::MakeMagic('B', 'K', 'T', 'R') ||
section.bktr.subsection.magic != Common::MakeMagic('B', 'K', 'T', 'R')) {
status = Loader::ResultStatus::ErrorBadBKTRHeader;
return;
}
if (section.bktr.relocation.offset + section.bktr.relocation.size !=
section.bktr.subsection.offset) {
status = Loader::ResultStatus::ErrorBKTRSubsectionNotAfterRelocation;
return;
}
const u64 size =
MEDIA_OFFSET_MULTIPLIER * (header.section_tables[i].media_end_offset -
header.section_tables[i].media_offset);
if (section.bktr.subsection.offset + section.bktr.subsection.size != size) {
status = Loader::ResultStatus::ErrorBKTRSubsectionNotAtEnd;
return;
}
const u64 offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
RelocationBlock relocation_block{};
if (dec->ReadObject(&relocation_block, section.bktr.relocation.offset - offset) !=
sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadRelocationBlock;
return;
}
SubsectionBlock subsection_block{};
if (dec->ReadObject(&subsection_block, section.bktr.subsection.offset - offset) !=
sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadSubsectionBlock;
return;
}
std::vector<RelocationBucketRaw> relocation_buckets_raw(
(section.bktr.relocation.size - sizeof(RelocationBlock)) /
sizeof(RelocationBucketRaw));
if (dec->ReadBytes(relocation_buckets_raw.data(),
section.bktr.relocation.size - sizeof(RelocationBlock),
section.bktr.relocation.offset + sizeof(RelocationBlock) -
offset) !=
section.bktr.relocation.size - sizeof(RelocationBlock)) {
status = Loader::ResultStatus::ErrorBadRelocationBuckets;
return;
}
std::vector<SubsectionBucketRaw> subsection_buckets_raw(
(section.bktr.subsection.size - sizeof(SubsectionBlock)) /
sizeof(SubsectionBucketRaw));
if (dec->ReadBytes(subsection_buckets_raw.data(),
section.bktr.subsection.size - sizeof(SubsectionBlock),
section.bktr.subsection.offset + sizeof(SubsectionBlock) -
offset) !=
section.bktr.subsection.size - sizeof(SubsectionBlock)) {
status = Loader::ResultStatus::ErrorBadSubsectionBuckets;
return;
}
std::vector<RelocationBucket> relocation_buckets(relocation_buckets_raw.size());
std::transform(relocation_buckets_raw.begin(), relocation_buckets_raw.end(),
relocation_buckets.begin(), &ConvertRelocationBucketRaw);
std::vector<SubsectionBucket> subsection_buckets(subsection_buckets_raw.size());
std::transform(subsection_buckets_raw.begin(), subsection_buckets_raw.end(),
subsection_buckets.begin(), &ConvertSubsectionBucketRaw);
u32 ctr_low;
std::memcpy(&ctr_low, section.raw.section_ctr.data(), sizeof(ctr_low));
subsection_buckets.back().entries.push_back(
{section.bktr.relocation.offset, {0}, ctr_low});
subsection_buckets.back().entries.push_back({size, {0}, 0});
boost::optional<Core::Crypto::Key128> key = boost::none;
if (encrypted) {
if (has_rights_id) {
status = Loader::ResultStatus::Success;
key = GetTitlekey();
if (key == boost::none) {
status = Loader::ResultStatus::ErrorMissingTitlekey;
return;
}
} else {
key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
if (key == boost::none) {
status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
return;
}
}
}
if (bktr_base_romfs == nullptr) {
status = Loader::ResultStatus::ErrorMissingBKTRBaseRomFS;
return;
}
auto bktr = std::make_shared<BKTR>(
bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
relocation_block, relocation_buckets, subsection_block, subsection_buckets,
encrypted, encrypted ? key.get() : Core::Crypto::Key128{}, base_offset,
bktr_base_ivfc_offset, section.raw.section_ctr);
// BKTR applies to entire IVFC, so make an offset version to level 6
files.push_back(std::make_shared<OffsetVfsFile>(
bktr, romfs_size, section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset));
romfs = files.back();
} else {
files.push_back(std::move(dec));
romfs = files.back();
}
} else if (section.raw.header.filesystem_type == NCASectionFilesystemType::PFS0) {
u64 offset = (static_cast<u64>(header.section_tables[i].media_offset) *
MEDIA_OFFSET_MULTIPLIER) +
section.pfs0.pfs0_header_offset;
u64 size = MEDIA_OFFSET_MULTIPLIER * (header.section_tables[i].media_end_offset -
header.section_tables[i].media_offset);
auto dec =
Decrypt(section, std::make_shared<OffsetVfsFile>(file, size, offset), offset);
if (dec != nullptr) {
auto npfs = std::make_shared<PartitionFilesystem>(std::move(dec));
if (npfs->GetStatus() == Loader::ResultStatus::Success) {
dirs.push_back(std::move(npfs));
if (IsDirectoryExeFS(dirs.back()))
exefs = dirs.back();
} else {
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return;
}
} else {
if (status != Loader::ResultStatus::Success)
return;
if (has_rights_id)
status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
else
status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
return;
}
}
}
status = Loader::ResultStatus::Success;
}
NCA::~NCA() = default;
Loader::ResultStatus NCA::GetStatus() const {
return status;
}

View File

@@ -73,6 +73,8 @@ inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) {
return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr;
}
bool IsValidNCA(const NCAHeader& header);
// An implementation of VfsDirectory that represents a Nintendo Content Archive (NCA) conatiner.
// After construction, use GetStatus to determine if the file is valid and ready to be used.
class NCA : public ReadOnlyVfsDirectory {
@@ -104,19 +106,10 @@ protected:
bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
private:
bool CheckSupportedNCA(const NCAHeader& header);
bool HandlePotentialHeaderDecryption();
std::vector<NCASectionHeader> ReadSectionHeaders() const;
bool ReadSections(const std::vector<NCASectionHeader>& sections, u64 bktr_base_ivfc_offset);
bool ReadRomFSSection(const NCASectionHeader& section, const NCASectionTableEntry& entry,
u64 bktr_base_ivfc_offset);
bool ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry);
u8 GetCryptoRevision() const;
boost::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
boost::optional<Core::Crypto::Key128> GetTitlekey();
VirtualFile Decrypt(const NCASectionHeader& header, VirtualFile in, u64 starting_offset);
VirtualFile Decrypt(NCASectionHeader header, VirtualFile in, u64 starting_offset);
std::vector<VirtualDir> dirs;
std::vector<VirtualFile> files;
@@ -125,15 +118,15 @@ private:
VirtualDir exefs = nullptr;
VirtualFile file;
VirtualFile bktr_base_romfs;
u64 ivfc_offset = 0;
u64 ivfc_offset;
NCAHeader header{};
bool has_rights_id{};
Loader::ResultStatus status{};
bool encrypted = false;
bool is_update = false;
bool encrypted;
bool is_update;
Core::Crypto::KeyManager keys;
};

View File

@@ -51,6 +51,13 @@ ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescr
meta.title_id);
}
if (meta.type == SaveDataType::DeviceSaveData && meta.user_id != u128{0, 0}) {
LOG_WARNING(Service_FS,
"Possibly incorrect SaveDataDescriptor, type is DeviceSaveData but user_id is "
"non-zero ({:016X}{:016X})",
meta.user_id[1], meta.user_id[0]);
}
std::string save_directory =
GetFullPath(space, meta.type, meta.title_id, meta.user_id, meta.save_id);
@@ -92,6 +99,9 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
case SaveDataSpaceId::NandUser:
out = "/user/";
break;
case SaveDataSpaceId::TemporaryStorage:
out = "/temp/";
break;
default:
ASSERT_MSG(false, "Unrecognized SaveDataSpaceId: {:02X}", static_cast<u8>(space));
}
@@ -100,10 +110,11 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
case SaveDataType::SystemSaveData:
return fmt::format("{}save/{:016X}/{:016X}{:016X}", out, save_id, user_id[1], user_id[0]);
case SaveDataType::SaveData:
case SaveDataType::DeviceSaveData:
return fmt::format("{}save/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
title_id);
case SaveDataType::TemporaryStorage:
return fmt::format("{}temp/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
return fmt::format("{}{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
title_id);
default:
ASSERT_MSG(false, "Unrecognized SaveDataType: {:02X}", static_cast<u8>(type));

View File

@@ -448,25 +448,12 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
case GetInfoType::RandomEntropy:
*result = 0;
break;
case GetInfoType::AddressSpaceBaseAddr:
*result = vm_manager.GetCodeRegionBaseAddress();
case GetInfoType::ASLRRegionBaseAddr:
*result = vm_manager.GetASLRRegionBaseAddress();
break;
case GetInfoType::AddressSpaceSize: {
const u64 width = vm_manager.GetAddressSpaceWidth();
switch (width) {
case 32:
*result = 0xFFE00000;
break;
case 36:
*result = 0xFF8000000;
break;
case 39:
*result = 0x7FF8000000;
break;
}
case GetInfoType::ASLRRegionSize:
*result = vm_manager.GetASLRRegionSize();
break;
}
case GetInfoType::NewMapRegionBaseAddr:
*result = vm_manager.GetNewMapRegionBaseAddress();
break;

View File

@@ -41,8 +41,8 @@ enum class GetInfoType : u64 {
RandomEntropy = 11,
PerformanceCounter = 0xF0000002,
// 2.0.0+
AddressSpaceBaseAddr = 12,
AddressSpaceSize = 13,
ASLRRegionBaseAddr = 12,
ASLRRegionSize = 13,
NewMapRegionBaseAddr = 14,
NewMapRegionSize = 15,
// 3.0.0+

View File

@@ -393,30 +393,35 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
switch (type) {
case FileSys::ProgramAddressSpaceType::Is32Bit:
case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
address_space_width = 32;
code_region_base = 0x200000;
code_region_end = code_region_base + 0x3FE00000;
map_region_size = 0x40000000;
heap_region_size = 0x40000000;
aslr_region_base = 0x200000;
aslr_region_end = aslr_region_base + 0xFFE00000;
if (type == FileSys::ProgramAddressSpaceType::Is32Bit) {
map_region_size = 0x40000000;
heap_region_size = 0x40000000;
} else {
map_region_size = 0;
heap_region_size = 0x80000000;
}
break;
case FileSys::ProgramAddressSpaceType::Is36Bit:
address_space_width = 36;
code_region_base = 0x8000000;
code_region_end = code_region_base + 0x78000000;
aslr_region_base = 0x8000000;
aslr_region_end = aslr_region_base + 0xFF8000000;
map_region_size = 0x180000000;
heap_region_size = 0x180000000;
break;
case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
address_space_width = 32;
code_region_base = 0x200000;
code_region_end = code_region_base + 0x3FE00000;
map_region_size = 0;
heap_region_size = 0x80000000;
break;
case FileSys::ProgramAddressSpaceType::Is39Bit:
address_space_width = 39;
code_region_base = 0x8000000;
code_region_end = code_region_base + 0x80000000;
aslr_region_base = 0x8000000;
aslr_region_end = aslr_region_base + 0x7FF8000000;
map_region_size = 0x1000000000;
heap_region_size = 0x180000000;
new_map_region_size = 0x80000000;
@@ -490,6 +495,18 @@ u64 VMManager::GetAddressSpaceWidth() const {
return address_space_width;
}
VAddr VMManager::GetASLRRegionBaseAddress() const {
return aslr_region_base;
}
VAddr VMManager::GetASLRRegionEndAddress() const {
return aslr_region_end;
}
u64 VMManager::GetASLRRegionSize() const {
return aslr_region_end - aslr_region_base;
}
VAddr VMManager::GetCodeRegionBaseAddress() const {
return code_region_base;
}

View File

@@ -205,6 +205,15 @@ public:
/// Gets the address space width in bits.
u64 GetAddressSpaceWidth() const;
/// Gets the base address of the ASLR region.
VAddr GetASLRRegionBaseAddress() const;
/// Gets the end address of the ASLR region.
VAddr GetASLRRegionEndAddress() const;
/// Gets the size of the ASLR region
u64 GetASLRRegionSize() const;
/// Gets the base address of the code region.
VAddr GetCodeRegionBaseAddress() const;
@@ -306,6 +315,9 @@ private:
VAddr address_space_base = 0;
VAddr address_space_end = 0;
VAddr aslr_region_base = 0;
VAddr aslr_region_end = 0;
VAddr code_region_base = 0;
VAddr code_region_end = 0;

View File

@@ -136,7 +136,7 @@ struct Values {
float resolution_factor;
bool use_frame_limit;
u16 frame_limit;
bool use_accurate_framebuffers;
bool use_accurate_gpu_emulation;
float bg_red;
float bg_green;

View File

@@ -163,8 +163,8 @@ TelemetrySession::TelemetrySession() {
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
Settings::values.use_frame_limit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers",
Settings::values.use_accurate_framebuffers);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
Settings::values.use_accurate_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
Settings::values.use_docked_mode);
}

View File

@@ -448,7 +448,10 @@ public:
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
} memory_layout;
u32 array_mode;
union {
BitField<0, 16, u32> array_mode;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
u32 base_layer;
INSERT_PADDING_WORDS(7);

View File

@@ -87,6 +87,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
return gpu_addr;
}
GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
for (const auto& region : mapped_regions) {
const GPUVAddr region_end{region.gpu_addr + region.size};
if (region_start >= region.gpu_addr && region_start < region_end) {
return region_end;
}
}
return {};
}
boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
GPUVAddr gpu_addr = 0;
u64 free_space = 0;

View File

@@ -26,6 +26,7 @@ public:
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

View File

@@ -11,32 +11,77 @@
#include "common/common_types.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
class RasterizerCacheObject {
public:
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
/// Wriets any cached resources back to memory
virtual void Flush() = 0;
/// Sets whether the cached object should be considered registered
void SetIsRegistered(bool registered) {
is_registered = registered;
}
/// Returns true if the cached object is registered
bool IsRegistered() const {
return is_registered;
}
/// Returns true if the cached object is dirty
bool IsDirty() const {
return is_dirty;
}
/// Returns ticks from when this cached object was last modified
u64 GetLastModifiedTicks() const {
return last_modified_ticks;
}
/// Marks an object as recently modified, used to specify whether it is clean or dirty
template <class T>
void MarkAsModified(bool dirty, T& cache) {
is_dirty = dirty;
last_modified_ticks = cache.GetModifiedTicks();
}
private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
};
template <class T>
class RasterizerCache : NonCopyable {
friend class RasterizerCacheObject;
public:
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
FlushObject(object);
}
}
/// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) {
if (size == 0)
return;
const ObjectInterval interval{addr, addr + size};
for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
for (auto& cached_object : pair.second) {
if (!cached_object)
continue;
remove_objects.emplace(cached_object);
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
if (!object->IsRegistered()) {
// Skip duplicates
continue;
}
Unregister(object);
}
for (auto& remove_object : remove_objects) {
Unregister(remove_object);
}
remove_objects.clear();
}
/// Invalidates everything in the cache
@@ -62,6 +107,7 @@ protected:
/// Register an object into the cache
void Register(const T& object) {
object->SetIsRegistered(true);
object_cache.add({GetInterval(object), ObjectSet{object}});
auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
@@ -69,12 +115,57 @@ protected:
/// Unregisters an object from the cache
void Unregister(const T& object) {
object->SetIsRegistered(false);
auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
if (Settings::values.use_accurate_gpu_emulation) {
FlushObject(object);
}
object_cache.subtract({GetInterval(object), ObjectSet{object}});
}
/// Returns a ticks counter used for tracking when cached objects were last modified
u64 GetModifiedTicks() {
return ++modified_ticks;
}
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
if (size == 0) {
return {};
}
std::vector<T> objects;
const ObjectInterval interval{addr, addr + size};
for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
for (auto& cached_object : pair.second) {
if (!cached_object) {
continue;
}
objects.push_back(cached_object);
}
}
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
});
return objects;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
if (!object->IsDirty()) {
return;
}
object->Flush();
object->MarkAsModified(false, *this);
}
using ObjectSet = std::set<T>;
using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectInterval = typename ObjectCache::interval_type;
@@ -84,6 +175,6 @@ private:
object->GetAddr() + object->GetSizeInBytes());
}
ObjectCache object_cache;
ObjectSet remove_objects;
ObjectCache object_cache; ///< Cache of objects
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
};

View File

@@ -15,15 +15,18 @@
namespace OpenGL {
struct CachedBufferEntry final {
VAddr GetAddr() const {
struct CachedBufferEntry final : public RasterizerCacheObject {
VAddr GetAddr() const override {
return addr;
}
std::size_t GetSizeInBytes() const {
std::size_t GetSizeInBytes() const override {
return size;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
VAddr addr;
std::size_t size;
GLintptr offset;

View File

@@ -424,6 +424,13 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
Surface color_surface =
res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
}
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
@@ -434,6 +441,13 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
std::array<GLenum, Maxwell::NumRenderTargets> buffers;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
}
buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
@@ -453,6 +467,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
}
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
depth_surface->MarkAsModified(true, res_cache);
if (regs.stencil_enable) {
// Attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -617,7 +635,14 @@ void RasterizerOpenGL::DrawArrays() {
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (Settings::values.use_accurate_gpu_emulation) {
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
res_cache.FlushRegion(addr, size);
}
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -627,6 +652,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}

View File

@@ -34,16 +34,53 @@ struct FormatTuple {
bool compressed;
};
static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cpu_addr{gpu.MemoryManager().GpuToCpuAddress(gpu_addr)};
return cpu_addr ? *cpu_addr : 0;
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_5X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_8X5:
return true;
default:
return false;
}
}
static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return {4, 4};
case PixelFormat::ASTC_2D_5X4:
return {5, 4};
case PixelFormat::ASTC_2D_8X8:
return {8, 8};
case PixelFormat::ASTC_2D_8X5:
return {8, 5};
default:
LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
UNREACHABLE();
}
}
void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
addr = cpu_addr ? *cpu_addr : 0;
gpu_addr = gpu_addr_;
size_in_bytes = SizeInBytesRaw();
if (IsPixelFormatASTC(pixel_format)) {
// ASTC is uncompressed in software, in emulated as RGBA8
size_in_bytes_gl = width * height * depth * 4;
} else {
size_in_bytes_gl = SizeInBytesGL();
}
}
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.tic.Address());
params.is_tiled = config.tic.IsTiled();
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
@@ -87,18 +124,18 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
break;
}
params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
params.InitCacheParameters(config.tic.Address());
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << config.memory_layout.block_width;
@@ -112,16 +149,17 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = 0;
// Render target specific parameters, not used for caching
params.rt.index = static_cast<u32>(index);
params.rt.array_mode = config.array_mode;
params.rt.layer_stride = config.layer_stride;
params.rt.volume = config.volume;
params.rt.base_layer = config.base_layer;
params.InitCacheParameters(config.Address());
return params;
}
@@ -130,7 +168,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(zeta_address);
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << std::min(block_width, 5U);
params.block_height = 1 << std::min(block_height, 5U);
@@ -143,18 +181,18 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = 0;
params.rt = {};
params.InitCacheParameters(zeta_address);
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled = !config.linear;
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
@@ -167,11 +205,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.size_in_bytes_total = params.SizeInBytesTotal();
params.size_in_bytes_2d = params.SizeInBytes2D();
params.max_mip_level = 0;
params.rt = {};
params.InitCacheParameters(config.Address());
return params;
}
@@ -276,34 +314,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return format;
}
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_5X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_8X5:
return true;
default:
return false;
}
}
static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return {4, 4};
case PixelFormat::ASTC_2D_5X4:
return {5, 4};
case PixelFormat::ASTC_2D_8X8:
return {8, 8};
case PixelFormat::ASTC_2D_8X5:
return {8, 5};
default:
LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
UNREACHABLE();
}
}
MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
u32 actual_height{unaligned_height};
if (IsPixelFormatASTC(pixel_format)) {
@@ -333,23 +343,21 @@ static bool IsFormatBCn(PixelFormat format) {
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
std::size_t gl_buffer_size, VAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
if (morton_to_gl) {
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
memcpy(gl_buffer, data.data(), size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
Memory::GetPointer(addr), gl_buffer, morton_to_gl);
Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
gl_buffer, false, block_height, block_depth);
}
}
@@ -430,17 +438,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
// formats are not supported
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::DXT1>,
MortonCopy<false, PixelFormat::DXT23>,
MortonCopy<false, PixelFormat::DXT45>,
MortonCopy<false, PixelFormat::DXN1>,
MortonCopy<false, PixelFormat::DXN2UNORM>,
MortonCopy<false, PixelFormat::DXN2SNORM>,
MortonCopy<false, PixelFormat::BC7U>,
MortonCopy<false, PixelFormat::BC6H_UF16>,
MortonCopy<false, PixelFormat::BC6H_SF16>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
MortonCopy<false, PixelFormat::G8R8U>,
MortonCopy<false, PixelFormat::G8R8S>,
@@ -626,22 +633,21 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
std::size_t buffer_size =
std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total);
std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
if (source_format.compressed) {
glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr);
static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
} else {
glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total),
source_format.type, static_cast<GLsizei>(src_params.size_in_bytes),
nullptr);
}
// If the new texture is bigger than the previous one, we need to fill in the rest with data
// from the CPU.
if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) {
if (src_params.size_in_bytes < dst_params.size_in_bytes) {
// Upload the rest of the memory.
if (dst_params.is_tiled) {
// TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
@@ -651,12 +657,12 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
std::size_t remaining_size =
dst_params.size_in_bytes_total - src_params.size_in_bytes_total;
std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
std::vector<u8> data(remaining_size);
Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(),
data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size,
std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes),
data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
data.data());
}
@@ -702,7 +708,8 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
}
CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)) {
: params(params), gl_target(SurfaceTargetToGL(params.target)),
cached_size_in_bytes(params.size_in_bytes) {
texture.Create();
const auto& rect{params.GetRect()};
@@ -752,9 +759,21 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
SurfaceParams::SurfaceTargetName(params.target));
// Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
// R32F render buffer. We do not yet know if this is a game bug or something else, but this
// check is necessary to prevent flushing from overwriting unmapped memory.
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
if (cached_size_in_bytes > max_size) {
LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
cached_size_in_bytes = max_size;
}
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
union S8Z24 {
BitField<0, 24, u32> z24;
BitField<24, 8, u32> s8;
@@ -767,22 +786,29 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
};
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
S8Z24 input_pixel{};
Z24S8 output_pixel{};
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
if (reverse) {
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
s8z24_pixel.s8.Assign(z24s8_pixel.s8);
s8z24_pixel.z24.Assign(z24s8_pixel.z24);
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
} else {
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
z24s8_pixel.s8.Assign(s8z24_pixel.s8);
z24s8_pixel.z24.Assign(s8z24_pixel.z24);
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
}
}
}
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
@@ -814,7 +840,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
case PixelFormat::S8Z24:
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height);
ConvertS8Z24ToZ24S8(data, width, height, false);
break;
case PixelFormat::G8R8U:
@@ -825,22 +851,36 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
}
/**
* Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
* Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
* with typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
case PixelFormat::G8R8U:
case PixelFormat::G8R8S:
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8: {
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
static_cast<u32>(pixel_format));
UNREACHABLE();
break;
}
case PixelFormat::S8Z24:
// Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height, true);
break;
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
const u32 copy_size = params.width * params.height * bytes_per_pixel;
const std::size_t total_size = copy_size * params.depth;
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
gl_buffer.resize(params.size_in_bytes_gl);
if (params.is_tiled) {
gl_buffer.resize(total_size);
u32 depth = params.depth;
u32 block_depth = params.block_depth;
@@ -853,13 +893,12 @@ void CachedSurface::LoadGLBuffer() {
block_depth = 1U;
}
const std::size_t size = copy_size * depth;
morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
size, params.addr);
gl_buffer.size(), params.addr);
} else {
const u8* const texture_src_data_end{texture_src_data + total_size};
const auto texture_src_data{Memory::GetPointer(params.addr)};
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
gl_buffer.assign(texture_src_data, texture_src_data_end);
}
@@ -868,7 +907,44 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
ASSERT_MSG(false, "Unimplemented");
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
// OpenGL temporary buffer needs to be big enough to store raw texture size
gl_buffer.resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
gl_buffer.data());
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
params.height);
ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
u32 depth = params.depth;
u32 block_depth = params.block_depth;
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
// TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
depth = 1U;
}
gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
gl_buffer.size(), GetAddr());
} else {
std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
}
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
@@ -878,9 +954,6 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
MICROPROFILE_SCOPE(OpenGL_TextureUL);
ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
GetGLBytesPerPixel(params.pixel_format) * params.depth);
const auto& rect{params.GetRect()};
// Load data from memory to the surface
@@ -889,7 +962,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
std::size_t buffer_offset =
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
static_cast<std::size_t>(x0)) *
GetGLBytesPerPixel(params.pixel_format);
SurfaceParams::GetBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
const GLuint target_tex = texture.handle;
@@ -905,7 +978,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
glActiveTexture(GL_TEXTURE0);
@@ -915,7 +988,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glCompressedTexImage2D(
SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
break;
case SurfaceParams::SurfaceTarget::Texture3D:
case SurfaceParams::SurfaceTarget::Texture2DArray:
@@ -923,16 +996,16 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
static_cast<GLsizei>(params.depth), 0,
static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]);
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap:
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
0, tuple.internal_format, static_cast<GLsizei>(params.width),
static_cast<GLsizei>(params.height), 0,
static_cast<GLsizei>(params.size_in_bytes_2d),
static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()),
&gl_buffer[buffer_offset]);
buffer_offset += params.size_in_bytes_2d;
buffer_offset += params.SizeInBytesCubeFace();
}
break;
default:
@@ -942,7 +1015,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glCompressedTexImage2D(
GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
static_cast<GLsizei>(params.height), 0,
static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
}
} else {
@@ -971,7 +1044,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[buffer_offset]);
buffer_offset += params.size_in_bytes_2d;
buffer_offset += params.SizeInBytesCubeFace();
}
break;
default:
@@ -1033,10 +1106,7 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
surface->FlushGLBuffer();
surface->MarkAsModified(false, *this);
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -1053,8 +1123,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Unregister(surface);
Surface new_surface{RecreateSurface(surface, params)};
Unregister(surface);
Register(new_surface);
return new_surface;
} else {
@@ -1105,6 +1175,14 @@ void RasterizerCacheOpenGL::FermiCopySurface(
FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
}
void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
FlushRegion(src_params.addr, dst_params.size_in_bytes);
LoadSurface(dst_surface);
}
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
@@ -1113,6 +1191,12 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
// Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)};
// With use_accurate_gpu_emulation enabled, do an accurate surface copy
if (Settings::values.use_accurate_gpu_emulation) {
AccurateCopySurface(old_surface, new_surface);
return new_surface;
}
// For compatible surfaces, we can just do fast glCopyImageSubData based copy
if (old_params.target == new_params.target && old_params.type == new_params.type &&
old_params.depth == new_params.depth && old_params.depth == 1 &&
@@ -1124,11 +1208,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
// If the format is the same, just do a framebuffer blit. This is significantly faster than
// using PBOs. The is also likely less accurate, as textures will be converted rather than
// reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
// reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate
// surface copy, where pixels are reinterpreted as a new format (without conversion). This
// code path uses OpenGL PBOs and is quite slow.
const bool is_blit{old_params.pixel_format == new_params.pixel_format ||
!Settings::values.use_accurate_framebuffers};
const bool is_blit{old_params.pixel_format == new_params.pixel_format};
switch (new_params.target) {
case SurfaceParams::SurfaceTarget::Texture2D:
@@ -1138,6 +1221,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
CopySurface(old_surface, new_surface, copy_pbo.handle);
}
break;
case SurfaceParams::SurfaceTarget::Texture3D:
AccurateCopySurface(old_surface, new_surface);
break;
case SurfaceParams::SurfaceTarget::TextureCubemap: {
if (old_params.rt.array_mode != 1) {
// TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this

View File

@@ -18,6 +18,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace OpenGL {
@@ -131,6 +132,8 @@ struct SurfaceParams {
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D;
case Tegra::Texture::TextureType::Texture3D:
return SurfaceTarget::Texture3D;
case Tegra::Texture::TextureType::TextureCubemap:
return SurfaceTarget::TextureCubemap;
case Tegra::Texture::TextureType::Texture1DArray:
@@ -701,21 +704,42 @@ struct SurfaceParams {
return SurfaceType::Invalid;
}
/// Returns the sizer in bytes of the specified pixel format
static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
if (pixel_format == SurfaceParams::PixelFormat::Invalid) {
return 0;
}
return GetFormatBpp(pixel_format) / CHAR_BIT;
}
/// Returns the rectangle corresponding to this surface
MathUtil::Rectangle<u32> GetRect() const;
/// Returns the size of this surface as a 2D texture in bytes, adjusted for compression
std::size_t SizeInBytes2D() const {
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) *
GetFormatBpp(pixel_format) / CHAR_BIT;
const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
const size_t uncompressed_size{
Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
height, depth, block_height, block_depth)};
// Divide by compression_factor^2, as height and width are factored by this
return uncompressed_size / (compression_factor * compression_factor);
}
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesTotal() const {
return SizeInBytes2D() * depth;
/// Returns the size of this surface as an OpenGL texture in bytes
std::size_t SizeInBytesGL() const {
return SizeInBytesRaw(true);
}
/// Returns the size of this surface as a cube face in bytes
std::size_t SizeInBytesCubeFace() const {
return size_in_bytes / 6;
}
/// Returns the size of this surface as an OpenGL cube face in bytes
std::size_t SizeInBytesCubeFaceGL() const {
return size_in_bytes_gl / 6;
}
/// Creates SurfaceParams from a texture configuration
@@ -742,7 +766,9 @@ struct SurfaceParams {
other.depth);
}
VAddr addr;
/// Initializes parameters for caching, should be called after everything has been initialized
void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
bool is_tiled;
u32 block_width;
u32 block_height;
@@ -754,15 +780,20 @@ struct SurfaceParams {
u32 height;
u32 depth;
u32 unaligned_height;
std::size_t size_in_bytes_total;
std::size_t size_in_bytes_2d;
SurfaceTarget target;
u32 max_mip_level;
// Parameters used for caching
VAddr addr;
Tegra::GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
// Render target specific parameters, not used in caching
struct {
u32 index;
u32 array_mode;
u32 volume;
u32 layer_stride;
u32 base_layer;
} rt;
@@ -775,7 +806,8 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res;
res.state = params;
res.state.rt = {}; // Ignore rt config in caching
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
res.state.rt = {}; // Ignore rt config in caching
return res;
}
};
@@ -790,16 +822,20 @@ struct hash<SurfaceReserveKey> {
namespace OpenGL {
class CachedSurface final {
class CachedSurface final : public RasterizerCacheObject {
public:
CachedSurface(const SurfaceParams& params);
VAddr GetAddr() const {
VAddr GetAddr() const override {
return params.addr;
}
std::size_t GetSizeInBytes() const {
return params.size_in_bytes_total;
std::size_t GetSizeInBytes() const override {
return cached_size_in_bytes;
}
void Flush() override {
FlushGLBuffer();
}
const OGLTexture& Texture() const {
@@ -810,13 +846,6 @@ public:
return gl_target;
}
static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
if (format == SurfaceParams::PixelFormat::Invalid)
return 0;
return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
}
const SurfaceParams& GetSurfaceParams() const {
return params;
}
@@ -833,6 +862,7 @@ private:
std::vector<u8> gl_buffer;
SurfaceParams params;
GLenum gl_target;
std::size_t cached_size_in_bytes;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -849,9 +879,6 @@ public:
/// Get the color surface based on the framebuffer configuration and the specified render target
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
/// Tries to find a framebuffer using on the provided CPU address
Surface TryFindFramebufferSurface(VAddr addr) const;
@@ -875,6 +902,9 @@ private:
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and
/// destroyed when used with different surface parameters.

View File

@@ -19,20 +19,21 @@ class CachedShader;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
class CachedShader final {
class CachedShader final : public RasterizerCacheObject {
public:
CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const {
VAddr GetAddr() const override {
return addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const {
std::size_t GetSizeInBytes() const override {
return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;

View File

@@ -1142,6 +1142,7 @@ private:
case Tegra::Shader::TextureType::Texture2D: {
return 2;
}
case Tegra::Shader::TextureType::Texture3D:
case Tegra::Shader::TextureType::TextureCube: {
return 3;
}
@@ -2036,9 +2037,9 @@ private:
break;
}
case OpCode::Id::TEX: {
ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
std::string coord;
const bool is_array = instr.tex.array != 0;
ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
@@ -2053,21 +2054,59 @@ private:
switch (num_coordinates) {
case 1: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
coord = "float coords = " + x + ';';
if (is_array) {
const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + index + ");";
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
coord = "float coords = " + x + ';';
}
break;
}
case 2: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
if (is_array) {
const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
}
break;
}
case 3: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
if (depth_compare) {
if (is_array) {
const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
");";
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
}
} else {
if (is_array) {
const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
");";
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
}
}
break;
}
default:
@@ -2086,7 +2125,7 @@ private:
std::string op_c;
const std::string sampler =
GetSampler(instr.sampler, texture_type, false, depth_compare);
GetSampler(instr.sampler, texture_type, is_array, depth_compare);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
@@ -2106,10 +2145,13 @@ private:
}
case Tegra::Shader::TextureProcessMode::LB:
case Tegra::Shader::TextureProcessMode::LBA: {
if (num_coordinates <= 2) {
op_c = regs.GetRegisterAsFloat(instr.gpr20);
if (depth_compare) {
if (is_array)
op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 2);
else
op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
} else {
op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
op_c = regs.GetRegisterAsFloat(instr.gpr20);
}
// TODO: Figure if A suffix changes the equation at all.
texture = "texture(" + sampler + ", coords, " + op_c + ')';

View File

@@ -85,8 +85,8 @@ void Config::ReadValues() {
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
Settings::values.use_accurate_framebuffers =
qt_config->value("use_accurate_framebuffers", false).toBool();
Settings::values.use_accurate_gpu_emulation =
qt_config->value("use_accurate_gpu_emulation", false).toBool();
Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -233,7 +233,7 @@ void Config::SaveValues() {
qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
qt_config->setValue("frame_limit", Settings::values.frame_limit);
qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);
qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
// Cast to double because Qt's written float values are not human-readable
qt_config->setValue("bg_red", (double)Settings::values.bg_red);

View File

@@ -75,7 +75,7 @@ void ConfigureGraphics::setConfiguration() {
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
ui->frame_limit->setValue(Settings::values.frame_limit);
ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue);
ui->bg_button->setStyleSheet(
@@ -87,7 +87,7 @@ void ConfigureGraphics::applyConfiguration() {
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
Settings::values.frame_limit = ui->frame_limit->value();
Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
Settings::values.bg_red = static_cast<float>(bg_color.redF());
Settings::values.bg_green = static_cast<float>(bg_color.greenF());
Settings::values.bg_blue = static_cast<float>(bg_color.blueF());

View File

@@ -50,9 +50,9 @@
</layout>
</item>
<item>
<widget class="QCheckBox" name="use_accurate_framebuffers">
<widget class="QCheckBox" name="use_accurate_gpu_emulation">
<property name="text">
<string>Use accurate framebuffers (slow)</string>
<string>Use accurate GPU emulation (slow)</string>
</property>
</widget>
</item>

View File

@@ -99,8 +99,8 @@ void Config::ReadValues() {
Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
Settings::values.frame_limit =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
Settings::values.use_accurate_framebuffers =
sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);
Settings::values.use_accurate_gpu_emulation =
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);

View File

@@ -110,9 +110,9 @@ use_frame_limit =
# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
frame_limit =
# Whether to use accurate framebuffers
# Whether to use accurate GPU emulation
# 0 (default): Off (fast), 1 : On (slow)
use_accurate_framebuffers =
use_accurate_gpu_emulation =
# The clear color for the renderer. What shows up on the sides of the bottom screen.
# Must be in range of 0.0-1.0. Defaults to 1.0 for all.