Clang format and other fixes

Implement Reinterpret Surface, to accurately blit 3D textures
Implement GetInRange in the Rasterizer Cache
2018-10-17 18:52:11 -04:00 · 2018-10-17 18:52:10 -04:00 · 2018-10-17 18:52:10 -04:00 · 2018-10-17 18:52:08 -04:00 · 2018-10-17 18:40:34 -04:00 · 2018-10-17 18:31:51 -04:00
24 changed files with 811 additions and 525 deletions
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -97,288 +97,11 @@ union NCASectionHeader {
 };
 static_assert(sizeof(NCASectionHeader) == 0x200, "NCASectionHeader has incorrect size.");

-static bool IsValidNCA(const NCAHeader& header) {
+bool IsValidNCA(const NCAHeader& header) {
    // TODO(DarkLordZach): Add NCA2/NCA0 support.
    return header.magic == Common::MakeMagic('N', 'C', 'A', '3');
 }

-NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset)
-    : file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)) {
-    if (file == nullptr) {
-        status = Loader::ResultStatus::ErrorNullFile;
-        return;
-    }
-
-    if (sizeof(NCAHeader) != file->ReadObject(&header)) {
-        LOG_ERROR(Loader, "File reader errored out during header read.");
-        status = Loader::ResultStatus::ErrorBadNCAHeader;
-        return;
-    }
-
-    if (!HandlePotentialHeaderDecryption()) {
-        return;
-    }
-
-    has_rights_id = std::any_of(header.rights_id.begin(), header.rights_id.end(),
-                                [](char c) { return c != '\0'; });
-
-    const std::vector<NCASectionHeader> sections = ReadSectionHeaders();
-    is_update = std::any_of(sections.begin(), sections.end(), [](const NCASectionHeader& header) {
-        return header.raw.header.crypto_type == NCASectionCryptoType::BKTR;
-    });
-
-    if (!ReadSections(sections, bktr_base_ivfc_offset)) {
-        return;
-    }
-
-    status = Loader::ResultStatus::Success;
-}
-
-NCA::~NCA() = default;
-
-bool NCA::CheckSupportedNCA(const NCAHeader& nca_header) {
-    if (nca_header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
-        status = Loader::ResultStatus::ErrorNCA2;
-        return false;
-    }
-
-    if (nca_header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
-        status = Loader::ResultStatus::ErrorNCA0;
-        return false;
-    }
-
-    return true;
-}
-
-bool NCA::HandlePotentialHeaderDecryption() {
-    if (IsValidNCA(header)) {
-        return true;
-    }
-
-    if (!CheckSupportedNCA(header)) {
-        return false;
-    }
-
-    NCAHeader dec_header{};
-    Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
-        keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
-    cipher.XTSTranscode(&header, sizeof(NCAHeader), &dec_header, 0, 0x200,
-                        Core::Crypto::Op::Decrypt);
-    if (IsValidNCA(dec_header)) {
-        header = dec_header;
-        encrypted = true;
-    } else {
-        if (!CheckSupportedNCA(dec_header)) {
-            return false;
-        }
-
-        if (keys.HasKey(Core::Crypto::S256KeyType::Header)) {
-            status = Loader::ResultStatus::ErrorIncorrectHeaderKey;
-        } else {
-            status = Loader::ResultStatus::ErrorMissingHeaderKey;
-        }
-        return false;
-    }
-
-    return true;
-}
-
-std::vector<NCASectionHeader> NCA::ReadSectionHeaders() const {
-    const std::ptrdiff_t number_sections =
-        std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
-                      [](NCASectionTableEntry entry) { return entry.media_offset > 0; });
-
-    std::vector<NCASectionHeader> sections(number_sections);
-    const auto length_sections = SECTION_HEADER_SIZE * number_sections;
-
-    if (encrypted) {
-        auto raw = file->ReadBytes(length_sections, SECTION_HEADER_OFFSET);
-        Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
-            keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
-        cipher.XTSTranscode(raw.data(), length_sections, sections.data(), 2, SECTION_HEADER_SIZE,
-                            Core::Crypto::Op::Decrypt);
-    } else {
-        file->ReadBytes(sections.data(), length_sections, SECTION_HEADER_OFFSET);
-    }
-
-    return sections;
-}
-
-bool NCA::ReadSections(const std::vector<NCASectionHeader>& sections, u64 bktr_base_ivfc_offset) {
-    for (std::size_t i = 0; i < sections.size(); ++i) {
-        const auto& section = sections[i];
-
-        if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
-            if (!ReadRomFSSection(section, header.section_tables[i], bktr_base_ivfc_offset)) {
-                return false;
-            }
-        } else if (section.raw.header.filesystem_type == NCASectionFilesystemType::PFS0) {
-            if (!ReadPFS0Section(section, header.section_tables[i])) {
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
-bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTableEntry& entry,
-                           u64 bktr_base_ivfc_offset) {
-    const std::size_t base_offset = entry.media_offset * MEDIA_OFFSET_MULTIPLIER;
-    ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
-    const std::size_t romfs_offset = base_offset + ivfc_offset;
-    const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
-    auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
-    auto dec = Decrypt(section, raw, romfs_offset);
-
-    if (dec == nullptr) {
-        if (status != Loader::ResultStatus::Success)
-            return false;
-        if (has_rights_id)
-            status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
-        else
-            status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
-        return false;
-    }
-
-    if (section.raw.header.crypto_type == NCASectionCryptoType::BKTR) {
-        if (section.bktr.relocation.magic != Common::MakeMagic('B', 'K', 'T', 'R') ||
-            section.bktr.subsection.magic != Common::MakeMagic('B', 'K', 'T', 'R')) {
-            status = Loader::ResultStatus::ErrorBadBKTRHeader;
-            return false;
-        }
-
-        if (section.bktr.relocation.offset + section.bktr.relocation.size !=
-            section.bktr.subsection.offset) {
-            status = Loader::ResultStatus::ErrorBKTRSubsectionNotAfterRelocation;
-            return false;
-        }
-
-        const u64 size = MEDIA_OFFSET_MULTIPLIER * (entry.media_end_offset - entry.media_offset);
-        if (section.bktr.subsection.offset + section.bktr.subsection.size != size) {
-            status = Loader::ResultStatus::ErrorBKTRSubsectionNotAtEnd;
-            return false;
-        }
-
-        const u64 offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
-        RelocationBlock relocation_block{};
-        if (dec->ReadObject(&relocation_block, section.bktr.relocation.offset - offset) !=
-            sizeof(RelocationBlock)) {
-            status = Loader::ResultStatus::ErrorBadRelocationBlock;
-            return false;
-        }
-        SubsectionBlock subsection_block{};
-        if (dec->ReadObject(&subsection_block, section.bktr.subsection.offset - offset) !=
-            sizeof(RelocationBlock)) {
-            status = Loader::ResultStatus::ErrorBadSubsectionBlock;
-            return false;
-        }
-
-        std::vector<RelocationBucketRaw> relocation_buckets_raw(
-            (section.bktr.relocation.size - sizeof(RelocationBlock)) / sizeof(RelocationBucketRaw));
-        if (dec->ReadBytes(relocation_buckets_raw.data(),
-                           section.bktr.relocation.size - sizeof(RelocationBlock),
-                           section.bktr.relocation.offset + sizeof(RelocationBlock) - offset) !=
-            section.bktr.relocation.size - sizeof(RelocationBlock)) {
-            status = Loader::ResultStatus::ErrorBadRelocationBuckets;
-            return false;
-        }
-
-        std::vector<SubsectionBucketRaw> subsection_buckets_raw(
-            (section.bktr.subsection.size - sizeof(SubsectionBlock)) / sizeof(SubsectionBucketRaw));
-        if (dec->ReadBytes(subsection_buckets_raw.data(),
-                           section.bktr.subsection.size - sizeof(SubsectionBlock),
-                           section.bktr.subsection.offset + sizeof(SubsectionBlock) - offset) !=
-            section.bktr.subsection.size - sizeof(SubsectionBlock)) {
-            status = Loader::ResultStatus::ErrorBadSubsectionBuckets;
-            return false;
-        }
-
-        std::vector<RelocationBucket> relocation_buckets(relocation_buckets_raw.size());
-        std::transform(relocation_buckets_raw.begin(), relocation_buckets_raw.end(),
-                       relocation_buckets.begin(), &ConvertRelocationBucketRaw);
-        std::vector<SubsectionBucket> subsection_buckets(subsection_buckets_raw.size());
-        std::transform(subsection_buckets_raw.begin(), subsection_buckets_raw.end(),
-                       subsection_buckets.begin(), &ConvertSubsectionBucketRaw);
-
-        u32 ctr_low;
-        std::memcpy(&ctr_low, section.raw.section_ctr.data(), sizeof(ctr_low));
-        subsection_buckets.back().entries.push_back({section.bktr.relocation.offset, {0}, ctr_low});
-        subsection_buckets.back().entries.push_back({size, {0}, 0});
-
-        boost::optional<Core::Crypto::Key128> key = boost::none;
-        if (encrypted) {
-            if (has_rights_id) {
-                status = Loader::ResultStatus::Success;
-                key = GetTitlekey();
-                if (key == boost::none) {
-                    status = Loader::ResultStatus::ErrorMissingTitlekey;
-                    return false;
-                }
-            } else {
-                key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
-                if (key == boost::none) {
-                    status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
-                    return false;
-                }
-            }
-        }
-
-        if (bktr_base_romfs == nullptr) {
-            status = Loader::ResultStatus::ErrorMissingBKTRBaseRomFS;
-            return false;
-        }
-
-        auto bktr = std::make_shared<BKTR>(
-            bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
-            relocation_block, relocation_buckets, subsection_block, subsection_buckets, encrypted,
-            encrypted ? key.get() : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
-            section.raw.section_ctr);
-
-        // BKTR applies to entire IVFC, so make an offset version to level 6
-        files.push_back(std::make_shared<OffsetVfsFile>(
-            bktr, romfs_size, section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset));
-    } else {
-        files.push_back(std::move(dec));
-    }
-
-    romfs = files.back();
-    return true;
-}
-
-bool NCA::ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry) {
-    const u64 offset = (static_cast<u64>(entry.media_offset) * MEDIA_OFFSET_MULTIPLIER) +
-                       section.pfs0.pfs0_header_offset;
-    const u64 size = MEDIA_OFFSET_MULTIPLIER * (entry.media_end_offset - entry.media_offset);
-
-    auto dec = Decrypt(section, std::make_shared<OffsetVfsFile>(file, size, offset), offset);
-    if (dec != nullptr) {
-        auto npfs = std::make_shared<PartitionFilesystem>(std::move(dec));
-
-        if (npfs->GetStatus() == Loader::ResultStatus::Success) {
-            dirs.push_back(std::move(npfs));
-            if (IsDirectoryExeFS(dirs.back()))
-                exefs = dirs.back();
-        } else {
-            if (has_rights_id)
-                status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
-            else
-                status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
-            return false;
-        }
-    } else {
-        if (status != Loader::ResultStatus::Success)
-            return false;
-        if (has_rights_id)
-            status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
-        else
-            status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
-        return false;
-    }
-
-    return true;
-}
-
 u8 NCA::GetCryptoRevision() const {
    u8 master_key_id = header.crypto_type;
    if (header.crypto_type_2 > master_key_id)
@@ -444,7 +167,7 @@ boost::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
    return titlekey;
 }

-VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 starting_offset) {
+VirtualFile NCA::Decrypt(NCASectionHeader s_header, VirtualFile in, u64 starting_offset) {
    if (!encrypted)
        return in;

@@ -492,6 +215,256 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s
    }
 }

+NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset)
+    : file(std::move(file_)),
+      bktr_base_romfs(bktr_base_romfs_ ? std::move(bktr_base_romfs_) : nullptr) {
+    status = Loader::ResultStatus::Success;
+
+    if (file == nullptr) {
+        status = Loader::ResultStatus::ErrorNullFile;
+        return;
+    }
+
+    if (sizeof(NCAHeader) != file->ReadObject(&header)) {
+        LOG_ERROR(Loader, "File reader errored out during header read.");
+        status = Loader::ResultStatus::ErrorBadNCAHeader;
+        return;
+    }
+
+    encrypted = false;
+
+    if (!IsValidNCA(header)) {
+        if (header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
+            status = Loader::ResultStatus::ErrorNCA2;
+            return;
+        }
+        if (header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
+            status = Loader::ResultStatus::ErrorNCA0;
+            return;
+        }
+
+        NCAHeader dec_header{};
+        Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
+            keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
+        cipher.XTSTranscode(&header, sizeof(NCAHeader), &dec_header, 0, 0x200,
+                            Core::Crypto::Op::Decrypt);
+        if (IsValidNCA(dec_header)) {
+            header = dec_header;
+            encrypted = true;
+        } else {
+            if (dec_header.magic == Common::MakeMagic('N', 'C', 'A', '2')) {
+                status = Loader::ResultStatus::ErrorNCA2;
+                return;
+            }
+            if (dec_header.magic == Common::MakeMagic('N', 'C', 'A', '0')) {
+                status = Loader::ResultStatus::ErrorNCA0;
+                return;
+            }
+
+            if (!keys.HasKey(Core::Crypto::S256KeyType::Header))
+                status = Loader::ResultStatus::ErrorMissingHeaderKey;
+            else
+                status = Loader::ResultStatus::ErrorIncorrectHeaderKey;
+            return;
+        }
+    }
+
+    has_rights_id = std::find_if_not(header.rights_id.begin(), header.rights_id.end(),
+                                     [](char c) { return c == '\0'; }) != header.rights_id.end();
+
+    const std::ptrdiff_t number_sections =
+        std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
+                      [](NCASectionTableEntry entry) { return entry.media_offset > 0; });
+
+    std::vector<NCASectionHeader> sections(number_sections);
+    const auto length_sections = SECTION_HEADER_SIZE * number_sections;
+
+    if (encrypted) {
+        auto raw = file->ReadBytes(length_sections, SECTION_HEADER_OFFSET);
+        Core::Crypto::AESCipher<Core::Crypto::Key256> cipher(
+            keys.GetKey(Core::Crypto::S256KeyType::Header), Core::Crypto::Mode::XTS);
+        cipher.XTSTranscode(raw.data(), length_sections, sections.data(), 2, SECTION_HEADER_SIZE,
+                            Core::Crypto::Op::Decrypt);
+    } else {
+        file->ReadBytes(sections.data(), length_sections, SECTION_HEADER_OFFSET);
+    }
+
+    is_update = std::find_if(sections.begin(), sections.end(), [](const NCASectionHeader& header) {
+                    return header.raw.header.crypto_type == NCASectionCryptoType::BKTR;
+                }) != sections.end();
+    ivfc_offset = 0;
+
+    for (std::ptrdiff_t i = 0; i < number_sections; ++i) {
+        auto section = sections[i];
+
+        if (section.raw.header.filesystem_type == NCASectionFilesystemType::ROMFS) {
+            const std::size_t base_offset =
+                header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
+            ivfc_offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
+            const std::size_t romfs_offset = base_offset + ivfc_offset;
+            const std::size_t romfs_size = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].size;
+            auto raw = std::make_shared<OffsetVfsFile>(file, romfs_size, romfs_offset);
+            auto dec = Decrypt(section, raw, romfs_offset);
+
+            if (dec == nullptr) {
+                if (status != Loader::ResultStatus::Success)
+                    return;
+                if (has_rights_id)
+                    status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
+                else
+                    status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
+                return;
+            }
+
+            if (section.raw.header.crypto_type == NCASectionCryptoType::BKTR) {
+                if (section.bktr.relocation.magic != Common::MakeMagic('B', 'K', 'T', 'R') ||
+                    section.bktr.subsection.magic != Common::MakeMagic('B', 'K', 'T', 'R')) {
+                    status = Loader::ResultStatus::ErrorBadBKTRHeader;
+                    return;
+                }
+
+                if (section.bktr.relocation.offset + section.bktr.relocation.size !=
+                    section.bktr.subsection.offset) {
+                    status = Loader::ResultStatus::ErrorBKTRSubsectionNotAfterRelocation;
+                    return;
+                }
+
+                const u64 size =
+                    MEDIA_OFFSET_MULTIPLIER * (header.section_tables[i].media_end_offset -
+                                               header.section_tables[i].media_offset);
+                if (section.bktr.subsection.offset + section.bktr.subsection.size != size) {
+                    status = Loader::ResultStatus::ErrorBKTRSubsectionNotAtEnd;
+                    return;
+                }
+
+                const u64 offset = section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset;
+                RelocationBlock relocation_block{};
+                if (dec->ReadObject(&relocation_block, section.bktr.relocation.offset - offset) !=
+                    sizeof(RelocationBlock)) {
+                    status = Loader::ResultStatus::ErrorBadRelocationBlock;
+                    return;
+                }
+                SubsectionBlock subsection_block{};
+                if (dec->ReadObject(&subsection_block, section.bktr.subsection.offset - offset) !=
+                    sizeof(RelocationBlock)) {
+                    status = Loader::ResultStatus::ErrorBadSubsectionBlock;
+                    return;
+                }
+
+                std::vector<RelocationBucketRaw> relocation_buckets_raw(
+                    (section.bktr.relocation.size - sizeof(RelocationBlock)) /
+                    sizeof(RelocationBucketRaw));
+                if (dec->ReadBytes(relocation_buckets_raw.data(),
+                                   section.bktr.relocation.size - sizeof(RelocationBlock),
+                                   section.bktr.relocation.offset + sizeof(RelocationBlock) -
+                                       offset) !=
+                    section.bktr.relocation.size - sizeof(RelocationBlock)) {
+                    status = Loader::ResultStatus::ErrorBadRelocationBuckets;
+                    return;
+                }
+
+                std::vector<SubsectionBucketRaw> subsection_buckets_raw(
+                    (section.bktr.subsection.size - sizeof(SubsectionBlock)) /
+                    sizeof(SubsectionBucketRaw));
+                if (dec->ReadBytes(subsection_buckets_raw.data(),
+                                   section.bktr.subsection.size - sizeof(SubsectionBlock),
+                                   section.bktr.subsection.offset + sizeof(SubsectionBlock) -
+                                       offset) !=
+                    section.bktr.subsection.size - sizeof(SubsectionBlock)) {
+                    status = Loader::ResultStatus::ErrorBadSubsectionBuckets;
+                    return;
+                }
+
+                std::vector<RelocationBucket> relocation_buckets(relocation_buckets_raw.size());
+                std::transform(relocation_buckets_raw.begin(), relocation_buckets_raw.end(),
+                               relocation_buckets.begin(), &ConvertRelocationBucketRaw);
+                std::vector<SubsectionBucket> subsection_buckets(subsection_buckets_raw.size());
+                std::transform(subsection_buckets_raw.begin(), subsection_buckets_raw.end(),
+                               subsection_buckets.begin(), &ConvertSubsectionBucketRaw);
+
+                u32 ctr_low;
+                std::memcpy(&ctr_low, section.raw.section_ctr.data(), sizeof(ctr_low));
+                subsection_buckets.back().entries.push_back(
+                    {section.bktr.relocation.offset, {0}, ctr_low});
+                subsection_buckets.back().entries.push_back({size, {0}, 0});
+
+                boost::optional<Core::Crypto::Key128> key = boost::none;
+                if (encrypted) {
+                    if (has_rights_id) {
+                        status = Loader::ResultStatus::Success;
+                        key = GetTitlekey();
+                        if (key == boost::none) {
+                            status = Loader::ResultStatus::ErrorMissingTitlekey;
+                            return;
+                        }
+                    } else {
+                        key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
+                        if (key == boost::none) {
+                            status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
+                            return;
+                        }
+                    }
+                }
+
+                if (bktr_base_romfs == nullptr) {
+                    status = Loader::ResultStatus::ErrorMissingBKTRBaseRomFS;
+                    return;
+                }
+
+                auto bktr = std::make_shared<BKTR>(
+                    bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
+                    relocation_block, relocation_buckets, subsection_block, subsection_buckets,
+                    encrypted, encrypted ? key.get() : Core::Crypto::Key128{}, base_offset,
+                    bktr_base_ivfc_offset, section.raw.section_ctr);
+
+                // BKTR applies to entire IVFC, so make an offset version to level 6
+
+                files.push_back(std::make_shared<OffsetVfsFile>(
+                    bktr, romfs_size, section.romfs.ivfc.levels[IVFC_MAX_LEVEL - 1].offset));
+                romfs = files.back();
+            } else {
+                files.push_back(std::move(dec));
+                romfs = files.back();
+            }
+        } else if (section.raw.header.filesystem_type == NCASectionFilesystemType::PFS0) {
+            u64 offset = (static_cast<u64>(header.section_tables[i].media_offset) *
+                          MEDIA_OFFSET_MULTIPLIER) +
+                         section.pfs0.pfs0_header_offset;
+            u64 size = MEDIA_OFFSET_MULTIPLIER * (header.section_tables[i].media_end_offset -
+                                                  header.section_tables[i].media_offset);
+            auto dec =
+                Decrypt(section, std::make_shared<OffsetVfsFile>(file, size, offset), offset);
+            if (dec != nullptr) {
+                auto npfs = std::make_shared<PartitionFilesystem>(std::move(dec));
+
+                if (npfs->GetStatus() == Loader::ResultStatus::Success) {
+                    dirs.push_back(std::move(npfs));
+                    if (IsDirectoryExeFS(dirs.back()))
+                        exefs = dirs.back();
+                } else {
+                    if (has_rights_id)
+                        status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
+                    else
+                        status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
+                    return;
+                }
+            } else {
+                if (status != Loader::ResultStatus::Success)
+                    return;
+                if (has_rights_id)
+                    status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
+                else
+                    status = Loader::ResultStatus::ErrorIncorrectKeyAreaKey;
+                return;
+            }
+        }
+    }
+
+    status = Loader::ResultStatus::Success;
+}
+
+NCA::~NCA() = default;
+
 Loader::ResultStatus NCA::GetStatus() const {
    return status;
 }
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -73,6 +73,8 @@ inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) {
    return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr;
 }

+bool IsValidNCA(const NCAHeader& header);
+
 // An implementation of VfsDirectory that represents a Nintendo Content Archive (NCA) conatiner.
 // After construction, use GetStatus to determine if the file is valid and ready to be used.
 class NCA : public ReadOnlyVfsDirectory {
@@ -104,19 +106,10 @@ protected:
    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;

 private:
-    bool CheckSupportedNCA(const NCAHeader& header);
-    bool HandlePotentialHeaderDecryption();
-
-    std::vector<NCASectionHeader> ReadSectionHeaders() const;
-    bool ReadSections(const std::vector<NCASectionHeader>& sections, u64 bktr_base_ivfc_offset);
-    bool ReadRomFSSection(const NCASectionHeader& section, const NCASectionTableEntry& entry,
-                          u64 bktr_base_ivfc_offset);
-    bool ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry);
-
    u8 GetCryptoRevision() const;
    boost::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
    boost::optional<Core::Crypto::Key128> GetTitlekey();
-    VirtualFile Decrypt(const NCASectionHeader& header, VirtualFile in, u64 starting_offset);
+    VirtualFile Decrypt(NCASectionHeader header, VirtualFile in, u64 starting_offset);

    std::vector<VirtualDir> dirs;
    std::vector<VirtualFile> files;
@@ -125,15 +118,15 @@ private:
    VirtualDir exefs = nullptr;
    VirtualFile file;
    VirtualFile bktr_base_romfs;
-    u64 ivfc_offset = 0;
+    u64 ivfc_offset;

    NCAHeader header{};
    bool has_rights_id{};

    Loader::ResultStatus status{};

-    bool encrypted = false;
-    bool is_update = false;
+    bool encrypted;
+    bool is_update;

    Core::Crypto::KeyManager keys;
 };
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -51,6 +51,13 @@ ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescr
                    meta.title_id);
    }

+    if (meta.type == SaveDataType::DeviceSaveData && meta.user_id != u128{0, 0}) {
+        LOG_WARNING(Service_FS,
+                    "Possibly incorrect SaveDataDescriptor, type is DeviceSaveData but user_id is "
+                    "non-zero ({:016X}{:016X})",
+                    meta.user_id[1], meta.user_id[0]);
+    }
+
    std::string save_directory =
        GetFullPath(space, meta.type, meta.title_id, meta.user_id, meta.save_id);

@@ -92,6 +99,9 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
    case SaveDataSpaceId::NandUser:
        out = "/user/";
        break;
+    case SaveDataSpaceId::TemporaryStorage:
+        out = "/temp/";
+        break;
    default:
        ASSERT_MSG(false, "Unrecognized SaveDataSpaceId: {:02X}", static_cast<u8>(space));
    }
@@ -100,10 +110,11 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
    case SaveDataType::SystemSaveData:
        return fmt::format("{}save/{:016X}/{:016X}{:016X}", out, save_id, user_id[1], user_id[0]);
    case SaveDataType::SaveData:
+    case SaveDataType::DeviceSaveData:
        return fmt::format("{}save/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
                           title_id);
    case SaveDataType::TemporaryStorage:
-        return fmt::format("{}temp/{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
+        return fmt::format("{}{:016X}/{:016X}{:016X}/{:016X}", out, 0, user_id[1], user_id[0],
                           title_id);
    default:
        ASSERT_MSG(false, "Unrecognized SaveDataType: {:02X}", static_cast<u8>(type));
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -448,25 +448,12 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
    case GetInfoType::RandomEntropy:
        *result = 0;
        break;
-    case GetInfoType::AddressSpaceBaseAddr:
-        *result = vm_manager.GetCodeRegionBaseAddress();
+    case GetInfoType::ASLRRegionBaseAddr:
+        *result = vm_manager.GetASLRRegionBaseAddress();
        break;
-    case GetInfoType::AddressSpaceSize: {
-        const u64 width = vm_manager.GetAddressSpaceWidth();
-
-        switch (width) {
-        case 32:
-            *result = 0xFFE00000;
-            break;
-        case 36:
-            *result = 0xFF8000000;
-            break;
-        case 39:
-            *result = 0x7FF8000000;
-            break;
-        }
+    case GetInfoType::ASLRRegionSize:
+        *result = vm_manager.GetASLRRegionSize();
        break;
-    }
    case GetInfoType::NewMapRegionBaseAddr:
        *result = vm_manager.GetNewMapRegionBaseAddress();
        break;
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -41,8 +41,8 @@ enum class GetInfoType : u64 {
    RandomEntropy = 11,
    PerformanceCounter = 0xF0000002,
    // 2.0.0+
-    AddressSpaceBaseAddr = 12,
-    AddressSpaceSize = 13,
+    ASLRRegionBaseAddr = 12,
+    ASLRRegionSize = 13,
    NewMapRegionBaseAddr = 14,
    NewMapRegionSize = 15,
    // 3.0.0+
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -393,30 +393,35 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty

    switch (type) {
    case FileSys::ProgramAddressSpaceType::Is32Bit:
+    case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
        address_space_width = 32;
        code_region_base = 0x200000;
        code_region_end = code_region_base + 0x3FE00000;
-        map_region_size = 0x40000000;
-        heap_region_size = 0x40000000;
+        aslr_region_base = 0x200000;
+        aslr_region_end = aslr_region_base + 0xFFE00000;
+        if (type == FileSys::ProgramAddressSpaceType::Is32Bit) {
+            map_region_size = 0x40000000;
+            heap_region_size = 0x40000000;
+        } else {
+            map_region_size = 0;
+            heap_region_size = 0x80000000;
+        }
        break;
    case FileSys::ProgramAddressSpaceType::Is36Bit:
        address_space_width = 36;
        code_region_base = 0x8000000;
        code_region_end = code_region_base + 0x78000000;
+        aslr_region_base = 0x8000000;
+        aslr_region_end = aslr_region_base + 0xFF8000000;
        map_region_size = 0x180000000;
        heap_region_size = 0x180000000;
        break;
-    case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
-        address_space_width = 32;
-        code_region_base = 0x200000;
-        code_region_end = code_region_base + 0x3FE00000;
-        map_region_size = 0;
-        heap_region_size = 0x80000000;
-        break;
    case FileSys::ProgramAddressSpaceType::Is39Bit:
        address_space_width = 39;
        code_region_base = 0x8000000;
        code_region_end = code_region_base + 0x80000000;
+        aslr_region_base = 0x8000000;
+        aslr_region_end = aslr_region_base + 0x7FF8000000;
        map_region_size = 0x1000000000;
        heap_region_size = 0x180000000;
        new_map_region_size = 0x80000000;
@@ -490,6 +495,18 @@ u64 VMManager::GetAddressSpaceWidth() const {
    return address_space_width;
 }

+VAddr VMManager::GetASLRRegionBaseAddress() const {
+    return aslr_region_base;
+}
+
+VAddr VMManager::GetASLRRegionEndAddress() const {
+    return aslr_region_end;
+}
+
+u64 VMManager::GetASLRRegionSize() const {
+    return aslr_region_end - aslr_region_base;
+}
+
 VAddr VMManager::GetCodeRegionBaseAddress() const {
    return code_region_base;
 }
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -205,6 +205,15 @@ public:
    /// Gets the address space width in bits.
    u64 GetAddressSpaceWidth() const;

+    /// Gets the base address of the ASLR region.
+    VAddr GetASLRRegionBaseAddress() const;
+
+    /// Gets the end address of the ASLR region.
+    VAddr GetASLRRegionEndAddress() const;
+
+    /// Gets the size of the ASLR region
+    u64 GetASLRRegionSize() const;
+
    /// Gets the base address of the code region.
    VAddr GetCodeRegionBaseAddress() const;

@@ -306,6 +315,9 @@ private:
    VAddr address_space_base = 0;
    VAddr address_space_end = 0;

+    VAddr aslr_region_base = 0;
+    VAddr aslr_region_end = 0;
+
    VAddr code_region_base = 0;
    VAddr code_region_end = 0;

--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -136,7 +136,7 @@ struct Values {
    float resolution_factor;
    bool use_frame_limit;
    u16 frame_limit;
-    bool use_accurate_framebuffers;
+    bool use_accurate_gpu_emulation;

    float bg_red;
    float bg_green;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -163,8 +163,8 @@ TelemetrySession::TelemetrySession() {
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
             Settings::values.use_frame_limit);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateFramebuffers",
-             Settings::values.use_accurate_framebuffers);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
+             Settings::values.use_accurate_gpu_emulation);
    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
             Settings::values.use_docked_mode);
 }
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -448,7 +448,10 @@ public:
                BitField<8, 3, u32> block_depth;
                BitField<12, 1, InvMemoryLayout> type;
            } memory_layout;
-            u32 array_mode;
+            union {
+                BitField<0, 16, u32> array_mode;
+                BitField<16, 1, u32> volume;
+            };
            u32 layer_stride;
            u32 base_layer;
            INSERT_PADDING_WORDS(7);
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -87,6 +87,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    return gpu_addr;
 }

+GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
+    for (const auto& region : mapped_regions) {
+        const GPUVAddr region_end{region.gpu_addr + region.size};
+        if (region_start >= region.gpu_addr && region_start < region_end) {
+            return region_end;
+        }
+    }
+    return {};
+}
+
 boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
    GPUVAddr gpu_addr = 0;
    u64 free_space = 0;
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -26,6 +26,7 @@ public:
    GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
+    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
    boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -11,32 +11,77 @@

 #include "common/common_types.h"
 #include "core/core.h"
+#include "core/settings.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"

+class RasterizerCacheObject {
+public:
+    /// Gets the address of the shader in guest memory, required for cache management
+    virtual VAddr GetAddr() const = 0;
+
+    /// Gets the size of the shader in guest memory, required for cache management
+    virtual std::size_t GetSizeInBytes() const = 0;
+
+    /// Wriets any cached resources back to memory
+    virtual void Flush() = 0;
+
+    /// Sets whether the cached object should be considered registered
+    void SetIsRegistered(bool registered) {
+        is_registered = registered;
+    }
+
+    /// Returns true if the cached object is registered
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+    /// Returns true if the cached object is dirty
+    bool IsDirty() const {
+        return is_dirty;
+    }
+
+    /// Returns ticks from when this cached object was last modified
+    u64 GetLastModifiedTicks() const {
+        return last_modified_ticks;
+    }
+
+    /// Marks an object as recently modified, used to specify whether it is clean or dirty
+    template <class T>
+    void MarkAsModified(bool dirty, T& cache) {
+        is_dirty = dirty;
+        last_modified_ticks = cache.GetModifiedTicks();
+    }
+
+private:
+    bool is_registered{};      ///< Whether the object is currently registered with the cache
+    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
+    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+};
+
 template <class T>
 class RasterizerCache : NonCopyable {
+    friend class RasterizerCacheObject;
+
 public:
+    /// Write any cached resources overlapping the specified region back to memory
+    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
+        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
+        for (auto& object : objects) {
+            FlushObject(object);
+        }
+    }
+
    /// Mark the specified region as being invalidated
    void InvalidateRegion(VAddr addr, u64 size) {
-        if (size == 0)
-            return;
-
-        const ObjectInterval interval{addr, addr + size};
-        for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
-            for (auto& cached_object : pair.second) {
-                if (!cached_object)
-                    continue;
-
-                remove_objects.emplace(cached_object);
+        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
+        for (auto& object : objects) {
+            if (!object->IsRegistered()) {
+                // Skip duplicates
+                continue;
            }
+            Unregister(object);
        }
-
-        for (auto& remove_object : remove_objects) {
-            Unregister(remove_object);
-        }
-
-        remove_objects.clear();
    }

    /// Invalidates everything in the cache
@@ -62,6 +107,7 @@ protected:

    /// Register an object into the cache
    void Register(const T& object) {
+        object->SetIsRegistered(true);
        object_cache.add({GetInterval(object), ObjectSet{object}});
        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
@@ -69,12 +115,57 @@ protected:

    /// Unregisters an object from the cache
    void Unregister(const T& object) {
+        object->SetIsRegistered(false);
        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
+
+        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
+        if (Settings::values.use_accurate_gpu_emulation) {
+            FlushObject(object);
+        }
+
        object_cache.subtract({GetInterval(object), ObjectSet{object}});
    }

+    /// Returns a ticks counter used for tracking when cached objects were last modified
+    u64 GetModifiedTicks() {
+        return ++modified_ticks;
+    }
+
 private:
+    /// Returns a list of cached objects from the specified memory region, ordered by access time
+    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+        if (size == 0) {
+            return {};
+        }
+
+        std::vector<T> objects;
+        const ObjectInterval interval{addr, addr + size};
+        for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
+            for (auto& cached_object : pair.second) {
+                if (!cached_object) {
+                    continue;
+                }
+                objects.push_back(cached_object);
+            }
+        }
+
+        std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
+            return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
+        });
+
+        return objects;
+    }
+
+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        if (!object->IsDirty()) {
+            return;
+        }
+        object->Flush();
+        object->MarkAsModified(false, *this);
+    }
+
    using ObjectSet = std::set<T>;
    using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>;
    using ObjectInterval = typename ObjectCache::interval_type;
@@ -84,6 +175,6 @@ private:
                                          object->GetAddr() + object->GetSizeInBytes());
    }

-    ObjectCache object_cache;
-    ObjectSet remove_objects;
+    ObjectCache object_cache; ///< Cache of objects
+    u64 modified_ticks{};     ///< Counter of cache state ticks, used for in-order flushing
 };
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -15,15 +15,18 @@

 namespace OpenGL {

-struct CachedBufferEntry final {
-    VAddr GetAddr() const {
+struct CachedBufferEntry final : public RasterizerCacheObject {
+    VAddr GetAddr() const override {
        return addr;
    }

-    std::size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const override {
        return size;
    }

+    // We do not have to flush this cache as things in it are never modified by us.
+    void Flush() override {}
+
    VAddr addr;
    std::size_t size;
    GLintptr offset;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -424,6 +424,13 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
            // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
            Surface color_surface =
                res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+
+            if (color_surface) {
+                // Assume that a surface will be written to if it is used as a framebuffer, even if
+                // the shader doesn't actually write to it.
+                color_surface->MarkAsModified(true, res_cache);
+            }
+
            glFramebufferTexture2D(
                GL_DRAW_FRAMEBUFFER,
                GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
@@ -434,6 +441,13 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
            std::array<GLenum, Maxwell::NumRenderTargets> buffers;
            for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
                Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+
+                if (color_surface) {
+                    // Assume that a surface will be written to if it is used as a framebuffer, even
+                    // if the shader doesn't actually write to it.
+                    color_surface->MarkAsModified(true, res_cache);
+                }
+
                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
                glFramebufferTexture2D(
                    GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
@@ -453,6 +467,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
    }

    if (depth_surface) {
+        // Assume that a surface will be written to if it is used as a framebuffer, even if
+        // the shader doesn't actually write to it.
+        depth_surface->MarkAsModified(true, res_cache);
+
        if (regs.stencil_enable) {
            // Attach both depth and stencil
            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -617,7 +635,14 @@ void RasterizerOpenGL::DrawArrays() {

 void RasterizerOpenGL::FlushAll() {}

-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+
+    if (Settings::values.use_accurate_gpu_emulation) {
+        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
+        res_cache.FlushRegion(addr, size);
+    }
+}

 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -627,6 +652,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
 }

 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    FlushRegion(addr, size);
    InvalidateRegion(addr, size);
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -34,16 +34,53 @@ struct FormatTuple {
    bool compressed;
 };

-static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
-    auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cpu_addr{gpu.MemoryManager().GpuToCpuAddress(gpu_addr)};
-    return cpu_addr ? *cpu_addr : 0;
+static bool IsPixelFormatASTC(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_5X4:
+    case PixelFormat::ASTC_2D_8X8:
+    case PixelFormat::ASTC_2D_8X5:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::ASTC_2D_4X4:
+        return {4, 4};
+    case PixelFormat::ASTC_2D_5X4:
+        return {5, 4};
+    case PixelFormat::ASTC_2D_8X8:
+        return {8, 8};
+    case PixelFormat::ASTC_2D_8X5:
+        return {8, 5};
+    default:
+        LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
+        UNREACHABLE();
+    }
+}
+
+void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
+
+    addr = cpu_addr ? *cpu_addr : 0;
+    gpu_addr = gpu_addr_;
+    size_in_bytes = SizeInBytesRaw();
+
+    if (IsPixelFormatASTC(pixel_format)) {
+        // ASTC is uncompressed in software, in emulated as RGBA8
+        size_in_bytes_gl = width * height * depth * 4;
+    } else {
+        size_in_bytes_gl = SizeInBytesGL();
+    }
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
    const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
    SurfaceParams params{};
-    params.addr = TryGetCpuAddr(config.tic.Address());
    params.is_tiled = config.tic.IsTiled();
    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
@@ -87,18 +124,18 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
        break;
    }

-    params.size_in_bytes_total = params.SizeInBytesTotal();
-    params.size_in_bytes_2d = params.SizeInBytes2D();
    params.max_mip_level = config.tic.max_mip_level + 1;
    params.rt = {};

+    params.InitCacheParameters(config.tic.Address());
+
    return params;
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
    const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
    SurfaceParams params{};
-    params.addr = TryGetCpuAddr(config.Address());
+
    params.is_tiled =
        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
    params.block_width = 1 << config.memory_layout.block_width;
@@ -112,16 +149,17 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.size_in_bytes_total = params.SizeInBytesTotal();
-    params.size_in_bytes_2d = params.SizeInBytes2D();
    params.max_mip_level = 0;

    // Render target specific parameters, not used for caching
    params.rt.index = static_cast<u32>(index);
    params.rt.array_mode = config.array_mode;
    params.rt.layer_stride = config.layer_stride;
+    params.rt.volume = config.volume;
    params.rt.base_layer = config.base_layer;

+    params.InitCacheParameters(config.Address());
+
    return params;
 }

@@ -130,7 +168,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    u32 block_width, u32 block_height, u32 block_depth,
    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
    SurfaceParams params{};
-    params.addr = TryGetCpuAddr(zeta_address);
+
    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
    params.block_width = 1 << std::min(block_width, 5U);
    params.block_height = 1 << std::min(block_height, 5U);
@@ -143,18 +181,18 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    params.unaligned_height = zeta_height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.size_in_bytes_total = params.SizeInBytesTotal();
-    params.size_in_bytes_2d = params.SizeInBytes2D();
    params.max_mip_level = 0;
    params.rt = {};

+    params.InitCacheParameters(zeta_address);
+
    return params;
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
    const Tegra::Engines::Fermi2D::Regs::Surface& config) {
    SurfaceParams params{};
-    params.addr = TryGetCpuAddr(config.Address());
+
    params.is_tiled = !config.linear;
    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
@@ -167,11 +205,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.size_in_bytes_total = params.SizeInBytesTotal();
-    params.size_in_bytes_2d = params.SizeInBytes2D();
    params.max_mip_level = 0;
    params.rt = {};

+    params.InitCacheParameters(config.Address());
+
    return params;
 }

@@ -276,34 +314,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-static bool IsPixelFormatASTC(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
-        return true;
-    default:
-        return false;
-    }
-}
-
-static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-        return {4, 4};
-    case PixelFormat::ASTC_2D_5X4:
-        return {5, 4};
-    case PixelFormat::ASTC_2D_8X8:
-        return {8, 8};
-    case PixelFormat::ASTC_2D_8X5:
-        return {8, 5};
-    default:
-        LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
-        UNREACHABLE();
-    }
-}
-
 MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
    u32 actual_height{unaligned_height};
    if (IsPixelFormatASTC(pixel_format)) {
@@ -333,23 +343,21 @@ static bool IsFormatBCn(PixelFormat format) {
 template <bool morton_to_gl, PixelFormat format>
 void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
                std::size_t gl_buffer_size, VAddr addr) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
+    constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format);
+
+    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
+    // pixel values.
+    const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};

    if (morton_to_gl) {
-        // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
-        // pixel values.
-        const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
        const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
            addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
        memcpy(gl_buffer, data.data(), size_to_copy);
    } else {
-        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
-        // check the configuration for this and perform more generic un/swizzle
-        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-                                       Memory::GetPointer(addr), gl_buffer, morton_to_gl);
+        Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
+                                         bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
+                                         gl_buffer, false, block_height, block_depth);
    }
 }

@@ -430,17 +438,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
        MortonCopy<false, PixelFormat::RGBA16UI>,
        MortonCopy<false, PixelFormat::R11FG11FB10F>,
        MortonCopy<false, PixelFormat::RGBA32UI>,
-        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
-        // formats are not supported
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
+        MortonCopy<false, PixelFormat::DXT1>,
+        MortonCopy<false, PixelFormat::DXT23>,
+        MortonCopy<false, PixelFormat::DXT45>,
+        MortonCopy<false, PixelFormat::DXN1>,
+        MortonCopy<false, PixelFormat::DXN2UNORM>,
+        MortonCopy<false, PixelFormat::DXN2SNORM>,
+        MortonCopy<false, PixelFormat::BC7U>,
+        MortonCopy<false, PixelFormat::BC6H_UF16>,
+        MortonCopy<false, PixelFormat::BC6H_SF16>,
+        // TODO(Subv): Swizzling ASTC formats are not supported
        nullptr,
        MortonCopy<false, PixelFormat::G8R8U>,
        MortonCopy<false, PixelFormat::G8R8S>,
@@ -626,22 +633,21 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
    auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
    auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);

-    std::size_t buffer_size =
-        std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total);
+    std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);

    glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
    if (source_format.compressed) {
        glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
-                                    static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr);
+                                    static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
    } else {
        glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
-                          source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total),
+                          source_format.type, static_cast<GLsizei>(src_params.size_in_bytes),
                          nullptr);
    }
    // If the new texture is bigger than the previous one, we need to fill in the rest with data
    // from the CPU.
-    if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) {
+    if (src_params.size_in_bytes < dst_params.size_in_bytes) {
        // Upload the rest of the memory.
        if (dst_params.is_tiled) {
            // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
@@ -651,12 +657,12 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
            LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
                              "reinterpretation but the texture is tiled.");
        }
-        std::size_t remaining_size =
-            dst_params.size_in_bytes_total - src_params.size_in_bytes_total;
+        std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
        std::vector<u8> data(remaining_size);
-        Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(),
-                          data.size());
-        glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size,
+        std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes),
+                    data.size());
+
+        glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
                        data.data());
    }

@@ -702,7 +708,8 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
 }

 CachedSurface::CachedSurface(const SurfaceParams& params)
-    : params(params), gl_target(SurfaceTargetToGL(params.target)) {
+    : params(params), gl_target(SurfaceTargetToGL(params.target)),
+      cached_size_in_bytes(params.size_in_bytes) {
    texture.Create();
    const auto& rect{params.GetRect()};

@@ -752,9 +759,21 @@ CachedSurface::CachedSurface(const SurfaceParams& params)

    VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
                             SurfaceParams::SurfaceTargetName(params.target));
+
+    // Clamp size to mapped GPU memory region
+    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
+    // R32F render buffer. We do not yet know if this is a game bug or something else, but this
+    // check is necessary to prevent flushing from overwriting unmapped memory.
+
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
+    if (cached_size_in_bytes > max_size) {
+        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
+        cached_size_in_bytes = max_size;
+    }
 }

-static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
+static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
    union S8Z24 {
        BitField<0, 24, u32> z24;
        BitField<24, 8, u32> s8;
@@ -767,22 +786,29 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
    };
    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");

-    S8Z24 input_pixel{};
-    Z24S8 output_pixel{};
-    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
+    S8Z24 s8z24_pixel{};
+    Z24S8 z24s8_pixel{};
+    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
-            std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
-            output_pixel.s8.Assign(input_pixel.s8);
-            output_pixel.z24.Assign(input_pixel.z24);
-            std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
+            if (reverse) {
+                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
+                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
+                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
+                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
+            } else {
+                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
+                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
+                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
+                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
+            }
        }
    }
 }

 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
+    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
@@ -814,7 +840,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
    }
    case PixelFormat::S8Z24:
        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
-        ConvertS8Z24ToZ24S8(data, width, height);
+        ConvertS8Z24ToZ24S8(data, width, height, false);
        break;

    case PixelFormat::G8R8U:
@@ -825,22 +851,36 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
    }
 }

+/**
+ * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
+ * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
+ * with typical desktop GPUs.
+ */
+static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
+                                                u32 width, u32 height) {
+    switch (pixel_format) {
+    case PixelFormat::G8R8U:
+    case PixelFormat::G8R8S:
+    case PixelFormat::ASTC_2D_4X4:
+    case PixelFormat::ASTC_2D_8X8: {
+        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
+                     static_cast<u32>(pixel_format));
+        UNREACHABLE();
+        break;
+    }
+    case PixelFormat::S8Z24:
+        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
+        ConvertS8Z24ToZ24S8(data, width, height, true);
+        break;
+    }
+}
+
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
 void CachedSurface::LoadGLBuffer() {
-    ASSERT(params.type != SurfaceType::Fill);
-
-    const u8* const texture_src_data = Memory::GetPointer(params.addr);
-
-    ASSERT(texture_src_data);
-
-    const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
-    const u32 copy_size = params.width * params.height * bytes_per_pixel;
-    const std::size_t total_size = copy_size * params.depth;
-
    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);

+    gl_buffer.resize(params.size_in_bytes_gl);
    if (params.is_tiled) {
-        gl_buffer.resize(total_size);
        u32 depth = params.depth;
        u32 block_depth = params.block_depth;

@@ -853,13 +893,12 @@ void CachedSurface::LoadGLBuffer() {
            block_depth = 1U;
        }

-        const std::size_t size = copy_size * depth;
-
        morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
            params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
-            size, params.addr);
+            gl_buffer.size(), params.addr);
    } else {
-        const u8* const texture_src_data_end{texture_src_data + total_size};
+        const auto texture_src_data{Memory::GetPointer(params.addr)};
+        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
        gl_buffer.assign(texture_src_data, texture_src_data_end);
    }

@@ -868,7 +907,44 @@ void CachedSurface::LoadGLBuffer() {

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
 void CachedSurface::FlushGLBuffer() {
-    ASSERT_MSG(false, "Unimplemented");
+    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
+
+    ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
+
+    // OpenGL temporary buffer needs to be big enough to store raw texture size
+    gl_buffer.resize(GetSizeInBytes());
+
+    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
+    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
+    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
+    ASSERT(!tuple.compressed);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
+                      gl_buffer.data());
+    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+                                        params.height);
+    ASSERT(params.type != SurfaceType::Fill);
+    const u8* const texture_src_data = Memory::GetPointer(params.addr);
+    ASSERT(texture_src_data);
+    if (params.is_tiled) {
+        u32 depth = params.depth;
+        u32 block_depth = params.block_depth;
+
+        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
+                   params.block_width, static_cast<u32>(params.target));
+
+        if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
+            // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
+            depth = 1U;
+        }
+        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
+            params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
+            gl_buffer.size(), GetAddr());
+    } else {
+        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
+    }
 }

 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
@@ -878,9 +954,6 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle

    MICROPROFILE_SCOPE(OpenGL_TextureUL);

-    ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
-                                   GetGLBytesPerPixel(params.pixel_format) * params.depth);
-
    const auto& rect{params.GetRect()};

    // Load data from memory to the surface
@@ -889,7 +962,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
    std::size_t buffer_offset =
        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
                                 static_cast<std::size_t>(x0)) *
-        GetGLBytesPerPixel(params.pixel_format);
+        SurfaceParams::GetBytesPerPixel(params.pixel_format);

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
    const GLuint target_tex = texture.handle;
@@ -905,7 +978,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
    cur_state.Apply();

    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
+    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));

    glActiveTexture(GL_TEXTURE0);
@@ -915,7 +988,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
            glCompressedTexImage2D(
                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
            break;
        case SurfaceParams::SurfaceTarget::Texture3D:
        case SurfaceParams::SurfaceTarget::Texture2DArray:
@@ -923,16 +996,16 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
                static_cast<GLsizei>(params.depth), 0,
-                static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
            break;
        case SurfaceParams::SurfaceTarget::TextureCubemap:
            for (std::size_t face = 0; face < params.depth; ++face) {
                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
                                       0, tuple.internal_format, static_cast<GLsizei>(params.width),
                                       static_cast<GLsizei>(params.height), 0,
-                                       static_cast<GLsizei>(params.size_in_bytes_2d),
+                                       static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()),
                                       &gl_buffer[buffer_offset]);
-                buffer_offset += params.size_in_bytes_2d;
+                buffer_offset += params.SizeInBytesCubeFace();
            }
            break;
        default:
@@ -942,7 +1015,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
            glCompressedTexImage2D(
                GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
                static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
        }
    } else {

@@ -971,7 +1044,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
                                y0, static_cast<GLsizei>(rect.GetWidth()),
                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
                                &gl_buffer[buffer_offset]);
-                buffer_offset += params.size_in_bytes_2d;
+                buffer_offset += params.SizeInBytesCubeFace();
            }
            break;
        default:
@@ -1033,10 +1106,7 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
    surface->LoadGLBuffer();
    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
-}
-
-void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
-    surface->FlushGLBuffer();
+    surface->MarkAsModified(false, *this);
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -1053,8 +1123,8 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
        } else if (preserve_contents) {
            // If surface parameters changed and we care about keeping the previous data, recreate
            // the surface from the old one
-            Unregister(surface);
            Surface new_surface{RecreateSurface(surface, params)};
+            Unregister(surface);
            Register(new_surface);
            return new_surface;
        } else {
@@ -1105,6 +1175,14 @@ void RasterizerCacheOpenGL::FermiCopySurface(
    FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
 }

+void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
+                                                const Surface& dst_surface) {
+    const auto& src_params{src_surface->GetSurfaceParams()};
+    const auto& dst_params{dst_surface->GetSurfaceParams()};
+    FlushRegion(src_params.addr, dst_params.size_in_bytes);
+    LoadSurface(dst_surface);
+}
+
 Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
                                               const SurfaceParams& new_params) {
    // Verify surface is compatible for blitting
@@ -1113,6 +1191,12 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    // Get a new surface with the new parameters, and blit the previous surface to it
    Surface new_surface{GetUncachedSurface(new_params)};

+    // With use_accurate_gpu_emulation enabled, do an accurate surface copy
+    if (Settings::values.use_accurate_gpu_emulation) {
+        AccurateCopySurface(old_surface, new_surface);
+        return new_surface;
+    }
+
    // For compatible surfaces, we can just do fast glCopyImageSubData based copy
    if (old_params.target == new_params.target && old_params.type == new_params.type &&
        old_params.depth == new_params.depth && old_params.depth == 1 &&
@@ -1124,11 +1208,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,

    // If the format is the same, just do a framebuffer blit. This is significantly faster than
    // using PBOs. The is also likely less accurate, as textures will be converted rather than
-    // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
+    // reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate
    // surface copy, where pixels are reinterpreted as a new format (without conversion). This
    // code path uses OpenGL PBOs and is quite slow.
-    const bool is_blit{old_params.pixel_format == new_params.pixel_format ||
-                       !Settings::values.use_accurate_framebuffers};
+    const bool is_blit{old_params.pixel_format == new_params.pixel_format};

    switch (new_params.target) {
    case SurfaceParams::SurfaceTarget::Texture2D:
@@ -1138,6 +1221,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
            CopySurface(old_surface, new_surface, copy_pbo.handle);
        }
        break;
+    case SurfaceParams::SurfaceTarget::Texture3D:
+        AccurateCopySurface(old_surface, new_surface);
+        break;
    case SurfaceParams::SurfaceTarget::TextureCubemap: {
        if (old_params.rt.array_mode != 1) {
            // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -18,6 +18,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"

 namespace OpenGL {
@@ -131,6 +132,8 @@ struct SurfaceParams {
        case Tegra::Texture::TextureType::Texture2D:
        case Tegra::Texture::TextureType::Texture2DNoMipmap:
            return SurfaceTarget::Texture2D;
+        case Tegra::Texture::TextureType::Texture3D:
+            return SurfaceTarget::Texture3D;
        case Tegra::Texture::TextureType::TextureCubemap:
            return SurfaceTarget::TextureCubemap;
        case Tegra::Texture::TextureType::Texture1DArray:
@@ -701,21 +704,42 @@ struct SurfaceParams {
        return SurfaceType::Invalid;
    }

+    /// Returns the sizer in bytes of the specified pixel format
+    static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
+        if (pixel_format == SurfaceParams::PixelFormat::Invalid) {
+            return 0;
+        }
+        return GetFormatBpp(pixel_format) / CHAR_BIT;
+    }
+
    /// Returns the rectangle corresponding to this surface
    MathUtil::Rectangle<u32> GetRect() const;

-    /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression
-    std::size_t SizeInBytes2D() const {
+    /// Returns the total size of this surface in bytes, adjusted for compression
+    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
        const u32 compression_factor{GetCompressionFactor(pixel_format)};
-        ASSERT(width % compression_factor == 0);
-        ASSERT(height % compression_factor == 0);
-        return (width / compression_factor) * (height / compression_factor) *
-               GetFormatBpp(pixel_format) / CHAR_BIT;
+        const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
+        const size_t uncompressed_size{
+            Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
+                                          height, depth, block_height, block_depth)};
+
+        // Divide by compression_factor^2, as height and width are factored by this
+        return uncompressed_size / (compression_factor * compression_factor);
    }

-    /// Returns the total size of this surface in bytes, adjusted for compression
-    std::size_t SizeInBytesTotal() const {
-        return SizeInBytes2D() * depth;
+    /// Returns the size of this surface as an OpenGL texture in bytes
+    std::size_t SizeInBytesGL() const {
+        return SizeInBytesRaw(true);
+    }
+
+    /// Returns the size of this surface as a cube face in bytes
+    std::size_t SizeInBytesCubeFace() const {
+        return size_in_bytes / 6;
+    }
+
+    /// Returns the size of this surface as an OpenGL cube face in bytes
+    std::size_t SizeInBytesCubeFaceGL() const {
+        return size_in_bytes_gl / 6;
    }

    /// Creates SurfaceParams from a texture configuration
@@ -742,7 +766,9 @@ struct SurfaceParams {
                        other.depth);
    }

-    VAddr addr;
+    /// Initializes parameters for caching, should be called after everything has been initialized
+    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+
    bool is_tiled;
    u32 block_width;
    u32 block_height;
@@ -754,15 +780,20 @@ struct SurfaceParams {
    u32 height;
    u32 depth;
    u32 unaligned_height;
-    std::size_t size_in_bytes_total;
-    std::size_t size_in_bytes_2d;
    SurfaceTarget target;
    u32 max_mip_level;

+    // Parameters used for caching
+    VAddr addr;
+    Tegra::GPUVAddr gpu_addr;
+    std::size_t size_in_bytes;
+    std::size_t size_in_bytes_gl;
+
    // Render target specific parameters, not used in caching
    struct {
        u32 index;
        u32 array_mode;
+        u32 volume;
        u32 layer_stride;
        u32 base_layer;
    } rt;
@@ -775,7 +806,8 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
    static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
        SurfaceReserveKey res;
        res.state = params;
-        res.state.rt = {}; // Ignore rt config in caching
+        res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
+        res.state.rt = {};       // Ignore rt config in caching
        return res;
    }
 };
@@ -790,16 +822,20 @@ struct hash<SurfaceReserveKey> {

 namespace OpenGL {

-class CachedSurface final {
+class CachedSurface final : public RasterizerCacheObject {
 public:
    CachedSurface(const SurfaceParams& params);

-    VAddr GetAddr() const {
+    VAddr GetAddr() const override {
        return params.addr;
    }

-    std::size_t GetSizeInBytes() const {
-        return params.size_in_bytes_total;
+    std::size_t GetSizeInBytes() const override {
+        return cached_size_in_bytes;
+    }
+
+    void Flush() override {
+        FlushGLBuffer();
    }

    const OGLTexture& Texture() const {
@@ -810,13 +846,6 @@ public:
        return gl_target;
    }

-    static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
-        if (format == SurfaceParams::PixelFormat::Invalid)
-            return 0;
-
-        return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
-    }
-
    const SurfaceParams& GetSurfaceParams() const {
        return params;
    }
@@ -833,6 +862,7 @@ private:
    std::vector<u8> gl_buffer;
    SurfaceParams params;
    GLenum gl_target;
+    std::size_t cached_size_in_bytes;
 };

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -849,9 +879,6 @@ public:
    /// Get the color surface based on the framebuffer configuration and the specified render target
    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);

-    /// Flushes the surface to Switch memory
-    void FlushSurface(const Surface& surface);
-
    /// Tries to find a framebuffer using on the provided CPU address
    Surface TryFindFramebufferSurface(VAddr addr) const;

@@ -875,6 +902,9 @@ private:
    /// Tries to get a reserved surface for the specified parameters
    Surface TryGetReservedSurface(const SurfaceParams& params);

+    /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
+    void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
+
    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
    /// previously been used. This is to prevent surfaces from being constantly created and
    /// destroyed when used with different surface parameters.
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -19,20 +19,21 @@ class CachedShader;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-class CachedShader final {
+class CachedShader final : public RasterizerCacheObject {
 public:
    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);

-    /// Gets the address of the shader in guest memory, required for cache management
-    VAddr GetAddr() const {
+    VAddr GetAddr() const override {
        return addr;
    }

-    /// Gets the size of the shader in guest memory, required for cache management
-    std::size_t GetSizeInBytes() const {
+    std::size_t GetSizeInBytes() const override {
        return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
    }

+    // We do not have to flush this cache as things in it are never modified by us.
+    void Flush() override {}
+
    /// Gets the shader entries for the shader
    const GLShader::ShaderEntries& GetShaderEntries() const {
        return entries;
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1142,6 +1142,7 @@ private:
        case Tegra::Shader::TextureType::Texture2D: {
            return 2;
        }
+        case Tegra::Shader::TextureType::Texture3D:
        case Tegra::Shader::TextureType::TextureCube: {
            return 3;
        }
@@ -2036,9 +2037,9 @@ private:
                break;
            }
            case OpCode::Id::TEX: {
-                ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
                Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
                std::string coord;
+                const bool is_array = instr.tex.array != 0;

                ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                           "NODEP is not implemented");
@@ -2053,21 +2054,59 @@ private:

                switch (num_coordinates) {
                case 1: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    coord = "float coords = " + x + ';';
+                    if (is_array) {
+                        const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        coord = "vec2 coords = vec2(" + x + ", " + index + ");";
+                    } else {
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                        coord = "float coords = " + x + ';';
+                    }
                    break;
                }
                case 2: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    if (is_array) {
+                        const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
+                        coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+                    } else {
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    }
                    break;
                }
                case 3: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
-                    coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+                    if (depth_compare) {
+                        if (is_array) {
+                            const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                            const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                            const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
+                            coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
+                                    ");";
+                        } else {
+                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                            const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                            const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+                            coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+                        }
+                    } else {
+                        if (is_array) {
+                            const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                            const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
+                            const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
+                            coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
+                                    ");";
+                        } else {
+                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                            const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                            const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
+                            coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+                        }
+                    }
                    break;
                }
                default:
@@ -2086,7 +2125,7 @@ private:
                std::string op_c;

                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, false, depth_compare);
+                    GetSampler(instr.sampler, texture_type, is_array, depth_compare);
                // Add an extra scope and declare the texture coords inside to prevent
                // overwriting them in case they are used as outputs of the texs instruction.

@@ -2106,10 +2145,13 @@ private:
                }
                case Tegra::Shader::TextureProcessMode::LB:
                case Tegra::Shader::TextureProcessMode::LBA: {
-                    if (num_coordinates <= 2) {
-                        op_c = regs.GetRegisterAsFloat(instr.gpr20);
+                    if (depth_compare) {
+                        if (is_array)
+                            op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 2);
+                        else
+                            op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
                    } else {
-                        op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
+                        op_c = regs.GetRegisterAsFloat(instr.gpr20);
                    }
                    // TODO: Figure if A suffix changes the equation at all.
                    texture = "texture(" + sampler + ", coords, " + op_c + ')';
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -85,8 +85,8 @@ void Config::ReadValues() {
    Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
    Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
    Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
-    Settings::values.use_accurate_framebuffers =
-        qt_config->value("use_accurate_framebuffers", false).toBool();
+    Settings::values.use_accurate_gpu_emulation =
+        qt_config->value("use_accurate_gpu_emulation", false).toBool();

    Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
    Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
@@ -233,7 +233,7 @@ void Config::SaveValues() {
    qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
    qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
    qt_config->setValue("frame_limit", Settings::values.frame_limit);
-    qt_config->setValue("use_accurate_framebuffers", Settings::values.use_accurate_framebuffers);
+    qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);

    // Cast to double because Qt's written float values are not human-readable
    qt_config->setValue("bg_red", (double)Settings::values.bg_red);
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -75,7 +75,7 @@ void ConfigureGraphics::setConfiguration() {
        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
    ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
    ui->frame_limit->setValue(Settings::values.frame_limit);
-    ui->use_accurate_framebuffers->setChecked(Settings::values.use_accurate_framebuffers);
+    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
    bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
                                Settings::values.bg_blue);
    ui->bg_button->setStyleSheet(
@@ -87,7 +87,7 @@ void ConfigureGraphics::applyConfiguration() {
        ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
    Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
    Settings::values.frame_limit = ui->frame_limit->value();
-    Settings::values.use_accurate_framebuffers = ui->use_accurate_framebuffers->isChecked();
+    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
    Settings::values.bg_red = static_cast<float>(bg_color.redF());
    Settings::values.bg_green = static_cast<float>(bg_color.greenF());
    Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -50,9 +50,9 @@
          </layout>
        </item>
        <item>
-         <widget class="QCheckBox" name="use_accurate_framebuffers">
+         <widget class="QCheckBox" name="use_accurate_gpu_emulation">
          <property name="text">
-           <string>Use accurate framebuffers (slow)</string>
+           <string>Use accurate GPU emulation (slow)</string>
          </property>
         </widget>
        </item>
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -99,8 +99,8 @@ void Config::ReadValues() {
    Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
    Settings::values.frame_limit =
        static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
-    Settings::values.use_accurate_framebuffers =
-        sdl2_config->GetBoolean("Renderer", "use_accurate_framebuffers", false);
+    Settings::values.use_accurate_gpu_emulation =
+        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);

    Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0);
    Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0);
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,9 +110,9 @@ use_frame_limit =
 # 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
 frame_limit =

-# Whether to use accurate framebuffers
+# Whether to use accurate GPU emulation
 # 0 (default): Off (fast), 1 : On (slow)
-use_accurate_framebuffers =
+use_accurate_gpu_emulation =

 # The clear color for the renderer. What shows up on the sides of the bottom screen.
 # Must be in range of 0.0-1.0. Defaults to 1.0 for all.
Author	SHA1	Message	Date
FernandoS27	caaa9914fd	Clang format and other fixes	2018-10-17 18:52:11 -04:00
FernandoS27	cb9fdc7a26	Implement Reinterpret Surface, to accurately blit 3D textures	2018-10-17 18:52:10 -04:00
FernandoS27	dbc34db6ce	Implement GetInRange in the Rasterizer Cache	2018-10-17 18:52:10 -04:00
FernandoS27	fd9e2d0073	Implement 3D Textures	2018-10-17 18:52:08 -04:00
bunnei	f912a82a8e	Merge pull request #1497 from bunnei/flush-framebuffers Implement flushing in the rasterizer cache	2018-10-17 18:40:34 -04:00
bunnei	6e8752881c	Merge pull request #1498 from lioncash/aslr svc: Clarify enum values for AddressSpaceBaseAddr and AddressSpaceSize in svcGetInfo()	2018-10-17 18:31:51 -04:00
bunnei	86dcf2942b	Merge pull request #1496 from FernandoS27/tex-array Implement Arrays on Tex Instruction	2018-10-17 18:30:44 -04:00
bunnei	afe22d8405	Merge pull request #1509 from DarkLordZach/device-save-data savedata_factory: Add DeviceSaveData and fix TemporaryStorage	2018-10-17 18:22:05 -04:00
bunnei	648b55c6b9	gl_rasterizer_cache: Remove unnecessary block_depth=1 on Flush.	2018-10-17 18:20:15 -04:00
bunnei	2a035a1f6f	gl_rasterizer_cache: Remove unnecessary temporary buffer with unswizzle.	2018-10-17 18:19:35 -04:00
bunnei	43b9494a0f	gl_rasterizer_cache: Use AccurateCopySurface for use_accurate_gpu_emulation.	2018-10-16 17:20:49 -04:00
bunnei	ee7c2dbf5a	config: Rename use_accurate_framebuffers -> use_accurate_gpu_emulation. - This will be used as a catch-all for slow-but-accurate GPU emulation paths.	2018-10-16 17:02:29 -04:00
bunnei	91602de7f2	rasterizer_cache: Refactor to support in-order flushing.	2018-10-16 16:51:53 -04:00
bunnei	0e59291310	gl_rasterizer_cache: Refactor to only call GetRegionEnd on surface creation.	2018-10-16 11:31:02 -04:00
bunnei	949d7832fa	gl_rasterizer_cache: Only flush when use_accurate_framebuffers is enabled.	2018-10-16 11:31:02 -04:00
bunnei	5f79ba04bd	gl_rasterizer_cache: Separate guest and host surface size managment.	2018-10-16 11:31:01 -04:00
bunnei	58be4dff79	gl_rasterizer_cache: Rename GetGLBytesPerPixel to GetBytesPerPixel. - This does not really have anything to do with OpenGL.	2018-10-16 11:31:01 -04:00
bunnei	cf7b46c101	gl_rasterizer_cache: Remove unused FlushSurface method.	2018-10-16 11:31:01 -04:00
bunnei	3afdfd7bfa	gl_rasterizer: Implement flushing.	2018-10-16 11:31:01 -04:00
bunnei	b4e29ccb81	gl_rasterizer_cache: Remove usage of Memory::Read/Write functions. - These cannot be used within the cache, as they change cache state.	2018-10-16 11:31:00 -04:00
bunnei	4e9683e9d5	gl_rasterizer_cache: Clamp cached surface size to mapped GPU region size.	2018-10-16 11:31:00 -04:00
bunnei	37575eae65	memory_manager: Add a method for querying the end of a mapped GPU region.	2018-10-16 11:31:00 -04:00
bunnei	0be7e82289	rasterizer_cache: Reintroduce method for flushing.	2018-10-16 11:31:00 -04:00
bunnei	9b929e934b	gl_rasterizer_cache: Reintroduce code for handling swizzle and flush to guest RAM.	2018-10-16 11:30:59 -04:00
Zach Hilman	9d4e6176eb	savedata_factory: Add TemporaryStorage SaveDataSpaceId Required for TemporaryStorage saves (in addition to SaveDataType)	2018-10-16 10:20:04 -04:00
Zach Hilman	74890cf2da	savedata_factory: Add support for DeviceSaveData Uses the same path as SaveData except with UID 0. Adds a warning if UID is not 0.	2018-10-16 10:19:21 -04:00
Lioncash	90f8474fc1	svc: Clarify enum values for AddressSpaceBaseAddr and AddressSpaceSize in svcGetInfo() So, one thing that's puzzled me is why the kernel seemed to not use the direct code address ranges in some cases for some service functions. For example, in svcMapMemory, the full address space width is compared against for validity, but for svcMapSharedMemory, it compares against 0xFFE00000, 0xFF8000000, and 0x7FF8000000 as upper bounds, and uses either 0x200000 or 0x8000000 as the lower-bounds as the beginning of the compared range. Coincidentally, these exact same values are also used in svcGetInfo, and also when initializing the user address space, so this is actually retrieving the ASLR extents, not the extents of the address space in general.	2018-10-14 20:11:16 -04:00
FernandoS27	1d6559fbd3	Implement Arrays on Tex Instruction	2018-10-14 13:31:02 -04:00