GPU/DMA: Fixed Tiled->Linear transfers.

We no longer write to out of bounds memory anymore.
This commit is contained in:
Subv
2018-09-21 08:27:03 -05:00
parent bdb3920753
commit 2fd06acc8f
3 changed files with 27 additions and 15 deletions

View File

@@ -96,26 +96,17 @@ void MaxwellDMA::HandleCopy() {
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
// Copy the data to a staging buffer first to make applying the src and dst offsets easier
std::vector<u8> staging_buffer(regs.src_pitch * regs.src_params.size_y);
// In this mode, the src_pitch register contains the source stride, and the dst_pitch
// contains the bytes per pixel.
u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
u32 dst_bytes_per_pixel = regs.dst_pitch;
FlushAndInvalidate(staging_buffer.size(), copy_size * dst_bytes_per_pixel);
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
copy_size * src_bytes_per_pixel);
Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y,
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
staging_buffer.data(), true, regs.src_params.BlockHeight());
u32 src_offset = (regs.src_params.pos_y * regs.src_params.size_x + regs.src_params.pos_x) *
regs.dst_pitch;
std::memcpy(dst_buffer, staging_buffer.data() + src_offset, copy_size * regs.dst_pitch);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
} else {
ASSERT(regs.dst_params.size_z == 1);
ASSERT(regs.src_pitch == regs.x_count);

View File

@@ -104,6 +104,22 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
}
}
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height, u32 offset_x, u32 offset_y) {
for (u32 line = 0; line < subrect_height; ++line) {
for (u32 x = 0; x < subrect_width; ++x) {
u32 swizzled_offset = GetSwizzleOffset(offset_x, line + offset_y, swizzled_width,
bytes_per_pixel, block_height);
const VAddr dest_line = unswizzled_data + line * dest_pitch + x;
const VAddr source_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
}
}
}
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:

View File

@@ -31,6 +31,11 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height, u32 offset_x, u32 offset_y);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.
*/