GPU/DMA: Fixed Tiled->Linear transfers.
We no longer write to out of bounds memory anymore.
This commit is contained in:
@@ -96,26 +96,17 @@ void MaxwellDMA::HandleCopy() {
|
||||
|
||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||
ASSERT(regs.src_params.size_z == 1);
|
||||
|
||||
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
|
||||
|
||||
// Copy the data to a staging buffer first to make applying the src and dst offsets easier
|
||||
std::vector<u8> staging_buffer(regs.src_pitch * regs.src_params.size_y);
|
||||
|
||||
// In this mode, the src_pitch register contains the source stride, and the dst_pitch
|
||||
// contains the bytes per pixel.
|
||||
u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
|
||||
u32 dst_bytes_per_pixel = regs.dst_pitch;
|
||||
|
||||
FlushAndInvalidate(staging_buffer.size(), copy_size * dst_bytes_per_pixel);
|
||||
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
|
||||
copy_size * src_bytes_per_pixel);
|
||||
|
||||
Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y,
|
||||
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
|
||||
staging_buffer.data(), true, regs.src_params.BlockHeight());
|
||||
|
||||
u32 src_offset = (regs.src_params.pos_y * regs.src_params.size_x + regs.src_params.pos_x) *
|
||||
regs.dst_pitch;
|
||||
std::memcpy(dst_buffer, staging_buffer.data() + src_offset, copy_size * regs.dst_pitch);
|
||||
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
|
||||
regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
|
||||
regs.src_params.BlockHeight(), regs.src_params.pos_x,
|
||||
regs.src_params.pos_y);
|
||||
} else {
|
||||
ASSERT(regs.dst_params.size_z == 1);
|
||||
ASSERT(regs.src_pitch == regs.x_count);
|
||||
|
||||
@@ -104,6 +104,22 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
|
||||
}
|
||||
}
|
||||
|
||||
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
|
||||
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
|
||||
u32 block_height, u32 offset_x, u32 offset_y) {
|
||||
for (u32 line = 0; line < subrect_height; ++line) {
|
||||
for (u32 x = 0; x < subrect_width; ++x) {
|
||||
u32 swizzled_offset = GetSwizzleOffset(offset_x, line + offset_y, swizzled_width,
|
||||
bytes_per_pixel, block_height);
|
||||
|
||||
const VAddr dest_line = unswizzled_data + line * dest_pitch + x;
|
||||
const VAddr source_addr = swizzled_data + swizzled_offset;
|
||||
|
||||
Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 BytesPerPixel(TextureFormat format) {
|
||||
switch (format) {
|
||||
case TextureFormat::DXT1:
|
||||
|
||||
@@ -31,6 +31,11 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
|
||||
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
|
||||
u32 block_height);
|
||||
|
||||
/// Copies a tiled subrectangle into a linear surface.
|
||||
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
|
||||
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
|
||||
u32 block_height, u32 offset_x, u32 offset_y);
|
||||
|
||||
/**
|
||||
* Decodes an unswizzled texture into a A8R8G8B8 texture.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user