GPU/DMA: Fixed the Linear->Tiled and Linear->Linear transfer modes.

This fixes the loading bar in Has-Been Heroes.

The Tiled->Tiled transfer mode is not implemented yet and will assert.
This commit is contained in:
Subv
2018-09-20 21:13:02 -05:00
parent 47826fd090
commit bb9eeba670
3 changed files with 70 additions and 25 deletions

View File

@@ -49,40 +49,58 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.dst_params.pos_x == 0);
ASSERT(regs.dst_params.pos_y == 0);
size_t copy_size = regs.x_count;
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
if (regs.exec.enable_2d) {
copy_size = copy_size * regs.y_count;
if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
// If both the source and the destination are in block layout, assert.
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
}
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
ASSERT(regs.src_params.pos_x == 0);
ASSERT(regs.src_params.pos_y == 0);
ASSERT(regs.dst_pitch == 1);
ASSERT(regs.src_pitch == 1);
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// TODO(Subv): For now assume that the size of the destination rectangle matches exactly the
// destination pitch.
ASSERT(regs.dst_pitch == regs.x_count);
// CopyBlock already takes care of flushing and invalidating the cache at the affected
// addresses.
Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
return;
}
// If both the source and the destination are in linear layout, perform a line-by-line
// copy. We're going to take a subrect of size (x_count, y_count) from the source
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
const VAddr source_line = source_cpu + line * regs.src_pitch;
const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
Memory::CopyBlock(dest_line, source_line, regs.dst_pitch);
}
return;
}
ASSERT(regs.exec.enable_2d == 1);
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated copying.
rasterizer.FlushRegion(source_cpu, copy_size);
size_t copy_size = regs.x_count * regs.y_count;
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu, copy_size);
const auto FlushAndInvalidate = [&](u32 src_size, u32 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu, dst_size);
};
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
// Copy the data to a staging buffer first to make applying the src and dst offsets easier
@@ -93,6 +111,8 @@ void MaxwellDMA::HandleCopy() {
u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
u32 dst_bytes_per_pixel = regs.dst_pitch;
FlushAndInvalidate(staging_buffer.size(), copy_size * dst_bytes_per_pixel);
Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y,
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
staging_buffer.data(), true, regs.src_params.BlockHeight());
@@ -101,13 +121,17 @@ void MaxwellDMA::HandleCopy() {
regs.dst_pitch;
std::memcpy(dst_buffer, staging_buffer.data() + src_offset, copy_size * regs.dst_pitch);
} else {
// TODO(Subv): Source offsets are not yet implemented for this mode.
ASSERT(regs.src_params.pos_x == 0);
ASSERT(regs.src_params.pos_y == 0);
ASSERT(regs.dst_params.size_z == 1);
ASSERT(regs.src_pitch == regs.x_count);
u32 src_bpp = regs.src_pitch / regs.x_count;
FlushAndInvalidate(regs.src_pitch * regs.y_count,
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
src_buffer, false, regs.dst_params.BlockHeight());
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
}
}

View File

@@ -88,6 +88,22 @@ void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_da
}
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height) {
for (u32 line = 0; line < subrect_height; ++line) {
for (u32 x = 0; x < subrect_width; ++x) {
u32 swizzled_offset =
GetSwizzleOffset(x, line, swizzled_width, bytes_per_pixel, block_height);
const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
const VAddr dest_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
}
}
}
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:

View File

@@ -26,6 +26,11 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.
*/