another try

This commit is contained in:
mgthepro
2022-11-05 13:58:44 +01:00
parent 4a9f2bbf2a
commit 9f63fbe700
2002 changed files with 671171 additions and 671092 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Tegra::Texture::ASTC {
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
} // namespace Tegra::Texture::ASTC
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Tegra::Texture::ASTC {
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
} // namespace Tegra::Texture::ASTC

View File

@@ -1,272 +1,272 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <cmath>
#include <cstring>
#include <span>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Texture {
namespace {
template <u32 mask>
constexpr u32 pdep(u32 value) {
u32 result = 0;
u32 m = mask;
for (u32 bit = 1; m; bit += bit) {
if (value & bit)
result |= m & (~m + 1);
m &= m - 1;
}
return result;
}
template <u32 mask, u32 incr_amount>
void incrpdep(u32& value) {
constexpr u32 swizzled_incr = pdep<mask>(incr_amount);
value = ((value | ~mask) + swizzled_incr) & mask;
}
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth, u32 stride) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_x = 0;
static constexpr u32 origin_y = 0;
static constexpr u32 origin_z = 0;
// We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 slice_size =
Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block_height) - 1;
const u32 block_depth_mask = (1U << block_depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
for (u32 slice = 0; slice < depth; ++slice) {
const u32 z = slice + origin_z;
const u32 offset_z = (z >> block_depth) * slice_size +
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
for (u32 line = 0; line < height; ++line) {
const u32 y = line + origin_y;
const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
const u32 offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
for (u32 column = 0; column < width;
++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
std::memcpy(dst, src, BYTES_PER_PIXEL);
}
}
}
}
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
u32 block_height, u32 block_depth, u32 pitch_linear) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_z = 0;
// We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = pitch_linear;
const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 slice_size =
Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block_height) - 1;
const u32 block_depth_mask = (1U << block_depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
u32 unprocessed_lines = num_lines;
u32 extent_y = std::min(num_lines, height - origin_y);
for (u32 slice = 0; slice < depth; ++slice) {
const u32 z = slice + origin_z;
const u32 offset_z = (z >> block_depth) * slice_size +
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
for (u32 line = 0; line < lines_in_y; ++line) {
const u32 y = line + origin_y;
const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
const u32 offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
for (u32 column = 0; column < extent_x;
++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
std::memcpy(dst, src, BYTES_PER_PIXEL);
}
}
unprocessed_lines -= lines_in_y;
if (unprocessed_lines == 0) {
return;
}
}
}
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \
block_depth, stride_alignment);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
} // Anonymous namespace
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride);
}
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride);
}
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \
origin_y, extent_x, extent_y, block_height, \
block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \
origin_y, extent_x, extent_y, block_height, \
block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
const u32 aligned_width = Common::AlignUpLog2(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
const u32 aligned_height = Common::AlignUpLog2(height, GOB_SIZE_Y_SHIFT + block_height);
const u32 aligned_depth = Common::AlignUpLog2(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
}
}
u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 gobs_in_block = 1 << block_height;
const u32 y_blocks = GOB_SIZE_Y << block_height;
const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel;
const u32 x_blocks = div_ceil(width, x_per_gob);
const u32 block_size = GOB_SIZE * gobs_in_block;
const u32 stride = block_size * x_blocks;
const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
const u32 relative_y = dst_y % y_blocks;
return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE;
}
} // namespace Tegra::Texture
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <cmath>
#include <cstring>
#include <span>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Texture {
namespace {
template <u32 mask>
constexpr u32 pdep(u32 value) {
u32 result = 0;
u32 m = mask;
for (u32 bit = 1; m; bit += bit) {
if (value & bit)
result |= m & (~m + 1);
m &= m - 1;
}
return result;
}
template <u32 mask, u32 incr_amount>
void incrpdep(u32& value) {
constexpr u32 swizzled_incr = pdep<mask>(incr_amount);
value = ((value | ~mask) + swizzled_incr) & mask;
}
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth, u32 stride) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_x = 0;
static constexpr u32 origin_y = 0;
static constexpr u32 origin_z = 0;
// We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = width * BYTES_PER_PIXEL;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 slice_size =
Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block_height) - 1;
const u32 block_depth_mask = (1U << block_depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
for (u32 slice = 0; slice < depth; ++slice) {
const u32 z = slice + origin_z;
const u32 offset_z = (z >> block_depth) * slice_size +
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
for (u32 line = 0; line < height; ++line) {
const u32 y = line + origin_y;
const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
const u32 offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
for (u32 column = 0; column < width;
++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
std::memcpy(dst, src, BYTES_PER_PIXEL);
}
}
}
}
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines,
u32 block_height, u32 block_depth, u32 pitch_linear) {
// The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it.
static constexpr u32 origin_z = 0;
// We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = pitch_linear;
const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT);
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
const u32 slice_size =
Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
const u32 block_height_mask = (1U << block_height) - 1;
const u32 block_depth_mask = (1U << block_depth) - 1;
const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
u32 unprocessed_lines = num_lines;
u32 extent_y = std::min(num_lines, height - origin_y);
for (u32 slice = 0; slice < depth; ++slice) {
const u32 z = slice + origin_z;
const u32 offset_z = (z >> block_depth) * slice_size +
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
const u32 lines_in_y = std::min(unprocessed_lines, extent_y);
for (u32 line = 0; line < lines_in_y; ++line) {
const u32 y = line + origin_y;
const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y);
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
const u32 offset_y = (block_y >> block_height) * block_size +
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL);
for (u32 column = 0; column < extent_x;
++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) {
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y);
const u32 unswizzled_offset =
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
std::memcpy(dst, src, BYTES_PER_PIXEL);
}
}
unprocessed_lines -= lines_in_y;
if (unprocessed_lines == 0) {
return;
}
}
}
template <bool TO_LINEAR>
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \
block_depth, stride_alignment);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
} // Anonymous namespace
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride);
}
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride);
}
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \
origin_y, extent_x, extent_y, block_height, \
block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) {
switch (bytes_per_pixel) {
#define BPP_CASE(x) \
case x: \
return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \
origin_y, extent_x, extent_y, block_height, \
block_depth, pitch_linear);
BPP_CASE(1)
BPP_CASE(2)
BPP_CASE(3)
BPP_CASE(4)
BPP_CASE(6)
BPP_CASE(8)
BPP_CASE(12)
BPP_CASE(16)
#undef BPP_CASE
default:
ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel);
}
}
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
const u32 aligned_width = Common::AlignUpLog2(width * bytes_per_pixel, GOB_SIZE_X_SHIFT);
const u32 aligned_height = Common::AlignUpLog2(height, GOB_SIZE_Y_SHIFT + block_height);
const u32 aligned_depth = Common::AlignUpLog2(depth, GOB_SIZE_Z_SHIFT + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
}
}
u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 gobs_in_block = 1 << block_height;
const u32 y_blocks = GOB_SIZE_Y << block_height;
const u32 x_per_gob = GOB_SIZE_X / bytes_per_pixel;
const u32 x_blocks = div_ceil(width, x_per_gob);
const u32 block_size = GOB_SIZE * gobs_in_block;
const u32 stride = block_size * x_blocks;
const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
const u32 relative_y = dst_y % y_blocks;
return base + (relative_y / GOB_SIZE_Y) * GOB_SIZE;
}
} // namespace Tegra::Texture

View File

@@ -1,72 +1,72 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/textures/texture.h"
namespace Tegra::Texture {
constexpr u32 GOB_SIZE_X = 64;
constexpr u32 GOB_SIZE_Y = 8;
constexpr u32 GOB_SIZE_Z = 1;
constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
constexpr u32 GOB_SIZE_X_SHIFT = 6;
constexpr u32 GOB_SIZE_Y_SHIFT = 3;
constexpr u32 GOB_SIZE_Z_SHIFT = 0;
constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
constexpr u32 SWIZZLE_X_BITS = 0b100101111;
constexpr u32 SWIZZLE_Y_BITS = 0b011010000;
using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
/**
* This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
* Calculates the offset of an (x, y) position within a swizzled texture.
* Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
*/
constexpr SwizzleTable MakeSwizzleTable() {
SwizzleTable table{};
for (u32 y = 0; y < table.size(); ++y) {
for (u32 x = 0; x < table[0].size(); ++x) {
table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
(y % 2) * 16 + (x % 16);
}
}
return table;
}
/// Unswizzles a block linear texture into linear memory.
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment = 1);
/// Swizzles linear memory into a block linear texture.
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment = 1);
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
u32 block_height, u32 block_depth, u32 pitch_linear);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear);
/// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel);
} // namespace Tegra::Texture
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include "common/common_types.h"
#include "video_core/textures/texture.h"
namespace Tegra::Texture {
constexpr u32 GOB_SIZE_X = 64;
constexpr u32 GOB_SIZE_Y = 8;
constexpr u32 GOB_SIZE_Z = 1;
constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
constexpr u32 GOB_SIZE_X_SHIFT = 6;
constexpr u32 GOB_SIZE_Y_SHIFT = 3;
constexpr u32 GOB_SIZE_Z_SHIFT = 0;
constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
constexpr u32 SWIZZLE_X_BITS = 0b100101111;
constexpr u32 SWIZZLE_Y_BITS = 0b011010000;
using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
/**
* This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
* Calculates the offset of an (x, y) position within a swizzled texture.
* Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
*/
constexpr SwizzleTable MakeSwizzleTable() {
SwizzleTable table{};
for (u32 y = 0; y < table.size(); ++y) {
for (u32 x = 0; x < table[0].size(); ++x) {
table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
(y % 2) * 16 + (x % 16);
}
}
return table;
}
/// Unswizzles a block linear texture into linear memory.
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment = 1);
/// Swizzles linear memory into a block linear texture.
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment = 1);
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y,
u32 block_height, u32 block_depth, u32 pitch_linear);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x,
u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear);
/// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
u32 bytes_per_pixel);
} // namespace Tegra::Texture

View File

@@ -1,85 +1,85 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/cityhash.h"
#include "common/settings.h"
#include "video_core/textures/texture.h"
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
namespace Tegra::Texture {
namespace {
constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
};
} // Anonymous namespace
std::array<float, 4> TSCEntry::BorderColor() const noexcept {
if (!srgb_conversion) {
return border_color;
}
return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
}
float TSCEntry::MaxAnisotropy() const noexcept {
if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) {
return 1.0f;
}
const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
u32 added_anisotropic{};
if (anisotropic_settings == 0) {
added_anisotropic = Settings::values.resolution_info.up_scale >>
Settings::values.resolution_info.down_shift;
} else {
added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U;
}
return static_cast<float>(1U << (max_anisotropy + added_anisotropic));
}
} // namespace Tegra::Texture
size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
}
size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
}
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/cityhash.h"
#include "common/settings.h"
#include "video_core/textures/texture.h"
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
namespace Tegra::Texture {
namespace {
constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
};
} // Anonymous namespace
std::array<float, 4> TSCEntry::BorderColor() const noexcept {
if (!srgb_conversion) {
return border_color;
}
return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
}
float TSCEntry::MaxAnisotropy() const noexcept {
if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) {
return 1.0f;
}
const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
u32 added_anisotropic{};
if (anisotropic_settings == 0) {
added_anisotropic = Settings::values.resolution_info.up_scale >>
Settings::values.resolution_info.down_shift;
} else {
added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U;
}
return static_cast<float>(1U << (max_anisotropy + added_anisotropic));
}
} // namespace Tegra::Texture
size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
}
size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
}

View File

@@ -1,409 +1,409 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Tegra::Texture {
enum class TextureFormat : u32 {
R32G32B32A32 = 0x01,
R32G32B32 = 0x02,
R16G16B16A16 = 0x03,
R32G32 = 0x04,
R32_B24G8 = 0x05,
ETC2_RGB = 0x06,
X8B8G8R8 = 0x07,
A8R8G8B8 = 0x08,
A2B10G10R10 = 0x09,
ETC2_RGB_PTA = 0x0a,
ETC2_RGBA = 0x0b,
R16G16 = 0x0c,
R24G8 = 0x0d,
R8G24 = 0x0e,
R32 = 0x0f,
BC6H_SFLOAT = 0x10,
BC6H_UFLOAT = 0x11,
A4B4G4R4 = 0x12,
A5B5G5R1 = 0x13,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
B6G5R5 = 0x16,
BC7 = 0x17,
R8G8 = 0x18,
EAC = 0x19,
EACX2 = 0x1a,
R16 = 0x1b,
Y8_VIDEO = 0x1c,
R8 = 0x1d,
G4R4 = 0x1e,
R1 = 0x1f,
E5B9G9R9 = 0x20,
B10G11R11 = 0x21,
G8B8G8R8 = 0x22,
B8G8R8G8 = 0x23,
BC1_RGBA = 0x24,
BC2 = 0x25,
BC3 = 0x26,
BC4 = 0x27,
BC5 = 0x28,
S8D24 = 0x29,
X8D24 = 0x2a,
D24S8 = 0x2b,
X4V4D24__COV4R4V = 0x2c,
X4V4D24__COV8R8V = 0x2d,
V8D24__COV4R12V = 0x2e,
D32 = 0x2f,
D32S8 = 0x30,
X8D24_X20V4S8__COV4R4V = 0x31,
X8D24_X20V4S8__COV8R8V = 0x32,
D32_X20V4X8__COV4R4V = 0x33,
D32_X20V4X8__COV8R8V = 0x34,
D32_X20V4S8__COV4R4V = 0x35,
D32_X20V4S8__COV8R8V = 0x36,
X8D24_X16V8S8__COV4R12V = 0x37,
D32_X16V8X8__COV4R12V = 0x38,
D32_X16V8S8__COV4R12V = 0x39,
D16 = 0x3a,
V8D24__COV8R24V = 0x3b,
X8D24_X16V8S8__COV8R24V = 0x3c,
D32_X16V8X8__COV8R24V = 0x3d,
D32_X16V8S8__COV8R24V = 0x3e,
ASTC_2D_4X4 = 0x40,
ASTC_2D_5X5 = 0x41,
ASTC_2D_6X6 = 0x42,
ASTC_2D_8X8 = 0x44,
ASTC_2D_10X10 = 0x45,
ASTC_2D_12X12 = 0x46,
ASTC_2D_5X4 = 0x50,
ASTC_2D_6X5 = 0x51,
ASTC_2D_8X6 = 0x52,
ASTC_2D_10X8 = 0x53,
ASTC_2D_12X10 = 0x54,
ASTC_2D_8X5 = 0x55,
ASTC_2D_10X5 = 0x56,
ASTC_2D_10X6 = 0x57,
};
enum class TextureType : u32 {
Texture1D = 0,
Texture2D = 1,
Texture3D = 2,
TextureCubemap = 3,
Texture1DArray = 4,
Texture2DArray = 5,
Texture1DBuffer = 6,
Texture2DNoMipmap = 7,
TextureCubeArray = 8,
};
enum class TICHeaderVersion : u32 {
OneDBuffer = 0,
PitchColorKey = 1,
Pitch = 2,
BlockLinear = 3,
BlockLinearColorKey = 4,
};
enum class ComponentType : u32 {
SNORM = 1,
UNORM = 2,
SINT = 3,
UINT = 4,
SNORM_FORCE_FP16 = 5,
UNORM_FORCE_FP16 = 6,
FLOAT = 7
};
enum class SwizzleSource : u32 {
Zero = 0,
R = 2,
G = 3,
B = 4,
A = 5,
OneInt = 6,
OneFloat = 7,
};
enum class MsaaMode : u32 {
Msaa1x1 = 0,
Msaa2x1 = 1,
Msaa2x2 = 2,
Msaa4x2 = 3,
Msaa4x2_D3D = 4,
Msaa2x1_D3D = 5,
Msaa4x4 = 6,
Msaa2x2_VC4 = 8,
Msaa2x2_VC12 = 9,
Msaa4x2_VC8 = 10,
Msaa4x2_VC24 = 11,
};
union TextureHandle {
/* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
u32 raw;
BitField<0, 20, u32> tic_id;
BitField<20, 12, u32> tsc_id;
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
if (via_header_index) {
return {raw, raw};
} else {
const Tegra::Texture::TextureHandle handle{raw};
return {handle.tic_id, handle.tsc_id};
}
}
struct TICEntry {
union {
struct {
union {
BitField<0, 7, TextureFormat> format;
BitField<7, 3, ComponentType> r_type;
BitField<10, 3, ComponentType> g_type;
BitField<13, 3, ComponentType> b_type;
BitField<16, 3, ComponentType> a_type;
BitField<19, 3, SwizzleSource> x_source;
BitField<22, 3, SwizzleSource> y_source;
BitField<25, 3, SwizzleSource> z_source;
BitField<28, 3, SwizzleSource> w_source;
};
u32 address_low;
union {
BitField<0, 16, u32> address_high;
BitField<16, 5, u32> layer_base_3_7;
BitField<21, 3, TICHeaderVersion> header_version;
BitField<24, 1, u32> load_store_hint;
BitField<25, 4, u32> view_coherency_hash;
BitField<29, 3, u32> layer_base_8_10;
};
union {
BitField<0, 3, u32> block_width;
BitField<3, 3, u32> block_height;
BitField<6, 3, u32> block_depth;
BitField<10, 3, u32> tile_width_spacing;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
BitField<26, 1, u32> use_header_opt_control;
BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
BitField<0, 16, u32> buffer_high_width_minus_one;
};
union {
BitField<0, 16, u32> width_minus_one;
BitField<16, 3, u32> layer_base_0_2;
BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
BitField<29, 3, u32> border_size;
BitField<0, 16, u32> buffer_low_width_minus_one;
};
union {
BitField<0, 16, u32> height_minus_1;
BitField<16, 14, u32> depth_minus_1;
BitField<30, 1, u32> is_sparse;
BitField<31, 1, u32> normalized_coords;
};
union {
BitField<6, 13, u32> mip_lod_bias;
BitField<27, 3, u32> max_anisotropy;
};
union {
BitField<0, 4, u32> res_min_mip_level;
BitField<4, 4, u32> res_max_mip_level;
BitField<8, 4, MsaaMode> msaa_mode;
BitField<12, 12, u32> min_lod_clamp;
};
};
std::array<u64, 4> raw;
};
constexpr bool operator==(const TICEntry& rhs) const noexcept {
return raw == rhs.raw;
}
constexpr bool operator!=(const TICEntry& rhs) const noexcept {
return raw != rhs.raw;
}
constexpr GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
constexpr u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
constexpr u32 Width() const {
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_one + 1;
}
return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
}
constexpr u32 Height() const {
return height_minus_1 + 1;
}
constexpr u32 Depth() const {
return depth_minus_1 + 1;
}
constexpr u32 BaseLayer() const {
return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
}
constexpr bool IsBlockLinear() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
constexpr bool IsPitchLinear() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
constexpr bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
enum class WrapMode : u32 {
Wrap = 0,
Mirror = 1,
ClampToEdge = 2,
Border = 3,
Clamp = 4,
MirrorOnceClampToEdge = 5,
MirrorOnceBorder = 6,
MirrorOnceClampOGL = 7,
};
enum class DepthCompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
enum class TextureFilter : u32 {
Nearest = 1,
Linear = 2,
};
enum class TextureMipmapFilter : u32 {
None = 1,
Nearest = 2,
Linear = 3,
};
enum class SamplerReduction : u32 {
WeightedAverage = 0,
Min = 1,
Max = 2,
};
enum class Anisotropy {
Default,
Filter2x,
Filter4x,
Filter8x,
Filter16x,
};
struct TSCEntry {
union {
struct {
union {
BitField<0, 3, WrapMode> wrap_u;
BitField<3, 3, WrapMode> wrap_v;
BitField<6, 3, WrapMode> wrap_p;
BitField<9, 1, u32> depth_compare_enabled;
BitField<10, 3, DepthCompareFunc> depth_compare_func;
BitField<13, 1, u32> srgb_conversion;
BitField<20, 3, u32> max_anisotropy;
};
union {
BitField<0, 2, TextureFilter> mag_filter;
BitField<4, 2, TextureFilter> min_filter;
BitField<6, 2, TextureMipmapFilter> mipmap_filter;
BitField<8, 1, u32> cubemap_anisotropy;
BitField<9, 1, u32> cubemap_interface_filtering;
BitField<10, 2, SamplerReduction> reduction_filter;
BitField<12, 13, u32> mip_lod_bias;
BitField<25, 1, u32> float_coord_normalization;
BitField<26, 5, u32> trilin_opt;
};
union {
BitField<0, 12, u32> min_lod_clamp;
BitField<12, 12, u32> max_lod_clamp;
BitField<24, 8, u32> srgb_border_color_r;
};
union {
BitField<12, 8, u32> srgb_border_color_g;
BitField<20, 8, u32> srgb_border_color_b;
};
std::array<f32, 4> border_color;
};
std::array<u64, 4> raw;
};
constexpr bool operator==(const TSCEntry& rhs) const noexcept {
return raw == rhs.raw;
}
constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
return raw != rhs.raw;
}
std::array<float, 4> BorderColor() const noexcept;
float MaxAnisotropy() const noexcept;
float MinLod() const {
return static_cast<float>(min_lod_clamp) / 256.0f;
}
float MaxLod() const {
return static_cast<float>(max_lod_clamp) / 256.0f;
}
float LodBias() const {
// Sign extend the 13-bit value.
static constexpr u32 mask = 1U << (13 - 1);
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
};
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
} // namespace Tegra::Texture
template <>
struct std::hash<Tegra::Texture::TICEntry> {
size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
};
template <>
struct std::hash<Tegra::Texture::TSCEntry> {
size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
};
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Tegra::Texture {
enum class TextureFormat : u32 {
R32G32B32A32 = 0x01,
R32G32B32 = 0x02,
R16G16B16A16 = 0x03,
R32G32 = 0x04,
R32_B24G8 = 0x05,
ETC2_RGB = 0x06,
X8B8G8R8 = 0x07,
A8R8G8B8 = 0x08,
A2B10G10R10 = 0x09,
ETC2_RGB_PTA = 0x0a,
ETC2_RGBA = 0x0b,
R16G16 = 0x0c,
R24G8 = 0x0d,
R8G24 = 0x0e,
R32 = 0x0f,
BC6H_SFLOAT = 0x10,
BC6H_UFLOAT = 0x11,
A4B4G4R4 = 0x12,
A5B5G5R1 = 0x13,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
B6G5R5 = 0x16,
BC7 = 0x17,
R8G8 = 0x18,
EAC = 0x19,
EACX2 = 0x1a,
R16 = 0x1b,
Y8_VIDEO = 0x1c,
R8 = 0x1d,
G4R4 = 0x1e,
R1 = 0x1f,
E5B9G9R9 = 0x20,
B10G11R11 = 0x21,
G8B8G8R8 = 0x22,
B8G8R8G8 = 0x23,
BC1_RGBA = 0x24,
BC2 = 0x25,
BC3 = 0x26,
BC4 = 0x27,
BC5 = 0x28,
S8D24 = 0x29,
X8D24 = 0x2a,
D24S8 = 0x2b,
X4V4D24__COV4R4V = 0x2c,
X4V4D24__COV8R8V = 0x2d,
V8D24__COV4R12V = 0x2e,
D32 = 0x2f,
D32S8 = 0x30,
X8D24_X20V4S8__COV4R4V = 0x31,
X8D24_X20V4S8__COV8R8V = 0x32,
D32_X20V4X8__COV4R4V = 0x33,
D32_X20V4X8__COV8R8V = 0x34,
D32_X20V4S8__COV4R4V = 0x35,
D32_X20V4S8__COV8R8V = 0x36,
X8D24_X16V8S8__COV4R12V = 0x37,
D32_X16V8X8__COV4R12V = 0x38,
D32_X16V8S8__COV4R12V = 0x39,
D16 = 0x3a,
V8D24__COV8R24V = 0x3b,
X8D24_X16V8S8__COV8R24V = 0x3c,
D32_X16V8X8__COV8R24V = 0x3d,
D32_X16V8S8__COV8R24V = 0x3e,
ASTC_2D_4X4 = 0x40,
ASTC_2D_5X5 = 0x41,
ASTC_2D_6X6 = 0x42,
ASTC_2D_8X8 = 0x44,
ASTC_2D_10X10 = 0x45,
ASTC_2D_12X12 = 0x46,
ASTC_2D_5X4 = 0x50,
ASTC_2D_6X5 = 0x51,
ASTC_2D_8X6 = 0x52,
ASTC_2D_10X8 = 0x53,
ASTC_2D_12X10 = 0x54,
ASTC_2D_8X5 = 0x55,
ASTC_2D_10X5 = 0x56,
ASTC_2D_10X6 = 0x57,
};
enum class TextureType : u32 {
Texture1D = 0,
Texture2D = 1,
Texture3D = 2,
TextureCubemap = 3,
Texture1DArray = 4,
Texture2DArray = 5,
Texture1DBuffer = 6,
Texture2DNoMipmap = 7,
TextureCubeArray = 8,
};
enum class TICHeaderVersion : u32 {
OneDBuffer = 0,
PitchColorKey = 1,
Pitch = 2,
BlockLinear = 3,
BlockLinearColorKey = 4,
};
enum class ComponentType : u32 {
SNORM = 1,
UNORM = 2,
SINT = 3,
UINT = 4,
SNORM_FORCE_FP16 = 5,
UNORM_FORCE_FP16 = 6,
FLOAT = 7
};
enum class SwizzleSource : u32 {
Zero = 0,
R = 2,
G = 3,
B = 4,
A = 5,
OneInt = 6,
OneFloat = 7,
};
enum class MsaaMode : u32 {
Msaa1x1 = 0,
Msaa2x1 = 1,
Msaa2x2 = 2,
Msaa4x2 = 3,
Msaa4x2_D3D = 4,
Msaa2x1_D3D = 5,
Msaa4x4 = 6,
Msaa2x2_VC4 = 8,
Msaa2x2_VC12 = 9,
Msaa4x2_VC8 = 10,
Msaa4x2_VC24 = 11,
};
union TextureHandle {
/* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
u32 raw;
BitField<0, 20, u32> tic_id;
BitField<20, 12, u32> tsc_id;
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
if (via_header_index) {
return {raw, raw};
} else {
const Tegra::Texture::TextureHandle handle{raw};
return {handle.tic_id, handle.tsc_id};
}
}
struct TICEntry {
union {
struct {
union {
BitField<0, 7, TextureFormat> format;
BitField<7, 3, ComponentType> r_type;
BitField<10, 3, ComponentType> g_type;
BitField<13, 3, ComponentType> b_type;
BitField<16, 3, ComponentType> a_type;
BitField<19, 3, SwizzleSource> x_source;
BitField<22, 3, SwizzleSource> y_source;
BitField<25, 3, SwizzleSource> z_source;
BitField<28, 3, SwizzleSource> w_source;
};
u32 address_low;
union {
BitField<0, 16, u32> address_high;
BitField<16, 5, u32> layer_base_3_7;
BitField<21, 3, TICHeaderVersion> header_version;
BitField<24, 1, u32> load_store_hint;
BitField<25, 4, u32> view_coherency_hash;
BitField<29, 3, u32> layer_base_8_10;
};
union {
BitField<0, 3, u32> block_width;
BitField<3, 3, u32> block_height;
BitField<6, 3, u32> block_depth;
BitField<10, 3, u32> tile_width_spacing;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
BitField<26, 1, u32> use_header_opt_control;
BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
BitField<0, 16, u32> buffer_high_width_minus_one;
};
union {
BitField<0, 16, u32> width_minus_one;
BitField<16, 3, u32> layer_base_0_2;
BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
BitField<29, 3, u32> border_size;
BitField<0, 16, u32> buffer_low_width_minus_one;
};
union {
BitField<0, 16, u32> height_minus_1;
BitField<16, 14, u32> depth_minus_1;
BitField<30, 1, u32> is_sparse;
BitField<31, 1, u32> normalized_coords;
};
union {
BitField<6, 13, u32> mip_lod_bias;
BitField<27, 3, u32> max_anisotropy;
};
union {
BitField<0, 4, u32> res_min_mip_level;
BitField<4, 4, u32> res_max_mip_level;
BitField<8, 4, MsaaMode> msaa_mode;
BitField<12, 12, u32> min_lod_clamp;
};
};
std::array<u64, 4> raw;
};
constexpr bool operator==(const TICEntry& rhs) const noexcept {
return raw == rhs.raw;
}
constexpr bool operator!=(const TICEntry& rhs) const noexcept {
return raw != rhs.raw;
}
constexpr GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
constexpr u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
constexpr u32 Width() const {
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_one + 1;
}
return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
}
constexpr u32 Height() const {
return height_minus_1 + 1;
}
constexpr u32 Depth() const {
return depth_minus_1 + 1;
}
constexpr u32 BaseLayer() const {
return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
}
constexpr bool IsBlockLinear() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
constexpr bool IsPitchLinear() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
constexpr bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
enum class WrapMode : u32 {
Wrap = 0,
Mirror = 1,
ClampToEdge = 2,
Border = 3,
Clamp = 4,
MirrorOnceClampToEdge = 5,
MirrorOnceBorder = 6,
MirrorOnceClampOGL = 7,
};
enum class DepthCompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
enum class TextureFilter : u32 {
Nearest = 1,
Linear = 2,
};
enum class TextureMipmapFilter : u32 {
None = 1,
Nearest = 2,
Linear = 3,
};
enum class SamplerReduction : u32 {
WeightedAverage = 0,
Min = 1,
Max = 2,
};
enum class Anisotropy {
Default,
Filter2x,
Filter4x,
Filter8x,
Filter16x,
};
struct TSCEntry {
union {
struct {
union {
BitField<0, 3, WrapMode> wrap_u;
BitField<3, 3, WrapMode> wrap_v;
BitField<6, 3, WrapMode> wrap_p;
BitField<9, 1, u32> depth_compare_enabled;
BitField<10, 3, DepthCompareFunc> depth_compare_func;
BitField<13, 1, u32> srgb_conversion;
BitField<20, 3, u32> max_anisotropy;
};
union {
BitField<0, 2, TextureFilter> mag_filter;
BitField<4, 2, TextureFilter> min_filter;
BitField<6, 2, TextureMipmapFilter> mipmap_filter;
BitField<8, 1, u32> cubemap_anisotropy;
BitField<9, 1, u32> cubemap_interface_filtering;
BitField<10, 2, SamplerReduction> reduction_filter;
BitField<12, 13, u32> mip_lod_bias;
BitField<25, 1, u32> float_coord_normalization;
BitField<26, 5, u32> trilin_opt;
};
union {
BitField<0, 12, u32> min_lod_clamp;
BitField<12, 12, u32> max_lod_clamp;
BitField<24, 8, u32> srgb_border_color_r;
};
union {
BitField<12, 8, u32> srgb_border_color_g;
BitField<20, 8, u32> srgb_border_color_b;
};
std::array<f32, 4> border_color;
};
std::array<u64, 4> raw;
};
constexpr bool operator==(const TSCEntry& rhs) const noexcept {
return raw == rhs.raw;
}
constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
return raw != rhs.raw;
}
std::array<float, 4> BorderColor() const noexcept;
float MaxAnisotropy() const noexcept;
float MinLod() const {
return static_cast<float>(min_lod_clamp) / 256.0f;
}
float MaxLod() const {
return static_cast<float>(max_lod_clamp) / 256.0f;
}
float LodBias() const {
// Sign extend the 13-bit value.
static constexpr u32 mask = 1U << (13 - 1);
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
};
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
} // namespace Tegra::Texture
template <>
struct std::hash<Tegra::Texture::TICEntry> {
size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
};
template <>
struct std::hash<Tegra::Texture::TSCEntry> {
size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
};