mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-15 22:54:00 +00:00
Merge pull request #2118 from FernandoS27/ipa-improve
shader_decompiler: Improve Accuracy of Attribute Interpolation.
This commit is contained in:
commit
c07987dfab
6 changed files with 74 additions and 38 deletions
|
@ -376,9 +376,9 @@ enum class R2pMode : u64 {
|
|||
};
|
||||
|
||||
enum class IpaInterpMode : u64 {
|
||||
Linear = 0,
|
||||
Perspective = 1,
|
||||
Flat = 2,
|
||||
Pass = 0,
|
||||
Multiply = 1,
|
||||
Constant = 2,
|
||||
Sc = 3,
|
||||
};
|
||||
|
||||
|
|
|
@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
|
|||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class AttributeUse : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
|
||||
struct Header {
|
||||
|
@ -84,9 +91,15 @@ struct Header {
|
|||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
|
||||
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
union {
|
||||
BitField<0, 2, AttributeUse> x;
|
||||
BitField<2, 2, AttributeUse> y;
|
||||
BitField<4, 2, AttributeUse> w;
|
||||
BitField<6, 2, AttributeUse> z;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
INSERT_PADDING_BYTES(2); // ImapColor
|
||||
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
|
||||
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
|
||||
|
@ -103,6 +116,28 @@ struct Header {
|
|||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
|
||||
return static_cast<AttributeUse>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
|
||||
}
|
||||
AttributeUse GetAttributeUse(u32 attribute) const {
|
||||
AttributeUse result = AttributeUse::Unused;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const auto index = GetAttributeIndexUse(attribute, i);
|
||||
if (index == AttributeUse::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result == AttributeUse::Unused || result == index) {
|
||||
result = index;
|
||||
continue;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
|
||||
if (index == AttributeUse::Perspective) {
|
||||
result = index;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} ps;
|
||||
};
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
namespace OpenGL::GLShader {
|
||||
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::AttributeUse;
|
||||
using Tegra::Shader::Header;
|
||||
using Tegra::Shader::IpaInterpMode;
|
||||
using Tegra::Shader::IpaMode;
|
||||
|
@ -288,34 +289,22 @@ private:
|
|||
code.AddNewLine();
|
||||
}
|
||||
|
||||
std::string GetInputFlags(const IpaMode& input_mode) {
|
||||
const IpaSampleMode sample_mode = input_mode.sampling_mode;
|
||||
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
|
||||
std::string GetInputFlags(AttributeUse attribute) {
|
||||
std::string out;
|
||||
|
||||
switch (interp_mode) {
|
||||
case IpaInterpMode::Flat:
|
||||
switch (attribute) {
|
||||
case AttributeUse::Constant:
|
||||
out += "flat ";
|
||||
break;
|
||||
case IpaInterpMode::Linear:
|
||||
case AttributeUse::ScreenLinear:
|
||||
out += "noperspective ";
|
||||
break;
|
||||
case IpaInterpMode::Perspective:
|
||||
case AttributeUse::Perspective:
|
||||
// Default, Smooth
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
|
||||
}
|
||||
switch (sample_mode) {
|
||||
case IpaSampleMode::Centroid:
|
||||
// It can be implemented with the "centroid " keyword in GLSL
|
||||
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
|
||||
break;
|
||||
case IpaSampleMode::Default:
|
||||
// Default, n/a
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
|
||||
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
|
||||
UNREACHABLE();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
@ -324,16 +313,11 @@ private:
|
|||
const auto& attributes = ir.GetInputAttributes();
|
||||
for (const auto element : attributes) {
|
||||
const Attribute::Index index = element.first;
|
||||
const IpaMode& input_mode = *element.second.begin();
|
||||
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
|
||||
// Skip when it's not a generic attribute
|
||||
continue;
|
||||
}
|
||||
|
||||
ASSERT(element.second.size() > 0);
|
||||
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
|
||||
"Multiple input flag modes are not supported in GLSL");
|
||||
|
||||
// TODO(bunnei): Use proper number of elements for these
|
||||
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
|
||||
if (stage != ShaderStage::Vertex) {
|
||||
|
@ -345,8 +329,14 @@ private:
|
|||
if (stage == ShaderStage::Geometry) {
|
||||
attr = "gs_" + attr + "[]";
|
||||
}
|
||||
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
|
||||
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
|
||||
std::string suffix;
|
||||
if (stage == ShaderStage::Fragment) {
|
||||
const auto input_mode =
|
||||
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
|
||||
suffix = GetInputFlags(input_mode);
|
||||
}
|
||||
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
|
||||
attr + ';');
|
||||
}
|
||||
if (!attributes.empty())
|
||||
code.AddNewLine();
|
||||
|
@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
|
|||
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL::GLShader
|
||||
|
|
|
@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
|
|||
layout (location = 6) out vec4 FragColor6;
|
||||
layout (location = 7) out vec4 FragColor7;
|
||||
|
||||
layout (location = 0) in vec4 position;
|
||||
layout (location = 0) in noperspective vec4 position;
|
||||
|
||||
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
|
||||
vec4 viewport_flip;
|
||||
|
@ -172,4 +172,4 @@ void main() {
|
|||
return {out, program.second};
|
||||
}
|
||||
|
||||
} // namespace OpenGL::GLShader
|
||||
} // namespace OpenGL::GLShader
|
||||
|
|
|
@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
|
||||
"Unaligned attribute loads are not supported");
|
||||
|
||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
|
||||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
|
||||
Tegra::Shader::IpaSampleMode::Default};
|
||||
|
||||
u64 next_element = instr.attribute.fmt20.element;
|
||||
|
|
|
@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
|||
instr.ipa.sample_mode.Value()};
|
||||
|
||||
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
|
||||
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
|
||||
Node value = attr;
|
||||
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
|
||||
if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
|
||||
index <= Tegra::Shader::Attribute::Index::Attribute_31) {
|
||||
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
|
||||
// In theory by setting them as perspective, OpenGL does the perspective correction.
|
||||
// A way must figured to reverse the last step of it.
|
||||
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
|
||||
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
|
||||
}
|
||||
}
|
||||
value = GetSaturatedFloat(value, instr.ipa.saturate);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
|
@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
|||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
|
Loading…
Reference in a new issue