Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GS: Fix huge ST coordinates in input vertices. #12201

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 213 additions & 50 deletions pcsx2/GS/GSState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1595,13 +1595,6 @@ inline bool GSState::TestDrawChanged()
return false;
}

u32 GSState::CalcMask(int exp, int max_exp)
{
const int amount = 9 + (max_exp - exp);

return (1 << std::min(amount, 23)) - 1;
}

void GSState::FlushPrim()
{
if (m_index.tail > 0)
Expand Down Expand Up @@ -1676,49 +1669,17 @@ void GSState::FlushPrim()
#endif

m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));

// Fix huge or nan ST coordinates
if (PRIM->TME && !PRIM->FST)
{
FixHugeSTCoords();
}

// Texel coordinate rounding
// Helps Manhunt (lights shining through objects).
// Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
// Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
if (PRIM->TME && (GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq.z))
// Round fractional parts of ST coords
if (PRIM->TME && !PRIM->FST && (GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq.z))
{
if (!PRIM->FST) // STQ's
{
const bool is_sprite = GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS;
// ST's have the lowest 9 bits (or greater depending on exponent difference) rounding down (from hardware tests).
for (int i = m_index.tail - 1; i >= 0; i--)
{
GSVertex* v = &m_vertex.buff[m_index.buff[i]];

// Only Q on the second vertex is valid
if (!(i & 1) && is_sprite)
v->RGBAQ.Q = m_vertex.buff[m_index.buff[i + 1]].RGBAQ.Q;

int T = std::bit_cast<int>(v->ST.T);
int Q = std::bit_cast<int>(v->RGBAQ.Q);
int S = std::bit_cast<int>(v->ST.S);
const int expS = (S >> 23) & 0xff;
const int expT = (T >> 23) & 0xff;
const int expQ = (Q >> 23) & 0xff;
int max_exp = std::max(expS, expQ);

u32 mask = CalcMask(expS, max_exp);
S &= ~mask;
v->ST.S = std::bit_cast<float>(S);
max_exp = std::max(expT, expQ);
mask = CalcMask(expT, max_exp);
T &= ~mask;
v->ST.T = std::bit_cast<float>(T);
Q &= ~0xff;

if (!is_sprite || (i & 1))
v->RGBAQ.Q = std::bit_cast<float>(Q);

m_vt.m_min.t.x = std::min(m_vt.m_min.t.x, (v->ST.S / v->RGBAQ.Q) * (1 << m_context->TEX0.TW));
m_vt.m_min.t.y = std::min(m_vt.m_min.t.y, (v->ST.T / v->RGBAQ.Q) * (1 << m_context->TEX0.TH));
}
}
RoundSTCoords();
}

// Skip draw if Z test is enabled, but set to fail all pixels.
Expand Down Expand Up @@ -3831,8 +3792,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL

u8 uses_border = 0;

if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX ||
m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX)
if (m_vt.m_max.t.x >= 2047.0f || m_vt.m_min.t.x <= -2047.0f ||
m_vt.m_max.t.y >= 2047.0f || m_vt.m_min.t.y <= -2047.0f)
{
// If any of the min/max values are +-FLT_MAX we can't rely on them
// so just assume full texture.
Expand Down Expand Up @@ -4009,6 +3970,208 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
return { vr, uses_border };
}

// ST coordinate rounding
// Helps Manhunt (lights shining through objects).
// Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
// Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
void GSState::RoundSTCoords()
{
// ST's have the lowest 9 bits (or greater depending on exponent difference) rounded down (from hardware tests).
// This gives the bitmask for the lower 9 (or more) bits.
auto LowerBitsMask = [](int exp, int max_exp)
{
const int amount = 9 + (max_exp - exp);
return (1 << std::min(amount, 23)) - 1;
};

for (int i = m_index.tail - 1; i >= 0; i--)
{
GSVertex* v = &m_vertex.buff[m_index.buff[i]];

if (m_vt.m_primclass == GS_SPRITE_CLASS && (i & 1))
{
// FIXME: Remove this once done debugging
pxAssertMsg(m_vertex.buff[m_index.buff[i]].RGBAQ.Q == m_vertex.buff[m_index.buff[i - 1]].RGBAQ.Q, "Sprite Qs different");
}

int S = std::bit_cast<int>(v->ST.S);
int T = std::bit_cast<int>(v->ST.T);
int Q = std::bit_cast<int>(v->RGBAQ.Q);

const int expS = (S >> 23) & 0xff;
const int expT = (T >> 23) & 0xff;
const int expQ = (Q >> 23) & 0xff;

S &= ~LowerBitsMask(expS, std::max(expS, expQ));
T &= ~LowerBitsMask(expT, std::max(expT, expQ));
Q &= ~0xff; // Q gets truncated less than ST by hardware tests

v->ST.S = std::bit_cast<float>(S);
v->ST.T = std::bit_cast<float>(T);

const float U = (v->ST.S / v->RGBAQ.Q) * (1 << m_context->TEX0.TW);
const float V = (v->ST.T / v->RGBAQ.Q) * (1 << m_context->TEX0.TH);
const float Qf = std::bit_cast<float>(Q);

const GSVector4 uvq(U, V, Qf, Qf);

// Do min/max with only those values that are not NaN
m_vt.m_min.t = m_vt.m_min.t.blend32(m_vt.m_min.t.min(uvq), uvq.notnan());
m_vt.m_max.t = m_vt.m_max.t.blend32(m_vt.m_max.t.max(uvq), uvq.notnan());
}

// Clamp the min/max UV values to the min/max valid UV values.
m_vt.m_min.t = m_vt.m_min.t.min(GSVector4(2047.0f)).max(GSVector4(-2047.0f)).xyzw(m_vt.m_min.t);
m_vt.m_max.t = m_vt.m_max.t.min(GSVector4(2047.0f)).max(GSVector4(-2047.0f)).xyzw(m_vt.m_max.t);
}

// Handle the huge or NaN ST coords culling primitives or replacing
// replacing the primitives with valid coordinates.
// This is based on hardware test that show that seem to show that ST coordinate get clamped to +/- 2047
// before applying repeat or region repeat.
// Note that the huge texture coords may be a symptom of floating point issues in the EE and
// it would be better to have them fixed there.
void GSState::FixHugeSTCoords()
{
switch (GSUtil::GetClassVertexCount(GSUtil::GetPrimClass(PRIM->PRIM)))
{
case 1:
FixHugeSTCoordsImpl<1, false>();
break;
case 2:
FixHugeSTCoordsImpl<2, false>();
break;
case 3:
FixHugeSTCoordsImpl<3, false>();
break;
default:
pxFail("Impossible");
}
}

template <u32 n, bool cull>
void GSState::FixHugeSTCoordsImpl()
{
GSVertex* const vertex = m_vertex.buff;
u16* const index = m_index.buff;

u32 new_index_tail = 0;

constexpr float huge = 1e10f; // Arbitrary large value

const float tex_width = 1 << m_context->TEX0.TW;
const float tex_height = 1 << m_context->TEX0.TH;

bool new_prims = false; // Did we generate new primitives?

for (u32 i = 0; i < m_index.tail; i += n)
{
bool nan_s = false;
bool nan_t = false;
bool huge_pos_s = false;
bool huge_neg_s = false;
bool huge_pos_t = false;
bool huge_neg_t = false;

if (m_vt.m_primclass == GS_SPRITE_CLASS)
{
// FIXME: Remove this once done debugging
pxAssertMsg(vertex[index[i + 0]].RGBAQ.Q == vertex[index[i + 1]].RGBAQ.Q, "Sprite Qs different");
}

for (u32 j = 0; j < n; j++)
{
const float s = vertex[index[i + j]].ST.S / vertex[index[i + j]].RGBAQ.Q;
const float t = vertex[index[i + j]].ST.T / vertex[index[i + j]].RGBAQ.Q;
nan_s |= std::isnan(s);
nan_t |= std::isnan(t);
huge_pos_s |= s > huge;
huge_pos_t |= t > huge;
huge_neg_s |= s < -huge;
huge_neg_t |= t < -huge;
}

// ambiguous = true would probably result in NaN in the SW rasterizer or something undefined in HW.
// PS2 does not have NaN so there is no really accurate way to emulate this.
// huge = true and ambiguous = false seems to have well-defined behavior on the PS2:
// it clamps huge values to +/-2047 in UV coordinates space. We try to approximate this by
// giving ST the values that would result in exactly +/-2047 across the primitive.
// For ambiguous values either cull the primitive or replace coordinates by 0.
const bool ambiguous_s = nan_s || (huge_pos_s && huge_neg_s);
const bool ambiguous_t = nan_t || (huge_pos_t && huge_neg_t);

if ((ambiguous_s || ambiguous_t) && cull)
{
// Cull the primitive by not saving the indices
continue;
}

if (huge_pos_s || huge_pos_t || huge_neg_s || huge_neg_t || ambiguous_s || ambiguous_t)
{
// Add new vertices to replace the primitive with another primitive with clamped values.
new_prims = true;

// Copy old values to tail of vertex buffer.
// The vertex buffer is allocated so that there is always at least room for 3 new vertices at the end.
for (u32 j = 0; j < n; j++)
vertex[m_vertex.tail + j] = vertex[index[i + j]];

const float new_u_val = ambiguous_s ? 0.0f :
huge_pos_s ? 2047.0f :
huge_neg_s ? -2047.0f :
NAN;
const float new_v_val = ambiguous_t ? 0.0f :
huge_pos_t ? 2047.0f :
huge_neg_t ? -2047.0f :
NAN;

// If we are replacing both S and T, replace Q by 1.0f
if (!std::isnan(new_u_val) && !std::isnan(new_v_val))
{
for (u32 j = 0; j < n; j++)
vertex[m_vertex.tail + j].RGBAQ.Q = 1.0f;
}

// Try to replace huge/ambiguous values so that we get constant U or V across the entire primitive after interpolation
if (!std::isnan(new_u_val))
{
for (u32 j = 0; j < n; j++)
vertex[m_vertex.tail + j].ST.S = new_u_val * vertex[m_vertex.tail + j].RGBAQ.Q / tex_width;
}

if (!std::isnan(new_v_val))
{
for (u32 j = 0; j < n; j++)
vertex[m_vertex.tail + j].ST.T = new_v_val * vertex[m_vertex.tail + j].RGBAQ.Q / tex_width;
}

// Make new indices point to new vertices
for (u32 j = 0; j < n; j++)
index[new_index_tail + j] = m_vertex.tail + j;

// Advance tail since we pushed new vertices
m_vertex.tail += n;

if (m_vertex.tail >= m_vertex.maxcount)
GrowVertexBuffer();
}
else if (new_index_tail < i) // If new_index_tail == i, don't update indices since no primitives have been culled
{
// Keep the same primitive so shift indices down
for (u32 j = 0; j < n; j++)
index[new_index_tail + j] = index[i + j];
}

new_index_tail += n;
}

m_index.tail = new_index_tail;

// If indexed new primitives at the end of the buffer, update head and next also
if (new_prims)
m_vertex.head = m_vertex.next = m_vertex.tail;
}

void GSState::CalcAlphaMinMax(const int tex_alpha_min, const int tex_alpha_max)
{
if (m_vt.m_alpha.valid && tex_alpha_min == 0 && tex_alpha_max == 255)
Expand Down
4 changes: 4 additions & 0 deletions pcsx2/GS/GSState.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ class GSState : public GSAlignedClass<32>
bool IsCoverageAlpha();
void CalcAlphaMinMax(const int tex_min, const int tex_max);
void CorrectATEAlphaMinMax(const u32 atst, const int aref);
void RoundSTCoords();
void FixHugeSTCoords();
template <u32 n, bool cull>
void FixHugeSTCoordsImpl();

public:
struct GSUploadQueue
Expand Down
15 changes: 15 additions & 0 deletions pcsx2/GS/GSVector4.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,16 @@ class alignas(16) GSVector4
return round<Round_PosInf>();
}

__forceinline GSVector4 notnan() const
{
return GSVector4(_mm_cmpord_ps(m, m));
}

__forceinline GSVector4 isnan() const
{
return GSVector4(_mm_cmpunord_ps(m, m));
}

// http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html

#define LOG_POLY0(x, c0) GSVector4(c0)
Expand Down Expand Up @@ -656,6 +666,11 @@ class alignas(16) GSVector4
return neg();
}

__forceinline GSVector4 operator~() const
{
return cast(~GSVector4i::cast(*this));
}

__forceinline void operator+=(const GSVector4& v)
{
m = _mm_add_ps(m, v);
Expand Down
15 changes: 15 additions & 0 deletions pcsx2/GS/GSVector4_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,16 @@ class alignas(16) GSVector4
return GSVector4(vrndpq_f32(v4s));
}

__forceinline GSVector4 notnan() const
{
return *this == *this;
}

__forceinline GSVector4 isnan() const
{
return *this != *this;
}

// http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html

#define LOG_POLY0(x, c0) GSVector4(c0)
Expand Down Expand Up @@ -560,6 +570,11 @@ class alignas(16) GSVector4
return neg();
}

__forceinline GSVector4 operator~() const
{
return cast(~GSVector4i::cast(*this));
}

__forceinline void operator+=(const GSVector4& v)
{
v4s = vaddq_f32(v4s, v.v4s);
Expand Down
9 changes: 5 additions & 4 deletions pcsx2/GS/Renderers/Common/GSVertexTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ void GSVertexTrace::Update(const void* vertex, const u16* index, int v_count, in

m_primclass = primclass;

u32 iip = m_state->PRIM->IIP;
u32 tme = m_state->PRIM->TME;
u32 fst = m_state->PRIM->FST;
u32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
const u32 iip = m_state->PRIM->IIP;
const u32 tme = m_state->PRIM->TME;
const u32 fst = m_state->PRIM->FST;
const u32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);

// Call the correct function to find the min/max values
m_fmm[color][fst][tme][iip][primclass](*this, vertex, index, i_count);

// Potential float overflow detected. Better uses the slower division instead
Expand Down
Loading