Skip to content

Commit

Permalink
Fix inconsistency that could cause the optimization algorithm to osci…
Browse files Browse the repository at this point in the history
…llate.

Fixes cjlin1#225.

# Background

The optimization algorithm has three main calculations:
1. Select the working set `{i, j}` that minimizes the decrease in the objective function.
2. Change `alpha[i]` and `alpha[j]` to minimize the decrease in the objective function while respecting constraints.
3. Update the gradient of the objective function according to the changes to `alpha[i]` and `alpha[j]`.

All three calculations make use of the matrix `Q`, which is represented by the `QMatrix` class. The `QMatrix` class has two main methods:
- `get_Q`, which returns an array of values for a single column of the matrix; and
- `get_QD`, which returns an array of diagonal values.

# Problem

`Q` values are of type `Qfloat` while `QD` values are of type `double`. `Qfloat` is currently defined as `float`, so there can be inconsistency in the diagonal values returned by `get_Q` and `get_QD`. For example, in cjlin1#225, one of the diagonal values is `181.05748749793070829` as `double` and `180.99411909539512067` as `float`.

The first two calculations of the optimization algorithm access the diagonal values via `get_QD`. However, the third calculation accesses the diagonal values via `get_Q`. This inconsistency between the minimization calculations and the gradient update can cause the optimization algorithm to oscillate, as demonstrated by cjlin1#225.

# Solution

We change `get_Q` to return a new class called `QColumn` instead of a plain array of values. The `QColumn` class overloads the subscript operator, so accessing individual elements is the same as before. Internally though, the `QColumn` class will return the `QD` value when the diagonal element is accessed. This guarantees that all calculations are using the same values for the diagonal elements, eliminating the inconsistency.

# Alternatives Considered

Alternatively, we could change `Qfloat` to be defined as `double`. This would also eliminate the inconsistency; however, it would reduce the cache capacity by half.

# Future Changes

The Java code will be updated similarly in a separate commit.
  • Loading branch information
fumoboy007 committed Dec 21, 2024
1 parent 35e5596 commit 87d106b
Showing 1 changed file with 51 additions and 18 deletions.
69 changes: 51 additions & 18 deletions svm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,37 @@ void Cache::swap_index(int i, int j)
}
}

// Provides access to the elements in a single column of a QMatrix.
class QColumn {
public:
QColumn(const Qfloat *values, int column_idx, double diagonal_value)
: values(values), column_idx(column_idx), diagonal_value(diagonal_value) {
}

double operator[](int row_idx) const {
if (row_idx != column_idx) {
return values[row_idx];
} else {
// Return the value from the QD array so that calculations stay
// consistent, regardless of whether QColumn or the QD array
// are used.
//
// QD array is double precision while Qfloat could be single
// precision, so the diagonal values could be very different
// between the two. If one calculation uses QColumn while
// another calculation uses the QD array, the optimization
// algorithm may not converge.
return diagonal_value;
}
}

private:
const Qfloat *values;

int column_idx;
double diagonal_value;
};

//
// Kernel evaluation
//
Expand All @@ -197,7 +228,7 @@ void Cache::swap_index(int i, int j)
//
class QMatrix {
public:
virtual Qfloat *get_Q(int column, int len) const = 0;
virtual QColumn get_Q(int column, int len) const = 0;
virtual double *get_QD() const = 0;
virtual void swap_index(int i, int j) const = 0;
virtual ~QMatrix() {}
Expand All @@ -210,7 +241,7 @@ class Kernel: public QMatrix {

static double k_function(const svm_node *x, const svm_node *y,
const svm_parameter& param);
virtual Qfloat *get_Q(int column, int len) const = 0;
virtual QColumn get_Q(int column, int len) const = 0;
virtual double *get_QD() const = 0;
virtual void swap_index(int i, int j) const // no so const...
{
Expand Down Expand Up @@ -486,7 +517,7 @@ void Solver::reconstruct_gradient()
{
for(i=active_size;i<l;i++)
{
const Qfloat *Q_i = Q->get_Q(i,active_size);
QColumn Q_i = Q->get_Q(i,active_size);
for(j=0;j<active_size;j++)
if(is_free(j))
G[i] += alpha[j] * Q_i[j];
Expand All @@ -497,7 +528,7 @@ void Solver::reconstruct_gradient()
for(i=0;i<active_size;i++)
if(is_free(i))
{
const Qfloat *Q_i = Q->get_Q(i,l);
QColumn Q_i = Q->get_Q(i,l);
double alpha_i = alpha[i];
for(j=active_size;j<l;j++)
G[j] += alpha_i * Q_i[j];
Expand Down Expand Up @@ -548,7 +579,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
for(i=0;i<l;i++)
if(!is_lower_bound(i))
{
const Qfloat *Q_i = Q.get_Q(i,l);
QColumn Q_i = Q.get_Q(i,l);
double alpha_i = alpha[i];
int j;
for(j=0;j<l;j++)
Expand Down Expand Up @@ -594,8 +625,8 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,

// update alpha[i] and alpha[j], handle bounds carefully

const Qfloat *Q_i = Q.get_Q(i,active_size);
const Qfloat *Q_j = Q.get_Q(j,active_size);
QColumn Q_i = Q.get_Q(i,active_size);
QColumn Q_j = Q.get_Q(j,active_size);

double C_i = get_C(i);
double C_j = get_C(j);
Expand Down Expand Up @@ -822,9 +853,11 @@ int Solver::select_working_set(int &out_i, int &out_j)
}

int i = Gmax_idx;
const Qfloat *Q_i = NULL;
if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
Q_i = Q->get_Q(i,active_size);
if (i == -1) {
return 1;
}

QColumn Q_i = Q->get_Q(i,active_size);

for(int j=0;j<active_size;j++)
{
Expand Down Expand Up @@ -1071,8 +1104,8 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)

int ip = Gmaxp_idx;
int in = Gmaxn_idx;
const Qfloat *Q_ip = NULL;
const Qfloat *Q_in = NULL;
QColumn Q_ip = QColumn(NULL, -1, 0);
QColumn Q_in = QColumn(NULL, -1, 0);
if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
Q_ip = Q->get_Q(ip,active_size);
if(in != -1)
Expand Down Expand Up @@ -1280,7 +1313,7 @@ class SVC_Q: public Kernel
QD[i] = (this->*kernel_function)(i,i);
}

Qfloat *get_Q(int i, int len) const
QColumn get_Q(int i, int len) const
{
Qfloat *data;
int start, j;
Expand All @@ -1292,7 +1325,7 @@ class SVC_Q: public Kernel
for(j=start;j<len;j++)
data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
}
return data;
return QColumn(data, i, QD[i]);
}

double *get_QD() const
Expand Down Expand Up @@ -1332,7 +1365,7 @@ class ONE_CLASS_Q: public Kernel
QD[i] = (this->*kernel_function)(i,i);
}

Qfloat *get_Q(int i, int len) const
QColumn get_Q(int i, int len) const
{
Qfloat *data;
int start, j;
Expand All @@ -1341,7 +1374,7 @@ class ONE_CLASS_Q: public Kernel
for(j=start;j<len;j++)
data[j] = (Qfloat)(this->*kernel_function)(i,j);
}
return data;
return QColumn(data, i, QD[i]);
}

double *get_QD() const
Expand Down Expand Up @@ -1398,7 +1431,7 @@ class SVR_Q: public Kernel
swap(QD[i],QD[j]);
}

Qfloat *get_Q(int i, int len) const
QColumn get_Q(int i, int len) const
{
Qfloat *data;
int j, real_i = index[i];
Expand All @@ -1417,7 +1450,7 @@ class SVR_Q: public Kernel
schar si = sign[i];
for(j=0;j<len;j++)
buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
return buf;
return QColumn(buf, i, QD[i]);
}

double *get_QD() const
Expand Down

0 comments on commit 87d106b

Please sign in to comment.