Skip to content

Commit

Permalink
make identifier for predicted class labels
Browse files Browse the repository at this point in the history
  • Loading branch information
smassung committed Aug 31, 2015
1 parent 9a4c682 commit 3ca6f03
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 30 deletions.
4 changes: 2 additions & 2 deletions include/classify/confusion_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class confusion_matrix
* @param actual The actual class label
* @param times The number of times this prediction was made
*/
void add(const class_label& predicted, const class_label& actual,
void add(const predicted_label& predicted, const class_label& actual,
size_t times = 1);

/**
Expand Down Expand Up @@ -71,7 +71,7 @@ class confusion_matrix
// without causing in internal compiler error (segmentation fault) in
// GCC 4.8.2
/** typedef for predicted class assignments to counts. */
typedef std::unordered_map<std::pair<class_label, class_label>, size_t,
typedef std::unordered_map<std::pair<predicted_label, class_label>, size_t,
decltype(&confusion_matrix::string_pair_hash)>
prediction_counts;

Expand Down
8 changes: 7 additions & 1 deletion include/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "util/identifiers.h"
/**
* The ModErn Text Analysis toolkit is a suite of natural language processing,
* classification, information retreival, data mining, and other applications
* classification, information retrieval, data mining, and other applications
* of text processing.
*/
namespace meta
Expand All @@ -29,6 +29,12 @@ namespace meta
*/
MAKE_IDENTIFIER_UDL(class_label, std::string, _cl)

/*
* Represents a *predicted* class label that is used in classification or
* features selection; it may not be the true class label
*/
MAKE_IDENTIFIER_UDL(predicted_label, std::string, _pl)

/*
* Numbering system for string terms.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/classify/classifier/classifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ confusion_matrix classifier::test(const std::vector<doc_id>& docs)
{
confusion_matrix matrix;
for (auto& d_id : docs)
matrix.add(classify(d_id), idx_->label(d_id));
matrix.add(predicted_label{classify(d_id)}, idx_->label(d_id));

return matrix;
}
Expand Down
2 changes: 1 addition & 1 deletion src/classify/classifier/svm_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ confusion_matrix svm_wrapper::test(const std::vector<doc_id>& docs)
// number of testing documents
std::getline(in, str_val);
uint32_t value = std::stoul(str_val);
class_label predicted = idx_->class_label_from_id(label_id{value});
predicted_label predicted{idx_->class_label_from_id(label_id{value})};
class_label actual = idx_->label(d_id);
matrix.add(predicted, actual);
}
Expand Down
30 changes: 18 additions & 12 deletions src/classify/confusion_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@ confusion_matrix::confusion_matrix()
/* nothing */
}

void confusion_matrix::add(const class_label& predicted,
void confusion_matrix::add(const predicted_label& predicted,
const class_label& actual, size_t times)
{
std::pair<class_label, class_label> prediction{predicted, actual};
std::pair<predicted_label, class_label> prediction{predicted, actual};
predictions_[prediction] += times;
counts_[actual] += times;
classes_.insert(predicted);
classes_.insert(actual);
total_ += times;
}
Expand Down Expand Up @@ -73,9 +72,11 @@ double confusion_matrix::precision(const class_label& lbl) const
{
double denom = 0.0;
for (auto& cls : classes_)
denom += map::safe_at(predictions_, std::make_pair(lbl, cls));
denom += map::safe_at(predictions_,
std::make_pair(predicted_label{lbl}, cls));

double correct = map::safe_at(predictions_, std::make_pair(lbl, lbl));
double correct
= map::safe_at(predictions_, std::make_pair(predicted_label{lbl}, lbl));

if (denom != 0.0)
return correct / denom;
Expand All @@ -96,9 +97,11 @@ double confusion_matrix::recall(const class_label& lbl) const
{
double denom = 0.0;
for (auto& cls : classes_)
denom += map::safe_at(predictions_, std::make_pair(cls, lbl));
denom += map::safe_at(predictions_,
std::make_pair(predicted_label{cls}, lbl));

double correct = map::safe_at(predictions_, std::make_pair(lbl, lbl));
double correct
= map::safe_at(predictions_, std::make_pair(predicted_label{lbl}, lbl));

if (denom != 0.0)
return correct / denom;
Expand Down Expand Up @@ -135,8 +138,8 @@ void confusion_matrix::print(std::ostream& out) const
<< " | ";
for (auto& pred_class : classes_)
{
auto it
= predictions_.find(std::make_pair(pred_class, actual_class));
auto it = predictions_.find(
std::make_pair(predicted_label{pred_class}, actual_class));
if (it != predictions_.end())
{
double percent = static_cast<double>(it->second)
Expand Down Expand Up @@ -223,7 +226,8 @@ double confusion_matrix::accuracy() const
{
double correct = 0.0;
for (auto& cls : classes_)
correct += map::safe_at(predictions_, std::make_pair(cls, cls));
correct += map::safe_at(predictions_,
std::make_pair(predicted_label{cls}, cls));
return correct / total_;
}

Expand Down Expand Up @@ -266,8 +270,10 @@ bool confusion_matrix::mcnemar_significant(const confusion_matrix& a,

for (auto& cls : classes)
{
auto a_count = map::safe_at(a.predictions_, std::make_pair(cls, cls));
auto b_count = map::safe_at(b.predictions_, std::make_pair(cls, cls));
auto a_count = map::safe_at(a.predictions_,
std::make_pair(predicted_label{cls}, cls));
auto b_count = map::safe_at(b.predictions_,
std::make_pair(predicted_label{cls}, cls));
if (a_count > b_count)
a_adv += (a_count - b_count);
else if (b_count > a_count)
Expand Down
2 changes: 1 addition & 1 deletion src/sequence/crf/tools/crf-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ int main(int argc, char** argv)
for (const auto& obs : seq)
{
auto tag = analyzer.tag(obs.label());
matrix.add(class_label{tag}, class_label{obs.tag()});
matrix.add(predicted_label{tag}, class_label{obs.tag()});
}
}
matrix.print();
Expand Down
3 changes: 2 additions & 1 deletion src/sequence/tools/greedy_tagger_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ int main(int argc, char** argv)
tagger.tag(seq);

for (uint64_t t = 0; t < seq.size(); ++t)
matrix.add(class_label{seq[t].tag()}, class_label{correct[t]});
matrix.add(predicted_label{seq[t].tag()},
class_label{correct[t]});
}
}
matrix.print_stats();
Expand Down
22 changes: 11 additions & 11 deletions src/test/classifier_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,20 +187,20 @@ int confusion_matrix_test()
"matrix-test", [&]()
{
// We have 3 classes {A, B, C} and get the following predictions:
std::vector<std::pair<class_label, class_label>> preds;
preds.emplace_back("A"_cl, "A"_cl);
preds.emplace_back("B"_cl, "A"_cl);
preds.emplace_back("C"_cl, "A"_cl);
preds.emplace_back("B"_cl, "B"_cl);
preds.emplace_back("B"_cl, "B"_cl);
preds.emplace_back("B"_cl, "B"_cl);
preds.emplace_back("A"_cl, "C"_cl);
preds.emplace_back("A"_cl, "C"_cl);
preds.emplace_back("A"_cl, "C"_cl);
std::vector<std::pair<predicted_label, class_label>> preds;
preds.emplace_back("A"_pl, "A"_cl);
preds.emplace_back("B"_pl, "A"_cl);
preds.emplace_back("C"_pl, "A"_cl);
preds.emplace_back("B"_pl, "B"_cl);
preds.emplace_back("B"_pl, "B"_cl);
preds.emplace_back("B"_pl, "B"_cl);
preds.emplace_back("A"_pl, "C"_cl);
preds.emplace_back("A"_pl, "C"_cl);
preds.emplace_back("A"_pl, "C"_cl);

classify::confusion_matrix mtx;
for (auto& pair : preds)
mtx.add(class_label{pair.first}, class_label{pair.second});
mtx.add(pair.first, pair.second);

ASSERT_APPROX_EQUAL(mtx.accuracy(), 4.0 / 9);

Expand Down

0 comments on commit 3ca6f03

Please sign in to comment.