Skip to content

Commit

Permalink
Merge pull request #501 from PrimozGodec/word-cloud-no-zeros
Browse files Browse the repository at this point in the history
Word Cloud: Remove words with zero weights from word cloud
  • Loading branch information
ajdapretnar authored Feb 24, 2020
2 parents 9180817 + eeabcab commit ecb7162
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 31 deletions.
63 changes: 45 additions & 18 deletions orangecontrib/text/widgets/owwordcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,35 @@
N_BEST_PLOTTED = 200


class TableModel(PyTableModel):
def __init__(self, precision, **kwargs):
super().__init__(**kwargs)
self.precision = precision

def data(self, index, role=Qt.DisplayRole):
"""
Format numbers of the first column with the number of decimal
spaces defined by self.predictions which can be changed based on
weights type - row counts does not have decimal spaces
"""
row, column = self.mapToSourceRows(index.row()), index.column()
if role == Qt.DisplayRole and column == 0:
value = float(self[row][column])
return f"{value:.{self.precision}f}"
return super().data(index, role)

def set_precision(self, precision: int):
"""
Setter for precision.
Parameters
----------
precision
Number of decimal spaces to format the weights.
"""
self.precision = precision


class OWWordCloud(widget.OWWidget):
name = "Word Cloud"
priority = 510
Expand Down Expand Up @@ -175,7 +204,7 @@ def update_selection(self, words):
self.__nope = False

view = self.tableview = TableView(self)
model = self.tablemodel = PyTableModel(parent=self)
model = self.tablemodel = TableModel(2, parent=self)
proxymodel = QSortFilterProxyModel(
self,
dynamicSortFilter=True,
Expand Down Expand Up @@ -255,10 +284,20 @@ def _repopulate_wordcloud(
weights
Words' weights
"""
def is_whole(d):
"""Whether or not d is a whole number."""
return (
isinstance(d, int)
or (isinstance(d, float) and d.is_integer())
)

words, weights = words[:N_BEST_PLOTTED], weights[:N_BEST_PLOTTED]
self.shown_words, self.shown_weights = words, weights

# Repopulate table
self.tablemodel.set_precision(
0 if all(is_whole(w) for w in weights) else 2
)
self.tablemodel.wrap(list(zip(weights, words)))
self.tableview.sortByColumn(0, Qt.DescendingOrder)

Expand Down Expand Up @@ -348,25 +387,13 @@ def _bow_words(self):
This function extract words from bag of words features and assign them
the frequency which is average bow count.
"""
bow_features = self._get_bow_variables()
if not bow_features:
return {}

average_bows = {
f.name: self.corpus.get_column_view(f)[0].mean()
for f in bow_features
f.name: self.corpus.X[:, i].mean()
for i, f in enumerate(self.corpus.domain.attributes)
if f.attributes.get("bow-feature", False)
}
return average_bows

def _get_bow_variables(self):
"""
Extract bow variables from data
"""
return [
var
for var in self.corpus.domain.variables
if var.attributes.get("bow-feature", False)
]
# return only positive bow weights (those == 0 are non-existing words)
return {f: w for f, w in average_bows.items() if w > 0}

def handleNewSignals(self):
if self.topic is not None and len(self.topic):
Expand Down
35 changes: 22 additions & 13 deletions orangecontrib/text/widgets/tests/test_owworldcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,20 @@ def test_bow_features(self):
v.attributes["bow-feature"] = True

self.send_signal(self.widget.Inputs.corpus, data)
self.assertDictEqual(
self.widget.corpus_counter, {"Word1": 1, "Word2": 2, "Word3": 2})
weights = list(zip(*sorted(self.widget.corpus_counter.items())))[1]
# due to computation error in computing mean use array_almost_equal
np.testing.assert_array_almost_equal(weights, [1, 2, 2])

output = self.get_output(self.widget.Outputs.word_counts)
np.testing.assert_array_equal([2, 2, 1], output.X.flatten())
np.testing.assert_array_almost_equal([2, 2, 1], output.X.flatten())
np.testing.assert_array_equal(
["Word2", "Word3", "Word1"], output.metas.flatten())
self.assertListEqual(
[(2.0, 'Word2'), (2.0, 'Word3'), (1.0, 'Word1')],
self.widget.tablemodel[:])
["Word3", "Word2", "Word1"], output.metas.flatten())
self.assertTupleEqual(
("Word3", "Word2", "Word1"),
list(zip(*self.widget.tablemodel[:]))[1])
np.testing.assert_array_almost_equal(
[2, 2, 1],
list(zip(*self.widget.tablemodel[:]))[0])

# try with one word not bow-feature
data = self.corpus[:3]
Expand All @@ -81,15 +86,19 @@ def test_bow_features(self):
v.attributes["bow-feature"] = True

self.send_signal(self.widget.Inputs.corpus, data)
self.assertDictEqual(
self.widget.corpus_counter, {"Word1": 1, "Word2": 2})
weights = list(zip(*sorted(self.widget.corpus_counter.items())))[1]
np.testing.assert_array_almost_equal(weights, [1, 2])

output = self.get_output(self.widget.Outputs.word_counts)
np.testing.assert_array_equal([2, 1], output.X.flatten())
np.testing.assert_array_almost_equal([2, 1], output.X.flatten())
np.testing.assert_array_equal(
["Word2", "Word1"], output.metas.flatten())
self.assertListEqual(
[(2.0, 'Word2'), (1.0, 'Word1')],
self.widget.tablemodel[:])
self.assertTupleEqual(
("Word2", "Word1"),
list(zip(*self.widget.tablemodel[:]))[1])
np.testing.assert_array_almost_equal(
[2, 1],
list(zip(*self.widget.tablemodel[:]))[0])

def test_bow_info(self):
"""
Expand Down

0 comments on commit ecb7162

Please sign in to comment.