diff --git a/orangecontrib/network/network/base.py b/orangecontrib/network/network/base.py index 1459999a..b92a33cf 100644 --- a/orangecontrib/network/network/base.py +++ b/orangecontrib/network/network/base.py @@ -1,5 +1,5 @@ from functools import reduce, wraps, partial -from typing import Sequence +from typing import Sequence, Union import numpy as np import scipy.sparse as sp @@ -15,6 +15,9 @@ def __init__(self, name: str = ""): self.edges = edges.tocsr(copy=True) self.edges.sum_duplicates() + # A sequence whose elements correspond to edges in the same order as + # elements of self.edges.data + # (i.e. sorted by source (row) then destination (column) self.edge_data = edge_data self.name = name @@ -70,6 +73,8 @@ def _compute_degree(edges, node, weighted): return to - fr def subset(self, mask, node_renumeration, shape): + # TODO: This is wrong. edges is not sparse, because it can be + # (and usually is) a Table edges = self.edges.tocoo() edge_mask = np.logical_and(mask[edges.row], mask[edges.col]) row = node_renumeration[edges.row[edge_mask]] @@ -148,7 +153,7 @@ def _make_twoway_edges(self): # Save (temporary) memory and CPU time edges.data = as_strided(1, (n_edges, ), (0,)) else: - max_weight = np.max(edges.data) + max_weight = 2 * np.max(edges.data) edges.data[edges.data == 0] = max_weight + 1 twe = edges + edges.transpose() @@ -159,6 +164,7 @@ def _make_twoway_edges(self): twe.data = as_strided(max_weight, (n_edges, ), (0,)) else: twe.data[twe.data > max_weight] = 0 + # TODO: Diagonal elements have double weights... return twe def degrees(self, *, weighted=False): @@ -201,7 +207,11 @@ def wrapper(graph, *args, **kwargs): class Network: - def __init__(self, nodes: Sequence, edges: Sequence, name: str = "", + def __init__(self, nodes: Sequence, + edges: Union[Edges, + sp.csr_matrix, + Sequence[Union[Edges, sp.csr_matrix]]], + name: str = "", coordinates: np.ndarray = None): """ Attributes: diff --git a/orangecontrib/network/network/compose.py b/orangecontrib/network/network/compose.py new file mode 100644 index 00000000..69b10d23 --- /dev/null +++ b/orangecontrib/network/network/compose.py @@ -0,0 +1,143 @@ +from typing import Union, Dict, Optional + +import numpy as np +import scipy.sparse as sp + +from Orange.data import Table, StringVariable, ContinuousVariable, Domain +from orangecontrib.network import Network +from orangecontrib.network.network.base import DirectedEdges, UndirectedEdges + +MAX_LABELS = 100_000 + + +class ComposeError(Exception): + pass + + +class NonUniqueLabels(ComposeError): + pass + + +class MismatchingEdgeVariables(ComposeError): + pass + + +class UnknownNodes(ComposeError): + pass + + +def network_from_tables( + data: Table, + label_variable: StringVariable, + edges: Table, + edge_src_variable: Union[StringVariable, ContinuousVariable], + edge_dst_variable: Union[StringVariable, ContinuousVariable], + directed=False) -> Network: + + labels = data.get_column(label_variable) + label_idcs = {label: i for i, label in enumerate(labels)} + if len(label_idcs) < len(labels): + raise NonUniqueLabels() + + src_col, dst_col = _edge_columns(edges, edge_src_variable, edge_dst_variable) + if isinstance(edge_src_variable, ContinuousVariable): + row_ind = _float_to_ind(src_col, edge_src_variable.name, len(data)) + col_ind = _float_to_ind(dst_col, edge_dst_variable.name, len(data)) + else: + row_ind = _str_to_ind(src_col, label_idcs) + col_ind = _str_to_ind(dst_col, label_idcs) + + edge_data = _reduced_edge_data(edges, edge_src_variable, edge_dst_variable) + return _net_from_data_and_edges(data, edge_data, row_ind, col_ind, directed) + + +def network_from_edge_table( + edges: Table, + edge_src_variable: Union[StringVariable, ContinuousVariable], + edge_dst_variable: Union[StringVariable, ContinuousVariable], + directed=False) -> Network: + + src_col, dst_col = _edge_columns(edges, edge_src_variable, edge_dst_variable) + if isinstance(edge_src_variable, ContinuousVariable): + row_ind = _float_to_ind(src_col, edge_src_variable.name) + col_ind = _float_to_ind(dst_col, edge_dst_variable.name) + labels = [str(x) + for x in range(1, max(np.max(row_ind), np.max(col_ind)) + 2)] + else: + labels = sorted(set(src_col) | set(dst_col)) + label_idcs = {label: i for i, label in enumerate(labels)} + row_ind = _str_to_ind(src_col, label_idcs) + col_ind = _str_to_ind(dst_col, label_idcs) + + domain = Domain([], [], [StringVariable("node_label")]) + n = len(labels) + labels = Table.from_numpy( + domain, np.empty((n, 0)), np.empty((n, 0)), np.array([labels]).T) + + edge_data = _reduced_edge_data(edges, edge_src_variable, edge_dst_variable) + return _net_from_data_and_edges(labels, edge_data, row_ind, col_ind, directed) + + +def _net_from_data_and_edges(data, edge_data, row_ind, col_ind, directed=False): + assert len(row_ind) == len(col_ind) + + if edge_data is not None: + assert len(row_ind) == len(edge_data) + edge_data = _sort_edges(row_ind, col_ind, edge_data) + + ones = np.lib.stride_tricks.as_strided(np.ones(1), (len(row_ind),), (0,)) + edge_type = DirectedEdges if directed else UndirectedEdges + net_edges = edge_type( + sp.csr_array((ones, (row_ind, col_ind)), shape=(len(data), ) * 2), + edge_data) + return Network(data, net_edges) + + +def _sort_edges(row_ind, col_ind, edge_data): + ocol = np.argsort(col_ind) + dcol = np.argsort(row_ind[ocol]) + return edge_data[ocol[dcol]] # same, but faster than as edge_data[ocol][dcol] + + +def _reduced_edge_data(edges, edge_src_variable, edge_dst_variable): + domain = edges.domain + parts = [[var for var in part + if var not in (edge_src_variable, edge_dst_variable)] + for part in (domain.attributes, domain.class_vars, domain.metas)] + if not any(parts): + return None + return edges.transform(Domain(*parts)) + + +def _edge_columns(edges, edge_src_variable, edge_dst_variable): + if type(edge_src_variable) is not type(edge_dst_variable): + raise MismatchingEdgeVariables() + + return (edges.get_column(edge_src_variable), + edges.get_column(edge_dst_variable)) + + +def _str_to_ind(col: np.ndarray, label_idcs: Dict[str, int]) -> np.ndarray: + ind = np.fromiter((label_idcs.get(x, -1) for x in col), + count=len(col), dtype=int) + if np.min(ind) == -1: + raise UnknownNodes("Unknown labels: " + + ", ".join(sorted(set(col) - set(label_idcs)))) + return ind + + +def _float_to_ind(col: np.ndarray, + var_name: str, + nlabels: Optional[int] = None) -> np.ndarray: + mi, ma = np.min(col), np.max(col) + if mi < 0: + raise UnknownNodes("negative vertex indices") + elif mi == 0: + raise UnknownNodes("vertex indices must be 1-based") + elif ma > (nlabels or MAX_LABELS): + raise UnknownNodes("some indices are too large") + elif np.isnan(mi) or np.isnan(ma): + raise UnknownNodes(f"{var_name} has missing values") + elif not np.all(np.modf(col)[0] == 0): + raise UnknownNodes("some indices are non-integer") + return col.astype(int) - 1 diff --git a/orangecontrib/network/network/readwrite.py b/orangecontrib/network/network/readwrite.py index 1f13bbad..41debb1f 100644 --- a/orangecontrib/network/network/readwrite.py +++ b/orangecontrib/network/network/readwrite.py @@ -51,17 +51,29 @@ def read_vertices(lines): def read_edges(id_idx, lines, nvertices): - lines = [(id_idx[v1], id_idx[v2], abs(float(value))) - for v1, v2, value, *_ in (line.split()[:3] + [1] - for line in lines)] - v1s, v2s, values = zip(*lines) - values = np.array(values) - if values.size and np.all(values == values[0]): - values = np.lib.stride_tricks.as_strided( - values[0], (len(values), ), (0, )) + def fake_data(x, n): + values = np.lib.stride_tricks.as_strided(x, (n, ), (0,)) values.flags.writeable = False - return sp.coo_matrix((values, (np.array(v1s), np.array(v2s))), - shape=(nvertices, nvertices)) + return values + + + lines = [(id_idx[v1], id_idx[v2], value) + for v1, v2, value, *_ in ( + (line.split(maxsplit=2) + [None])[:3] for line in lines)] + v1s, v2s, values = zip(*lines) + try: + values = np.array(values, dtype=float) + if np.all(np.isnan(values)): + values = fake_data(np.array(1.), len(values)) + elif values.size and np.all(values == values[0]): + values = fake_data(values[0], len(values)) + edge_data = None + except ValueError: + edge_data = np.array(values) + values = fake_data(np.array(1.), len(v1s)) + return (sp.coo_matrix((values, (np.array(v1s), np.array(v2s))), + shape=(nvertices, nvertices)), + edge_data) def read_edges_list(id_idx, lines, nvertices): @@ -121,7 +133,7 @@ def check_has_vertices(): check_has_vertices() edges.append( EdgeType[part_type=="*arcs"]( - read_edges(id_idx, line_part, len(labels)), + *read_edges(id_idx, line_part, len(labels)), name=part_args.strip() or part_type[1:])) elif part_type in ("*edgeslist", "*arcslist"): check_has_vertices() diff --git a/orangecontrib/network/tests/__init__.py b/orangecontrib/network/network/tests/__init__.py similarity index 100% rename from orangecontrib/network/tests/__init__.py rename to orangecontrib/network/network/tests/__init__.py diff --git a/orangecontrib/network/tests/test-arcslist.net b/orangecontrib/network/network/tests/test-arcslist.net similarity index 100% rename from orangecontrib/network/tests/test-arcslist.net rename to orangecontrib/network/network/tests/test-arcslist.net diff --git a/orangecontrib/network/network/tests/test_compose.py b/orangecontrib/network/network/tests/test_compose.py new file mode 100644 index 00000000..2f391f87 --- /dev/null +++ b/orangecontrib/network/network/tests/test_compose.py @@ -0,0 +1,273 @@ +import unittest + +import numpy as np + +from Orange.data import Domain, Table, ContinuousVariable, StringVariable +from orangecontrib.network.network.base import DirectedEdges, UndirectedEdges +from orangecontrib.network.network.compose import ( + UnknownNodes, MismatchingEdgeVariables, NonUniqueLabels, + network_from_tables, network_from_edge_table, + _net_from_data_and_edges, _sort_edges, _reduced_edge_data, _edge_columns, + _float_to_ind, _str_to_ind +) + + +class TestComposeBase(unittest.TestCase): + def setUp(self) -> None: + self.lab, self.labx = StringVariable("lab"), ContinuousVariable("x") + self.data = Table.from_list(Domain([self.labx], None, [self.lab]), + [[1.3, "foo"], + [1.8, "bar"], + [2.7, "qux"], + [1.1, "baz"], + [np.nan, "bax"], + ]) + + self.srcs, self.dsts = metas = \ + StringVariable("srcs"), StringVariable("dsts") + self.w, self.src1, self.dst1 = attrs = [ + ContinuousVariable(x) for x in ("w src1 dst1").split()] + self.edges = Table.from_list(Domain(attrs, None, metas), + [[1, 1, 4, "foo", "baz"], + [8, 4, 4, "baz", "baz"], + [3, 5, 2, "bax", "bar"], + [2, 1, 2, "foo", "bar"] + ]) + + +class TestUtils(TestComposeBase): + def test_net_from_data_and_edges(self): + row_ind = self.edges.get_column(1).astype(int) - 1 + col_ind = self.edges.get_column(2).astype(int) - 1 + exp = np.zeros((5, 5), dtype=int) + exp[row_ind, col_ind] = 1 + + edge_data = self.edges.transform(Domain([self.edges.domain[0]], None)) + net = _net_from_data_and_edges(self.data, edge_data, row_ind, col_ind) + self.assertIs(net.nodes, self.data) + self.assertIsInstance(net.edges[0], UndirectedEdges) + np.testing.assert_equal(net.edges[0].edges.todense(), exp) + np.testing.assert_equal(net.edges[0].edge_data, np.array([[2, 1, 8, 3]]).T) + + net = _net_from_data_and_edges(self.data, edge_data, row_ind, col_ind, + directed=True) + self.assertIsInstance(net.edges[0], DirectedEdges) + + net = _net_from_data_and_edges(self.data, None, row_ind, col_ind) + self.assertIs(net.nodes, self.data) + self.assertIsInstance(net.edges[0], UndirectedEdges) + np.testing.assert_equal(net.edges[0].edges.todense(), exp) + + net = _net_from_data_and_edges(self.data, None, row_ind, col_ind, + directed=True) + self.assertIs(net.nodes, self.data) + self.assertIsInstance(net.edges[0], DirectedEdges) + np.testing.assert_equal(net.edges[0].edges.todense(), exp) + + def test_sort_edges(self): + edge_data = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8]) + row_indss = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + col_indss = np.array([2, 2, 2, 1, 1, 1, 3, 3, 3]) + np.testing.assert_equal(_sort_edges(row_indss, col_indss, edge_data), + [7, 1, 4, 9, 3, 6, 2, 5, 8]) + + def test_reduced_edge_data(self): + attrs = tuple(ContinuousVariable(x) for x in "abcdefghi") + data = Table.from_list(Domain(attrs[:3], attrs[3], attrs[4:]), + [[0] * 9]) + + domain = _reduced_edge_data(data, attrs[0], attrs[5]).domain + self.assertEqual(domain.attributes, attrs[1:3]) + self.assertEqual(domain.class_var, attrs[3]) + self.assertEqual(domain.metas, attrs[4:5] + attrs[6:]) + + domain = _reduced_edge_data(data, attrs[2], attrs[3]).domain + self.assertEqual(domain.attributes, attrs[:2]) + self.assertIsNone(domain.class_var) + self.assertEqual(domain.metas, attrs[4:]) + + data = Table.from_list(Domain([attrs[0]], None, [attrs[1]]), [[0, 0]]) + self.assertIsNone(_reduced_edge_data(data, attrs[0], attrs[1])) + + def test_edge_columns(self): + c, d = ContinuousVariable("c"), ContinuousVariable("d") + s, t = StringVariable("s"), StringVariable("t") + domain = Domain([c], None, [d, s, t]) + edges = Table.from_list(domain, [[3, 1, "foo", "bar"], + [0, 2, "bar", "baz"]]) + + col1, col2 = _edge_columns(edges, c, d) + np.testing.assert_equal(col1, [3, 0]) + np.testing.assert_equal(col2, [1, 2]) + + col1, col2 = _edge_columns(edges, s, t) + np.testing.assert_equal(col1, ["foo", "bar"]) + np.testing.assert_equal(col2, ["bar", "baz"]) + + self.assertRaises(MismatchingEdgeVariables, + _edge_columns, edges, c, s) + + def test_str_to_ind(self): + np.testing.assert_equal( + _str_to_ind(np.array("foo foo baz bar".split()), + {"foo": 0, "bar": 1, "baz": 2}), + [0, 0, 2, 1]) + + def test_str_to_ind_errors(self): + self.assertRaisesRegex(UnknownNodes, ".*known.*baz.*", + _str_to_ind, + np.array("foo foo baz bar".split()), + {"foo": 0, "bar": 1}) + + def test_float_to_ind(self): + col = np.array([1, 7, 3, 2, 4], dtype=float) + a = _float_to_ind(col, "x") + np.testing.assert_equal(a, col - 1) + self.assertEqual(a.dtype, int) + + a = _float_to_ind(col, "x", 7) + np.testing.assert_equal(a, col - 1) + self.assertEqual(a.dtype, int) + + def test_float_to_ind_errors(self): + self.assertRaisesRegex( + UnknownNodes, ".*missing values.*", + _float_to_ind, np.array([1, 7, 3, np.nan, 4]), "x" + ) + self.assertRaisesRegex( + UnknownNodes, ".*non-integer.*", + _float_to_ind, np.array([1, 7, 3, 1.5, 4]), "x" + ) + self.assertRaisesRegex( + UnknownNodes, ".*negative.*", + _float_to_ind, np.array([1, 7, 3, -1, 4]), "x" + ) + self.assertRaisesRegex( + UnknownNodes, ".*1-based.*", + _float_to_ind, np.array([1, 7, 3, 0, 4]), "x" + ) + self.assertRaisesRegex( + UnknownNodes, ".*large.*", + _float_to_ind, np.array([1, 7, 3, 2, 4]), "x", 5 + ) + self.assertRaisesRegex( + UnknownNodes, ".*large.*", + _float_to_ind, np.array([1, 7, 3e10, 2, 4]), "x", 5 + ) + + +class TestFunctions(TestComposeBase): + def test_network_from_tables(self): + exp_edges = np.zeros((5, 5), dtype=int) + exp_edges[0, 1] = exp_edges[0, 3] = exp_edges[3, 3] = exp_edges[4, 1] = 1 + odom = self.edges.domain + + for src, dst, tst in ((self.srcs, self.dsts, "edges as strings"), + (self.src1, self.dst1, "edges as indices")): + with self.subTest(tst): + network = network_from_tables(self.data, self.lab, + self.edges, src, dst) + self.assertIsInstance(network.edges[0], UndirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + self.assertIs(network.nodes, self.data) + + network = network_from_tables(self.data, self.lab, + self.edges, src, dst, + directed=True) + self.assertIsInstance(network.edges[0], DirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + # node data is assigned as is + self.assertIs(network.nodes, self.data) + # edge data must be sorted by rows, then columns + np.testing.assert_equal(network.edges[0].edge_data[:, 0], + np.array([[2, 1, 8, 3]]).T) + # edge data must not contain the used columns + edom = network.edges[0].edge_data.domain + self.assertEqual(len(edom.attributes + edom.metas), + len(odom.attributes + odom.metas) - 2) + + # just two attributes in edges -> no edge_data afterwards + red_edges = self.edges.transform(Domain([], None, odom.metas)) + network = network_from_tables(self.data, self.lab, + red_edges, self.srcs, self.dsts) + self.assertIsInstance(network.edges[0], UndirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + self.assertIs(network.nodes, self.data) + self.assertIsNone(network.edges[0].edge_data) + + def test_network_from_tables_errors(self): + self.assertRaises( + MismatchingEdgeVariables, + network_from_tables, + self.data, self.lab, + self.edges, self.srcs, self.dst1 + ) + + with self.data.unlocked(self.data.metas): + self.data.metas[2, 0] = self.data.metas[0, 0] + + self.assertRaises( + NonUniqueLabels, + network_from_tables, + self.data, self.lab, + self.edges, self.srcs, self.dst1 + ) + + def test_network_from_edge_table_by_labels(self): + network = network_from_edge_table(self.edges, self.srcs, self.dsts) + odom = self.edges.domain + + # labels are assumed to be sorted alphabetically + labels = {lab: i for i, lab in enumerate("bar bax baz foo".split())} + exp_edges = np.zeros((4, 4), dtype=int) + for src, dst in [("foo", "baz"), ("baz", "baz"), + ("bax", "bar"), ("foo", "bar")]: + exp_edges[labels[src], labels[dst]] = 1 + + self.assertIsInstance(network.edges[0], UndirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + + network = network_from_edge_table(self.edges, self.srcs, self.dsts, + directed=True) + self.assertIsInstance(network.edges[0], DirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + + # edge data contains only labels + np.testing.assert_equal(network.nodes.metas.T, [list(labels)]) + + # edge data must be sorted labels + np.testing.assert_equal(network.edges[0].edge_data[:, 0], + np.array([[3, 8, 2, 1]]).T) + # edge data must not contain the used columns + edom = network.edges[0].edge_data.domain + self.assertEqual(len(edom.attributes + edom.metas), + len(odom.attributes + odom.metas) - 2) + + def test_network_from_edge_table_by_indices(self): + exp_edges = np.zeros((5, 5), dtype=int) + exp_edges[0, 1] = exp_edges[0, 3] = exp_edges[3, 3] = exp_edges[4, 1] = 1 + odom = self.edges.domain + + network = network_from_edge_table(self.edges, self.src1, self.dst1) + self.assertIsInstance(network.edges[0], UndirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + + network = network_from_edge_table(self.edges, self.src1, self.dst1, + directed=True) + self.assertIsInstance(network.edges[0], DirectedEdges) + np.testing.assert_equal(network.edges[0].edges.todense(), exp_edges) + + # edge data contains only numbers + np.testing.assert_equal(network.nodes.metas.T, [list("12345")]) + + # edge data must be sorted by rows, then columns + np.testing.assert_equal(network.edges[0].edge_data[:, 0], + np.array([[2, 1, 8, 3]]).T) + # edge data must not contain the used columns + edom = network.edges[0].edge_data.domain + self.assertEqual(len(edom.attributes + edom.metas), + len(odom.attributes + odom.metas) - 2) + + +if __name__ == '__main__': + unittest.main() diff --git a/orangecontrib/network/tests/test_embeddings.py b/orangecontrib/network/network/tests/test_embeddings.py similarity index 100% rename from orangecontrib/network/tests/test_embeddings.py rename to orangecontrib/network/network/tests/test_embeddings.py diff --git a/orangecontrib/network/tests/test_readwrite.py b/orangecontrib/network/network/tests/test_readwrite.py similarity index 79% rename from orangecontrib/network/tests/test_readwrite.py rename to orangecontrib/network/network/tests/test_readwrite.py index 4cf7d9a1..ec00febc 100644 --- a/orangecontrib/network/tests/test_readwrite.py +++ b/orangecontrib/network/network/tests/test_readwrite.py @@ -1,5 +1,6 @@ import os import unittest +from pkg_resources import resource_filename from tempfile import NamedTemporaryFile import numpy as np @@ -8,12 +9,15 @@ def _fullpath(name): - return os.path.join(os.path.split(__file__)[0], name) + return os.path.join(resource_filename("orangecontrib.network", "networks"), name) + +def _fullpathtest(name): + return os.path.join(resource_filename(__name__, ""), name) class TestReadPajek(unittest.TestCase): def test_two_mode(self): - davis = readwrite.read_pajek(_fullpath("../networks/davis.net")) + davis = readwrite.read_pajek(_fullpath("davis.net")) self.assertEqual(davis.number_of_nodes(), 32) self.assertEqual( list(davis.nodes), @@ -25,6 +29,17 @@ def test_two_mode(self): ) self.assertEqual(davis.in_first_mode, 18) + def test_edge_labels(self): + net = readwrite.read_pajek(_fullpathtest("towns.net")) + self.assertEqual(net.number_of_nodes(), 4) + self.assertEqual( + list(net.nodes), + ["Ljubljana", "Kranj", "Maribor", "Novo mesto"] + ) + self.assertEqual( + list(net.edges[0].edge_data), + ['near', 'far', 'not near, not far', 'huh?']) + def test_write_pajek(self): net = readwrite.read_pajek(_fullpath("../networks/leu_by_genesets.net")) with NamedTemporaryFile("wt", suffix=".net", delete=False) as f: @@ -62,7 +77,7 @@ def test_write_pajek_multiple_edge_types(self): self.assertRaises(TypeError, readwrite.write_pajek, f, net) def test_edge_list(self): - net = readwrite.read_pajek(_fullpath("test-arcslist.net")) + net = readwrite.read_pajek(_fullpathtest("test-arcslist.net")) neighs = [(1, (2, 3, 6)), (2, (1, 4, 5, 6)), (5, (1, 2)), diff --git a/orangecontrib/network/tests/test_twomode.py b/orangecontrib/network/network/tests/test_twomode.py similarity index 100% rename from orangecontrib/network/tests/test_twomode.py rename to orangecontrib/network/network/tests/test_twomode.py diff --git a/orangecontrib/network/network/tests/towns.net b/orangecontrib/network/network/tests/towns.net new file mode 100644 index 00000000..0c65e456 --- /dev/null +++ b/orangecontrib/network/network/tests/towns.net @@ -0,0 +1,12 @@ +*Network "Test" +*Description "Slovenian towns" +*Vertices +1 "Ljubljana" +2 "Kranj" +3 "Maribor" +4 "Novo mesto" +*Edges +1 2 near +1 3 far +1 4 not near, not far +3 4 huh? diff --git a/orangecontrib/network/widgets/OWNxExplorer.py b/orangecontrib/network/widgets/OWNxExplorer.py index a6d5d8fd..c3048674 100644 --- a/orangecontrib/network/widgets/OWNxExplorer.py +++ b/orangecontrib/network/widgets/OWNxExplorer.py @@ -1,21 +1,27 @@ +from typing import Optional, Union + import numpy as np import scipy.sparse as sp from AnyQt.QtCore import QTimer, QSize, Qt, Signal, QObject, QThread import Orange -from Orange.data import Table, Domain, StringVariable +from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \ + Variable from Orange.widgets import gui, widget from Orange.widgets.settings import Setting, SettingProvider +from Orange.widgets.utils.itemmodels import DomainModel from Orange.widgets.utils.plot import OWPlotGUI from Orange.widgets.visualize.utils.widget import OWDataProjectionWidget from Orange.widgets.widget import Input, Output +from orangecontrib.network.network import compose from orangecontrib.network.network.base import Network from orangecontrib.network.network.layout import fruchterman_reingold from orangecontrib.network.widgets.graphview import GraphView FR_ALLOWED_TIME = 30 +WEIGHTS_COMBO_ITEM = "Weights" class OWNxExplorer(OWDataProjectionWidget): @@ -50,6 +56,12 @@ class Outputs(OWDataProjectionWidget.Outputs): mark_min_conn = Setting(5) mark_max_conn = Setting(5) mark_most_conn = Setting(1) + # These can't be context settings. Contexts are inherited from parent class + # and use variables describing projected points (= graph nodes). Edges would + # need a separate context, so let us use hints instead. + edge_width_variable_hint: Optional[str] = Setting(None, schema_only=True) + edge_label_variable_hint: Optional[str] = Setting(None, schema_only=True) + edge_color_variable_hint: Optional[str] = Setting(None, schema_only=True) alpha_value = 255 # Override the setting from parent @@ -77,12 +89,17 @@ def __init__(self): self.mark_mode = 0 self.mark_text = "" + self.edge_width_variable = None + self.edge_label_variable = None + self.edge_color_variable = None + super().__init__() self.network = None self.node_data = None self.distance_matrix = None self.edges = None + self.edge_data = None self.positions = None self._optimizer = None @@ -101,12 +118,24 @@ def sizeHint(self): def _add_controls(self): self.gui = OWPlotGUI(self) self._add_info_box() - self.gui.point_properties_box(self.controlArea) - self._add_effects_box() - self.gui.plot_properties_box(self.controlArea) + self._add_node_box() + self._add_edge_box() + self._add_properties_box() self._add_mark_box() self.controls.attr_label.activated.connect(self.on_change_label_attr) + def _add_node_box(self): + sgui = self.gui + box = sgui.create_gridbox(self.controlArea, "Nodes") + sgui.add_widgets([ + sgui.Color, + sgui.Shape, + sgui.Label, + sgui.Size, + sgui.PointSize, + ], box) + box.layout().itemAtPosition(5, 0).widget().setText("") + def _add_info_box(self): info = gui.vBox(self.controlArea, box="Layout") gui.label( @@ -136,36 +165,49 @@ def _add_info_box(self): label="Make edges with large weights shorter", callback=self.improve) - def _add_effects_box(self): - gbox = self.gui.create_gridbox(self.controlArea, box="Widths and Sizes") - self.gui.add_widget(self.gui.PointSize, gbox) - gbox.layout().itemAtPosition(1, 0).widget().setText("Node Size:") + def _add_edge_box(self): + gbox = self.gui.create_gridbox(self.controlArea, box="Edges") + + order = (None, WEIGHTS_COMBO_ITEM, DomainModel.Separator) + DomainModel.SEPARATED + self.edge_label_model = DomainModel( + placeholder="(None)", order=order, separators=True) + self.gui._combo( + gbox, "edge_label_variable", "Label", self.edge_label_var_changed, + model=self.edge_label_model) + self.edge_color_model = DomainModel( + placeholder="(Same color)", + valid_types=DomainModel.PRIMITIVE) + self.gui._combo( + gbox, "edge_color_variable", "Color", self.edge_color_var_changed, + model=self.edge_color_model) + self.edge_width_model = DomainModel( + valid_types=ContinuousVariable, + placeholder="(Same width)", order=order, separators=False) self.gui.add_control( - gbox, gui.hSlider, "Edge width:", + gbox, + gui.comboBox, "Width:", + master=self, value="edge_width_variable", + model=self.edge_width_model, + callback=self.edge_width_var_changed, + ) + self.gui.add_control( + gbox, gui.hSlider, "", master=self, value='graph.edge_width', minValue=1, maxValue=10, step=1, - callback=self.graph.update_edges) - box = gui.vBox(None) - gbox.layout().addWidget(box, 3, 0, 1, 2) - gui.separator(box) - self.checkbox_relative_edges = gui.checkBox( - box, self, 'graph.relative_edge_widths', - 'Scale edge widths to weights', - callback=self.graph.update_edges) - self.checkbox_show_weights = gui.checkBox( - box, self, 'graph.show_edge_weights', - 'Show edge weights', - callback=self.graph.update_edge_labels) - self.checkbox_show_weights = gui.checkBox( - box, self, 'graph.label_selected_edges', - 'Label only edges of selected nodes', - callback=self.graph.update_edge_labels) + callback=self.graph.update_edge_widths) # This is ugly: create a slider that controls alpha_value so that # parent can enable and disable it - although it's never added to any # layout and visible to the user gui.hSlider(None, self, "graph.alpha_value") + def _add_properties_box(self): + sgui = self.gui + return sgui.create_box([ + sgui.LabelOnlySelected, + sgui.ClassDensity, + sgui.ShowLegend], self.controlArea, None, False) + def _add_mark_box(self): hbox = gui.hBox(None, box=True) self.mainArea.layout().addWidget(hbox) @@ -376,6 +418,22 @@ def on_change_label_attr(self): if self.mark_mode in (1, 2): self.update_marks() + @staticmethod + def _hint(var: Union[Variable, str, None]) -> Union[str, None]: + return var.name if isinstance(var, Variable) else var + + def edge_color_var_changed(self): + self.edge_color_variable_hint = self._hint(self.edge_color_variable) + self.graph.update_edge_colors() + + def edge_label_var_changed(self): + self.edge_label_variable_hint = self._hint(self.edge_label_variable) + self.graph.update_edge_labels() + + def edge_width_var_changed(self): + self.edge_width_variable_hint = self._hint(self.edge_width_variable) + self.graph.update_edge_widths() + @Inputs.node_data def set_node_data(self, data): self.node_data = data @@ -469,22 +527,32 @@ def set_actual_data(): self.cb_class_density.setEnabled(self.can_draw_density()) def set_actual_edges(): - def set_checkboxes(value): - self.checkbox_show_weights.setEnabled(value) - self.checkbox_relative_edges.setEnabled(value) - self.Warning.distance_matrix_mismatch.clear() if self.network is None: self.edges = None - set_checkboxes(False) + self.edge_data = None return - set_checkboxes(True) if network.number_of_edges(0): - self.edges = network.edges[0].edges.tocoo() + edges = network.edges[0] + self.edges = edges.edges.tocoo() + self.edge_data = edge_data = edges.edge_data + if isinstance(edge_data, np.ndarray): + if edge_data.dtype == float: + self.edge_data = Table.from_numpy( + Domain([ContinuousVariable("label")]), + np.atleast_2d(edge_data)) + else: + self.edge_data = Table.from_numpy( + Domain([], metas=[StringVariable("label")]), + np.empty((len(edge_data), 0)), + metas=edge_data.reshape(len(edge_data), 1)) + elif edge_data is not None: + assert isinstance(edges.edge_data, Table) else: self.edges = sp.coo_matrix((0, 3)) + self.edge_data = None if self.distance_matrix is not None: if len(self.distance_matrix) != self.number_of_nodes: self.Warning.distance_matrix_mismatch() @@ -496,9 +564,22 @@ def set_checkboxes(value): ) if np.allclose(self.edges.data, 0): self.edges.data[:] = 1 - set_checkboxes(False) - elif len(set(self.edges.data)) == 1: - set_checkboxes(False) + + def _retrieve(model, hint): + model.set_domain(domain) + for var in model: + if (isinstance(var, Variable) and var.name == hint + or isinstance(var, str) and var == hint): + return var + return None + + domain = None if self.edge_data is None else self.edge_data.domain + self.edge_label_variable = \ + _retrieve(self.edge_label_model, self.edge_label_variable_hint) + self.edge_color_variable = \ + _retrieve(self.edge_color_model, self.edge_color_variable_hint) + self.edge_width_variable = \ + _retrieve(self.edge_width_model, self.edge_width_variable_hint) self.stop_optimization_and_wait() set_actual_data() @@ -581,6 +662,34 @@ def get_subset_mask(self): def get_edges(self): return self.edges + def get_edge_labels(self): + if self.edge_label_variable is None: + return None + if self.edge_label_variable == WEIGHTS_COMBO_ITEM: + weights = self.edges.data + if np.allclose(np.modf(weights)[0], 0): + return np.array([str(x) for x in weights.astype(int)]) + else: + return np.array(["{:.02}".format(x) for x in weights]) + elw = self.edge_label_variable + tostr = elw.repr_val + return np.array([tostr(x) for x in self.edge_data.get_column(elw)]) + + def get_edge_widths(self): + if self.edge_width_variable is None: + return None + if self.edge_width_variable == WEIGHTS_COMBO_ITEM: + widths = self.edges.data + return widths if len(set(widths)) > 1 else None + else: + return self.edge_data.get_column(self.edge_width_variable) + + def get_edge_colors(self): + var = self.edge_color_variable + if var is None: + return None + return var.palette.values_to_qcolors(self.edge_data.get_column(var)) + def is_directed(self): return self.network is not None and self.network.edges[0].directed @@ -719,7 +828,10 @@ def main(): network = read_pajek(join(dirname(dirname(__file__)), 'networks', 'leu_by_genesets.net')) #network = read_pajek(join(dirname(dirname(__file__)), 'networks', 'davis.net')) #transform_data_to_orange_table(network) + data = Table("/Users/janez/Downloads/relations.tab") + network = compose.network_from_edge_table(data, *data.domain.metas[:2]) WidgetPreview(OWNxExplorer).run(set_graph=network) + if __name__ == "__main__": main() diff --git a/orangecontrib/network/widgets/OWNxFile.py b/orangecontrib/network/widgets/OWNxFile.py index 5d85ebed..75013a65 100755 --- a/orangecontrib/network/widgets/OWNxFile.py +++ b/orangecontrib/network/widgets/OWNxFile.py @@ -1,5 +1,7 @@ from os import path -from itertools import product +from typing import Optional + +from itertools import product, chain, count from traceback import format_exception_only import numpy as np @@ -8,57 +10,26 @@ from AnyQt.QtWidgets import QStyle, QSizePolicy, QFileDialog from Orange.util import get_entry_point -from Orange.data import Table, Domain, StringVariable +from Orange.data import Table, Domain, StringVariable, ContinuousVariable from Orange.data.util import get_unique_names -from Orange.widgets import gui, settings -from Orange.widgets.settings import ContextHandler +from Orange.widgets import gui +from Orange.widgets.report import bool_str +from orangewidget.settings import Setting from Orange.widgets.utils.itemmodels import VariableListModel from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import OWWidget, Msg, Input, Output -from orangecontrib.network.network import Network +from orangecontrib.network.network import Network, compose from orangecontrib.network.network.readwrite import read_pajek -class NxFileContextHandler(ContextHandler): - def new_context(self, useful_vars): - context = super().new_context() - context.useful_vars = {var.name for var in useful_vars} - context.label_variable = None - return context - - # noinspection PyMethodOverriding - def match(self, context, useful_vars): - useful_vars = {var.name for var in useful_vars} - if context.useful_vars == useful_vars: - return self.PERFECT_MATCH - # context.label_variable can also be None; this would always match, - # so ignore it - elif context.label_variable in useful_vars: - return self.MATCH - else: - return self.NO_MATCH - - def settings_from_widget(self, widget, *_): - context = widget.current_context - if context is not None: - context.label_variable = \ - widget.label_variable and widget.label_variable.name - - def settings_to_widget(self, widget, useful_vars): - context = widget.current_context - widget.label_variable = None - if context.label_variable is not None: - for var in useful_vars: - if var.name == context.label_variable: - widget.label_variable = var - break - - demos_path = next( get_entry_point("Orange3-Network", "orange.data.io.search_paths", "network") ())[1] +# TODO: Check box whether the network constructed from edges input is directed + + class OWNxFile(OWWidget): name = "Network File" description = "Read network graph file" @@ -67,14 +38,19 @@ class OWNxFile(OWWidget): class Inputs: items = Input("Items", Table) + edges = Input("Edges", Table) class Outputs: network = Output("Network", Network) items = Output("Items", Table) - settingsHandler = NxFileContextHandler() - label_variable: StringVariable = settings.ContextSetting(None) - recentFiles = settings.Setting([]) + LoadFromFile, ConstructFromInputs = 0, 1 + + label_variable_hint: Optional[str] = Setting(None, schema_only=True) + edge_src_variable_hint: Optional[str] = Setting(None, schema_only=True) + edge_dst_variable_hint: Optional[str] = Setting(None, schema_only=True) + original_net_source = Setting(LoadFromFile, schema_only=True) + recentFiles = Setting([]) class Information(OWWidget.Information): auto_annotation = Msg( @@ -82,6 +58,16 @@ class Information(OWWidget.Information): suggest_annotation = Msg( 'Add optional data input to annotate nodes') + class Warning(OWWidget.Warning): + missing_edges = Msg('There is no data for some edges') + extra_edges = Msg( + 'Edge data contains data for some edges that do not exist') + mismatched_lengths = Msg( + "Data for nodes is ignored because its size does not match " + "the number of nodes:\n" + "select a data column whose values can be matched with network " + "labels") + class Error(OWWidget.Error): io_error = Msg('Error reading file "{}"\n{}') error_parsing_file = Msg('Error reading file "{}"') @@ -89,10 +75,19 @@ class Error(OWWidget.Error): "Attempt to read {} failed\n" "The widget tried to annotated nodes with data from\n" "a file with the same name.") - mismatched_lengths = Msg( - "Data size does not match the number of nodes.\n" - "Select a data column whose values can be matched with network " - "labels") + no_label_variable = Msg( + "Choose a label column to construct a network from tables") + mismatched_edge_variables = Msg( + "Source and destination columns must be of the same type\n" + "(numerical or text)" + ) + unidentified_nodes = Msg( + "Edge data refers to nodes that do not exist in the node data:\n{}" + ) + missing_label_values = Msg( + "Constructing a network from tables requires a label column " + "without missing values" + ) want_main_area = False mainArea_width_height_ratio = None @@ -102,11 +97,22 @@ def __init__(self): self.network = None self.auto_data = None - self.original_nodes = None + self.original_network = None self.data = None + self.label_variable = None + self.edges = None + self.edge_src_variable = self.edge_dst_variable = None self.net_index = 0 - hb = gui.widgetBox(self.controlArea, orientation=Qt.Horizontal) + vb = gui.radioButtons( + self.controlArea, self, "original_net_source", + box="Network from file", + callback=self.on_source_changed) + hb = gui.hBox(vb) + gui.appendRadioButton( + vb, "Load network from file: ", insertInto=hb, id=self.LoadFromFile, + tooltip="Load from file and " + "use inputs (if any) to annotate vertices and edges") self.filecombo = gui.comboBox( hb, self, "net_index", callback=self.select_net_file, minimumWidth=250) @@ -118,25 +124,37 @@ def __init__(self): hb, self, 'Reload', callback=self.reload, icon=self.style().standardIcon(QStyle.SP_BrowserReload), sizePolicy=(QSizePolicy.Maximum, QSizePolicy.Fixed)) + hb = gui.hBox(vb) + gui.appendRadioButton( + vb, "Construct network from input tables", insertInto=hb, + id=self.ConstructFromInputs + ) self.label_model = VariableListModel(placeholder="(Match by rows)") self.label_model[:] = [None] gui.comboBox( - self.controlArea, self, "label_variable", box=True, - label="Match node labels to data column: ", orientation=Qt.Horizontal, + self.controlArea, self, "label_variable", + box="Node description (from input signal)", + label="Match node labels with values of ", orientation=Qt.Horizontal, model=self.label_model, callback=self.label_changed) - self.populate_comboboxes() + self.edge_model = VariableListModel(placeholder="(None)") + box = gui.vBox(self.controlArea, + box="Edge description (from input signal)") + ebox = gui.hBox(box) + gui.comboBox( + ebox, self, "edge_src_variable", label="Source node label:", + model=self.edge_model, callback=self.edge_changed) + gui.separator(ebox, 16) + gui.comboBox( + ebox, self, "edge_dst_variable", label="Destination node label:", + model=self.edge_model, callback=self.edge_changed) + + self.update_file_combo() self.setFixedHeight(self.sizeHint().height()) self.reload() - @Inputs.items - def set_data(self, data): - self.data = data - self.update_label_combo() - self.send_output() - - def populate_comboboxes(self): + def update_file_combo(self): self.filecombo.clear() for file in self.recentFiles or ("(None)",): self.filecombo.addItem(path.basename(file)) @@ -160,7 +178,7 @@ def browse_net_file(self, browse_demos=False): self.recentFiles.remove(filename) self.recentFiles.insert(0, filename) - self.populate_comboboxes() + self.update_file_combo() self.net_index = 0 self.select_net_file() return True @@ -169,15 +187,27 @@ def reload(self): if self.recentFiles: self.select_net_file() + def on_source_changed(self): + if self.original_net_source == self.LoadFromFile: + if self.recentFiles and self.net_index < len(self.recentFiles): + self.open_net_file(self.recentFiles[0]) + else: + self.select_net_file() + else: + self.open_net_file(None) + def select_net_file(self): """user selected a graph file from the combo box""" + self.original_net_source = self.LoadFromFile if self.net_index > len(self.recentFiles) - 1: if not self.browse_net_file(True): + if self.net_index >= len(self.recentFiles): + self.original_net_source = self.ConstructFromInputs return # Cancelled elif self.net_index: self.recentFiles.insert(0, self.recentFiles.pop(self.net_index)) self.net_index = 0 - self.populate_comboboxes() + self.update_file_combo() if self.recentFiles: self.open_net_file(self.recentFiles[0]) @@ -186,21 +216,22 @@ def open_net_file(self, filename): self.Error.clear() self.Warning.clear() self.Information.clear() - self.network = None - self.original_nodes = None - try: - self.network = read_pajek(filename) - except OSError as err: - self.Error.io_error( - filename, - "".join(format_exception_only(type(err), err)).rstrip()) - except Exception: # pylint: disable=broad-except - self.Error.error_parsing_file(filename) + self.original_network = None + self.auto_data = None + if filename is not None: + try: + self.original_network = read_pajek(filename) + except OSError as err: + self.Error.io_error( + filename, + "".join(format_exception_only(type(err), err)).rstrip()) + except Exception: # pylint: disable=broad-except + self.Error.error_parsing_file(filename) + else: + self.read_auto_data(filename) else: - self.original_nodes = self.network.nodes - self.read_auto_data(filename) - self.update_label_combo() - self.send_output() + self.original_net_source = self.ConstructFromInputs + self.compose_network() def read_auto_data(self, filename): self.Error.auto_data_failed.clear() @@ -222,99 +253,199 @@ def read_auto_data(self, filename): if errored_file: self.Error.auto_data_failed(errored_file) - def update_label_combo(self): - self.closeContext() + @property + def original_nodes(self): + return self.original_network.nodes + + @Inputs.items + def set_data(self, data): + self.data = data data = self.data if self.data is not None else self.auto_data - if self.network is None or data is None: + if data is None: self.label_model[:] = [None] + self.label_variable = None + return + + best_var, useful_vars = self._vars_for_label() + self.label_model[:] = [None] + useful_vars + self.label_variable = \ + self._find_variable(self.label_variable_hint, useful_vars, best_var) + + @staticmethod + def _find_variable(name, variables, default=None): + for var in variables: + if var.name == name: + return var + return default + + def _vars_for_label(self): + useful_vars = [] + overs = [] + if self.original_network is not None: + original_nodes = set(self.original_nodes) else: - best_var, useful_vars = self._vars_for_label(data) - self.label_model[:] = [None] + useful_vars - self.label_variable = best_var - self.openContext(useful_vars) - self.set_network_nodes() - - def _vars_for_label(self, data: Table): - vars_and_overs = [] - original_nodes = set(self.original_nodes) - for var in data.domain.metas: + original_nodes = set() + + for var in self.data.domain.metas: if not isinstance(var, StringVariable): continue - values= data.get_column(var) + values = self.data.get_column(var) values = values[values != ""] set_values = set(values) + # if you remove the subset condition, also change data_by_labels if len(values) != len(set_values) \ or not original_nodes <= set_values: continue - vars_and_overs.append((len(set_values - original_nodes), var)) - if not vars_and_overs: + useful_vars.append(var) + overs.append(len(set_values - original_nodes)) + if not useful_vars: return None, [] - _, best_var = min(vars_and_overs) - useful_string_vars = [var for _, var in vars_and_overs] - return best_var, useful_string_vars + best_var = useful_vars[np.argmin(overs)] + return best_var, useful_vars + + @Inputs.edges + def set_edges(self, edges): + self.edges = edges + if self.edges is None: + self.edge_model[:] = [None] + self.edge_src_variable = self.edge_dst_variable = None + return + + *guess, edge_vars = self._vars_for_edges() + src = self._find_variable(self.edge_src_variable_hint, edge_vars) + dst = self._find_variable(self.edge_dst_variable_hint, edge_vars) + if not (src and dst): + src, dst = guess + self.edge_model[:] = [None] + edge_vars + self.edge_src_variable, self.edge_dst_variable = src, dst + + def _vars_for_edges(self): + edges = self.edges + useful_vars = [ + var for var in chain(edges.domain.variables, edges.domain.metas) + if (var.is_string + and np.all(edges.get_column(var) != "")) + or (type(var) is ContinuousVariable + and not np.isnan(np.sum(col := edges.get_column(var))) + and np.all(np.modf(col)[0] == 0)) + ] + if len(useful_vars) < 2: + src = dst = None + else: + # Take the first variable and find the next of the same type + src = useful_vars[0] + for dst in useful_vars[1:]: + if type(src) is type(dst): + break + else: + # If there's none, the second and third are of the same type + if len(useful_vars) >= 3: + src, dst = tuple(useful_vars[1:3]) + else: + src = dst = None + return src, dst, useful_vars + + def handleNewSignals(self): + self.compose_network() def label_changed(self): - self.set_network_nodes() - self.send_output() + self.label_variable_hint = self._hint_for(self.label_variable) + self.compose_network() + + def edge_changed(self): + self.edge_src_variable_hint = self._hint_for(self.edge_src_variable) + self.edge_dst_variable_hint = self._hint_for(self.edge_dst_variable) + self.compose_network() + + @staticmethod + def _hint_for(var): + return var and var.name + + def compose_network(self): + self.Error.no_label_variable.clear() + self.Error.mismatched_edge_variables.clear() + self.Error.unidentified_nodes.clear() + self.Error.missing_label_values.clear() + self.Warning.mismatched_lengths.clear() + self.Warning.missing_edges.clear() + self.Warning.extra_edges.clear() + self.Information.suggest_annotation.clear() + self.Information.auto_annotation.clear() - def send_output(self): - if self.network is None: - self.Outputs.network.send(None) - self.Outputs.items.send(None) + if self.original_network is not None: + self.network = self.annotated_read_network() + elif self.original_net_source == self.ConstructFromInputs and self.edges: + self.network = self.network_from_inputs() else: - self.Outputs.network.send(self.network) - self.Outputs.items.send(self.network.nodes) + self.network = None + self.send_output() - def set_network_nodes(self): - self.Error.mismatched_lengths.clear() - self.Information.auto_annotation.clear() - self.Information.suggest_annotation.clear() - if self.network is None: - return + def annotated_read_network(self): + assert self.original_network is not None - data = self.data if self.data is not None else self.auto_data + return Network( + self.network_nodes(), self.network_edges(), + self.original_network.name, + self.original_network.coordinates) + + def network_nodes(self): + assert self.original_network is not None + + data = self.data if data is None: - self.Information.suggest_annotation() - elif self.label_variable is None \ - and len(data) != self.network.number_of_nodes(): - self.Error.mismatched_lengths() + if self.auto_data is not None: + data = self.auto_data + self.Information.auto_annotation() + else: + self.Information.suggest_annotation() + if data is not None \ + and self.label_variable is None \ + and len(data) != self.original_network.number_of_nodes(): + self.Warning.mismatched_lengths() data = None if data is None: - self.network.nodes = self._label_to_tabel() + return self._label_to_tabel() elif self.label_variable is None: - self.network.nodes = self._combined_data(data) + return self._combined_data(data) else: - self.network.nodes = self._data_by_labels(data) + return self._data_by_labels(data) def _data_by_labels(self, data): - data_col = data.get_column(self.label_variable) - data_rows = {label: row for row, label in enumerate(data_col)} - indices = [data_rows[label] for label in self.original_nodes] - return data[indices] - - def _combined_data(self, source): + """ + Return data rearranged so that values of `self.label_variable` + match the original graph labels. + """ + # all node labels exist in data_col; this is ensured by _vars_for_label + data_col = data.get_column(self.label_variable) + data_rows = {label: row for row, label in enumerate(data_col)} + indices = [data_rows[label] for label in self.original_nodes] + return data[indices] + + def _combined_data(self, source: Table): + """ + Return `source` with an additional column `node_label` containing + original graph labels. + If original labels are sequential numbers starting with 0 or 1, + just return `source`. + """ nodes = np.array(self.original_nodes, dtype=str) - if nodes.ndim != 1: - return source try: - nums = np.sort(np.array([int(x) for x in nodes])) + nums = [int(x) for x in nodes] except ValueError: pass else: - if np.all(nums[1:] - nums[:-1] == 1): + nums = np.sort(nums) + if nums[0] in (0, 1) and np.all(nums[1:] - nums[:-1] == 1): return source - - src_dom = source.domain - label_attr = StringVariable(get_unique_names(src_dom, "node_label")) - domain = Domain(src_dom.attributes, src_dom.class_vars, - src_dom.metas + (label_attr, )) - data = source.transform(domain) - with data.unlocked(data.metas): - data.metas[:, -1] = nodes - return data + return source.add_column( + StringVariable(get_unique_names(source.domain, "node_label")), + nodes) def _label_to_tabel(self): + """ + Return a data table containing the graphs original node labels + """ domain = Domain([], [], [StringVariable("node_label")]) n = len(self.original_nodes) data = Table.from_numpy( @@ -322,15 +453,121 @@ def _label_to_tabel(self): np.array(self.original_nodes, dtype=str).reshape(-1, 1)) return data + def network_edges(self): + if self.edges is None or ( + self.edge_src_variable is None + or self.edge_dst_variable is None): + return self.original_network.edges + + src_col = self.edges.get_column(self.edge_src_variable) + dst_col = self.edges.get_column(self.edge_dst_variable) + edge_data = { + (src, dst): row for row, src, dst in zip(count(), src_col, dst_col)} + edges0 = self.original_network.edges[0] + directed = edges0.directed + edges = edges0.edges + n_edges = len(self.edges) + edges.sort_indices() + indices = [] + for src_idx, ptr_begin, ptr_end in zip(count(), edges.indptr, edges.indptr[1:]): + for dst_idx in edges.indices[ptr_begin:ptr_end]: + src_node = self.original_nodes[src_idx] + dst_node = self.original_nodes[dst_idx] + ind = edge_data.pop((src_node, dst_node), n_edges) + if ind == n_edges and not directed: + ind = edge_data.pop((dst_node, src_node),n_edges) + indices.append(ind) + if edge_data: + self.Warning.extra_edges() + + domain = self.edges.domain + edge_attrs = (self.edge_src_variable, self.edge_dst_variable) + pure_domain = Domain( + *([var for var in part if var not in edge_attrs] + for part in (domain.attributes, domain.class_vars, domain.metas))) + pure_table = self.edges.transform(pure_domain) + if np.max(indices) == n_edges: + extra_row = Table.from_list( + pure_domain, [[np.nan] * (len(pure_domain.variables) + len(pure_domain.metas))] + ) + pure_table = Table.concatenate((pure_table, extra_row)) + self.Warning.missing_edges() + + edges = self.original_network.edges + return [ + type(edges[0])(edges[0].edges, pure_table[indices], edges[0].name) + ] + edges[1:] + + def network_from_inputs(self): + if self.edges is None \ + or self.edge_src_variable is None \ + or self.edge_dst_variable is None: + return None - def sendReport(self): - self.reportSettings( - "Network file", - [("File name", self.filecombo.currentText()), - ("Vertices", self.network.number_of_nodes()), - ("Directed", gui.YesNo[self.network.edges[0].directed]) - ]) + try: + if self.data is None: + return compose.network_from_edge_table( + self.edges, self.edge_src_variable, self.edge_dst_variable) + else: + if self.label_variable is None: + self.Error.no_label_variable() + return None + if np.any(self.data.get_column(self.label_variable) == ""): + self.Error.missing_label_values() + return + return compose.network_from_tables( + self.data, self.label_variable, + self.edges, self.edge_src_variable, self.edge_dst_variable) + except compose.MismatchingEdgeVariables: + self.Error.mismatched_edge_variables() + except compose.UnknownNodes as exc: + msg = str(exc) + self.Error.unidentified_nodes(msg[:60] + "..." * (len(msg) > 60)) + # We intentionally don't handle `compose.NonUniqueLabels`: + # the widget should prevent it by not allowing to select such variables + return None + + def send_output(self): + self.Outputs.network.send(self.network) + self.Outputs.items.send(self.network and self.network.nodes) + + def send_report(self): + file_data = [("File name", self.filecombo.currentText())] + if self.original_network: + file_data += [ + ("Vertices", self.network.number_of_nodes()), + ("Edges", self.network.number_of_edges()), + ("Directed", bool_str(self.network.edges[0].directed)) + ] + self.report_items("Network file", file_data) + + ctrl = self.controls + annotation = [] + if self.data is not None: + annotation += [ + ("Table with vertex data", self.data.name), + ("Column with label", ctrl.label_variable.currentText()) + ] + if self.edges is not None: + annotation += [ + ("Table with edge data", self.edges.name), + ("Columns for matching with label", + f"{ctrl.edge_src_variable.currentText()} and " + f"{ctrl.edge_dst_variable.currentText()}") + ] + if annotation: + self.report_items("Additional data from inputs", annotation) + + @classmethod + def migrate_settings(cls, settings, version): + if "context_settings" in settings: + settings["label_variable_hint"] = None + if len(settings["context_settings"]) > 0: + context = settings["context_settings"][-1] + if "label_variable" in context: + settings["label_variable_hint"] = context["label_variable"] + del settings["context_settings"] if __name__ == "__main__": - WidgetPreview(OWNxFile).run() + WidgetPreview(OWNxFile).run(set_edges=Table("heart_disease")) diff --git a/orangecontrib/network/widgets/graphview.py b/orangecontrib/network/widgets/graphview.py index 236aaff7..2f3b5e54 100644 --- a/orangecontrib/network/widgets/graphview.py +++ b/orangecontrib/network/widgets/graphview.py @@ -6,7 +6,7 @@ from AnyQt.QtCore import QLineF, Qt, QRectF from AnyQt.QtGui import QPen -from Orange.util import scale +from Orange.data.util import scale from Orange.widgets.settings import Setting from Orange.widgets.visualize.owscatterplotgraph import OWScatterPlotBase @@ -15,6 +15,7 @@ class PlotVarWidthCurveItem(pg.PlotCurveItem): def __init__(self, directed, *args, **kwargs): self.directed = directed self.widths = kwargs.pop("widths", None) + self.colors = kwargs.pop("colors", None) self.setPen(kwargs.pop("pen", pg.mkPen(0.0))) self.sizes = kwargs.pop("size", None) self.coss = self.sins = None @@ -24,12 +25,17 @@ def setWidths(self, widths): self.widths = widths self.update() + def setEdgeColors(self, colors): + self.colors = colors + self.update() + def setPen(self, pen): self.pen = pen self.pen.setCapStyle(Qt.RoundCap) def setData(self, *args, **kwargs): self.widths = kwargs.pop("widths", self.widths) + self.colors = kwargs.pop("colors", self.colors) self.setPen(kwargs.pop("pen", self.pen)) self.sizes = kwargs.pop("size", self.sizes) super().setData(*args, **kwargs) @@ -105,7 +111,7 @@ def get_short_edge_coords(): pen = QPen(self.pen) p.setRenderHint(p.Antialiasing, True) p.setCompositionMode(p.CompositionMode_SourceOver) - if self.widths is None: + if self.widths is None and self.colors is None: p.setPen(pen) if self.directed: for (x0, y0, x1, y1), (x1w, y1w), (xa1, ya1, xa2, ya2), arc in zip( @@ -118,19 +124,31 @@ def get_short_edge_coords(): for ecoords in edge_coords[~arcs]: p.drawLine(QLineF(*ecoords)) else: + if self.widths is None: + widths = np.lib.stride_tricks.as_strided( + pen.width(), (len(edge_coords),), (0,)) + else: + widths = self.widths + if self.colors is None: + colors = np.lib.stride_tricks.as_strided( + pen.color(), (len(edge_coords),), (0,)) + else: + colors = self.colors if self.directed: - for (x0, y0, x1, y1), (x1w, y1w), (xa1, ya1, xa2, ya2), w, arc in zip( + for (x0, y0, x1, y1), (x1w, y1w), (xa1, ya1, xa2, ya2), w, c, arc in zip( edge_coords, get_short_edge_coords(), get_arrows(), - self.widths, arcs): + widths, colors, arcs): if not arc: pen.setWidth(w) + pen.setColor(c) p.setPen(pen) p.drawLine(QLineF(x0, y0, x1w, y1w)) p.drawLine(QLineF(xa1, ya1, x1, y1)) p.drawLine(QLineF(xa2, ya2, x1, y1)) else: - for ecoords, w in zip(edge_coords[~arcs], self.widths[~arcs]): + for ecoords, w, c in zip(edge_coords[~arcs], widths[~arcs], colors[~arcs]): pen.setWidth(w) + pen.setColor(c) p.setPen(pen) p.drawLine(QLineF(*ecoords)) @@ -155,11 +173,14 @@ def get_short_edge_coords(): if self.widths is None: widths = np.full(len(rxs), pen.width()) + colors = np.full(len(rxs), pen.color()) else: widths = self.widths[arcs] - for rx, ry, rfx, rfy, w in zip(rxs, rys, rfxs, rfys, widths): + colors = self.colors[arcs] + for rx, ry, rfx, rfy, w, c in zip(rxs, rys, rfxs, rfys, widths, colors): rect = QRectF(rx, ry, rfx, rfy) pen.setWidth(w) + pen.setColor(c) p.setPen(pen) p.drawArc(rect, 100 * 16, 250 * 16) if self.directed: @@ -170,10 +191,7 @@ def get_short_edge_coords(): class GraphView(OWScatterPlotBase): - show_edge_weights = Setting(False) - relative_edge_widths = Setting(True) edge_width = Setting(2) - label_selected_edges = Setting(True) COLOR_NOT_SUBSET = (255, 255, 255, 255) COLOR_SUBSET = (0, 0, 0, 255) @@ -228,7 +246,14 @@ def update_edges(self): return x, y = self.scatterplot_item.getData() edges = self.master.get_edges() - srcs, dests, weights = edges.row, edges.col, edges.data + colors = self.master.get_edge_colors() + widths = self.master.get_edge_widths() + if widths is not None: + widths = scale(widths, .7, 8) + widths[np.isnan(widths)] = 0.35 + widths *= np.log2(self.edge_width / 4 + 1) + + srcs, dests = edges.row, edges.col if self.edge_curve is None: self.pair_indices = np.empty((2 * len(srcs),), dtype=int) self.pair_indices[::2] = srcs @@ -236,12 +261,8 @@ def update_edges(self): data = dict(x=x[self.pair_indices], y=y[self.pair_indices], pen=self._edge_curve_pen(), antialias=True, - size=self.scatterplot_item.data["size"][self.pair_indices] / 2) - if self.relative_edge_widths and len(set(weights)) > 1: - data['widths'] = \ - scale(weights, .7, 8) * np.log2(self.edge_width / 4 + 1) - else: - data['widths'] = None + size=self.scatterplot_item.data["size"][self.pair_indices] / 2, + widths=widths, colors=colors) if self.edge_curve is None: self.edge_curve = PlotVarWidthCurveItem( @@ -267,14 +288,14 @@ def update_edge_labels(self): self.plot_widget.removeItem(label) self.edge_labels = [] if self.scatterplot_item is None \ - or not self.show_edge_weights \ or self.simplify & self.Simplifications.NoEdgeLabels: return edges = self.master.get_edges() - if edges is None: + labels = self.master.get_edge_labels() + if edges is None or labels is None: return - srcs, dests, weights = edges.row, edges.col, edges.data - if self.label_selected_edges: + srcs, dests = edges.row, edges.col + if self.label_only_selected: selected = self._selected_and_marked() num_selected = np.sum(selected) if num_selected >= 2: @@ -283,11 +304,7 @@ def update_edge_labels(self): selected_edges = selected[srcs] | selected[dests] srcs = srcs[selected_edges] dests = dests[selected_edges] - weights = weights[selected_edges] - if np.allclose(weights, np.round(weights)): - labels = [str(x) for x in weights.astype(int)] - else: - labels = ["{:.02}".format(x) for x in weights] + labels = labels[selected_edges] x, y = self.scatterplot_item.getData() xs = (x[srcs.astype(np.int64)] + x[dests.astype(np.int64)]) / 2 ys = (y[srcs.astype(np.int64)] + y[dests.astype(np.int64)]) / 2 @@ -298,6 +315,13 @@ def update_edge_labels(self): self.plot_widget.addItem(ti) self.edge_labels.append(ti) + def update_edge_colors(self): + self.update_edges() + + def update_edge_widths(self): + self.update_edges() + + def _remove_edges(self): if self.edge_curve: self.plot_widget.removeItem(self.edge_curve) @@ -342,6 +366,7 @@ def update_labels(self): if marked is not None and len(marked): self.selection = self._selected_and_marked() super().update_labels() + self.update_edge_labels() self.selection = saved_selection def _remove_labels(self): @@ -379,10 +404,10 @@ def select_by_click(self, _, points): def unselect_all(self): super().unselect_all() - if self.label_selected_edges: + if self.label_only_selected: self.update_edge_labels() def _update_after_selection(self): - if self.label_selected_edges: + if self.label_only_selected: self.update_edge_labels() super()._update_after_selection() diff --git a/orangecontrib/network/widgets/tests/networks/test-compose.net b/orangecontrib/network/widgets/tests/networks/test-compose.net new file mode 100644 index 00000000..ffdd4c3e --- /dev/null +++ b/orangecontrib/network/widgets/tests/networks/test-compose.net @@ -0,0 +1,12 @@ +*Network "" +*Vertices 5 + 1 "foo" + 2 "bar" + 3 "qux" + 4 "baz" + 5 "bax" +*Edges + 1 2 + 1 4 + 4 4 + 5 2 diff --git a/orangecontrib/network/widgets/tests/test_OWNxExplorer.py b/orangecontrib/network/widgets/tests/test_OWNxExplorer.py index b2cd7db6..16f704ac 100644 --- a/orangecontrib/network/widgets/tests/test_OWNxExplorer.py +++ b/orangecontrib/network/widgets/tests/test_OWNxExplorer.py @@ -7,7 +7,8 @@ from orangewidget.tests.utils import simulate from orangecontrib.network import Network -from orangecontrib.network.widgets.OWNxExplorer import OWNxExplorer +from orangecontrib.network.widgets.OWNxExplorer import OWNxExplorer, \ + WEIGHTS_COMBO_ITEM class TestOWNxExplorer(NetworkTest): @@ -75,7 +76,8 @@ def test_get_reachable(self): def test_edge_weights(self): self.send_signal(self.widget.Inputs.network, self.davis_net) - self.widget.graph.show_edge_weights = True + self.widget.edge_label_variable = WEIGHTS_COMBO_ITEM + self.widget.graph.label_only_selected = True # Mark nodes with many connections (multiple): should show the weights for edges between marked nodes only self.widget.mark_min_conn = 8 diff --git a/orangecontrib/network/widgets/tests/test_OWNxFile.py b/orangecontrib/network/widgets/tests/test_OWNxFile.py index 236da3be..d405d37e 100644 --- a/orangecontrib/network/widgets/tests/test_OWNxFile.py +++ b/orangecontrib/network/widgets/tests/test_OWNxFile.py @@ -3,21 +3,62 @@ from unittest.mock import patch, Mock import numpy as np +import scipy.sparse as sp import Orange +from Orange.data import Table, DiscreteVariable, ContinuousVariable, \ + StringVariable, Domain +from orangecontrib.network import Network +from orangecontrib.network.network.base import DirectedEdges from orangecontrib.network.widgets.OWNxFile import OWNxFile from orangecontrib.network.widgets.tests.utils import NetworkTest TEST_NETS = os.path.join(os.path.split(__file__)[0], "networks") + def _get_test_net(filename): return os.path.join(TEST_NETS, filename) +def select(combo, var): + ind = combo.model().indexOf(var) + combo.setCurrentIndex(ind) + combo.activated[int].emit(ind) + + class TestOWNxFile(NetworkTest): def setUp(self): self.widget = self.create_widget(OWNxFile) # type: OWNxFile + self.lab, self.lab2, self.labx = (StringVariable("lab"), + StringVariable("lab2"), + ContinuousVariable("x")) + self.data = Table.from_list(Domain([self.labx], + None, + [self.lab, self.lab2]), + [[1.3, "foo", "a"], + [1.8, "bar", "b"], + [2.7, "qux", "c"], + [1.1, "baz", "d"], + [np.nan, "bax", "e"], + ]) + + self.srcs, self.dsts = metas = \ + StringVariable("srcs"), StringVariable("dsts") + self.w, self.src1, self.dst1, self.spam = attrs = [ + ContinuousVariable(x) for x in ("w src1 dst1 spam").split()] + self.edges = Table.from_list(Domain(attrs, None, metas), + [[1, 1, 4, 1.5, "foo", "baz"], + [8, 4, 4, 2, "baz", "baz"], + [3, 5, 2, 5, "bax", "bar"], + [2, 1, 2, 0, "foo", "bar"] + ]) + + edges = np.zeros((5, 5), dtype=int) + edges[0, 3] = edges[3, 3] = edges[4, 1] = edges[0, 1] = 1 + self.network = Network("foo bar qux baz bax".split(), + sp.csr_array(edges)) + def test_read_error(self): with patch("orangecontrib.network.widgets.OWNxFile.read_pajek", Mock(side_effect=OSError)): @@ -36,7 +77,7 @@ def test_invalid_datafile_length(self): # When data file's length does not match, the widget must create # a table from node labels self.widget.open_net_file(_get_test_net("test_inv.net")) - self.assertTrue(self.widget.Error.mismatched_lengths) + self.assertTrue(self.widget.Warning.mismatched_lengths) network = self.get_output(self.widget.Outputs.network) self.assertEqual(network.number_of_nodes(), 7) @@ -48,9 +89,9 @@ def test_invalid_datafile_length(self): def test_vars_for_label(self): self.widget.open_net_file(self._get_filename(None, mode="t")) - data = Orange.data.Table(_get_test_net("test_data.tab")) - domain = data.domain - best_var, useful_vars = self.widget._vars_for_label(data) + self.widget.data = Orange.data.Table(_get_test_net("test_data.tab")) + domain = self.widget.data.domain + best_var, useful_vars = self.widget._vars_for_label() self.assertIs(best_var, domain["label"]) self.assertEqual(useful_vars, [domain["with_extras"], domain["label"]]) @@ -80,14 +121,14 @@ def test_label_combo_contents(self): widget._label_to_tabel = Mock(return_value=data[:7]) widget.label_variable = None widget.label_changed() - self.assertTrue(widget.Error.mismatched_lengths.is_shown()) + self.assertTrue(widget.Warning.mismatched_lengths.is_shown()) output = self.get_output(widget.Outputs.network) self.assertIs(output.nodes, widget._label_to_tabel.return_value) # Choose a different variable; no error, output has corresponding data widget.label_variable = domain["with_extras"] widget.label_changed() - self.assertFalse(widget.Error.mismatched_lengths.is_shown()) + self.assertFalse(widget.Warning.mismatched_lengths.is_shown()) output = self.get_output(widget.Outputs.network) id_col = output.nodes.get_column("id") np.testing.assert_equal(id_col, np.arange(2, 9)) @@ -103,28 +144,627 @@ def test_label_combo_contents(self): self.send_signal(widget.Inputs.items, data) widget.label_variable = None widget.label_changed() - self.assertTrue(widget.Error.mismatched_lengths.is_shown()) + self.assertTrue(widget.Warning.mismatched_lengths.is_shown()) self.send_signal(widget.Inputs.items, None) output = self.get_output(widget.Outputs.network) self.assertIs(output.nodes, widget._label_to_tabel.return_value) - self.assertFalse(widget.Error.mismatched_lengths.is_shown()) + self.assertFalse(widget.Warning.mismatched_lengths.is_shown()) - def test_context_matching(self): - widget = self.widget - widget.open_net_file(self._get_filename(None, mode="t")) - data = Orange.data.Table(_get_test_net("test_data.tab")) - domain = data.domain + def test_set_data_combo_setup(self): + w = self.widget + combo = w.controls.label_variable + labs = [self.lab, self.lab2] - self.send_signal(widget.Inputs.items, data) - self.assertIs(widget.label_variable, domain["label"]) + for best in labs: + for w.label_variable_hint in ["lab", "lab2", None]: + with patch.object(w, "_vars_for_label", + Mock(return_value=(best, labs))): + self.send_signal(w.Inputs.items, self.data) + self.assertEqual(list(combo.model()), [None] + labs) + self.assertEqual(w.label_variable.name, + w.label_variable_hint or best.name) - widget.label_variable = domain["with_extras"] - self.send_signal(widget.Inputs.items, None) - self.assertIs(widget.label_variable, None) + self.send_signal(w.Inputs.items, None) + self.assertEqual(list(combo.model()), [None]) + self.assertIsNone(w.label_variable) - self.send_signal(widget.Inputs.items, data) - self.assertIs(widget.label_variable, domain["with_extras"]) + def test_label_hints(self): + w = self.widget + combo = w.controls.label_variable + + self.send_signal(w.Inputs.items, self.data) + ind_lab_2 = combo.model().indexOf(self.lab2) + combo.setCurrentIndex(ind_lab_2) + combo.activated[int].emit(ind_lab_2) + + self.send_signal(w.Inputs.items, self.edges) + self.assertEqual(combo.currentIndex(), 0) + self.assertIsNot(w.label_variable, self.lab2) + assert ind_lab_2 != 0 + + self.send_signal(w.Inputs.items, self.data) + self.assertEqual(combo.currentIndex(), ind_lab_2) + self.assertIs(w.label_variable, self.lab2) + + def test_vars_for_edges(self): + self.widget.edges = Table.from_list( + Domain([DiscreteVariable("discrete"), + ContinuousVariable("nonint"), + ContinuousVariable("has nans"), + ContinuousVariable("ok1")], + None, + [StringVariable("has missing"), + StringVariable("ok2"), + ContinuousVariable("ok3") + ]), + [[0, 0, 0, 0, "a", "b", 0], + [1, 0.5, np.nan, 1, "", "d", 2]] + ) + *guess, edge_vars = self.widget._vars_for_edges() + self.assertEqual([var.name for var in edge_vars], "ok1 ok2 ok3".split()) + self.assertEqual([var.name for var in guess], "ok1 ok3".split()) + + self.widget.edges = Table.from_list( + Domain([DiscreteVariable("discrete"), + ContinuousVariable("nonint"), + ContinuousVariable("has nans") + ], + None, + [StringVariable("has missing"), + StringVariable("ok1"), + ContinuousVariable("ok2"), + ContinuousVariable("ok3") + ]), + [[0, 0, 0, "a", "b", 0, 0], + [1, 0.5, np.nan, "", "d", 2, 1]] + ) + *guess, edge_vars = self.widget._vars_for_edges() + self.assertEqual([var.name for var in edge_vars], "ok1 ok2 ok3".split()) + self.assertEqual([var.name for var in guess], "ok2 ok3".split()) + + self.widget.edges = Table.from_list( + Domain([DiscreteVariable("discrete"), + ContinuousVariable("nonint"), + ContinuousVariable("has nans") + ], + None, + [StringVariable("has missing"), + StringVariable("ok1"), + ]), + [[0, 0, 0, "a", "b"], + [1, 0.5, np.nan, "", "d"]] + ) + *guess, edge_vars = self.widget._vars_for_edges() + self.assertEqual([var.name for var in edge_vars], ["ok1"]) + self.assertEqual(guess, [None, None]) + + self.widget.edges = Table.from_list( + Domain([DiscreteVariable("discrete"), + ContinuousVariable("nonint"), + ContinuousVariable("has nans") + ], + None, + [StringVariable("has missing"), + ]), + [[0, 0, 0, "a"], + [1, 0.5, np.nan, ""]] + ) + *guess, edge_vars = self.widget._vars_for_edges() + self.assertEqual([var.name for var in edge_vars], []) + self.assertEqual(guess, [None, None]) + + def test_set_edges_combo_setup(self): + w = self.widget + src = w.controls.edge_src_variable + dst = w.controls.edge_dst_variable + + # Take the first two variables + self.send_signal(w.Inputs.edges, self.edges) + useful = "w src1 dst1 srcs dsts".split() + self.assertEqual([var.name for var in src.model() if var], useful) + self.assertEqual([var.name for var in dst.model() if var], useful) + self.assertIs(w.edge_src_variable, self.w) + self.assertIs(w.edge_dst_variable, self.src1) + + # Observe the hint + w.edge_src_variable_hint, w.edge_dst_variable_hint = ("src1", "dst1") + self.send_signal(w.Inputs.edges, self.edges) + self.assertIs(w.edge_src_variable, self.src1) + self.assertIs(w.edge_dst_variable, self.dst1) + + self.send_signal(w.Inputs.edges, None) + self.assertIs(w.edge_src_variable, None) + self.assertIs(w.edge_src_variable, None) + + # Remember the hint + self.send_signal(w.Inputs.edges, self.edges) + self.assertIs(w.edge_src_variable, self.src1) + self.assertIs(w.edge_dst_variable, self.dst1) + + # Ignore the hint because one variable is missing; take the first + # two of the same type + dom = self.edges.domain + self.send_signal( + w.Inputs.edges, + self.edges.transform(Domain(dom.attributes[2:], None, dom.metas))) + self.assertIs(w.edge_src_variable, self.srcs) + self.assertIs(w.edge_dst_variable, self.dsts) + + # Ignore the hint, and also set nothing else because there are no + # two variables of the same type + w.edge_src_variable_hint, w.edge_dst_variable_hint = ("src1", "dst1") + dom = self.edges.domain + self.send_signal( + w.Inputs.edges, + self.edges.transform(Domain(dom.attributes[2:], None, dom.metas[:1]))) + self.assertIsNone(w.edge_src_variable) + self.assertIsNone(w.edge_dst_variable) + + def test_edge_hints(self): + w = self.widget + src = w.controls.edge_src_variable + dst = w.controls.edge_dst_variable + + self.send_signal(w.Inputs.edges, self.edges) + indesrc = src.model().indexOf(self.srcs) + indedst = dst.model().indexOf(self.dsts) + assert src.currentIndex() != indesrc and dst.currentIndex() != indedst + select(src, self.srcs) + select(dst, self.dsts) + + self.send_signal(w.Inputs.items, self.edges) + self.assertEqual(src.currentIndex(), indesrc) + self.assertEqual(dst.currentIndex(), indedst) + + self.send_signal(w.Inputs.edges, None) + assert src.currentIndex() != indesrc and dst.currentIndex() != indedst + + self.send_signal(w.Inputs.edges, self.edges) + self.assertEqual(src.currentIndex(), indesrc) + self.assertEqual(dst.currentIndex(), indedst) + + @patch.object(OWNxFile, "compose_network") + def test_call_compose(self, compose): + w = self.widget + + compose.reset_mock() + w.open_net_file(_get_test_net("test.net")) + compose.assert_called_once() + + compose.reset_mock() + self.send_signal(w.Inputs.items, self.data) + compose.assert_called_once() + + compose.reset_mock() + self.send_signal(w.Inputs.edges, self.edges) + compose.assert_called_once() + + for combo in (w.controls.label_variable, + w.controls.edge_src_variable, + w.controls.edge_dst_variable): + compose.reset_mock() + combo.activated[int].emit(0) + compose.assert_called_once() + + def test_network_nodes_no_data(self): + w = self.widget + w.original_network = self.network + + nodes = w.network_nodes() + self.assertEqual(list(nodes.metas[:, 0]), self.network.nodes) + self.assertTrue(w.Information.suggest_annotation.is_shown()) + self.assertFalse(w.Information.auto_annotation.is_shown()) + + w.auto_data = self.data + w.Information.suggest_annotation.clear() + w.network_nodes() + self.assertFalse(w.Information.suggest_annotation.is_shown()) + self.assertTrue(w.Information.auto_annotation.is_shown()) + + def test_network_nodes_no_label_variable(self): + w = self.widget + w.original_network = self.network + w.data = self.data + nodes = w.network_nodes() + np.testing.assert_equal(nodes.get_column("lab"), + self.data.get_column("lab")) + np.testing.assert_equal(nodes.get_column("node_label"), + self.network.nodes) + + # also serves as `test_data_by_labels` + def test_network_nodes_label_variable(self): + w = self.widget + # scramble node order + w.original_network = \ + Network("bar qux foo bax baz".split(), + self.network.edges) + self.send_signal(w.Inputs.items, self.data) + w.label_variable = self.lab + + for nodes in (w.network_nodes(), w._data_by_labels(self.data)): + np.testing.assert_equal(nodes.get_column("lab2"), list("bcaed")) + self.assertEqual(nodes.domain, self.data.domain) + + def test_combined_data(self): + w = self.widget + w.original_network = self.network + + # Adds original graph's nodes as a column + nodes = w._combined_data(self.data) + np.testing.assert_equal(nodes.get_column("lab"), self.data.get_column("lab")) + np.testing.assert_equal(nodes.get_column("node_label"), self.network.nodes) + + # Doesn't add a column of sequential numbers + self.network.nodes = "1 2 3 4 5".split() + nodes = w._combined_data(self.data) + self.assertIs(nodes, self.data) + + # Doesn't add a column of sequential numbers + self.network.nodes = "0 1 2 3 4".split() + nodes = w._combined_data(self.data) + self.assertIs(nodes, self.data) + + # Numbers, but not sequential: add a column + self.network.nodes = "1 2 8 4 5".split() + nodes = w._combined_data(self.data) + np.testing.assert_equal(nodes.get_column("lab"), self.data.get_column("lab")) + np.testing.assert_equal(nodes.get_column("node_label"), self.network.nodes) + + # Sequence of numbers, but not starting with 0 or 1: add a column + self.network.nodes = "4 5 6 7 8".split() + nodes = w._combined_data(self.data) + np.testing.assert_equal(nodes.get_column("lab"), self.data.get_column("lab")) + np.testing.assert_equal(nodes.get_column("node_label"), self.network.nodes) + + def test_label_to_tabel(self): + w = self.widget + w.original_network = self.network + nodes = w._combined_data(self.data) + np.testing.assert_equal(nodes.get_column("node_label"), self.network.nodes) + + def test_network_edges_no_data(self): + w = self.widget + w.original_network = self.network + + self.assertIs(w.network_edges()[0], self.network.edges[0]) + + self.send_signal(w.Inputs.edges, self.edges) + assert w.network_edges()[0] is not self.network.edges[0] + + w.edge_src_variable = None + self.assertIs(w.network_edges(), self.network.edges) + + w.edge_src_variable = w.edge_dst_variable + w.edge_dst_variable = None + self.assertIs(w.network_edges(), self.network.edges) + + def test_network_edges_resorting(self): + w = self.widget + w.original_network = self.network + w.edge_src_variable_hint = "srcs" + w.edge_dst_variable_hint = "dsts" + + # properly sort the edge table data + self.send_signal(w.Inputs.edges, self.edges) + edges = w.network_edges()[0] + np.testing.assert_equal(edges.edge_data.get_column("w"), + [2, 1, 8, 3]) + self.assertFalse(w.Warning.missing_edges.is_shown()) + self.assertFalse(w.Warning.extra_edges.is_shown()) + + # missing data + self.send_signal(w.Inputs.edges, self.edges[1:]) + edges = w.network_edges()[0] + np.testing.assert_equal(edges.edge_data.get_column("w"), + [2, np.nan, 8, 3]) + self.assertTrue(w.Warning.missing_edges.is_shown()) + self.assertFalse(w.Warning.extra_edges.is_shown()) + w.Warning.missing_edges.clear() + + # missing and extra + with self.edges.unlocked(self.edges.metas): + self.edges.metas[0, 1] = "bax" + self.send_signal(w.Inputs.edges, self.edges) + self.assertTrue(w.Warning.missing_edges.is_shown()) + self.assertTrue(w.Warning.extra_edges.is_shown()) + w.Warning.missing_edges.clear() + w.Warning.extra_edges.clear() + + # non-existing label + with self.edges.unlocked(self.edges.metas): + self.edges.metas[0, 1] = "no such label" + self.send_signal(w.Inputs.edges, self.edges) + self.assertTrue(w.Warning.missing_edges.is_shown()) + self.assertTrue(w.Warning.extra_edges.is_shown()) + + def test_network_edge_directed(self): + undirected = self.network + directed = Network(self.network.nodes, + [DirectedEdges(self.network.edges[0].edges)]) + assert not undirected.edges[0].directed + + def all_matched(net): + w.original_network = net + return not np.any(np.isnan(w.network_edges()[0].edge_data.get_column(0))) + + w = self.widget + w.original_network = self.network + + self.send_signal(w.Inputs.edges, self.edges) + w.edge_src_variable = self.srcs + w.edge_dst_variable = self.dsts + self.assertTrue(all_matched(undirected)) + self.assertTrue(all_matched(directed)) + + w.edge_src_variable = self.dsts + w.edge_dst_variable = self.srcs + self.assertTrue(all_matched(undirected)) + self.assertFalse(all_matched(directed)) + + def test_network_from_inputs_no_edge_data(self): + w = self.widget + w.original_network = self.network + w.edge_src_variable_hint = "srcs" + w.edge_dst_variable_hint = "dsts" + + self.send_signal(w.Inputs.items, self.data) + + self.assertIsNone(w.network_from_inputs()) + + self.send_signal(w.Inputs.edges, self.edges) + assert w.network_from_inputs() is not None + + w.edge_src_variable = None + self.assertIsNone(w.network_from_inputs()) + + w.edge_src_variable = w.edge_dst_variable + w.edge_dst_variable = None + self.assertIsNone(w.network_from_inputs()) + + def test_network_from_inputs(self): + w = self.widget + w.original_network = self.network + w.edge_src_variable_hint = "srcs" + w.edge_dst_variable_hint = "dsts" + self.send_signal(w.Inputs.edges, self.edges) + + with patch("orangecontrib.network.network.compose.network_from_edge_table") as m: + self.assertIs(w.network_from_inputs(), m.return_value) + m.assert_called_once_with(self.edges, self.srcs, self.dsts) + + self.send_signal(w.Inputs.items, self.data) + with patch("orangecontrib.network.network.compose.network_from_tables") as m: + self.assertIs(w.network_from_inputs(), m.return_value) + m.assert_called_once_with(self.data, w.label_variable, + self.edges, self.srcs, self.dsts) + + def test_network_from_inputs_errors(self): + w = self.widget + w.original_network = self.network + w.edge_src_variable_hint = "srcs" + w.edge_dst_variable_hint = "dsts" + self.send_signal(w.Inputs.edges, self.edges) + self.send_signal(w.Inputs.items, self.data) + w.label_variable = None + + def assert_shown(exp): + for err in (w.Error.no_label_variable, + w.Error.missing_label_values, + w.Error.mismatched_edge_variables, + w.Error.unidentified_nodes): + self.assertIs(err.is_shown(), err is exp, repr(err)) + err.clear() + + self.assertIsNone(w.network_from_inputs()) + assert_shown(w.Error.no_label_variable) + + d = Table.concatenate( + [self.data, + Table.from_list(self.data.domain, [[1, "", "a"]]) + ]) + self.send_signal(w.Inputs.items, d) + w.label_variable = self.lab + self.assertIsNone(w.network_from_inputs()) + assert_shown(w.Error.missing_label_values) + + self.send_signal(w.Inputs.items, self.data) + w.edge_src_variable = self.src1 + w.edge_dst_variable = self.dsts + self.assertIsNone(w.network_from_inputs()) + assert_shown(w.Error.mismatched_edge_variables) + + e = Table.concatenate( + [self.edges, + Table.from_list(self.edges.domain, + [[1, 2, 3, 4, "boo", "far"]])] + ) + self.send_signal(w.Inputs.edges, e) + self.assertIsNone(w.network_from_inputs()) + assert_shown(w.Error.unidentified_nodes) + + def test_source_radios(self): + w = self.widget + radio = w.controls.original_net_source.group + test_labels = "aaa bbb ccc ddd eee fff ggg".split() + etest_labels = "bar bax baz foo".split() + w.original_net_source = w.LoadFromFile + w.edge_src_variable_hint = "srcs" + w.edge_dst_variable_hint = "dsts" + + def rchoose(opt): + radio.button(opt).click() + + with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName", + return_value=(_get_test_net("test.net"), ".net")): + w.select_net_file() + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1), test_labels) + + self.send_signal(w.Inputs.edges, self.edges) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1), test_labels) + + rchoose(w.ConstructFromInputs) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1), etest_labels) + + rchoose(w.LoadFromFile) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1), test_labels) + + rchoose(w.ConstructFromInputs) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1), etest_labels) + + with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName", + return_value=(self._get_filename("davis.net"), ".net")): + w.browse_net_file() + self.assertEqual(w.original_net_source, w.LoadFromFile) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.nodes.get_column(-1)[:3], + ['EVELYN', 'LAURA', 'THERESA']) + + def test_flow(self): + w = self.widget + src = w.controls.edge_src_variable + dst = w.controls.edge_dst_variable + + graph_order = [2, 1, 8, 3] + rev_index_order = [2, 3, 1, 8] + alpha_order = [3, 8, 2, 1] + + w.send_report() + + w.open_net_file(_get_test_net("test-compose.net")) + + # Just input file: no label or edge data, labels come from the file + out = self.get_output(w.Outputs.network) + self.assertEqual(len(out.nodes.domain.attributes), 0) + self.assertEqual(len(out.nodes.domain.class_vars), 0) + self.assertEqual(len(out.nodes.domain.metas), 1) + np.testing.assert_equal(out.nodes.get_column(-1), self.data.get_column("lab")) + self.assertIsNone(out.edges[0].edge_data) + w.send_report() + + # Input file + data: we have node data, but still no edge data + self.send_signal(w.Inputs.items, self.data) + out = self.get_output(w.Outputs.network) + self.assertEqual(out.nodes.domain.attributes, self.data.domain.attributes) + self.assertEqual(out.nodes.domain.class_vars, self.data.domain.class_vars) + self.assertEqual(out.nodes.domain.metas, self.data.domain.metas) + np.testing.assert_equal(out.nodes.get_column(0), self.data.get_column(0)) + self.assertIsNone(out.edges[0].edge_data) + w.send_report() + + # Input file + data + edge data + self.send_signal(w.Inputs.edges, self.edges) + out = self.get_output(w.Outputs.network) + self.assertEqual(out.nodes.domain.attributes, self.data.domain.attributes) + self.assertEqual(out.nodes.domain.class_vars, self.data.domain.class_vars) + self.assertEqual(out.nodes.domain.metas, self.data.domain.metas) + np.testing.assert_equal(out.nodes.get_column(0), self.data.get_column(0)) + # edge data exists but is nan because wrong columns are chosen + self.assertIsNotNone(out.edges[0].edge_data) + w.send_report() + + # now edge data needs to be properly permuted + select(src, self.srcs) + select(dst, self.dsts) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), [2, 1, 8, 3]) + w.send_report() + + # Input file + edge data + self.send_signal(w.Inputs.items, None) + out = self.get_output(w.Outputs.network) + self.assertEqual(len(out.nodes.domain.attributes), 0) + self.assertEqual(len(out.nodes.domain.class_vars), 0) + self.assertEqual(len(out.nodes.domain.metas), 1) + np.testing.assert_equal(out.nodes.get_column(-1), self.data.get_column("lab")) + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), [2, 1, 8, 3]) + w.send_report() + + # Just edge data; no labels or network from file + w.open_net_file(None) + out = self.get_output(w.Outputs.network) + self.assertEqual(len(out.nodes.domain.attributes), 0) + self.assertEqual(len(out.nodes.domain.class_vars), 0) + self.assertEqual(len(out.nodes.domain.metas), 1) + np.testing.assert_equal(out.nodes.get_column(-1), "bar bax baz foo".split()) + w.send_report() + + edom = out.edges[0].edge_data.domain + self.assertEqual(edom.attributes, self.edges.domain.attributes) + self.assertEqual(len(edom.class_vars), 0) + self.assertEqual(len(edom.metas), 0) + # ordered alphabetically by labels + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), alpha_order) + w.send_report() + + # Just edge data, but using indices; no labels or network from file + select(src, self.dst1) # switch to have a different order + select(dst, self.src1) + out = self.get_output(w.Outputs.network) + self.assertEqual(len(out.nodes.domain.attributes), 0) + self.assertEqual(len(out.nodes.domain.class_vars), 0) + self.assertEqual(len(out.nodes.domain.metas), 1) + np.testing.assert_equal(out.nodes.get_column(-1), "1 2 3 4 5".split()) + edom = out.edges[0].edge_data.domain + self.assertEqual(len(edom.attributes), 2) # no used attrs + self.assertEqual(len(edom.class_vars), 0) + self.assertEqual(len(edom.metas), 2) + # ordered indices; dst1 first + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), rev_index_order) + w.send_report() + + # Label and edge data, no network from file + self.send_signal(w.Inputs.items, self.data) + out = self.get_output(w.Outputs.network) + self.assertIs(out.nodes, self.data) + self.assertEqual(len(edom.attributes), 2) # no used attrs + self.assertEqual(len(edom.class_vars), 0) + self.assertEqual(len(edom.metas), 2) + # ordered indices; dst1 first + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), rev_index_order) + w.send_report() + + select(src, self.srcs) + select(dst, self.dsts) + out = self.get_output(w.Outputs.network) + np.testing.assert_equal(out.edges[0].edge_data.get_column(0), graph_order) + w.send_report() + + # Label data, no edges or network from file + self.send_signal(w.Inputs.edges, None) + out = self.get_output(w.Outputs.network) + self.assertIsNone(out) + w.send_report() + + # Kaput + self.send_signal(w.Inputs.items, None) + out = self.get_output(w.Outputs.network) + self.assertIsNone(out) + w.send_report() + + def test_migrate_from_context_settings(self): + settings = {'__version__': 1, + 'context_settings': [ + {'values': {'__version__': 1}, + 'useful_vars': {'artist', 'albums'}, + 'label_variable': 'artist'}]} + w = self.create_widget(OWNxFile, stored_settings=settings) + self.assertEqual(w.label_variable_hint, "artist") + + settings = {'__version__': 1, + 'context_settings': []} + w = self.create_widget(OWNxFile, stored_settings=settings) + self.assertIsNone(w.label_variable_hint) + + settings = {'__version__': 1, + 'label_variable_hint': "foo"} + w = self.create_widget(OWNxFile, stored_settings=settings) + self.assertEqual(w.label_variable_hint, "foo") if __name__ == "__main__": diff --git a/orangecontrib/network/widgets/tests/test_ownxsinglemode.py b/orangecontrib/network/widgets/tests/test_ownxsinglemode.py index 8ebda3e1..05590995 100755 --- a/orangecontrib/network/widgets/tests/test_ownxsinglemode.py +++ b/orangecontrib/network/widgets/tests/test_ownxsinglemode.py @@ -333,6 +333,7 @@ def test_missing_data(self): widget = self.widget network = self._read_network("davis.net") num_total = len(network.nodes) + network.nodes = network.nodes.copy() with network.nodes.unlocked(network.nodes.X): network.nodes.X[0, 1] = np.nan # hide a node's role (= person in this case) self.send_signal(widget.Inputs.network, network)