Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Network File and Explorer: Load data for edges; show colored and labelled edges #249

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions orangecontrib/network/network/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import reduce, wraps, partial
from typing import Sequence
from typing import Sequence, Union

import numpy as np
import scipy.sparse as sp
Expand All @@ -15,6 +15,9 @@ def __init__(self,
name: str = ""):
self.edges = edges.tocsr(copy=True)
self.edges.sum_duplicates()
# A sequence whose elements correspond to edges in the same order as
# elements of self.edges.data
# (i.e. sorted by source (row) then destination (column)
self.edge_data = edge_data
self.name = name

Expand Down Expand Up @@ -70,6 +73,8 @@ def _compute_degree(edges, node, weighted):
return to - fr

def subset(self, mask, node_renumeration, shape):
# TODO: This is wrong. edges is not sparse, because it can be
# (and usually is) a Table
edges = self.edges.tocoo()
edge_mask = np.logical_and(mask[edges.row], mask[edges.col])
row = node_renumeration[edges.row[edge_mask]]
Expand Down Expand Up @@ -148,7 +153,7 @@ def _make_twoway_edges(self):
# Save (temporary) memory and CPU time
edges.data = as_strided(1, (n_edges, ), (0,))
else:
max_weight = np.max(edges.data)
max_weight = 2 * np.max(edges.data)
edges.data[edges.data == 0] = max_weight + 1

twe = edges + edges.transpose()
Expand All @@ -159,6 +164,7 @@ def _make_twoway_edges(self):
twe.data = as_strided(max_weight, (n_edges, ), (0,))
else:
twe.data[twe.data > max_weight] = 0
# TODO: Diagonal elements have double weights...
return twe

def degrees(self, *, weighted=False):
Expand Down Expand Up @@ -201,7 +207,11 @@ def wrapper(graph, *args, **kwargs):


class Network:
def __init__(self, nodes: Sequence, edges: Sequence, name: str = "",
def __init__(self, nodes: Sequence,
edges: Union[Edges,
sp.csr_matrix,
Sequence[Union[Edges, sp.csr_matrix]]],
name: str = "",
coordinates: np.ndarray = None):
"""
Attributes:
Expand Down
143 changes: 143 additions & 0 deletions orangecontrib/network/network/compose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from typing import Union, Dict, Optional

import numpy as np
import scipy.sparse as sp

from Orange.data import Table, StringVariable, ContinuousVariable, Domain
from orangecontrib.network import Network
from orangecontrib.network.network.base import DirectedEdges, UndirectedEdges

MAX_LABELS = 100_000


class ComposeError(Exception):
pass


class NonUniqueLabels(ComposeError):
pass


class MismatchingEdgeVariables(ComposeError):
pass


class UnknownNodes(ComposeError):
pass


def network_from_tables(
data: Table,
label_variable: StringVariable,
edges: Table,
edge_src_variable: Union[StringVariable, ContinuousVariable],
edge_dst_variable: Union[StringVariable, ContinuousVariable],
directed=False) -> Network:

labels = data.get_column(label_variable)
label_idcs = {label: i for i, label in enumerate(labels)}
if len(label_idcs) < len(labels):
raise NonUniqueLabels()

src_col, dst_col = _edge_columns(edges, edge_src_variable, edge_dst_variable)
if isinstance(edge_src_variable, ContinuousVariable):
row_ind = _float_to_ind(src_col, edge_src_variable.name, len(data))
col_ind = _float_to_ind(dst_col, edge_dst_variable.name, len(data))
else:
row_ind = _str_to_ind(src_col, label_idcs)
col_ind = _str_to_ind(dst_col, label_idcs)

edge_data = _reduced_edge_data(edges, edge_src_variable, edge_dst_variable)
return _net_from_data_and_edges(data, edge_data, row_ind, col_ind, directed)


def network_from_edge_table(
edges: Table,
edge_src_variable: Union[StringVariable, ContinuousVariable],
edge_dst_variable: Union[StringVariable, ContinuousVariable],
directed=False) -> Network:

src_col, dst_col = _edge_columns(edges, edge_src_variable, edge_dst_variable)
if isinstance(edge_src_variable, ContinuousVariable):
row_ind = _float_to_ind(src_col, edge_src_variable.name)
col_ind = _float_to_ind(dst_col, edge_dst_variable.name)
labels = [str(x)
for x in range(1, max(np.max(row_ind), np.max(col_ind)) + 2)]
else:
labels = sorted(set(src_col) | set(dst_col))
label_idcs = {label: i for i, label in enumerate(labels)}
row_ind = _str_to_ind(src_col, label_idcs)
col_ind = _str_to_ind(dst_col, label_idcs)

domain = Domain([], [], [StringVariable("node_label")])
n = len(labels)
labels = Table.from_numpy(
domain, np.empty((n, 0)), np.empty((n, 0)), np.array([labels]).T)

edge_data = _reduced_edge_data(edges, edge_src_variable, edge_dst_variable)
return _net_from_data_and_edges(labels, edge_data, row_ind, col_ind, directed)


def _net_from_data_and_edges(data, edge_data, row_ind, col_ind, directed=False):
assert len(row_ind) == len(col_ind)

if edge_data is not None:
assert len(row_ind) == len(edge_data)
edge_data = _sort_edges(row_ind, col_ind, edge_data)

ones = np.lib.stride_tricks.as_strided(np.ones(1), (len(row_ind),), (0,))
edge_type = DirectedEdges if directed else UndirectedEdges
net_edges = edge_type(
sp.csr_array((ones, (row_ind, col_ind)), shape=(len(data), ) * 2),
edge_data)
return Network(data, net_edges)


def _sort_edges(row_ind, col_ind, edge_data):
ocol = np.argsort(col_ind)
dcol = np.argsort(row_ind[ocol])
return edge_data[ocol[dcol]] # same, but faster than as edge_data[ocol][dcol]


def _reduced_edge_data(edges, edge_src_variable, edge_dst_variable):
domain = edges.domain
parts = [[var for var in part
if var not in (edge_src_variable, edge_dst_variable)]
for part in (domain.attributes, domain.class_vars, domain.metas)]
if not any(parts):
return None
return edges.transform(Domain(*parts))


def _edge_columns(edges, edge_src_variable, edge_dst_variable):
if type(edge_src_variable) is not type(edge_dst_variable):
raise MismatchingEdgeVariables()

return (edges.get_column(edge_src_variable),
edges.get_column(edge_dst_variable))


def _str_to_ind(col: np.ndarray, label_idcs: Dict[str, int]) -> np.ndarray:
ind = np.fromiter((label_idcs.get(x, -1) for x in col),
count=len(col), dtype=int)
if np.min(ind) == -1:
raise UnknownNodes("Unknown labels: "
+ ", ".join(sorted(set(col) - set(label_idcs))))
return ind


def _float_to_ind(col: np.ndarray,
var_name: str,
nlabels: Optional[int] = None) -> np.ndarray:
mi, ma = np.min(col), np.max(col)
if mi < 0:
raise UnknownNodes("negative vertex indices")
elif mi == 0:
raise UnknownNodes("vertex indices must be 1-based")
elif ma > (nlabels or MAX_LABELS):
raise UnknownNodes("some indices are too large")
elif np.isnan(mi) or np.isnan(ma):
raise UnknownNodes(f"{var_name} has missing values")
elif not np.all(np.modf(col)[0] == 0):
raise UnknownNodes("some indices are non-integer")
return col.astype(int) - 1
34 changes: 23 additions & 11 deletions orangecontrib/network/network/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,29 @@ def read_vertices(lines):


def read_edges(id_idx, lines, nvertices):
lines = [(id_idx[v1], id_idx[v2], abs(float(value)))
for v1, v2, value, *_ in (line.split()[:3] + [1]
for line in lines)]
v1s, v2s, values = zip(*lines)
values = np.array(values)
if values.size and np.all(values == values[0]):
values = np.lib.stride_tricks.as_strided(
values[0], (len(values), ), (0, ))
def fake_data(x, n):
values = np.lib.stride_tricks.as_strided(x, (n, ), (0,))
values.flags.writeable = False
return sp.coo_matrix((values, (np.array(v1s), np.array(v2s))),
shape=(nvertices, nvertices))
return values


lines = [(id_idx[v1], id_idx[v2], value)
for v1, v2, value, *_ in (
(line.split(maxsplit=2) + [None])[:3] for line in lines)]
v1s, v2s, values = zip(*lines)
try:
values = np.array(values, dtype=float)
if np.all(np.isnan(values)):
values = fake_data(np.array(1.), len(values))
elif values.size and np.all(values == values[0]):
values = fake_data(values[0], len(values))
edge_data = None
except ValueError:
edge_data = np.array(values)
values = fake_data(np.array(1.), len(v1s))
return (sp.coo_matrix((values, (np.array(v1s), np.array(v2s))),
shape=(nvertices, nvertices)),
edge_data)


def read_edges_list(id_idx, lines, nvertices):
Expand Down Expand Up @@ -121,7 +133,7 @@ def check_has_vertices():
check_has_vertices()
edges.append(
EdgeType[part_type=="*arcs"](
read_edges(id_idx, line_part, len(labels)),
*read_edges(id_idx, line_part, len(labels)),
name=part_args.strip() or part_type[1:]))
elif part_type in ("*edgeslist", "*arcslist"):
check_has_vertices()
Expand Down
Loading