From 48071c8905c441728c31d5a132620f7b4e588e36 Mon Sep 17 00:00:00 2001 From: Amandeep Singh Date: Thu, 17 Mar 2022 11:17:50 -0700 Subject: [PATCH 01/32] Update docker-build-and-push-image.yml --- .github/workflows/docker-build-and-push-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-build-and-push-image.yml b/.github/workflows/docker-build-and-push-image.yml index 272bb3bad..1b6599305 100644 --- a/.github/workflows/docker-build-and-push-image.yml +++ b/.github/workflows/docker-build-and-push-image.yml @@ -12,8 +12,8 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v1 with: - username: uscisii2admin - password: ${{ secrets.USCISIDOCKERHUB }} + username: ${{ secrets.USC_ISI_DOCKERHUB_USERNAME }} + password: ${{ secrets.USC_ISI_DOCKERHUB_PASSWORD }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 From 3145699799f92806007a7244ec49a79ebfbcbbc1 Mon Sep 17 00:00:00 2001 From: Amandeep Singh Date: Thu, 17 Mar 2022 11:18:21 -0700 Subject: [PATCH 02/32] Update docker-buil-push-dev.yml --- .github/workflows/docker-buil-push-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-buil-push-dev.yml b/.github/workflows/docker-buil-push-dev.yml index a1f796a7e..38f3372e3 100644 --- a/.github/workflows/docker-buil-push-dev.yml +++ b/.github/workflows/docker-buil-push-dev.yml @@ -12,8 +12,8 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v1 with: - username: uscisii2admin - password: ${{ secrets.USCISIDOCKERHUB }} + username: ${{ secrets.USC_ISI_DOCKERHUB_USERNAME }} + password: ${{ secrets.USC_ISI_DOCKERHUB_PASSWORD }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 From 8a27c9a252ea19c515120f442ef9ec9e397dfa1a Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 25 Mar 2022 17:17:53 -0700 Subject: [PATCH 03/32] fix node color, edge width, edge color --- kgtk/visualize/visualize_api.py | 63 +++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/kgtk/visualize/visualize_api.py b/kgtk/visualize/visualize_api.py index 22b067ece..cb606ec2b 100644 --- a/kgtk/visualize/visualize_api.py +++ b/kgtk/visualize/visualize_api.py @@ -118,8 +118,8 @@ def __init__( self.kwargs = kwargs def execute(self) -> int: - d, node_color = self.compute_visualization_graph() - self.to_html(d, node_color) + d, node_color, node_color_map_len = self.compute_visualization_graph() + self.to_html(d, node_color, node_color_map_len - 1) return 0 def compute_visualization_graph(self): @@ -253,7 +253,13 @@ def compute_visualization_graph(self): '@') - 1], 'width_orig': width_orig}) else: edges.append( - {'source': row[n1], 'target': row[n2], 'label': row[l2], 'width_orig': width_orig}) + { + 'source': row[n1], + 'target': row[n2], + 'label': row[l2], + 'width_orig': width_orig, + 'color': self.edge_color_default + }) arr = [] if self.edge_width_mapping == 'fixed': @@ -312,9 +318,12 @@ def compute_visualization_graph(self): edge['width'] = self.edge_width_default else: edge['width'] = self.edge_width_minimum + (log_cur - log_min) * ( - self.edge_width_maximum - self.edge_width_minimum) / (log_max - log_min) + self.edge_width_maximum - self.edge_width_minimum) / (log_max - log_min) else: edge['width'] = self.edge_width_default + else: + for edge in edges: + edge['width'] = self.edge_width_default if self.edge_color_column is not None and self.edge_color_style == 'gradient': edge_color_list = [] @@ -449,7 +458,7 @@ def compute_visualization_graph(self): if log_max == log_min: temp['color'] = self.node_color_default else: - color_value\ + color_value \ = 0 + (log_cur - log_min) * (1 - 0) / (log_max - log_min) temp['color'] = float(color_value) if not pd.isna( row[kr_node.column_name_map[ @@ -460,18 +469,12 @@ def compute_visualization_graph(self): self.node_color_column]]) else self.node_color_default else: node_color = 2 - if row[kr_node.column_name_map[self.node_color_column]] not in node_color_map \ - and len(node_color_map) < 10: - node_color_map[row[kr_node.column_name_map[self.node_color_column]]] \ - = len(node_color_map) - - # temp['color'] = min(color_set[row[kr_node.column_name_map[self.node_color_column]]], - # 9) if not pd.isna( - # row[kr_node.column_name_map[self.node_color_column]]) else self.node_color_default - # TODO this is a hack for now to get fix colors for few and many subclasses node, - # TODO these are the only 2 options in the node graph, we'll fix it properly - temp['color'] = node_color_map.get(row[kr_node.column_name_map[self.node_color_column]], - self.node_color_default) + if row[kr_node.column_name_map[self.node_color_column]] not in node_color_map: + node_color_map[row[kr_node.column_name_map[self.node_color_column]]] = len( + node_color_map) + + temp['color'] = node_color_map[row[kr_node.column_name_map[self.node_color_column]]] + if self.node_size_column is not None: if self.node_size_mapping == 'fixed': temp['size'] = row[kr_node.column_name_map[self.node_size_column]] if not pd.isna( @@ -485,8 +488,8 @@ def compute_visualization_graph(self): size_value = self.node_size_minimum + ( float(row[kr_node.column_name_map[self.node_size_column]]) - min(node_size_list)) * ( - self.node_size_maximum - self.node_size_minimum) / ( - max(node_size_list) - min(node_size_list)) + self.node_size_maximum - self.node_size_minimum) / ( + max(node_size_list) - min(node_size_list)) temp['size'] = float(size_value) if not pd.isna( row[kr_node.column_name_map[self.node_size_column]]) else self.node_size_default elif self.node_size_scale == 'log': @@ -530,22 +533,27 @@ def compute_visualization_graph(self): raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e)) - return d, node_color + return d, node_color, len(node_color_map) - def to_html(self, d, node_color): + def to_html(self, d, node_color, num_colors): output_kgtk_file: Path = KGTKArgumentParser.get_output_file(self.output_file) f = open(output_kgtk_file, 'w') - f.write(''' - + f.write(f''' + +
- - - - - - - -
- + + + + + + + +
+ - ''') + ''') From 2cd7b942248174bee8f8fb92476d1d9b22ccd78c Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 11:53:50 -0700 Subject: [PATCH 08/32] reusable colr and width functions --- kgtk/visualize/visualize_api.py | 268 +++++++++++++++----------------- 1 file changed, 128 insertions(+), 140 deletions(-) diff --git a/kgtk/visualize/visualize_api.py b/kgtk/visualize/visualize_api.py index 4aed3f5b7..c44bc3ec9 100644 --- a/kgtk/visualize/visualize_api.py +++ b/kgtk/visualize/visualize_api.py @@ -14,6 +14,7 @@ from kgtk.value.kgtkvalueoptions import KgtkValueOptions from kgtk.kgtkformat import KgtkFormat import re +from typing import List def parser(): @@ -23,11 +24,6 @@ def parser(): } -node_color_map = { - 'few_subclasses': 0, - 'many_subclasses': 1 -} - kgtk_format = KgtkFormat() compiled_hex_color_regex = re.compile(r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$") @@ -76,8 +72,8 @@ def __init__( node_border_color: str = None, tooltip_column: str = None, show_text: str = None, - node_categorical_scale: str = 'd3.schemeCategory10', - edge_categorical_scale: str = 'd3.schemeCategory10', + node_categorical_scale: str = 'rainbow', + edge_categorical_scale: str = 'rainbow', node_gradient_scale: str = 'd3.interpolateRdBu', edge_gradient_scale: str = 'd3.interpolateRdBu', kwargs=None): @@ -139,6 +135,13 @@ def __init__( self.edge_color_map = {} + self.node_color_map = { + 'few_subclasses': 0, + 'many_subclasses': 1 + } + + self.node_color_choice = 0 + if self.node_size_minimum == 0.0 and self.node_size_scale == 'log': raise ValueError("node size cannot be 0 when using log scale") if self.edge_width_minimum == 0 and self.edge_width_scale == 'log': @@ -146,8 +149,8 @@ def __init__( def execute(self) -> int: - d, node_color, node_color_map_len = self.compute_visualization_graph() - self.to_html(d, node_color, node_color_map_len - 1) + d = self.compute_visualization_graph() + self.to_html(d) return 0 def compute_visualization_graph(self): @@ -163,7 +166,7 @@ def compute_visualization_graph(self): raise KGTKException("Exit requested") except Exception as e: raise KGTKException(str(e)) - return d, len(node_color_map) + return d def process_edge_file(self): # First create the KgtkReader. It provides parameters used by the ID @@ -253,68 +256,22 @@ def process_edge_file(self): kr.close() if self.edge_width_column is not None: - edge_width_list = [x['width_orig'] for x in edges] - max_width = max(edge_width_list) - min_width = min(edge_width_list) - - log_max_width = math.log(max_width, self.base) if max_width > 0.0 else -1.0 - log_min_width = math.log(min_width, self.base) if min_width > 0.0 else -1.0 - - for edge in edges: - edge_width = edge['width_orig'] - if self.edge_width_scale == 'linear': - edge['width'] = self.edge_width_minimum + (edge_width - min_width) * \ - (self.edge_width_minimum - self.edge_width_maximum) / \ - (max_width - min_width) - elif self.edge_width_scale == 'log': - if edge_width == 0.0 or log_max_width == log_min_width: - edge['width'] = self.edge_width_default - else: - edge['width'] = self.edge_width_minimum + (math.log(edge_width, self.base) - log_min_width) * ( - self.edge_width_maximum - self.edge_width_minimum) / (log_max_width - log_min_width) - else: - edge['width'] = edge_width if edge_width > 0 else self.edge_width_default + edges = self.calculate_size(edges, + self.edge_width_scale, + self.edge_width_default, + self.edge_width_minimum, + self.edge_width_maximum, + 'width_orig', + 'width') if self.edge_color_column is not None: - if self.edge_color_hex: - # all good, nothing to do here - pass - else: - if self.edge_color_numbers: - edge_color_list = [x['orig_color'] for x in edges] - max_color = max(edge_color_list) - min_color = min(edge_color_list) - - log_max_color = math.log(max_color, self.base) if max_color > 0.0 else -1.0 - log_min_color = math.log(min_color, self.base) if min_color > 0.0 else -1.0 - - for edge in edges: - orig_color = edge['orig_color'] - if self.edge_color_style == 'gradient': - if self.edge_color_scale == 'linear': - edge['color'] = (orig_color - min_color) / (max_color - min_color) - elif self.edge_color_scale == 'log': - if orig_color == 0.0 or log_max_color == log_min_color: - edge['color'] = self.node_color_default - else: - edge['color'] = (math.log(orig_color, self.base) - log_min_color) / ( - log_max_color - log_min_color) - else: - edge['color'] = orig_color if orig_color != 0.0 else self.edge_color_default - else: - if orig_color not in self.edge_color_map: - self.edge_color_map[orig_color] = len(self.edge_color_map) - - edge['color'] = self.edge_color_map[orig_color] - del edge['orig_color'] - else: - for edge in edges: - orig_color = edge['orig_color'] - if orig_color not in self.edge_color_map: - self.edge_color_map[orig_color] = len(self.edge_color_map) - - edge['color'] = self.edge_color_map[orig_color] - del edge['orig_color'] + edges = self.calculate_color(edges, + self.edge_color_hex, + self.edge_color_numbers, + self.edge_color_style, + self.edge_color_scale, + self.edge_color_default, + False) if self.node_file is None: for ele in nodes: nodes_from_edge_file.append( @@ -344,7 +301,6 @@ def convert_string_float(a_string: str) -> float: def process_node_file(self): nodes = [] - if self.node_file is not None: kr_node: KgtkReader = KgtkReader.open(self.node_file, @@ -414,73 +370,109 @@ def process_node_file(self): nodes.append(temp) kr_node.close() + if self.node_color_column is not None: + nodes = self.calculate_color(nodes, + self.node_color_hex, + self.node_color_numbers, + self.node_color_style, + self.node_color_scale, + self.node_color_default, + True) + if self.node_size_column is not None: - if self.node_color_hex: - # all good, nothing to do here - pass - else: - if self.node_color_numbers: - node_color_list = [x['orig_color'] for x in nodes] - max_color = max(node_color_list) - min_color = min(node_color_list) - - log_max_color = math.log(max_color, self.base) if max_color > 0.0 else -1.0 - log_min_color = math.log(min_color, self.base) if min_color > 0.0 else -1.0 - - for node in nodes: - orig_color = node['orig_color'] - if self.node_color_style == 'gradient': - if self.node_color_scale == 'linear': - node['color'] = (orig_color - min_color) / (max_color - min_color) - elif self.node_color_scale == 'log': - if orig_color == 0.0 or log_max_color == log_min_color: - node['color'] = self.node_color_default - else: - node['color'] = (math.log(orig_color, self.base) - log_min_color) / ( - log_max_color - log_min_color) - else: - node['color'] = orig_color if orig_color != 0.0 else self.node_color_default - else: - if orig_color not in node_color_map: - node_color_map[orig_color] = len(node_color_map) + self.calculate_size(nodes, + self.node_size_scale, + self.node_size_default, + self.node_size_minimum, + self.node_size_maximum, + 'orig_size', + 'size') - node['color'] = node_color_map[orig_color] - del node['orig_color'] - else: - for node in nodes: - orig_color = node['orig_color'] - if orig_color not in node_color_map: - node_color_map[orig_color] = len(node_color_map) + return nodes - node['color'] = node_color_map[orig_color] - del node['orig_color'] + def calculate_color(self, + nodes: List[dict], + node_color_hex: bool, + node_color_numbers: bool, + node_color_style: str, + node_color_scale: str, + node_color_default: str, + process_nodes: bool) -> List[dict]: + + if node_color_hex: + # all good, nothing to do here + pass + else: - if self.node_size_column is not None: - node_size_list = [x['orig_size'] for x in nodes] - max_size = max(node_size_list) - min_size = min(node_size_list) - - log_max_size = math.log(max_size, self.base) if max_size > 0.0 else -1.0 - log_min_size = math.log(min_size, self.base) if min_size > 0.0 else -1.0 - - for node in nodes: - node_size = node['orig_size'] - if self.node_size_scale == 'linear': - node['size'] = self.node_size_minimum + (node_size - min_size) * \ - (self.node_size_maximum - self.node_size_minimum) / \ - (max_size - min_size) - elif self.node_size_scale == 'log': - if node_size == 0.0 or log_min_size == log_max_size: - node['size'] = self.node_size_default + for node in nodes: + orig_color = node['orig_color'] + if node_color_numbers: + node_color_list = [x['orig_color'] for x in nodes] + max_color = max(node_color_list) + min_color = min(node_color_list) + + log_max_color = math.log(max_color, self.base) if max_color > 0.0 else -1.0 + log_min_color = math.log(min_color, self.base) if min_color > 0.0 else -1.0 + + if node_color_style == GRADIENT: + if process_nodes: + self.node_color_choice = 1 + if node_color_scale == 'linear': + node['color'] = (orig_color - min_color) / (max_color - min_color) + elif node_color_scale == 'log': + if orig_color == 0.0 or log_max_color == log_min_color: + node['color'] = node_color_default + else: + node['color'] = (math.log(orig_color, self.base) - log_min_color) / ( + log_max_color - log_min_color) else: - node['size'] = self.node_size_minimum + (math.log(node_size, self.base) - log_min_size) * ( - self.node_size_maximum - self.node_size_minimum) / (log_max_size - log_min_size) + node['color'] = orig_color if orig_color != 0.0 else node_color_default + + if 'color' not in node: + if process_nodes: + self.node_color_choice = 2 + if orig_color not in self.node_color_map: + self.node_color_map[orig_color] = len(self.node_color_map) + + node['color'] = self.node_color_map[orig_color] else: - node['size'] = node_size if node_size > 0 else self.node_size_default + if orig_color not in self.edge_color_map: + self.edge_color_map[orig_color] = len(self.edge_color_map) + node['color'] = self.edge_color_map[orig_color] + del node['orig_color'] + return nodes + def calculate_size(self, + nodes: List[dict], + node_size_scale: str, + node_size_default: str, + node_size_minimum: float, + node_size_maximum: float, + size_field: str, + output_field: str + ) -> List[dict]: + node_size_list = [x[size_field] for x in nodes] + max_size = max(node_size_list) + min_size = min(node_size_list) + log_max_size = math.log(max_size, self.base) if max_size > 0.0 else -1.0 + log_min_size = math.log(min_size, self.base) if min_size > 0.0 else -1.0 + for node in nodes: + node_size = node[size_field] + if node_size_scale == 'linear': + node[output_field] = node_size_minimum + (node_size - min_size) * \ + (node_size_maximum - node_size_minimum) / \ + (max_size - min_size) + elif node_size_scale == 'log': + if node_size == 0.0 or log_min_size == log_max_size: + node[output_field] = node_size_default + else: + node[output_field] = node_size_minimum + (math.log(node_size, self.base) - log_min_size) * ( + node_size_maximum - node_size_minimum) / (log_max_size - log_min_size) + else: + node[output_field] = node_size if node_size > 0 else node_size_default return nodes - def to_html(self, d, node_color, num_colors): + def to_html(self, d): output_kgtk_file: Path = KGTKArgumentParser.get_output_file(self.output_file) f = open(output_kgtk_file, 'w') f.write(f''' @@ -495,10 +487,7 @@ def to_html(self, d, node_color, num_colors):
''') From 6ea10ee0273e037a5ccab5834be194989d03f120 Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 17:36:42 -0700 Subject: [PATCH 10/32] better help messages for options --- kgtk/cli/visualize-graph.py | 280 +++++++++++++++++++++++------------- 1 file changed, 179 insertions(+), 101 deletions(-) diff --git a/kgtk/cli/visualize-graph.py b/kgtk/cli/visualize-graph.py index 8fb45ca1b..8e3143873 100644 --- a/kgtk/cli/visualize-graph.py +++ b/kgtk/cli/visualize-graph.py @@ -32,145 +32,215 @@ def add_arguments_extended(parser: KGTKArgumentParser, parser.add_input_file(positional=True) parser.add_output_file() - parser.add_argument('--node-file', dest='node_file', type=str, + parser.add_argument('--node-file', + dest='node_file', + type=str, default=None, - help="Specify the location of node file.") + help="Path of the node file.") - parser.add_argument('--direction', dest='direction', type=str, + parser.add_argument('--direction', + dest='direction', + type=str, default=None, - help="Specify direction (arrow, " + - "particle and None), default none") + help="The edge direction: arrow|particle|None. Default: None") - parser.add_argument('--show-edge-label', dest='edge_label', type=bool, + parser.add_argument('--show-edge-label', + dest='edge_label', + action='store_true', default=False, - help="Specify direction (arrow, particle and None)" + - ", default none") + help="Add this option to show labels on edges. Default: False") parser.add_argument('--edge-color-column', - dest='edge_color_column', type=str, + dest='edge_color_column', + type=str, default=None, - help="Specify column used for edge color") + help="Column for edge colors in the edge file. " + "The values can be numbers, hex codes or any strings") + + parser.add_argument('--edge-color-numbers', + dest='edge_color_numbers', + action='store_true', + default=False, + help="Add this option if the values in the --edge-color-column are numbers") + + parser.add_argument('--edge-color-hex', + dest='edge_color_hex', + action='store_true', + default=False, + help="Add this option if the values in the --edge-color-column are valid hexadecimal colors." + "Valid hexadecimal colors start with # and are of 3 or 6 length (without the #) ") parser.add_argument('--edge-color-style', - dest='edge_color_style', type=str, + dest='edge_color_style', + type=str, default=None, - help="Specify style (categorical, gradient)" + - "used for edge color") + help="Edge color style for edge color: categorical|gradient. Default: None") parser.add_argument('--edge-color-default', - dest='edge_color_default', type=str, + dest='edge_color_default', + type=str, default='#000000', - help="Specify default color for edge") + help="Default color for edges. Default: '#000000'") parser.add_argument('--edge-width-column', - dest='edge_width_column', type=str, + dest='edge_width_column', + type=str, default=None, - help="Specify column used for edge width") - - parser.add_argument('--edge-width-minimum', dest='edge_width_minimum', - type=float, default=1.0, - help="Specify edge width minimum") - - parser.add_argument('--edge-width-maximum', dest='edge_width_maximum', - type=float, default=5.0, - help="Specify edge width maximum") - - parser.add_argument('--edge-width-mapping', dest='edge_width_mapping', - type=str, default=None, - help="Specify mapping (auto, fixed) for edge width") - - parser.add_argument('--edge-width-default', dest='edge_width_default', - type=float, default=1.0, - help="Specify default width for edge") - - parser.add_argument('--edge-width-scale', dest='edge_width_scale', - type=str, default=None, - help="Specify scale for width for edge (linear, log)") - - parser.add_argument('--node-color-column', dest='node_color_column', - type=str, default=None, - help="Specify column used for node color") - - parser.add_argument('--node-color-style', dest='node_color_style', - type=str, default=None, - help="Specify style (categorical, gradient)" + - " used for node color") + help="Column for edge widths in the edge file. The values should be numbers.") + + parser.add_argument('--edge-width-minimum', + dest='edge_width_minimum', + type=float, + default=1.0, + help="Minimum edge width. Default: 1.0") + + parser.add_argument('--edge-width-maximum', + dest='edge_width_maximum', + type=float, + default=5.0, + help="Maximum edge width. Default: 5.0") + + parser.add_argument('--edge-width-default', + dest='edge_width_default', + type=float, + default=1.0, + help="Default edge width. Default: 1.0") + + parser.add_argument('--edge-width-scale', + dest='edge_width_scale', + type=str, + default=None, + help="Edge width scale: linear|log. Default: None") - parser.add_argument('--node-color-default', dest='node_color_default', - type=str, default='#000000', - help="Specify default color for node") + parser.add_argument('--node-color-column', + dest='node_color_column', + type=str, + default=None, + help="Column for node colors in the --node-file. The values can be numbers, valid hex codes" + " or any strings.") - parser.add_argument('--node-color-scale', dest='node_color_scale', - type=str, default=None, - help="Specify node color scale (linear/log)") + parser.add_argument('--node-color-style', + dest='node_color_style', + type=str, + default=None, + help="Node color style: categorical|gradient. Default: None") - parser.add_argument('--node-size-column', dest='node_size_column', - type=str, default=None, - help="Specify column used for node size") + parser.add_argument('--node-color-default', + dest='node_color_default', + type=str, + default='#000000', + help="Default node color. Default: '#000000'") - parser.add_argument('--node-size-minimum', dest='node_size_minimum', - type=float, default=1.0, - help="Specify node size minimum") + parser.add_argument('--node-color-scale', + dest='node_color_scale', + type=str, + default=None, + help="Node color scale: linear|log. Default: None") - parser.add_argument('--node-size-maximum', dest='node_size_maximum', - type=float, default=5.0, - help="Specify node size maximum") + parser.add_argument('--node-color-numbers', + dest='node_color_numbers', + action='store_true', + default=False, + help="Add this option if the values in the --node-color-column are numbers") - parser.add_argument('--node-size-default', dest='node_size_default', - type=float, default=2.0, - help="Specify default size for node") + parser.add_argument('--node-color-hex', + dest='node_color_hex', + action='store_true', + default=False, + help="Add this option if the values in the --node-color-column are valid hexadecimal colors." + "Valid hexadecimal colors start with # and are of 3 or 6 length (without the #) ") - parser.add_argument('--node-size-scale', dest='node_size_scale', type=str, + parser.add_argument('--node-size-column', + dest='node_size_column', + type=str, default=None, - help="Specify scale for node size (linear, log)") + help="Column for node sizes in the --node-file. Default: None") + + parser.add_argument('--node-size-minimum', + dest='node_size_minimum', + type=float, + default=1.0, + help="Minimum node size. Default: 1.0") + + parser.add_argument('--node-size-maximum', + dest='node_size_maximum', + type=float, + default=5.0, + help="Maximum node size. Default: 5.0") + + parser.add_argument('--node-size-default', + dest='node_size_default', + type=float, + default=2.0, + help="Default node size. Default: 2.0") + + parser.add_argument('--node-size-scale', + dest='node_size_scale', + type=str, + default=None, + help="Node size scale: linear|log. Default: None") - parser.add_argument('--node-file-id', dest='node_file_id', type=str, + parser.add_argument('--node-file-id', + dest='node_file_id', + type=str, default='id', - help="Specify id column name in node file," + - " default is id") + help="ID column name in the --node-file. Default: 'id'") - parser.add_argument('--show-text-limit', dest='show_text_limit', type=int, + parser.add_argument('--show-text-limit', + dest='show_text_limit', + type=int, default=500, - help="When node number is greater than this number, " + - "will not show text as label, default is 500") + help="When number of nodes is greater than --show-text-limit, node labels will not be visible." + "Default: 500") - parser.add_argument('--node-border-color', dest='node_border_color', - type=str, default=None, - help="Specify node border color ") + parser.add_argument('--node-border-color', + dest='node_border_color', + type=str, + default=None, + help="Node border color. Default: None") - parser.add_argument('--tooltip-column', dest='tooltip_column', type=str, + parser.add_argument('--tooltip-column', + dest='tooltip_column', + type=str, default=None, - help="Specify option to show tooltip ") + help="Column for node tooltips in the --node-file. Default: None") - parser.add_argument('--show-text', dest='show_text', type=str, + parser.add_argument('--show-text', + dest='show_text', + type=str, default=None, - help="Specify option to show text [None|center|above], default is None. If the number of" - "nodes in the graph is greater than specified by --show-text-limit option, which is" - "500 by default, then the text will not be shown in the visualization.") + help="Show node labels at the position relative to node: center|above. Default: None. " + "If the number of nodes in the graph is greater than specified by " + "--show-text-limit option, which is 500 by default, " + "then the text will not be shown in the visualization.") parser.add_argument('--node-categorical-scale', dest='node_categorical_scale', - type=str, default='d3.schemeCategory10', - help="Specify color categorical scale " + - "for node from d3-scale-chromatic") + type=str, + default='rainbow', + help="Node color categorical scale node from d3-scale-chromatic." + "https://observablehq.com/@d3/sequential-scales. Default: rainbow") parser.add_argument('--edge-categorical-scale', dest='edge_categorical_scale', - type=str, default='d3.schemeCategory10', - help="Specify color categorical scale " + - "for edge d3-scale-chromatic") - - parser.add_argument('--node-gradient-scale', dest='node_gradient_scale', - type=str, default='d3.interpolateRdBu', - help="Specify color gradient scale" + - " for node from d3-scale-chromatic") - - parser.add_argument('--edge-gradient-scale', dest='edge_gradient_scale', - type=str, default='d3.interpolateRdBu', - help="Specify color gradient scale" + - " for edge d3-scale-chromatic") - - KgtkIdBuilderOptions.add_arguments(parser, expert=True) # Show all the options. + type=str, + default='rainbow', + help="Edge color categorical scale for edge d3-scale-chromatic." + "https://observablehq.com/@d3/sequential-scales. Default: rainbow") + + parser.add_argument('--node-gradient-scale', + dest='node_gradient_scale', + type=str, + default='d3.interpolateRdBu', + help="Node color gradient scale from d3-scale-chromatic. Default: d3.interpolateRdBu") + + parser.add_argument('--edge-gradient-scale', + dest='edge_gradient_scale', + type=str, + default='d3.interpolateRdBu', + help="Edge color gradient scale from d3-scale-chromatic. Default: d3.interpolateRdBu") + + KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert) @@ -187,6 +257,8 @@ def run(input_file: KGTKFiles, direction: str = None, edge_label: bool = False, edge_color_column: str = None, + edge_color_hex: bool = False, + edge_color_numbers: bool = False, edge_color_style: str = None, edge_color_default: str = '#000000', edge_width_column: str = None, @@ -195,6 +267,8 @@ def run(input_file: KGTKFiles, edge_width_maximum: float = 5.0, edge_width_scale: str = None, node_color_column: str = None, + node_color_numbers: bool = False, + node_color_hex: bool = False, node_color_style: str = None, node_color_default: str = '#000000', node_color_scale: str = None, @@ -208,8 +282,8 @@ def run(input_file: KGTKFiles, node_border_color: str = None, tooltip_column: str = None, show_text: str = None, - node_categorical_scale: str = 'd3.schemeCategory10', - edge_categorical_scale: str = 'd3.schemeCategory10', + node_categorical_scale: str = 'rainbow', + edge_categorical_scale: str = 'rainbow', node_gradient_scale: str = 'd3.interpolateRdBu', edge_gradient_scale: str = 'd3.interpolateRdBu', @@ -228,6 +302,8 @@ def run(input_file: KGTKFiles, direction=direction, edge_label=edge_label, edge_color_column=edge_color_column, + edge_color_hex=edge_color_hex, + edge_color_numbers=edge_color_numbers, edge_color_style=edge_color_style, edge_color_default=edge_color_default, edge_width_column=edge_width_column, @@ -236,6 +312,8 @@ def run(input_file: KGTKFiles, edge_width_maximum=edge_width_maximum, edge_width_scale=edge_width_scale, node_color_column=node_color_column, + node_color_hex=node_color_hex, + node_color_numbers=node_color_numbers, node_color_style=node_color_style, node_color_default=node_color_default, node_color_scale=node_color_scale, From efe0a1dcae054af93dd927d61ff8eaa8de0194af Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 17:37:15 -0700 Subject: [PATCH 11/32] consistent parameter names --- kgtk/visualize/visualize_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kgtk/visualize/visualize_api.py b/kgtk/visualize/visualize_api.py index d9c753109..8a59c9aa5 100644 --- a/kgtk/visualize/visualize_api.py +++ b/kgtk/visualize/visualize_api.py @@ -60,8 +60,8 @@ def __init__( node_color_style: str = None, node_color_default: str = '#000000', node_color_scale: str = None, - node_colors_numbers: bool = False, - node_colors_hex: bool = False, + node_color_numbers: bool = False, + node_color_hex: bool = False, node_size_column: str = None, node_size_default: float = 2.0, node_size_minimum: float = 1.0, @@ -118,8 +118,8 @@ def __init__( self.edge_categorical_scale = edge_categorical_scale self.node_gradient_scale = node_gradient_scale self.edge_gradient_scale = edge_gradient_scale - self.node_color_numbers = node_colors_numbers - self.node_color_hex = node_colors_hex + self.node_color_numbers = node_color_numbers + self.node_color_hex = node_color_hex self.kwargs = kwargs self.input_kgtk_file: Path = KGTKArgumentParser.get_input_file(self.input_file) From 41e01ebadb74b94973eaeceb80f637cbfe4e8f0a Mon Sep 17 00:00:00 2001 From: Amandeep Singh Date: Fri, 1 Apr 2022 17:51:34 -0700 Subject: [PATCH 12/32] move visualize-graph to analysis section --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 47c8a76f4..1cd007e95 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,7 +20,6 @@ nav: - 'Export commands': - 'export-gt': 'export/export_gt.md' - 'generate_wikidata_triples': 'export/generate_wikidata_triples.md' - - 'visualize-force-graph': 'export/visualize-force-graph.md' - 'Transformation commands': - 'add-id': 'transform/add_id.md' - 'add-labels': 'transform/add_labels.md' @@ -72,6 +71,7 @@ nav: - 'paths': 'analysis/paths.md' - 'reachable-nodes': 'analysis/reachable_nodes.md' - 'text-embeddings': 'analysis/text_embedding.md' + - 'visualize-graph': 'export/visualize-graph.md' - Using pipes: pipes.md - Configure KGTK Notebooks: configure-kgtk-notebooks-readme.md From 77abedc04d316c14b0421288ec6b41f0966087a6 Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 17:52:44 -0700 Subject: [PATCH 13/32] rename visualize file --- .../visualize-force-graph.md => analysis/visualize-graph.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/{export/visualize-force-graph.md => analysis/visualize-graph.md} (100%) diff --git a/docs/export/visualize-force-graph.md b/docs/analysis/visualize-graph.md similarity index 100% rename from docs/export/visualize-force-graph.md rename to docs/analysis/visualize-graph.md From cfb1913521fd5ac8d7895f06f65773a27e47bc66 Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 17:53:24 -0700 Subject: [PATCH 14/32] fix path --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 1cd007e95..cb52788bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -71,7 +71,7 @@ nav: - 'paths': 'analysis/paths.md' - 'reachable-nodes': 'analysis/reachable_nodes.md' - 'text-embeddings': 'analysis/text_embedding.md' - - 'visualize-graph': 'export/visualize-graph.md' + - 'visualize-graph': 'analysis/visualize-graph.md' - Using pipes: pipes.md - Configure KGTK Notebooks: configure-kgtk-notebooks-readme.md From 4ab4d1325cfda18fd9632d5f17bb37055454ccee Mon Sep 17 00:00:00 2001 From: Amandeep Singh Date: Fri, 1 Apr 2022 18:05:45 -0700 Subject: [PATCH 15/32] Update visualize-graph.md --- docs/analysis/visualize-graph.md | 147 +++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 48 deletions(-) diff --git a/docs/analysis/visualize-graph.md b/docs/analysis/visualize-graph.md index ef7949abf..948dd4e67 100644 --- a/docs/analysis/visualize-graph.md +++ b/docs/analysis/visualize-graph.md @@ -1,6 +1,41 @@ This command loads a TSV edges file into html visualization of graph ## Usage ``` +usage: kgtk visualize-graph [-h] [-i INPUT_FILE] [-o OUTPUT_FILE] + [--node-file NODE_FILE] [--direction DIRECTION] + [--show-edge-label] + [--edge-color-column EDGE_COLOR_COLUMN] + [--edge-color-numbers] [--edge-color-hex] + [--edge-color-style EDGE_COLOR_STYLE] + [--edge-color-default EDGE_COLOR_DEFAULT] + [--edge-width-column EDGE_WIDTH_COLUMN] + [--edge-width-minimum EDGE_WIDTH_MINIMUM] + [--edge-width-maximum EDGE_WIDTH_MAXIMUM] + [--edge-width-default EDGE_WIDTH_DEFAULT] + [--edge-width-scale EDGE_WIDTH_SCALE] + [--node-color-column NODE_COLOR_COLUMN] + [--node-color-style NODE_COLOR_STYLE] + [--node-color-default NODE_COLOR_DEFAULT] + [--node-color-scale NODE_COLOR_SCALE] + [--node-color-numbers] [--node-color-hex] + [--node-size-column NODE_SIZE_COLUMN] + [--node-size-minimum NODE_SIZE_MINIMUM] + [--node-size-maximum NODE_SIZE_MAXIMUM] + [--node-size-default NODE_SIZE_DEFAULT] + [--node-size-scale NODE_SIZE_SCALE] + [--node-file-id NODE_FILE_ID] + [--show-text-limit SHOW_TEXT_LIMIT] + [--node-border-color NODE_BORDER_COLOR] + [--tooltip-column TOOLTIP_COLUMN] + [--show-text SHOW_TEXT] + [--node-categorical-scale NODE_CATEGORICAL_SCALE] + [--edge-categorical-scale EDGE_CATEGORICAL_SCALE] + [--node-gradient-scale NODE_GRADIENT_SCALE] + [--edge-gradient-scale EDGE_GRADIENT_SCALE] + [-v [optional True|False]] + +Convert edge file (optional node file) to html graph visualization file + optional arguments: -h, --help show this help message and exit -i INPUT_FILE, --input-file INPUT_FILE @@ -10,80 +45,96 @@ optional arguments: The KGTK output file. (May be omitted or '-' for stdout.) --node-file NODE_FILE - Specify the location of node file. + Path of the node file. --direction DIRECTION - Specify direction (arrow, particle and None), default - none - --show-edge-label EDGE_LABEL - Specify direction (arrow, particle and None), default - none + The edge direction: arrow|particle|None. Default: None + --show-edge-label Add this option to show labels on edges. Default: + False --edge-color-column EDGE_COLOR_COLUMN - Specify column used for edge color + Column for edge colors in the edge file. The values + can be numbers, hex codes or any strings + --edge-color-numbers Add this option if the values in the --edge-color- + column are numbers + --edge-color-hex Add this option if the values in the --edge-color- + column are valid hexadecimal colors.Valid hexadecimal + colors start with # and are of 3 or 6 length (without + the #) --edge-color-style EDGE_COLOR_STYLE - Specify style (categorical, gradient) used for edge - color - --edge-color-mapping EDGE_COLOR_MAPPING - Specify mapping (auto, fixed) used for edge color + Edge color style for edge color: categorical|gradient. + Default: None --edge-color-default EDGE_COLOR_DEFAULT - Specify default color for edge + Default color for edges. Default: '#000000' --edge-width-column EDGE_WIDTH_COLUMN - Specify column used for edge width + Column for edge widths in the edge file. The values + should be numbers. --edge-width-minimum EDGE_WIDTH_MINIMUM - Specify edge width minimum + Minimum edge width. Default: 1.0 --edge-width-maximum EDGE_WIDTH_MAXIMUM - Specify edge width maximum - --edge-width-mapping EDGE_WIDTH_MAPPING - Specify mapping (auto, fixed) used for edge width + Maximum edge width. Default: 5.0 --edge-width-default EDGE_WIDTH_DEFAULT - Specify default width for edge + Default edge width. Default: 1.0 --edge-width-scale EDGE_WIDTH_SCALE - Specify scale for width for edge (linear, log) + Edge width scale: linear|log. Default: None --node-color-column NODE_COLOR_COLUMN - Specify column used for node color + Column for node colors in the --node-file. The values + can be numbers, valid hex codes or any strings. --node-color-style NODE_COLOR_STYLE - Specify style (categorical, gradient) used for node - color - --node-color-mapping NODE_COLOR_MAPPING - Specify mapping (auto, fixed) used for node color + Node color style: categorical|gradient. Default: None --node-color-default NODE_COLOR_DEFAULT - Specify default color for node + Default node color. Default: '#000000' --node-color-scale NODE_COLOR_SCALE - Specify node color scale (linear/log) + Node color scale: linear|log. Default: None + --node-color-numbers Add this option if the values in the --node-color- + column are numbers + --node-color-hex Add this option if the values in the --node-color- + column are valid hexadecimal colors.Valid hexadecimal + colors start with # and are of 3 or 6 length (without + the #) --node-size-column NODE_SIZE_COLUMN - Specify column used for node size + Column for node sizes in the --node-file. Default: + None --node-size-minimum NODE_SIZE_MINIMUM - Specify node size minimum + Minimum node size. Default: 1.0 --node-size-maximum NODE_SIZE_MAXIMUM - Specify node size maximum - --node-size-mapping NODE_SIZE_MAPPING - Specify mapping (auto, fixed) used for node size + Maximum node size. Default: 5.0 --node-size-default NODE_SIZE_DEFAULT - Specify default size for node + Default node size. Default: 2.0 --node-size-scale NODE_SIZE_SCALE - Specify scale for node size (linear, log) + Node size scale: linear|log. Default: None --node-file-id NODE_FILE_ID - Specify id column name in node file, default is id + ID column name in the --node-file. Default: 'id' --show-text-limit SHOW_TEXT_LIMIT - When node number is greater than this number, will not - show text as label, default is 500 + When number of nodes is greater than --show-text- + limit, node labels will not be visible.Default: 500 --node-border-color NODE_BORDER_COLOR - Specify node border color + Node border color. Default: None --tooltip-column TOOLTIP_COLUMN - Specify option to show tooltip - --text-node TEXT_NODE - Specify option to show text (None, center, above), default is None + Column for node tooltips in the --node-file. Default: + None + --show-text SHOW_TEXT + Show node labels at the position relative to node: + center|above. Default: None. If the number of nodes in + the graph is greater than specified by --show-text- + limit option, which is 500 by default, then the text + will not be shown in the visualization. --node-categorical-scale NODE_CATEGORICAL_SCALE - Specify color categorical scale for node from - d3-scale-chromatic + Node color categorical scale node from d3-scale- + chromatic.https://observablehq.com/@d3/sequential- + scales. Default: rainbow --edge-categorical-scale EDGE_CATEGORICAL_SCALE - Specify color categorical scale for edge d3-scale- - chromatic + Edge color categorical scale for edge d3-scale- + chromatic.https://observablehq.com/@d3/sequential- + scales. Default: rainbow --node-gradient-scale NODE_GRADIENT_SCALE - Specify color gradient scale for node from d3-scale- - chromatic + Node color gradient scale from d3-scale-chromatic. + Default: d3.interpolateRdBu --edge-gradient-scale EDGE_GRADIENT_SCALE - Specify color gradient scale for edge d3-scale- - chromatic + Edge color gradient scale from d3-scale-chromatic. + Default: d3.interpolateRdBu + + -v [optional True|False], --verbose [optional True|False] + Print additional progress messages (default=False). + ``` From ca7ac9689f6e7ede60f5e6910138ac4bedab49df Mon Sep 17 00:00:00 2001 From: Amandeep Singh Date: Fri, 1 Apr 2022 18:09:52 -0700 Subject: [PATCH 16/32] Update visualize-graph.md --- docs/analysis/visualize-graph.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/analysis/visualize-graph.md b/docs/analysis/visualize-graph.md index 948dd4e67..ba271c9bd 100644 --- a/docs/analysis/visualize-graph.md +++ b/docs/analysis/visualize-graph.md @@ -225,7 +225,7 @@ kgtk visualize-force-graph -i examples/docs/visualize_force_graph_example2.tsv \ --node-color-style categorical \ -o show_countries.html ``` -This customization uses is_country as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. The default d3.schemeCategory10 supports a max of 10 different colors. +This customization uses is_country as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. @@ -237,7 +237,7 @@ kgtk visualize-force-graph -i examples/docs/visualize_force_graph_example2.tsv \ --node-color-style categorical \ -o show_types.html ``` -This customization uses type as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. The default d3.schemeCategory10 supports a max of 10 different colors +This customization uses type as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. @@ -250,7 +250,7 @@ kgtk visualize-force-graph -i examples/docs/visualize_force_graph_example2.tsv \ -o show_types_missing.html ``` -This customization uses type_missing as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. The default d3.schemeCategory10 supports a max of 10 different colors. Notice here there are missing values. All missing values will be assigned a color. +This customization uses type_missing as columns for assigning colors. --node-color-style categorical indicates that we assign a unique color to each different string. Notice here there are missing values. All missing values will be assigned the default node color. @@ -274,7 +274,7 @@ kgtk visualize-force-graph -i examples/docs/visualize_force_graph_example2.tsv \ --edge-color-style categorical \ -o show_edge_color.html ``` -This customization uses label as categorical coloring. Each different label will be assigned a different color. The default d3.schemeCategory10 supports a max of 10 different colors. +This customization uses label as categorical coloring. Each different label will be assigned a different color. @@ -332,7 +332,7 @@ kgtk visualize-force-graph -i examples/docs/visualize_force_graph_example2.tsv \ --node-color-column degree \ --node-color-style gradient \ --node-color-scale linear \ ---text-node above \ +--show-text above \ -o show_node_label.html ``` This customization uses degree column in node file to interpolate color based on linear scale. Also the label of each nodes will be showing as text above nodes. From 403ac01c385ca31e41c5ad9c94ed0c586e039eae Mon Sep 17 00:00:00 2001 From: saggu Date: Fri, 1 Apr 2022 18:10:36 -0700 Subject: [PATCH 17/32] remove id builder options --- kgtk/cli/visualize-graph.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kgtk/cli/visualize-graph.py b/kgtk/cli/visualize-graph.py index 8e3143873..f4ca4485f 100644 --- a/kgtk/cli/visualize-graph.py +++ b/kgtk/cli/visualize-graph.py @@ -3,7 +3,6 @@ from argparse import Namespace from kgtk.cli_argparse import KGTKArgumentParser, KGTKFiles from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions -from kgtk.reshape.kgtkidbuilder import KgtkIdBuilderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions @@ -11,12 +10,11 @@ def parser(): return { 'help': 'Convert edge file to html visualization', 'description': 'Convert edge file (optional node file)' + - 'to html graph visualization file' + ' to html graph visualization file' } -def add_arguments_extended(parser: KGTKArgumentParser, - parsed_shared_args: Namespace): +def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: @@ -240,7 +238,6 @@ def add_arguments_extended(parser: KGTKArgumentParser, default='d3.interpolateRdBu', help="Edge color gradient scale from d3-scale-chromatic. Default: d3.interpolateRdBu") - KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert) From 8b36be603afac00ec77414f9df1a97b7305ec03e Mon Sep 17 00:00:00 2001 From: saggu Date: Mon, 4 Apr 2022 17:20:02 -0700 Subject: [PATCH 18/32] fix destringy bugs --- kgtk/visualize/visualize_api.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/kgtk/visualize/visualize_api.py b/kgtk/visualize/visualize_api.py index 8a59c9aa5..ce6befd0f 100644 --- a/kgtk/visualize/visualize_api.py +++ b/kgtk/visualize/visualize_api.py @@ -206,10 +206,18 @@ def process_edge_file(self): for row in kr: if self.node_file is None: - clean_node1_label, _, _ = kgtk_format.destringify(row[node1_label_idx]) + node1_label = row[node1_label_idx] + node2_label = row[node2_label_idx] + if '@' in node1_label: + clean_node1_label, _, _ = kgtk_format.destringify(node1_label) + else: + clean_node1_label = node1_label nodes.add((row[node1_idx], clean_node1_label)) - clean_node2_label, _, _ = kgtk_format.destringify(row[node2_label_idx]) + if '@' in node2_label: + clean_node2_label, _, _ = kgtk_format.destringify(node2_label) + else: + clean_node2_label = node2_label nodes.add((row[node2_idx], clean_node2_label)) if '@' in row[label_label_idx]: @@ -403,13 +411,16 @@ def calculate_color(self, # all good, nothing to do here pass else: - + node_color_list = [] + max_color = -1 + min_color = -1 + if self.node_color_numbers: + node_color_list = [x['orig_color'] for x in nodes] + max_color = max(node_color_list) + min_color = min(node_color_list) for node in nodes: orig_color = node['orig_color'] if node_color_numbers: - node_color_list = [x['orig_color'] for x in nodes] - max_color = max(node_color_list) - min_color = min(node_color_list) log_max_color = math.log(max_color, self.base) if max_color > 0.0 else -1.0 log_min_color = math.log(min_color, self.base) if min_color > 0.0 else -1.0 From b3b1d44d17ef6dc990d3fd7d98bbc240573cb818 Mon Sep 17 00:00:00 2001 From: saggu Date: Mon, 4 Apr 2022 17:20:31 -0700 Subject: [PATCH 19/32] add unit tests for visualize --- tests/test_visualize_graph.py | 206 ++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 tests/test_visualize_graph.py diff --git a/tests/test_visualize_graph.py b/tests/test_visualize_graph.py new file mode 100644 index 000000000..286fad6ba --- /dev/null +++ b/tests/test_visualize_graph.py @@ -0,0 +1,206 @@ +import shutil +import tempfile +import unittest +from kgtk.cli_entry import cli_entry + + +class TestVisualizeGraph(unittest.TestCase): + def setUp(self) -> None: + self.example_file = 'data/visualize_force_graph_example2.tsv' + self.node_file = 'data/visualize_force_graph_example2_node.tsv' + self.ground_truth_default = 'data/visualize_graph_example_1_no_node_default.html' + self.ground_truth_color_node = 'data/visualize_graph_example_color_by_node_column.html' + self.ground_truth_color_node_log = 'data/visualize_graph_example_color_by_node_column_log.html' + self.ground_truth_color_node_missing = 'data/visualize_graph_example_color_by_node_column_log_missing.html' + self.ground_truth_color_node_hex = 'data/visualize_graph_example_color_by_node_column_hex.html' + self.ground_truth_color_edge = 'data/visualize_graph_example_color_edge.html' + self.ground_truth_node_size = 'data/visualize_graph_example_node_size.html' + self.ground_truth_edge_width = 'data/visualize_graph_example_edge_width.html' + self.ground_truth_node_text = 'data/visualize_graph_example_node_text.html' + self.ground_truth_edge_text = 'data/visualize_graph_example_edge_text.html' + self.ground_truth_node_edge_text = 'data/visualize_graph_example_node_edge_text.html' + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self) -> None: + shutil.rmtree(self.temp_dir) + + def test_default_no_node_file(self): + output = f'{self.temp_dir}/test_1.html' + cli_entry("kgtk", "visualize-graph", "-i", self.example_file, "-o", output) + + f1 = set(open(self.ground_truth_default).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_color_by_node_column(self): + output = f'{self.temp_dir}/test_2.html' + cli_entry("kgtk", "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--node-color-column", "is_country", + "--node-file", f'{self.node_file}' + ) + f1 = set(open(self.ground_truth_color_node).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_color_by_node_column_log(self): + output = f'{self.temp_dir}/test_3.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--node-color-column", "degree", + "--node-color-numbers", + "--node-file", f'{self.node_file}', + "--node-color-scale", "log" + ) + f1 = set(open(self.ground_truth_color_node_log).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_color_by_node_column_missing(self): + output = f'{self.temp_dir}/test_4.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--node-color-column", "type_missing", + "--node-file", f'{self.node_file}' + ) + f1 = set(open(self.ground_truth_color_node_missing).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_color_by_node_column_hex(self): + output = f'{self.temp_dir}/test_5.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--node-color-column", "hex_color", + "--node-color-hex", + "--node-file", f'{self.node_file}' + ) + f1 = set(open(self.ground_truth_color_node_hex).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_color_by_edge_column(self): + output = f'{self.temp_dir}/test_6.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--edge-color-column", "hex_color", + "--edge-color-hex" + ) + f1 = set(open(self.ground_truth_color_edge).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_node_size(self): + output = f'{self.temp_dir}/test_7.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "--node-file", f'{self.node_file}', + "-o", f'{output}', + "--node-size-column", "population", + "--node-size-minimum", "2.0", + "--node-size-maximum", "6.0", + "--node-size-default", "4.0", + "--node-color-column", "hex_color", + "--node-color-hex", + "--node-size-scale", "log" + ) + f1 = set(open(self.ground_truth_node_size).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_edge_width(self): + output = f'{self.temp_dir}/test_8.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "--node-file", f'{self.node_file}', + "-o", f'{output}', + "--edge-width-column", "weight", + "--edge-width-minimum", "2.0", + "--edge-width-maximum", "5.0", + "--edge-width-default", "2.0", + "--edge-width-scale", "log" + ) + f1 = set(open(self.ground_truth_edge_width).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_node_text(self): + output = f'{self.temp_dir}/test_9.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "--node-file", f'{self.node_file}', + "-o", f'{output}', + "--node-color-column", "hex_color", + "--node-color-hex", + "--show-text", "above" + ) + f1 = set(open(self.ground_truth_node_text).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_edge_text(self): + output = f'{self.temp_dir}/test_10.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "-o", f'{output}', + "--show-edge-label", + "--edge-color-column", "hex_color" + ) + f1 = set(open(self.ground_truth_edge_text).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + def test_node_edge_text(self): + output = f'{self.temp_dir}/test_11.html' + cli_entry("kgtk", "--debug", + "visualize-graph", + "-i", self.example_file, + "--node-file", f'{self.node_file}', + "-o", f'{output}', + "--node-color-column", "hex_color", + "--node-color-hex", + "--show-text", "above", + "--show-edge-label" + ) + f1 = set(open(self.ground_truth_node_edge_text).readlines()) + f2 = open(output).readlines() + + for line in f2: + self.assertTrue(line in f1) + + +if __name__ == '__main__': + unittest.main() From 37855c63e1cbff5005bda7a6ac38b2afe618d281 Mon Sep 17 00:00:00 2001 From: saggu Date: Mon, 4 Apr 2022 17:22:53 -0700 Subject: [PATCH 20/32] add data files for unit tests --- tests/data/visualize_force_graph_example2.tsv | 19 ++ .../visualize_force_graph_example2_node.tsv | 10 + ...alize_graph_example_1_no_node_default.html | 199 +++++++++++++ ...ze_graph_example_color_by_node_column.html | 217 ++++++++++++++ ...raph_example_color_by_node_column_hex.html | 217 ++++++++++++++ ...raph_example_color_by_node_column_log.html | 217 ++++++++++++++ ...mple_color_by_node_column_log_missing.html | 217 ++++++++++++++ .../visualize_graph_example_color_edge.html | 199 +++++++++++++ .../visualize_graph_example_edge_text.html | 237 +++++++++++++++ .../visualize_graph_example_edge_width.html | 235 +++++++++++++++ ...isualize_graph_example_node_edge_text.html | 275 ++++++++++++++++++ .../visualize_graph_example_node_size.html | 226 ++++++++++++++ .../visualize_graph_example_node_text.html | 237 +++++++++++++++ 13 files changed, 2505 insertions(+) create mode 100644 tests/data/visualize_force_graph_example2.tsv create mode 100644 tests/data/visualize_force_graph_example2_node.tsv create mode 100644 tests/data/visualize_graph_example_1_no_node_default.html create mode 100644 tests/data/visualize_graph_example_color_by_node_column.html create mode 100644 tests/data/visualize_graph_example_color_by_node_column_hex.html create mode 100644 tests/data/visualize_graph_example_color_by_node_column_log.html create mode 100644 tests/data/visualize_graph_example_color_by_node_column_log_missing.html create mode 100644 tests/data/visualize_graph_example_color_edge.html create mode 100644 tests/data/visualize_graph_example_edge_text.html create mode 100644 tests/data/visualize_graph_example_edge_width.html create mode 100644 tests/data/visualize_graph_example_node_edge_text.html create mode 100644 tests/data/visualize_graph_example_node_size.html create mode 100644 tests/data/visualize_graph_example_node_text.html diff --git a/tests/data/visualize_force_graph_example2.tsv b/tests/data/visualize_force_graph_example2.tsv new file mode 100644 index 000000000..eaf37f1b6 --- /dev/null +++ b/tests/data/visualize_force_graph_example2.tsv @@ -0,0 +1,19 @@ +node1 label node2 weight hex_color +Q1 "friend" Q2 0.9 #FF69B4 +Q2 "friend" Q3 0.3 #FF69B4 +Q3 "friend" Q4 "" #FF69B4 +Q5 "friend" Q3 "" #FF69B4 +Q6 "friend" Q5 "" #FF69B4 +Q6 "friend" Q1 "" #FF69B4 +Q1 "born" Q7 "" #FFF68F +Q2 "born" Q7 "" #FFF68F +Q3 "born" Q7 "" #FFF68F +Q4 "born" Q8 "" #FFF68F +Q5 "born" Q8 "" #FFF68F +Q6 "born" Q8 "" #FFF68F +Q1 "lives" Q8 "" #32CD32 +Q2 "lives" Q7 "" #32CD32 +Q3 "lives" Q9 "" #32CD32 +Q4 "lives" Q8 "" #32CD32 +Q5 "lives" Q9 "" #32CD32 +Q6 "lives" Q8 "" #32CD32 \ No newline at end of file diff --git a/tests/data/visualize_force_graph_example2_node.tsv b/tests/data/visualize_force_graph_example2_node.tsv new file mode 100644 index 000000000..8dcf1ab9d --- /dev/null +++ b/tests/data/visualize_force_graph_example2_node.tsv @@ -0,0 +1,10 @@ +id label is_country type degree type_missing population hex_color +Q1 'Alice'@en 0 human 40 "" "" #00FFFF +Q2 'Susan'@en 0 human 14 "" "" #8A2BE2 +Q3 'John'@en 0 human 4 "" "" #FF4040 +Q4 'Claudia'@en 0 human 32 "" "" #7FFF00 +Q5 'Ulrich'@en 0 human 422 "" "" #FFB90F +Q6 'Fritz'@en 0 human 4 "" "" #C1FFC1 +Q7 'USA'@en 1 country 50 country 300 #FF1493 +Q8 'Germany'@en 1 country 500 country 50 #FFD700 +Q9 'Brazil'@en 1 country 222 country 200 #FF69B4 \ No newline at end of file diff --git a/tests/data/visualize_graph_example_1_no_node_default.html b/tests/data/visualize_graph_example_1_no_node_default.html new file mode 100644 index 000000000..24387333c --- /dev/null +++ b/tests/data/visualize_graph_example_1_no_node_default.html @@ -0,0 +1,199 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_color_by_node_column.html b/tests/data/visualize_graph_example_color_by_node_column.html new file mode 100644 index 000000000..9cacfbbc5 --- /dev/null +++ b/tests/data/visualize_graph_example_color_by_node_column.html @@ -0,0 +1,217 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_color_by_node_column_hex.html b/tests/data/visualize_graph_example_color_by_node_column_hex.html new file mode 100644 index 000000000..fe6ce2484 --- /dev/null +++ b/tests/data/visualize_graph_example_color_by_node_column_hex.html @@ -0,0 +1,217 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_color_by_node_column_log.html b/tests/data/visualize_graph_example_color_by_node_column_log.html new file mode 100644 index 000000000..b715d4958 --- /dev/null +++ b/tests/data/visualize_graph_example_color_by_node_column_log.html @@ -0,0 +1,217 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_color_by_node_column_log_missing.html b/tests/data/visualize_graph_example_color_by_node_column_log_missing.html new file mode 100644 index 000000000..9cacfbbc5 --- /dev/null +++ b/tests/data/visualize_graph_example_color_by_node_column_log_missing.html @@ -0,0 +1,217 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_color_edge.html b/tests/data/visualize_graph_example_color_edge.html new file mode 100644 index 000000000..ee317a53b --- /dev/null +++ b/tests/data/visualize_graph_example_color_edge.html @@ -0,0 +1,199 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_edge_text.html b/tests/data/visualize_graph_example_edge_text.html new file mode 100644 index 000000000..ddfa2974a --- /dev/null +++ b/tests/data/visualize_graph_example_edge_text.html @@ -0,0 +1,237 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_edge_width.html b/tests/data/visualize_graph_example_edge_width.html new file mode 100644 index 000000000..2c8ca9fbf --- /dev/null +++ b/tests/data/visualize_graph_example_edge_width.html @@ -0,0 +1,235 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_node_edge_text.html b/tests/data/visualize_graph_example_node_edge_text.html new file mode 100644 index 000000000..0361532a8 --- /dev/null +++ b/tests/data/visualize_graph_example_node_edge_text.html @@ -0,0 +1,275 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_node_size.html b/tests/data/visualize_graph_example_node_size.html new file mode 100644 index 000000000..d3a909151 --- /dev/null +++ b/tests/data/visualize_graph_example_node_size.html @@ -0,0 +1,226 @@ + + + + + + + + + + +
+ + \ No newline at end of file diff --git a/tests/data/visualize_graph_example_node_text.html b/tests/data/visualize_graph_example_node_text.html new file mode 100644 index 000000000..794cb3390 --- /dev/null +++ b/tests/data/visualize_graph_example_node_text.html @@ -0,0 +1,237 @@ + + + + + + + + + + +
+ + \ No newline at end of file From c159bb9636a0f121ac6fcfd0e657a7eaebe894d9 Mon Sep 17 00:00:00 2001 From: saggu Date: Mon, 4 Apr 2022 17:24:43 -0700 Subject: [PATCH 21/32] remove trailing whitespace --- kgtk/visualize/visualize_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kgtk/visualize/visualize_api.py b/kgtk/visualize/visualize_api.py index ce6befd0f..c24d64a26 100644 --- a/kgtk/visualize/visualize_api.py +++ b/kgtk/visualize/visualize_api.py @@ -498,7 +498,7 @@ def to_html(self, d):