Skip to content

Commit

Permalink
chore: refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
scott-es committed Apr 8, 2024
1 parent 0e2c3ec commit b11eb9a
Show file tree
Hide file tree
Showing 14 changed files with 265,042 additions and 216 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,14 @@ Monitoring depGraph via Snyk API ...
```

### Pruning
If you encounter a HTTP 500 when performing `test` or `monitor` commands, then try to enable pruning.
If you encounter a HTTP 422 when performing `test` or `monitor` commands, with the accompaying error message:
`Retrying: {"error":"Failed to generate snapshot. Please contact support on [email protected]"}`
then try to enable pruning.

What is likely happening is that there are too many vulnerable paths for the system (>100,000), so
pruning the repeated sub-dependencies will alleviate this.

You may run with `--prune` all the time to avoid this error.
You may run with `--prune` or `--prune-all` to avoid this error.

## Currently supported package types
* maven (tested with rules_jvm_external)
Expand Down
7 changes: 4 additions & 3 deletions bazel2snyk/bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_coordinates_from_bazel_dep(self, bazel_dep, package_source):
logger.debug(f"{re_match_string=}")

for rule in bazel_rules.findall("rule"):
logger.debug(f"processing {rule.attrib['name']=}")
# logger.debug(f"processing {rule.attrib['name']=}")
if (
re.match(
r".*/BUILD(\.bzl|\.bazel)?\:\d+\:\d+$", rule.attrib["location"]
Expand Down Expand Up @@ -133,15 +133,16 @@ def maven_bazel_dep_to_snyk_dep(self, dep_coordinates: str):
def pip_bazel_dep_to_snyk_dep(self, dep_coordinates: str):
snyk_dep = dep_coordinates
logger.debug(f"PYTHON TEST: {snyk_dep=}")
# match = re.search(r"\@.*\/\/pypi__.*\:(.*).dist\-info.*\/", dep_coordinates)
match = re.search(
r"\@.*_.*\:site-packages\/(.*).dist\-info.*\/.*", dep_coordinates
)
if not match:
match = re.search(r"\@.*\/\/pypi__.*\:(.*).dist\-info.*\/", dep_coordinates)
if match:
snyk_dep = match.group(1)
k = snyk_dep.rfind("-")
snyk_dep = snyk_dep[:k] + "@" + snyk_dep[k + 1 :]
logger.debug(f"PYTHON TEST: {snyk_dep=}")
logger.debug(f"{snyk_dep=}")

return snyk_dep

Expand Down
99 changes: 21 additions & 78 deletions bazel2snyk/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import requests
import typer
import math
import time
import sys
import traceback
Expand Down Expand Up @@ -42,16 +41,13 @@ def __init__(
self.dep_graph = dep_graph
self._visited = []
self._visited_temp = []
self._dep_path_counts = {}
self._target_path_counts = {}
self._oss_deps_count = 0

def bazel_to_depgraph(self, parent_node_id: str, depth: int):
"""
Recursive function that will walk the bazel dep tree.
"""
logger.debug(f"{parent_node_id=},{depth=}")

# global visited_temp, bazel_xml_parser
logger.debug(f"{self._visited_temp=}")

children = self.bazel_xml_parser.get_children_from_rule(
Expand All @@ -63,9 +59,15 @@ def bazel_to_depgraph(self, parent_node_id: str, depth: int):
parent_node_id, self.bazel_xml_parser.pkg_manager_name
)

if parent_dep_snyk != parent_node_id and not parent_dep_snyk.endswith(
f"{BAZEL_TARGET_VERSION_STRING}"
):
self._oss_deps_count += 1
logger.debug(f"{self._oss_deps_count=}")

# special entry for the root node of the dep graph
if depth == 0:
self.dep_graph.set_root_node_package(f"{parent_dep_snyk}")
self.dep_graph.set_root_node_package(parent_dep_snyk)

for child in children:
child_dep_for_snyk = self.snyk_dep_from_bazel_dep(
Expand All @@ -87,23 +89,14 @@ def bazel_to_depgraph(self, parent_node_id: str, depth: int):
logger.debug(f"adding pkg {child_dep_for_snyk=}")
self.dep_graph.add_pkg(child_dep_for_snyk)

# keep track of how many times each dep is encountered
if self.bazel_xml_parser.get_node_type(child) in [BazelNodeType.DEPENDENCY]:
self.increment_dep_path_count(child_dep_for_snyk)

elif self.bazel_xml_parser.get_node_type(child) in [
BazelNodeType.INTERNAL_TARGET,
BazelNodeType.EXTERNAL_TARGET,
]:
self.increment_target_path_count(child_dep_for_snyk)

logger.debug(f"adding dep {child_dep_for_snyk=} for {parent_dep_snyk=}")
self.dep_graph.add_dep(child_dep_for_snyk, parent_dep_snyk)

self._visited_temp.append(parent_node_id)

# if we've already processed this subtree, then just return
if child not in self._visited:
logger.debug(f"{child} not yet visited, traversing...")
self.bazel_to_depgraph(child, depth=depth + 1)
# else:
# future use for smarter pruning
Expand Down Expand Up @@ -137,62 +130,6 @@ def snyk_dep_from_bazel_dep(
else:
return f"{bazel_dep_id}@{BAZEL_TARGET_VERSION_STRING}"

def increment_dep_path_count(self, dep: str):
"""
Increment global dep path counts which is later
used if the dep graph needs to be pruned
"""
self._dep_path_counts[dep] = self._dep_path_counts.get(dep, 0) + 1

def increment_target_path_count(self, dep: str):
"""
Increment global target path counts which is later
used if the dep graph needs to be pruned
"""
self._target_path_counts[dep] = self._target_path_counts.get(dep, 0) + 1

def prune_graph_all(self):
"""
Prune graph whenever OSS dependencies are repeated more than 2x
or when bazel target dependencies are repeated more than 10x
"""
for dep, instances in self.dep_path_counts.items():
if instances > 2:
logger.info(f"pruning {dep} ({instances=})")
self.dep_graph.prune_dep(dep)

for dep, instances in self.target_path_counts.items():
if instances > 10:
logger.info(f"pruning {dep} ({instances=})")
self.dep_graph.prune_dep(dep)

def prune_graph(
self, instance_count_threshold: int, instance_percentage_threshold: int
):
"""
Prune graph according to threshold of duplicated transitive dependencies
"""
self._dep_path_counts.update(self._target_path_counts)
combined_path_counts = self._dep_path_counts

total_item_count = 0

for dep, instances in combined_path_counts.items():
total_item_count += instances
logger.debug(f"{total_item_count=}")

for dep, instances in combined_path_counts.items():
if instances > 1:
instance_percentage = math.ceil((instances / total_item_count) * 100)
if (
instances > instance_count_threshold
or instance_percentage > instance_percentage_threshold
):
logger.info(
f"pruning {dep} ({instances=}/{instance_count_threshold},{instance_percentage=}/{instance_percentage_threshold})"
)
self.dep_graph.prune_dep(dep)


def load_file(file_path: str) -> str:
"""
Expand Down Expand Up @@ -284,7 +221,7 @@ def main(

bazel2snyk.bazel_to_depgraph(parent_node_id=bazel_target, depth=0)

if len(bazel2snyk.dep_graph.graph()["depGraph"]["graph"]["nodes"]) <= 1:
if len(bazel2snyk.dep_graph.graph().depGraph.graph.nodes) <= 1:
logger.error(
f"No {package_source} dependencies found for given target, please verify --bazel-target exists in the source data"
)
Expand All @@ -293,11 +230,13 @@ def main(
if prune_all:
logger.info("Pruning graph ...")
time.sleep(2)
bazel2snyk.prune_graph_all()
# bazel2snyk.prune_graph_all()
bazel2snyk.dep_graph.prune_graph_all()
elif prune:
time.sleep(2)
logger.info("Smart pruning graph (experimental) ...")
bazel2snyk.prune_graph(20, 5)
# bazel2snyk.prune_graph(20, 5)
bazel2snyk.dep_graph.prune_graph(20, 5)
return


Expand All @@ -306,7 +245,9 @@ def print_graph():
"""
Print the Snyk depGraph representation of the dependency graph
"""
print(f"{json.dumps(bazel2snyk.dep_graph.graph(), indent=4)}")
# print(f"{json.dumps(bazel2snyk.dep_graph.graph(), indent=4)}")
# print({bazel2snyk.dep_graph.graph().model_dump_json(indent=4)})
print(json.dumps(bazel2snyk.dep_graph.graph().model_dump(), indent=4))


@cli.command()
Expand All @@ -329,7 +270,8 @@ def test(

typer.echo("Testing depGraph via Snyk API ...", file=sys.stderr)
response: requests.Response = snyk_client.post(
f"{DEPGRAPH_BASE_TEST_URL}{snyk_org_id}", body=bazel2snyk.dep_graph.graph()
f"{DEPGRAPH_BASE_TEST_URL}{snyk_org_id}",
body=bazel2snyk.dep_graph.graph().model_dump(),
)

json_response = response.json()
Expand Down Expand Up @@ -372,7 +314,8 @@ def monitor(

typer.echo("Monitoring depGraph via Snyk API ...", file=sys.stderr)
response: requests.Response = snyk_client.post(
f"{DEPGRAPH_BASE_MONITOR_URL}{snyk_org_id}", body=bazel2snyk.dep_graph.graph()
f"{DEPGRAPH_BASE_MONITOR_URL}{snyk_org_id}",
body=bazel2snyk.dep_graph.graph().model_dump(),
)

json_response = response.json()
Expand Down
Loading

0 comments on commit b11eb9a

Please sign in to comment.