PoC: move evaluator external log processing here

This is a PoC for moving the external implementations we have for evaluator log processing into this repository. This does not yet modify the runtime of this repository, but it does add immutable.js to the package.json file.
github · Nov 14, 2024 · 11a0fb5 · 11a0fb5
1 parent f7caf01
commit 11a0fb5
Show file tree

Hide file tree

Showing 15 changed files with 2,268 additions and 0 deletions.
diff --git a/extensions/ql-vscode/package-lock.json b/extensions/ql-vscode/package-lock.json
diff --git a/extensions/ql-vscode/package.json b/extensions/ql-vscode/package.json
@@ -1995,6 +1995,7 @@
     "d3": "^7.9.0",
     "d3-graphviz": "^5.0.2",
     "fs-extra": "^11.1.1",
+    "immutable": "^5.0.2",
     "js-yaml": "^4.1.0",
     "msw": "^2.2.13",
     "nanoid": "^5.0.7",

diff --git a/extensions/ql-vscode/src/log-insights/core/README.md b/extensions/ql-vscode/src/log-insights/core/README.md
@@ -0,0 +1,6 @@
+# log-insights/core
+
+The core of the `log-insights` feature: provides insights from the raw logs emitted by the CodeQL CLI.
+
+This is intended to be a vscode independent directory that in theory can be used in external contexts as well, or become a package of its own one day.
+The unit tests for this directory define the interface, and there are no guarantees about external compatibility beyond that.
diff --git a/extensions/ql-vscode/src/log-insights/core/cli.ts b/extensions/ql-vscode/src/log-insights/core/cli.ts
@@ -0,0 +1,120 @@
+import { cpSync, createReadStream, mkdtempSync } from "fs";
+import { join } from "path";
+// eslint-disable-next-line import/no-namespace
+import * as badnessMetrics from "./log-processors/badness-metrics";
+// eslint-disable-next-line import/no-namespace
+import * as expensivePredicates from "./log-processors/expensive-predicates";
+// eslint-disable-next-line import/no-namespace
+import * as logSummary from "./log-processors/log-summary";
+// eslint-disable-next-line import/no-namespace
+import * as stageTimings from "./log-processors/stage-timings";
+// eslint-disable-next-line import/no-namespace
+import * as tupleSums from "./log-processors/tuple-sums";
+import { log } from "./util";
+
+/**
+ * Minimal CLI interface for running the evaluator log processing locally.
+ *
+ * Intended for use in development and debugging.
+ * This is not intended to be a full-featured CLI tool, nor as a replacement for ordinary testing.
+ *
+ * Sample use:
+ *
+ * ```
+ * $ ts-node cli.ts badness-metrics codeql ~/Downloads/codeql-evaluator-log.json
+ * ```
+ */
+async function main(args: string[]) {
+  const positionals = args.filter((arg) => !arg.startsWith("--"));
+  const [operation, codeqlPath, logPath] = positionals;
+  const options = args.filter((arg) => arg.startsWith("--"));
+  const verbose = options.includes("--verbose");
+  const explicitOutputFile = options
+    .find((arg) => arg.startsWith("--output="))
+    ?.split("=")[1];
+  const help = options.includes("--help");
+  // dear future reader. Please consider using a proper CLI library instead of this ad hoc parsing.
+  const usage = [
+    "Usage: cli <badness-metrics|expensive-predicates|overall-summary|predicates-summary|stage-timings|tuple-sums> <codeql-path> <summary-log-path> [--verbose] [--output=<output-file>]",
+  ].join("\n");
+
+  if (help) {
+    console.log(usage);
+    return;
+  }
+  if (!operation || !codeqlPath || !logPath) {
+    throw new Error(`Missing arguments.\n\n${usage}`);
+  }
+  async function makeSummaryLogFile(format: "overall" | "predicates") {
+    const summaryLogFile = `${logPath}.${format}.log`;
+    await logSummary.process(codeqlPath, logPath, summaryLogFile, format);
+    return summaryLogFile;
+  }
+
+  const implicitOutputFile = join(
+    mkdtempSync("log-insights-"),
+    "implicit-output.txt",
+  );
+  const actualOutputFile = explicitOutputFile || implicitOutputFile;
+  switch (operation) {
+    case "badness-metrics":
+      await badnessMetrics.process(
+        codeqlPath,
+        await makeSummaryLogFile("predicates"),
+        actualOutputFile,
+      );
+      break;
+    case "expensive-predicates":
+      await expensivePredicates.process(
+        codeqlPath,
+        await makeSummaryLogFile("overall"),
+        actualOutputFile,
+      );
+      break;
+    case "overall-summary":
+      await logSummary.process(
+        codeqlPath,
+        logPath,
+        actualOutputFile,
+        "overall",
+      );
+      break;
+    case "predicates-summary": {
+      await logSummary.process(
+        codeqlPath,
+        logPath,
+        actualOutputFile,
+        "predicates",
+      );
+      break;
+    }
+    case "text-summary": {
+      await logSummary.process(codeqlPath, logPath, actualOutputFile, "text");
+      break;
+    }
+    case "stage-timings":
+      await stageTimings.process(
+        codeqlPath,
+        await makeSummaryLogFile("predicates"),
+        actualOutputFile,
+      );
+      break;
+    case "tuple-sums":
+      await tupleSums.process(
+        codeqlPath,
+        await makeSummaryLogFile("predicates"),
+        actualOutputFile,
+      );
+      break;
+    default:
+      throw new Error(`Unknown operation: ${operation}.\n\n${usage}`);
+  }
+  if (verbose) {
+    createReadStream(actualOutputFile).pipe(process.stdout);
+  }
+  if (explicitOutputFile) {
+    cpSync(actualOutputFile, explicitOutputFile);
+  }
+  log(`Output is available in ${actualOutputFile}.`);
+}
+void main(process.argv.slice(2));
diff --git a/extensions/ql-vscode/src/log-insights/core/log-processors/README.md b/extensions/ql-vscode/src/log-insights/core/log-processors/README.md
@@ -0,0 +1,6 @@
+# log-insights/core/log-processors
+
+This directory contains the log top-level log processors.
+They will generally read and write files on disk with their exported `process` function, possibly making use of on-disk caches to speed up processing.
+
+The files might expose additional functions for testing purposes, as well as in-memory variations of the top-level `process` function.