Add the k6 load test script to the docs (G-Research#1104)

* Add the k6 load test script to the docs, with vars for shaping data at top of file. * Tweak developer doc Co-authored-by: @jgiannuzzi
suprjinx · Apr 17, 2024 · 88b345a · 88b345a
1 parent 11b3f9f
commit 88b345a
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 1 deletion.
diff --git a/docs/developer.md b/docs/developer.md
@@ -50,6 +50,24 @@ make migrations-rebuild
 This target will rebuild the `database/migrate_generated.go` file to include execution of all
 the packages in `database/migrations`.
 
+## Filling the database
+
+It's often necessary to test out your changes on a loaded database, and we definitely want to do this
+before making a release. A production-level target for database shape is:
+* 40k runs across 300 experiments
+* 2k unique metrics per run, each with 200 values
+
+To get some percentage of this into your local database instance, you can use the included K6
+load test script, which can be run in the dev container.
+
+1. Start the FML tracking server
+2. Start a terminal in the dev container. 
+3. `cd docs/example`
+4. `k6 run k6_load.js`
+
+Tweak the numbers in `k6_load.js` for number of runs, metrics, etc -- the default amounts are 
+pretty small.
+
 ## Working with the UIs
 
 FastTrackML incorporates the existing Aim and MLFlow web UIs, albeit

diff --git a/docs/example/k6_load.js b/docs/example/k6_load.js
@@ -0,0 +1,148 @@
+import http from 'k6/http';
+
+const MAX_METRICS_PER_BATCH = 200
+
+export default function () {
+  const namespace = 'default'
+  const numberOfExperiments = 1
+  const runsPerExperiment = 2
+  const paramsPerRun = 1
+  const metricsPerRun = 2000
+  const stepsPerMetric = 4
+
+  for (let i = 0; i < numberOfExperiments; i++) {
+    const experimentId = createExperiment(namespace)
+    for (let j = 0; j < runsPerExperiment; j++) {
+      createRun(namespace, experimentId, paramsPerRun, metricsPerRun, stepsPerMetric)
+    }
+  }
+}
+
+function createExperiment(namespace) {
+  const base_url = `http://localhost:5000/ns/${namespace}/api/2.0/mlflow/`;
+
+  const exp_response = http.post(
+    base_url + 'experiments/create',
+    JSON.stringify({
+      "name": `experiment-${Date.now()}`,
+    }),
+    {
+      headers: {
+        'Content-Type': 'application/json'
+      },
+    }
+  );
+  return exp_response.json().experiment_id;
+}
+
+
+function createRun(namespace, experimentId, numParams, numMetrics, numSteps) {
+  const base_url = `http://localhost:5000/ns/${namespace}/api/2.0/mlflow/`;
+
+  const run_response = http.post(
+    base_url + 'runs/create',
+    JSON.stringify({
+      experiment_id: experimentId,
+      start_time: Date.now(),
+      tags: [
+        {
+          key: "mlflow.user",
+          value: "k6"
+        }
+      ]
+    }),
+    {
+      headers: {
+        'Content-Type': 'application/json'
+      },
+    }
+  );
+  const run_id = run_response.json().run.info.run_id;
+
+  let params = []
+  for (let id = 1; id <= numParams; id++) {
+    params.push({
+      key: `param${id}`,
+      value: `${id * Math.random()}`,
+    })
+  }
+  http.post(
+    base_url + 'runs/log-batch',
+    JSON.stringify({
+      run_id: run_id,
+      params: params
+    }),
+    {
+      headers: {
+        'Content-Type': 'application/json'
+      },
+    }
+  );
+
+  let metrics = [];
+  for (let step = 1; step <= numSteps; step++) {
+    for (let id = 1; id <= numMetrics; id++) {
+      let ctx = {}
+      let rnd = Math.random()
+      if (rnd < 0.3) {
+        ctx = { type: 'training' }
+      }
+      else if (rnd > 0.6) {
+        ctx = { type: 'testing' }
+      }
+
+      metrics.push({
+        key: `metric${id}`,
+        value: id * step * Math.random(),
+        timestamp: Date.now(),
+        step: step,
+        context: ctx,
+      })
+
+      if (metrics.length >= MAX_METRICS_PER_BATCH) {
+        http.post(
+          base_url + 'runs/log-batch',
+          JSON.stringify({
+            run_id: run_id,
+            metrics: metrics
+          }),
+          {
+            headers: {
+              'Content-Type': 'application/json'
+            },
+          }
+        );
+        metrics.length = 0;
+      }
+    }
+
+    if (metrics.length > 0) {
+      http.post(
+        base_url + 'runs/log-batch',
+        JSON.stringify({
+          run_id: run_id,
+          metrics: metrics
+        }),
+        {
+          headers: {
+            'Content-Type': 'application/json'
+          },
+        }
+      );
+    }
+
+    http.post(
+      base_url + 'runs/update',
+      JSON.stringify({
+        run_id: run_id,
+        end_time: Date.now(),
+        status: 'FINISHED'
+      }),
+      {
+        headers: {
+          'Content-Type': 'application/json'
+        },
+      }
+    );
+  }
+}
diff --git a/pkg/api/mlflow/dao/repositories/run.go b/pkg/api/mlflow/dao/repositories/run.go
@@ -289,7 +289,7 @@ func (r RunRepository) RestoreBatch(ctx context.Context, namespaceID uint, ids [
 
 // UpdateWithTransaction updates existing models.Run entity in scope of transaction.
 func (r RunRepository) UpdateWithTransaction(ctx context.Context, tx *gorm.DB, run *models.Run) error {
-	if err := tx.WithContext(ctx).Model(&run).Updates(run).Error; err != nil {
+	if err := tx.WithContext(ctx).Model(&run).Omit("LatestMetrics", "Metrics", "Params").Updates(run).Error; err != nil {
 		return eris.Wrapf(err, "error updating existing run with id: %s", run.ID)
 	}
 	return nil