Skip to content

Commit

Permalink
0.6 cherry picks (#1134)
Browse files Browse the repository at this point in the history
* Update ui-aim version to latest (#1133)

* Use experiment names for project params filter (#1132)

* Add the k6 load test script to the docs, with vars for shaping data at top of file.
* Tweak developer doc

Co-authored-by: @jgiannuzzi
  • Loading branch information
suprjinx authored Apr 17, 2024
1 parent 7a8f0fe commit 513c90f
Show file tree
Hide file tree
Showing 10 changed files with 185 additions and 23 deletions.
18 changes: 18 additions & 0 deletions docs/developer.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ make migrations-rebuild
This target will rebuild the `database/migrate_generated.go` file to include execution of all
the packages in `database/migrations`.

## Filling the database

It's often necessary to test out your changes on a loaded database, and we definitely want to do this
before making a release. A production-level target for database shape is:
* 40k runs across 300 experiments
* 2k unique metrics per run, each with 200 values

To get some percentage of this into your local database instance, you can use the included K6
load test script, which can be run in the dev container.

1. Start the FML tracking server
2. Start a terminal in the dev container.
3. `cd docs/example`
4. `k6 run k6_load.js`

Tweak the numbers in `k6_load.js` for number of runs, metrics, etc -- the default amounts are
pretty small.

## Working with the UIs

FastTrackML incorporates the existing Aim and MLFlow web UIs, albeit
Expand Down
148 changes: 148 additions & 0 deletions docs/example/k6_load.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import http from 'k6/http';

const MAX_METRICS_PER_BATCH = 200

export default function () {
const namespace = 'default'
const numberOfExperiments = 1
const runsPerExperiment = 2
const paramsPerRun = 1
const metricsPerRun = 2000
const stepsPerMetric = 4

for (let i = 0; i < numberOfExperiments; i++) {
const experimentId = createExperiment(namespace)
for (let j = 0; j < runsPerExperiment; j++) {
createRun(namespace, experimentId, paramsPerRun, metricsPerRun, stepsPerMetric)
}
}
}

function createExperiment(namespace) {
const base_url = `http://localhost:5000/ns/${namespace}/api/2.0/mlflow/`;

const exp_response = http.post(
base_url + 'experiments/create',
JSON.stringify({
"name": `experiment-${Date.now()}`,
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);
return exp_response.json().experiment_id;
}


function createRun(namespace, experimentId, numParams, numMetrics, numSteps) {
const base_url = `http://localhost:5000/ns/${namespace}/api/2.0/mlflow/`;

const run_response = http.post(
base_url + 'runs/create',
JSON.stringify({
experiment_id: experimentId,
start_time: Date.now(),
tags: [
{
key: "mlflow.user",
value: "k6"
}
]
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);
const run_id = run_response.json().run.info.run_id;

let params = []
for (let id = 1; id <= numParams; id++) {
params.push({
key: `param${id}`,
value: `${id * Math.random()}`,
})
}
http.post(
base_url + 'runs/log-batch',
JSON.stringify({
run_id: run_id,
params: params
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);

let metrics = [];
for (let step = 1; step <= numSteps; step++) {
for (let id = 1; id <= numMetrics; id++) {
let ctx = {}
let rnd = Math.random()
if (rnd < 0.3) {
ctx = { type: 'training' }
}
else if (rnd > 0.6) {
ctx = { type: 'testing' }
}

metrics.push({
key: `metric${id}`,
value: id * step * Math.random(),
timestamp: Date.now(),
step: step,
context: ctx,
})

if (metrics.length >= MAX_METRICS_PER_BATCH) {
http.post(
base_url + 'runs/log-batch',
JSON.stringify({
run_id: run_id,
metrics: metrics
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);
metrics.length = 0;
}
}

if (metrics.length > 0) {
http.post(
base_url + 'runs/log-batch',
JSON.stringify({
run_id: run_id,
metrics: metrics
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);
}

http.post(
base_url + 'runs/update',
JSON.stringify({
run_id: run_id,
end_time: Date.now(),
status: 'FINISHED'
}),
{
headers: {
'Content-Type': 'application/json'
},
}
);
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
dagger.io/dagger v0.11.0
dario.cat/mergo v1.0.0
github.com/DATA-DOG/go-sqlmock v1.5.2
github.com/G-Research/fasttrackml-ui-aim v0.31705.27
github.com/G-Research/fasttrackml-ui-aim v0.31705.33
github.com/G-Research/fasttrackml-ui-mlflow v0.20902.7
github.com/PuerkitoBio/goquery v1.9.1
github.com/apache/arrow/go/v14 v14.0.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ github.com/99designs/gqlgen v0.17.31/go.mod h1:i4rEatMrzzu6RXaHydq1nmEPZkb3bKQsn
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/G-Research/fasttrackml-ui-aim v0.31705.27 h1:v8kbGFqnc+1o1TFKj5wJ9QzZ0QHFYj1T7NOCad0eOao=
github.com/G-Research/fasttrackml-ui-aim v0.31705.27/go.mod h1:1ydj5zgJgklq4gf3jkKMh+OrBXRz/5hZtx+1aROuWaM=
github.com/G-Research/fasttrackml-ui-aim v0.31705.33 h1:kdkhL3dYaigdRq0ir72Gz3bGBGfZysVDXiVf8tfTp18=
github.com/G-Research/fasttrackml-ui-aim v0.31705.33/go.mod h1:1ydj5zgJgklq4gf3jkKMh+OrBXRz/5hZtx+1aROuWaM=
github.com/G-Research/fasttrackml-ui-mlflow v0.20902.7 h1:GPNCKPkUBBx54JYCRX8r06WvBa7sep5ppm1VQiPYZKY=
github.com/G-Research/fasttrackml-ui-mlflow v0.20902.7/go.mod h1:Bg/xSCP6KzFDVDBSfJfrGmXuU6H8lFtboy+bTiHK6c4=
github.com/Khan/genqlient v0.6.0 h1:Bwb1170ekuNIVIwTJEqvO8y7RxBxXu639VJOkKSrwAk=
Expand Down
8 changes: 2 additions & 6 deletions pkg/api/aim2/dao/repositories/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type MetricRepositoryProvider interface {
BaseRepositoryProvider
// GetMetricKeysAndContextsByExperiments returns metric keys and contexts by provided experiments.
GetMetricKeysAndContextsByExperiments(
ctx context.Context, namespaceID uint, experiments []int, experimentNames []string,
ctx context.Context, namespaceID uint, experimentNames []string,
) ([]models.LatestMetric, error)
// SearchMetrics returns a sql.Rows cursor for streaming the metrics matching the request.
SearchMetrics(
Expand Down Expand Up @@ -59,7 +59,7 @@ func NewMetricRepository(db *gorm.DB) *MetricRepository {

// GetMetricKeysAndContextsByExperiments returns metric keys and contexts by provided experiments.
func (r MetricRepository) GetMetricKeysAndContextsByExperiments(
ctx context.Context, namespaceID uint, experiments []int, experimentNames []string,
ctx context.Context, namespaceID uint, experimentNames []string,
) ([]models.LatestMetric, error) {
query := r.db.WithContext(ctx).Distinct().Select(
"key", "context_id",
Expand All @@ -75,10 +75,6 @@ func (r MetricRepository) GetMetricKeysAndContextsByExperiments(
).Where(
"runs.lifecycle_stage = ?", models.LifecycleStageActive,
)

if len(experiments) != 0 {
query = query.Where("experiments.experiment_id IN ?", experiments)
}
if len(experimentNames) != 0 {
query = query.Where("experiments.name IN ?", experimentNames)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/api/aim2/dao/repositories/param.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
// ParamRepositoryProvider provides an interface to work with models.Param entity.
type ParamRepositoryProvider interface {
// GetParamKeysByParameters returns list of param keys by requested parameters.
GetParamKeysByParameters(ctx context.Context, namespaceID uint, experiments []int) ([]string, error)
GetParamKeysByParameters(ctx context.Context, namespaceID uint, experimentNames []string) ([]string, error)
}

// ParamRepository repository to work with models.Param entity.
Expand All @@ -31,7 +31,7 @@ func NewParamRepository(db *gorm.DB) *ParamRepository {

// GetParamKeysByParameters returns list of param keys by requested parameters.
func (r ParamRepository) GetParamKeysByParameters(
ctx context.Context, namespaceID uint, experiments []int,
ctx context.Context, namespaceID uint, experimentNames []string,
) ([]string, error) {
query := r.db.WithContext(ctx).Distinct().Model(
&models.Param{},
Expand All @@ -43,8 +43,8 @@ func (r ParamRepository) GetParamKeysByParameters(
).Where(
"runs.lifecycle_stage = ?", models.LifecycleStageActive,
)
if len(experiments) != 0 {
query = query.Where("experiments.experiment_id IN ?", experiments)
if len(experimentNames) != 0 {
query = query.Where("experiments.name IN ?", experimentNames)
}
var keys []string
if err := query.Pluck("Key", &keys).Error; err != nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/api/aim2/dao/repositories/tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type TagRepositoryProvider interface {
// CreateExperimentTag creates new models.ExperimentTag entity connected to models.Experiment.
CreateExperimentTag(ctx context.Context, experimentTag *models.ExperimentTag) error
// GetTagKeysByParameters returns list of tag keys by requested parameters.
GetTagKeysByParameters(ctx context.Context, namespaceID uint, experiments []int) ([]string, error)
GetTagKeysByParameters(ctx context.Context, namespaceID uint, experimentNames []string) ([]string, error)
}

// TagRepository repository to work with models.Tag entity.
Expand Down Expand Up @@ -57,7 +57,7 @@ func (r TagRepository) GetTagsByNamespace(ctx context.Context, namespaceID uint)

// GetTagKeysByParameters returns list of tag keys by requested parameters.
func (r TagRepository) GetTagKeysByParameters(
ctx context.Context, namespaceID uint, experiments []int,
ctx context.Context, namespaceID uint, experimentNames []string,
) ([]string, error) {
// fetch and process tags.
query := r.db.WithContext(ctx).Model(
Expand All @@ -70,8 +70,8 @@ func (r TagRepository) GetTagKeysByParameters(
).Where(
"runs.lifecycle_stage = ?", models.LifecycleStageActive,
)
if len(experiments) != 0 {
query = query.Where("experiments.experiment_id IN ?", experiments)
if len(experimentNames) != 0 {
query = query.Where("experiments.name IN ?", experimentNames)
}

var keys []string
Expand Down
6 changes: 3 additions & 3 deletions pkg/api/aim2/services/project/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ func (s Service) GetProjectParams(

projectParams := models.ProjectParams{}
if !req.ExcludeParams {
paramKeys, err := s.paramRepository.GetParamKeysByParameters(ctx, namespaceID, req.Experiments)
paramKeys, err := s.paramRepository.GetParamKeysByParameters(ctx, namespaceID, req.ExperimentNames)
if err != nil {
return nil, api.NewInternalError("error getting param keys: %s", err)
}
projectParams.ParamKeys = paramKeys

tagKeys, err := s.tagRepository.GetTagKeysByParameters(ctx, namespaceID, req.Experiments)
tagKeys, err := s.tagRepository.GetTagKeysByParameters(ctx, namespaceID, req.ExperimentNames)
if err != nil {
return nil, api.NewInternalError("error getting tag keys: %s", err)
}
Expand All @@ -106,7 +106,7 @@ func (s Service) GetProjectParams(
if slices.Contains(req.Sequences, "metric") {
// fetch metrics only when Experiments or ExperimentNames were provided.
metrics, err := s.metricRepository.GetMetricKeysAndContextsByExperiments(
ctx, namespaceID, req.Experiments, req.ExperimentNames,
ctx, namespaceID, req.ExperimentNames,
)
if err != nil {
return nil, api.NewInternalError("error getting metrics: %s", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/api/mlflow/dao/repositories/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ func (r RunRepository) RestoreBatch(ctx context.Context, namespaceID uint, ids [

// UpdateWithTransaction updates existing models.Run entity in scope of transaction.
func (r RunRepository) UpdateWithTransaction(ctx context.Context, tx *gorm.DB, run *models.Run) error {
if err := tx.WithContext(ctx).Model(&run).Updates(run).Error; err != nil {
if err := tx.WithContext(ctx).Model(&run).Omit("LatestMetrics", "Metrics", "Params").Updates(run).Error; err != nil {
return eris.Wrapf(err, "error updating existing run with id: %s", run.ID)
}
return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (s *GetProjectParamsTestSuite) Test_Ok() {
{
name: "RequestProjectParamsFilteredByExistingExperiment",
request: map[any]any{
"experiments": *s.DefaultExperiment.ID,
"experiment_names": s.DefaultExperiment.Name,
},
response: response.ProjectParamsResponse{
Metric: map[string][]fiber.Map{
Expand All @@ -119,7 +119,7 @@ func (s *GetProjectParamsTestSuite) Test_Ok() {
{
name: "RequestProjectParamsFilteredByNotExistingExperiment",
request: map[any]any{
"experiments": 999,
"experiment_names": "not-existing-experiment",
},
response: response.ProjectParamsResponse{
Metric: map[string][]fiber.Map{},
Expand Down

0 comments on commit 513c90f

Please sign in to comment.