diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..61ccad7 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: lint +on: + push: + branches: + - main + pull_request: + +permissions: + contents: read + +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: stable + - name: golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + version: v1.59.1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..5453578 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,37 @@ +name: test +on: + push: + branches: + - main + pull_request: + +permissions: + contents: read + +jobs: + test: + name: test + runs-on: ubuntu-latest + services: + postgres: + image: postgres:15.3 + env: + POSTGRES_DB: ripoff-test-db + POSTGRES_USER: ripoff + POSTGRES_PASSWORD: ripoff + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: stable + - name: Run tests + run: go test . + env: + RIPOFF_TEST_DATABASE_URL: postgres://ripoff:ripoff@localhost/ripoff-test-db diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6a97f3d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +tmp diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..483adc0 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,7 @@ +Copyright 2024 Samuel Mortenson + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c0860e --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# ripoff - generate fake data from templated yaml files + +ripoff is a command line tool that generates fake data from yaml files (ripoffs) and inserts that data into PostgreSQL. + +Some features of ripoff are: + +- Model your fake data in one or more yaml files, god's favorite file format +- Provide templated yaml files in cases where a row in one table requires many other rows, or you want loops +- Ability to resolve dependencies (foreign keys) between generated rows, always running queries "in order" +- Due to deterministic random generation, re-running ripoff will perform upserts on all generated rows + +# Installation + +1. Run `go install github.com/mortenson/ripoff/cmd/ripoff@latest` +2. Set the `DATABASE_URL` env variable to your local PostgreSQL database +3. Run `ripoff ` + +When writing your initial ripoffs, you may want to use the `-s` ("soft") flag which does not commit the generated transaction. + +# File format + +ripoffs define rows to be inserted into your database. Any number of ripoffs can be included in a single directory. + +## Basic example + +```yaml +# A map of rows to upsert, where keys are in the format :(), and values are maps of column names to values. +rows: + # A "users" table row identified with a UUID generated with the seed "fooBar" + users:uuid(fooBar): + # Using the map key here implicitly informs ripoff that "id" is the primary key of the table + id: users:uuid(fooBar) + email: foobar@example.com + avatars:uuid(fooBarAvatar): + id: avatars:uuid(fooBarAvatar) + # ripoff will see this and insert the "users:uuid(fooBar)" row before this row + user_id: users:uuid(fooBar) +``` + +For more (sometimes wildly complex) examples, see `./testdata`. + +## More on valueFuncs and row keys + +valueFuncs allow you to generate random data that's seeded with a static string. This ensures that repeat runs of ripoff are deterministic, which enables upserts (consistent primary keys). If they appear anywhere in a + +ripoff provides: + +- `uuid(seedString)` - generates a UUIDv4 +- `int(seedString)` - generates an integer (note: might be awkward on auto incrementing tables) +- `literal(someId)` - returns "someId" exactly. useful if you want to hard code UUIDs/ints + +and also all functions from [gofakeit](https://github.com/brianvoe/gofakeit?tab=readme-ov-file#functions) that have no arguments and return a string (called in camelcase, ex: `email(seedString)`). + +## Using templates + +ripoff files can be used as templates to create multiple rows at once. + +Yaml files that start with `template_` will be treated as Go templates. Here's a template that creates a user and an avatar: + +```yaml +rows: + # "rowId" is the id/key of the row that rendered this template. All other variables are arbitrarily provided. + {{ .rowId }}: + id: {{ .rowId }} + email: {{ .email }} + avatar_id: avatars:uuid({{ .rowId }}) + avatars:uuid({{ .rowId }}): + id: avatars:uuid({{ .rowId }}) + url: {{ .avatarUrl }} +``` + +which you would use from a "normal" ripoff like: + +```yaml +rows: + # The row id/key will be passed to the template in the "rowId" variable. + # This is useful if you'd like to reference "users:uuid(fooBar)" in a foreign key elsewhere. + users:uuid(fooBar): + # The template filename. + template: template_user.yml + # All other variables are arbitrary. + email: foobar@example.com + avatarUrl: image.png + avatarGrayscale: false +``` + +## Explicitly defining primary keys + +ripoff will try to determine the primary key for your row by matching the row ID with a single column (see "Basic example" above). However if you use composite keys, or your primary key is a foreign key to another table (see ./testdata/dependencies), this may not be possible. In these cases you can manually define primary keys using `~conflict: column_1, column_2, ...`. + +# Security + +This project explicitly allows SQL injection due to the way queries are constructed. Do not run `ripoff` on directories you do not trust. + +# Why this exists + +Fake data generators generally come in two flavors: + +1. Model your fake data in the same language/DSL/ORM that your application uses +2. Fuzz your database schema by spewing completely randomized data at it + +I find generating fake data to be a completely separate use case from "normal" ORM usage, and truly randomized fake data is awkward to use locally. + +So ripoff is my approach to fake (but not excessively random) data generation. Because it's not aware of your application or schema, it's closer to writing templated SQL than learning some crazy high level DSL. There are awkward bits (everything is a string) but it's holding up OK for + +# FAQ + +## Why use Go templates and valueFuncs? + +It's kind of weird that `template_*` files use Go templates, but there's also valueFuncs like `uuid(someSeed)`. + +This is done for two reasons - first, Go templates are ugly and result in invalid yaml, so no reason to force you to write them unless you need to. Second, ripoff builds its dependency graph based on the row ids/keys, not the actual generated random value. So you can think of the query building pipeline as: + +1. Load all ripoff files +2. For each row in each file, check if the row uses a template +3. If it does, process the template and append the templated rows into the total rows +4. If not, just append that row +5. Now we have a "total ripoff" (har har) file which contains all rows. I think it's cool at this point that the templating is "done" +6. For each row, check if any column references another row and build a directed acyclic graph, then sort that graph +7. Run queries for each row, in order of least to greatest dependencies diff --git a/cmd/ripoff.go b/cmd/ripoff.go new file mode 100644 index 0000000..3d252b5 --- /dev/null +++ b/cmd/ripoff.go @@ -0,0 +1,84 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log/slog" + "os" + "path" + + "github.com/jackc/pgx/v5" + + "github.com/mortenson/ripoff" +) + +func errAttr(err error) slog.Attr { + return slog.Any("error", err) +} + +func main() { + verbosePtr := flag.Bool("v", false, "enable verbose output") + softPtr := flag.Bool("s", false, "do not commit generated queries") + flag.Parse() + + if *verbosePtr { + slog.SetLogLoggerLevel(slog.LevelDebug) + } + + dburl := os.Getenv("DATABASE_URL") + if dburl == "" { + slog.Error("DATABASE_URL env variable is required") + os.Exit(1) + } + + if len(flag.Args()) != 1 { + slog.Error("Path to YAML files required") + os.Exit(1) + } + rootDirectory := path.Clean(flag.Arg(0)) + totalRipoff, err := ripoff.RipoffFromDirectory(rootDirectory) + if err != nil { + slog.Error("Could not load ripoff", errAttr(err)) + os.Exit(1) + } + + ctx := context.Background() + conn, err := pgx.Connect(ctx, dburl) + if err != nil { + slog.Error("Could not connect to database", errAttr(err)) + os.Exit(1) + } + defer conn.Close(ctx) + + tx, err := conn.Begin(ctx) + if err != nil { + slog.Error("Could not create transaction", errAttr(err)) + os.Exit(1) + } + defer func() { + err = tx.Rollback(ctx) + if err != nil { + slog.Error("Could not rollback transaction", errAttr(err)) + os.Exit(1) + } + }() + + err = ripoff.RunRipoff(ctx, tx, totalRipoff) + if err != nil { + slog.Error("Could not run ripoff", errAttr(err)) + os.Exit(1) + } + + if *softPtr { + slog.Info("Not committing transaction due to -s flag") + } else { + err = tx.Commit(ctx) + if err != nil { + slog.Error("Could not commit transaction", errAttr(err)) + os.Exit(1) + } + } + + slog.Info(fmt.Sprintf("Ripoff complete, %d rows processed", len(totalRipoff.Rows))) +} diff --git a/db.go b/db.go new file mode 100644 index 0000000..c1463cc --- /dev/null +++ b/db.go @@ -0,0 +1,186 @@ +package ripoff + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "fmt" + "log/slog" + "math/rand" + "reflect" + "regexp" + "slices" + "strings" + + "github.com/brianvoe/gofakeit/v7" + "github.com/dominikbraun/graph" + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/lib/pq" +) + +// Runs ripoff from start to finish, without committing the transaction. +func RunRipoff(ctx context.Context, tx pgx.Tx, totalRipoff RipoffFile) error { + queries, err := buildQueriesForRipoff(totalRipoff) + if err != nil { + return err + } + + for _, query := range queries { + slog.Debug(query) + _, err = tx.Exec(ctx, query) + if err != nil { + return fmt.Errorf("error when running query %s, %v", query, err) + } + } + + return nil +} + +var valueFuncRegex = regexp.MustCompile(`([a-zA-Z]+)\((\S+)\)$`) +var referenceRegex = regexp.MustCompile(`^[a-zA-Z0-9_]+:`) + +// Copied from gofakeit +func fakerFuncs(f *gofakeit.Faker) map[string]func() string { + funcs := map[string]func() string{} + + v := reflect.ValueOf(f) + + templateExclusion := []string{ + "RandomMapKey", + "SQL", + "Template", + } + + for i := 0; i < v.NumMethod(); i++ { + if slices.Index(templateExclusion, v.Type().Method(i).Name) != -1 { + continue + } + + // Verify that this is a method that takes a string and returns a string. + if v.Type().Method(i).Type.NumOut() != 1 || v.Type().Method(i).Type.NumIn() != 1 || v.Type().Method(i).Type.Out(0).String() != "string" { + continue + } + + funcs[v.Type().Method(i).Name] = v.Method(i).Interface().(func() string) + } + + return funcs +} + +func prepareValue(rawValue string) (string, bool, error) { + valueFuncMatches := valueFuncRegex.FindStringSubmatch(rawValue) + if len(valueFuncMatches) != 3 { + return rawValue, false, nil + } + addEdge := referenceRegex.MatchString(rawValue) + kind := valueFuncMatches[1] + value := valueFuncMatches[2] + h := sha256.New() + h.Write([]byte(value)) + hashBytes := h.Sum(nil) + randSeed := rand.New(rand.NewSource(int64(binary.BigEndian.Uint64(hashBytes)))) + // It's one of ours. + switch kind { + case "uuid": + randomId, err := uuid.NewRandomFromReader(randSeed) + if err != nil { + return "", false, err + } + return randomId.String(), addEdge, nil + case "int": + return fmt.Sprint(randSeed.Int()), addEdge, nil + case "literal": + return value, addEdge, nil + } + + faker := gofakeit.NewFaker(randSeed, true) + funcs := fakerFuncs(faker) + fakerFunc, funcExists := funcs[strings.ToUpper(kind[:1])+kind[1:]] + if funcExists { + return fakerFunc(), addEdge, nil + } + + return "", false, fmt.Errorf("Magic ID kind does not exist: %s(%s)", kind, value) +} + +func buildQueriesForRipoff(totalRipoff RipoffFile) ([]string, error) { + dependencyGraph := graph.New(graph.StringHash, graph.Directed(), graph.Acyclic()) + queries := map[string]string{} + + // Add vertexes first. + for rowId := range totalRipoff.Rows { + err := dependencyGraph.AddVertex(rowId) + if err != nil { + return []string{}, err + } + } + + for rowId, row := range totalRipoff.Rows { + parts := strings.Split(rowId, ":") + if len(parts) < 2 { + return []string{}, fmt.Errorf("invalid id: %s", rowId) + } + table := parts[0] + + columns := []string{} + values := []string{} + setStatements := []string{} + onConflictColumn := "" + for column, value := range row { + // Rows can explicitly mark what columns they should conflict with, in (hopefully rare) cases. + if column == "~conflict" { + columnParts := strings.Split(value, ",") + for i, columnPart := range columnParts { + columnParts[i] = pq.QuoteIdentifier(strings.TrimSpace(columnPart)) + } + onConflictColumn = strings.Join(columnParts, ", ") + continue + } + columns = append(columns, pq.QuoteIdentifier(column)) + valuePrepared, addEdge, err := prepareValue(value) + if err != nil { + return []string{}, err + } + if addEdge && rowId != value { + err = dependencyGraph.AddEdge(rowId, value) + if err != nil { + return []string{}, err + } + } + if rowId == value { + onConflictColumn = pq.QuoteIdentifier(column) + } + values = append(values, pq.QuoteLiteral(valuePrepared)) + setStatements = append(setStatements, fmt.Sprintf("%s = %s", pq.QuoteIdentifier(column), pq.QuoteLiteral(valuePrepared))) + } + + if onConflictColumn == "" { + return []string{}, fmt.Errorf("cannot determine column to conflict with for: %s", rowId) + } + + query := fmt.Sprintf( + `INSERT INTO %s (%s) + VALUES (%s) + ON CONFLICT (%s) + DO UPDATE SET %s;`, + pq.QuoteIdentifier(table), + strings.Join(columns, ","), + strings.Join(values, ","), + onConflictColumn, + strings.Join(setStatements, ","), + ) + queries[rowId] = query + } + + ordered, _ := graph.TopologicalSort(dependencyGraph) + sortedQueries := []string{} + for i := len(ordered) - 1; i >= 0; i-- { + query, ok := queries[ordered[i]] + if !ok { + return []string{}, fmt.Errorf("no query found for %s", ordered[i]) + } + sortedQueries = append(sortedQueries, query) + } + return sortedQueries, nil +} diff --git a/db_test.go b/db_test.go new file mode 100644 index 0000000..5b8fb14 --- /dev/null +++ b/db_test.go @@ -0,0 +1,85 @@ +package ripoff + +import ( + "context" + "fmt" + "os" + "path" + "runtime" + "strings" + "testing" + + "github.com/jackc/pgx/v5" + "github.com/lib/pq" + "github.com/stretchr/testify/require" +) + +func runTestData(t *testing.T, ctx context.Context, tx pgx.Tx, testDir string) { + schemaFile, err := os.ReadFile(path.Join(testDir, "schema.sql")) + require.NoError(t, err) + _, err = tx.Exec(ctx, string(schemaFile)) + require.NoError(t, err) + totalRipoff, err := RipoffFromDirectory(testDir) + require.NoError(t, err) + err = RunRipoff(ctx, tx, totalRipoff) + require.NoError(t, err) + // Run again to implicitly test upsert behavior. + err = RunRipoff(ctx, tx, totalRipoff) + require.NoError(t, err) + // Try to verify that the number of generated rows matches the ripoff. + tableCount := map[string]int{} + for rowId := range totalRipoff.Rows { + tableName := strings.Split(rowId, ":") + if len(tableName) > 0 { + tableCount[tableName[0]]++ + } + } + for tableName, expectedCount := range tableCount { + row := tx.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s;", pq.QuoteIdentifier(tableName))) + var realCount int + err := row.Scan(&realCount) + require.NoError(t, err) + require.Equal(t, expectedCount, realCount) + } + // Test output further if needed. + validationFile, err := os.ReadFile(path.Join(testDir, "validate.sql")) + if err == nil { + row := tx.QueryRow(ctx, string(validationFile)) + var success int + var debug string + err := row.Scan(&success, &debug) + require.NoError(t, err) + if success != 1 { + t.Fatalf("Validation failed with debug content: %s", debug) + } + } +} + +func TestRipoff(t *testing.T) { + envUrl := os.Getenv("RIPOFF_TEST_DATABASE_URL") + if envUrl == "" { + envUrl = "postgres:///ripoff-test-db" + } + ctx := context.Background() + conn, err := pgx.Connect(ctx, envUrl) + if err != nil { + require.NoError(t, err) + } + defer conn.Close(ctx) + + _, filename, _, _ := runtime.Caller(0) + dir := path.Join(path.Dir(filename), "testdata") + dirEntry, err := os.ReadDir(dir) + require.NoError(t, err) + + for _, e := range dirEntry { + if !e.IsDir() { + continue + } + tx, err := conn.Begin(ctx) + require.NoError(t, err) + runTestData(t, ctx, tx, path.Join(dir, e.Name())) + err = tx.Rollback(ctx) + require.NoError(t, err) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6191f24 --- /dev/null +++ b/go.mod @@ -0,0 +1,27 @@ +module github.com/mortenson/ripoff + +go 1.22 + +toolchain go1.22.4 + +require ( + github.com/brianvoe/gofakeit/v7 v7.0.4 + github.com/dominikbraun/graph v0.23.0 + github.com/google/uuid v1.6.0 + github.com/jackc/pgx/v5 v5.6.0 + github.com/lib/pq v1.10.9 + github.com/stretchr/testify v1.9.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect + github.com/kr/text v0.2.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect + golang.org/x/crypto v0.17.0 // indirect + golang.org/x/sync v0.3.0 // indirect + golang.org/x/text v0.14.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..acb0993 --- /dev/null +++ b/go.sum @@ -0,0 +1,45 @@ +github.com/brianvoe/gofakeit/v7 v7.0.4 h1:Mkxwz9jYg8Ad8NvT9HA27pCMZGFQo08MK6jD0QTKEww= +github.com/brianvoe/gofakeit/v7 v7.0.4/go.mod h1:QXuPeBw164PJCzCUZVmgpgHJ3Llj49jSLVkKPMtxtxA= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dominikbraun/graph v0.23.0 h1:TdZB4pPqCLFxYhdyMFb1TBdFxp8XLcJfTTBQucVPgCo= +github.com/dominikbraun/graph v0.23.0/go.mod h1:yOjYyogZLY1LSG9E33JWZJiq5k83Qy2C6POAuiViluc= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY= +github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw= +github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= +github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= +golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ripoff_file.go b/ripoff_file.go new file mode 100644 index 0000000..60cd0af --- /dev/null +++ b/ripoff_file.go @@ -0,0 +1,129 @@ +package ripoff + +import ( + "bytes" + "fmt" + "os" + "path" + "path/filepath" + "regexp" + "strconv" + "strings" + "text/template" + + "gopkg.in/yaml.v3" +) + +type Row map[string]string + +type RipoffFile struct { + Rows map[string]Row `yaml:"rows"` +} + +var funcMap = template.FuncMap{ + "intSlice": func(countStr string) []int { + countInt, err := strconv.Atoi(countStr) + if err != nil { + panic(err) + } + ret := make([]int, countInt) + for i := range ret { + ret[i] = i + } + return ret + }, +} + +var templateFileRegex = regexp.MustCompile(`^template_(\S+)\.`) + +func concatRows(templates *template.Template, existingRows map[string]Row, newRows map[string]Row) error { + for rowId, row := range newRows { + _, rowExists := existingRows[rowId] + if rowExists { + return fmt.Errorf("row %s is defined more than once", rowId) + } + templateName, usesTemplate := row["template"] + if usesTemplate { + // "rowId" allows dependencies between templated rows to be clear outside of the template. + // Templates can additionally use it to seed random generators. + templateVars := row + templateVars["rowId"] = rowId + buf := &bytes.Buffer{} + err := templates.ExecuteTemplate(buf, templateName, templateVars) + if err != nil { + return err + } + ripoff := &RipoffFile{} + err = yaml.Unmarshal(buf.Bytes(), ripoff) + if err != nil { + return err + } + for templateRowId, templateRow := range ripoff.Rows { + _, rowExists := existingRows[templateRowId] + if rowExists { + return fmt.Errorf("row %s is defined more than once", rowId) + } + existingRows[templateRowId] = templateRow + } + } else { + existingRows[rowId] = row + } + } + return nil +} + +func RipoffFromDirectory(dir string) (RipoffFile, error) { + dir = path.Clean(dir) + + // Treat files starting with template_ as go templates. + templates := template.New("").Option("missingkey=error").Funcs(funcMap) + _, err := templates.ParseGlob(path.Join(dir, "template_*")) + if err != nil && !strings.Contains(err.Error(), "template: pattern matches no files") { + return RipoffFile{}, err + } + + // Find all ripoff files in dir recursively. + allRipoffs := []RipoffFile{} + err = filepath.WalkDir(dir, func(path string, entry os.DirEntry, err error) error { + if err != nil { + return err + } + if filepath.Ext(path) != ".yaml" && filepath.Ext(path) != ".yml" { + return nil + } + // Templates were already processed. + templateNameMatches := templateFileRegex.FindStringSubmatch(entry.Name()) + if len(templateNameMatches) == 2 { + return nil + } + + yamlFile, err := os.ReadFile(path) + if err != nil { + return err + } + ripoff := &RipoffFile{} + err = yaml.Unmarshal(yamlFile, ripoff) + if err != nil { + return err + } + allRipoffs = append(allRipoffs, *ripoff) + return nil + }) + + if err != nil { + return RipoffFile{}, err + } + + totalRipoff := RipoffFile{ + Rows: map[string]Row{}, + } + + for _, ripoff := range allRipoffs { + err = concatRows(templates, totalRipoff.Rows, ripoff.Rows) + if err != nil { + return RipoffFile{}, err + } + } + + return totalRipoff, nil +} diff --git a/testdata/basic/basic.yml b/testdata/basic/basic.yml new file mode 100644 index 0000000..a8ac8dd --- /dev/null +++ b/testdata/basic/basic.yml @@ -0,0 +1,10 @@ +rows: + uuid_users:uuid(fooBar): + id: uuid_users:uuid(fooBar) + email: foobar@example.com + int_users:int(fooBar): + id: int_users:int(fooBar) + email: foobar@example.com + uuid_users:literal(bbb2ddaa-f33a-4b85-96e7-96d77a194b61): + id: uuid_users:literal(bbb2ddaa-f33a-4b85-96e7-96d77a194b61) + email: foobar@example.com diff --git a/testdata/basic/schema.sql b/testdata/basic/schema.sql new file mode 100644 index 0000000..ed9b861 --- /dev/null +++ b/testdata/basic/schema.sql @@ -0,0 +1,10 @@ +CREATE TABLE uuid_users ( + id UUID NOT NULL PRIMARY KEY, + email TEXT NOT NULL +); + +CREATE TABLE int_users ( + id BIGSERIAL NOT NULL, + email TEXT NOT NULL, + PRIMARY KEY (id) +); diff --git a/testdata/bigdata/bigdata.yml b/testdata/bigdata/bigdata.yml new file mode 100644 index 0000000..17d81e8 --- /dev/null +++ b/testdata/bigdata/bigdata.yml @@ -0,0 +1,4 @@ +rows: + row_ids_dont_always_matter_if_row_id_isnt_used_in_template: + template: template_multi_user.yml + numUsers: 100 diff --git a/testdata/bigdata/schema.sql b/testdata/bigdata/schema.sql new file mode 100644 index 0000000..ca0cba4 --- /dev/null +++ b/testdata/bigdata/schema.sql @@ -0,0 +1,5 @@ +CREATE TABLE users ( + id UUID NOT NULL, + email TEXT NOT NULL, + PRIMARY KEY (id) +); diff --git a/testdata/bigdata/template_multi_user.yml b/testdata/bigdata/template_multi_user.yml new file mode 100644 index 0000000..2f38727 --- /dev/null +++ b/testdata/bigdata/template_multi_user.yml @@ -0,0 +1,6 @@ +rows: + {{ range $k, $v := (intSlice .numUsers) }} + users:uuid({{ print $.rowId $k }}): + id: users:uuid({{ print $.rowId $k }}) + email: multi-user-{{ $k }}@example.com + {{ end }} diff --git a/testdata/dependencies/dependencies.yml b/testdata/dependencies/dependencies.yml new file mode 100644 index 0000000..8ef1e8a --- /dev/null +++ b/testdata/dependencies/dependencies.yml @@ -0,0 +1,12 @@ +rows: + users:uuid(fooBar): + id: users:uuid(fooBar) + email: foobar@example.com + avatar_id: avatars:uuid(fooBarAvatar) + avatars:uuid(fooBarAvatar): + id: avatars:uuid(fooBarAvatar) + url: image.png + avatar_modifiers:uuid(fooBarAvatar): + ~conflict: id + id: avatars:uuid(fooBarAvatar) + grayscale: true diff --git a/testdata/dependencies/schema.sql b/testdata/dependencies/schema.sql new file mode 100644 index 0000000..c897188 --- /dev/null +++ b/testdata/dependencies/schema.sql @@ -0,0 +1,16 @@ +CREATE TABLE avatars ( + id UUID NOT NULL PRIMARY KEY, + url TEXT NOT NULL +); + +CREATE TABLE avatar_modifiers ( + /* Both an primary and foreign key, yikes! */ + id UUID NOT NULL PRIMARY KEY REFERENCES avatars, + grayscale BOOLEAN NOT NULL +); + +CREATE TABLE users ( + id UUID NOT NULL PRIMARY KEY, + avatar_id UUID NOT NULL REFERENCES avatars, + email TEXT NOT NULL +); diff --git a/testdata/faker/faker.yml b/testdata/faker/faker.yml new file mode 100644 index 0000000..6ca3916 --- /dev/null +++ b/testdata/faker/faker.yml @@ -0,0 +1,4 @@ +rows: + users:uuid(fooBar): + id: users:uuid(fooBar) + email: email(fooBar) diff --git a/testdata/faker/schema.sql b/testdata/faker/schema.sql new file mode 100644 index 0000000..ca0cba4 --- /dev/null +++ b/testdata/faker/schema.sql @@ -0,0 +1,5 @@ +CREATE TABLE users ( + id UUID NOT NULL, + email TEXT NOT NULL, + PRIMARY KEY (id) +); diff --git a/testdata/faker/validate.sql b/testdata/faker/validate.sql new file mode 100644 index 0000000..b76ce88 --- /dev/null +++ b/testdata/faker/validate.sql @@ -0,0 +1,7 @@ +WITH test AS ( + SELECT count(*) as count FROM users + WHERE email = 'nelsonyost@russel.biz' + AND id = '6b30cfb0-a35b-4584-a035-1334515f846b' +) +SELECT (select count from test),'email: ' || users.email || ' id: ' || users.id +FROM users; diff --git a/testdata/multiple_primary_keys/multiple_primary_keys.yml b/testdata/multiple_primary_keys/multiple_primary_keys.yml new file mode 100644 index 0000000..1e2bab0 --- /dev/null +++ b/testdata/multiple_primary_keys/multiple_primary_keys.yml @@ -0,0 +1,16 @@ +rows: + users:uuid(fooBar): + id: users:uuid(fooBar) + email: foobar@example.com + workspaces:uuid(fooBarOrg): + id: workspaces:uuid(fooBarOrg) + name: Foo Bar Inc. + workspace_users:uuid(fooBarWorkspaceUser): + ~conflict: user_id, workspace_id + user_id: users:uuid(fooBar) + workspace_id: workspaces:uuid(fooBarOrg) + workspace_user_permissions:uuid(fooBarWorkspaceUserPermission): + ~conflict: user_id, workspace_id + user_id: users:uuid(fooBar) + workspace_id: workspaces:uuid(fooBarOrg) + is_admin: true diff --git a/testdata/multiple_primary_keys/schema.sql b/testdata/multiple_primary_keys/schema.sql new file mode 100644 index 0000000..943ad92 --- /dev/null +++ b/testdata/multiple_primary_keys/schema.sql @@ -0,0 +1,22 @@ +CREATE TABLE users ( + id UUID NOT NULL PRIMARY KEY, + email TEXT NOT NULL +); + +CREATE TABLE workspaces ( + id UUID NOT NULL PRIMARY KEY, + name TEXT NOT NULL +); + +CREATE TABLE workspace_users ( + user_id UUID NOT NULL REFERENCES users, + workspace_id UUID NOT NULL REFERENCES workspaces, + PRIMARY KEY (user_id, workspace_id) +); + +CREATE TABLE workspace_user_permissions ( + user_id UUID NOT NULL REFERENCES users, + workspace_id UUID NOT NULL REFERENCES workspaces, + is_admin BOOLEAN NOT NULL, + PRIMARY KEY (user_id, workspace_id) +); diff --git a/testdata/templates/schema.sql b/testdata/templates/schema.sql new file mode 100644 index 0000000..c897188 --- /dev/null +++ b/testdata/templates/schema.sql @@ -0,0 +1,16 @@ +CREATE TABLE avatars ( + id UUID NOT NULL PRIMARY KEY, + url TEXT NOT NULL +); + +CREATE TABLE avatar_modifiers ( + /* Both an primary and foreign key, yikes! */ + id UUID NOT NULL PRIMARY KEY REFERENCES avatars, + grayscale BOOLEAN NOT NULL +); + +CREATE TABLE users ( + id UUID NOT NULL PRIMARY KEY, + avatar_id UUID NOT NULL REFERENCES avatars, + email TEXT NOT NULL +); diff --git a/testdata/templates/template_user.yml b/testdata/templates/template_user.yml new file mode 100644 index 0000000..fd5b5e0 --- /dev/null +++ b/testdata/templates/template_user.yml @@ -0,0 +1,12 @@ +rows: + {{ .rowId }}: + id: {{ .rowId }} + email: {{ .email }} + avatar_id: avatars:uuid({{ .rowId }}) + avatars:uuid({{ .rowId }}): + id: avatars:uuid({{ .rowId }}) + url: {{ .avatarUrl }} + avatar_modifiers:uuid({{ .rowId }}): + ~conflict: id + id: avatars:uuid({{ .rowId }}) + grayscale: {{ .avatarGrayscale }} diff --git a/testdata/templates/templates.yml b/testdata/templates/templates.yml new file mode 100644 index 0000000..b22f1ed --- /dev/null +++ b/testdata/templates/templates.yml @@ -0,0 +1,6 @@ +rows: + users:uuid(fooBar): + template: template_user.yml + email: foobar@example.com + avatarUrl: image.png + avatarGrayscale: false