diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a8dcd87..d841005 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,8 +10,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-12, macos-11, ubuntu-20.04, windows-2019, windows-2022]
-        go: ["1.16", "1.17", "1.18", "1.19"]
+        os: [macos-12, macos-13, ubuntu-22.04, windows-2019, windows-2022]
+        go: ["1.16", "1.17", "1.18", "1.19", "1.20", "1.21"]
 
     steps:
 
@@ -29,31 +29,3 @@ jobs:
 
     - name: Test
       run: make test
-
-  test-integration:
-    # On GHA envs like windows/mac this tests are very unreliable
-    # but they work well on linux runners. Real benchmarks need
-    # environments with little noise to give accurate results and
-    # these are full integration tests.
-    name: Integration Test
-
-    runs-on: ubuntu-20.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ["1.16", "1.17", "1.18", "1.19"]
-
-    steps:
-
-    - name: Set up Go ${{ matrix.go }}
-      uses: actions/setup-go@v1
-      with:
-        go-version: ${{ matrix.go }}
-      id: go
-
-    - name: Check out code into the Go module directory
-      uses: actions/checkout@v1
-
-    - name: Test
-      run: make test/integration
diff --git a/Makefile b/Makefile
index 7bba1aa..4e6d418 100644
--- a/Makefile
+++ b/Makefile
@@ -20,10 +20,6 @@ lint:
 test:
 	go test -race ./...
 
-.PHONY: test/integration
-test/integration:
-	go test -race -tags integration -count=1 ./...
-
 .PHONY: coverage
 coverage: 
 	go test -race -covermode=atomic -coverprofile=$(coverage_report) -tags integration ./...
diff --git a/README.md b/README.md
index 27120b5..ec1e77f 100644
--- a/README.md
+++ b/README.md
@@ -39,26 +39,34 @@ have information on memory allocations.
 Comparing performance between two versions of a Go module
 and just showing results on output (no check performed):
 
-```
-benchcheck cool.go.module v0.0.1 v0.0.2
+```sh
+benchcheck -mod cool.go.module -old v0.0.1 -new v0.0.2
 ```
 
 Comparing performance between two versions of a Go module
 and failing on time regression:
 
-```
-benchcheck cool.go.module v0.0.1 v0.0.2 -time-delta +13.31%
+```sh
+benchcheck -mod cool.go.module -old v0.0.1 -new v0.0.2 -check time/op=13.31%
 ```
 
-Now doing the same but also checking for allocation regression:
+You can also check if your code got faster and use the check to
+I don't know... Celebrate ? =P
 
-```
-benchcheck cool.go.module v0.0.1 v0.0.2 -alloc-delta +15% -allocs-delta +20%
+```sh
+benchcheck -mod cool.go.module -old v0.0.1 -new v0.0.2 -check time/op=-13.31%
 ```
 
-You can also check if your code got faster and use the check to
-I don't know... Celebrate ? =P
+Now lets say you want to customize how the benchmarks are run, just add the command that you wish
+to be executed to run the benchmarks like this:
 
+```sh
+benchcheck -mod cool.go.module -old v0.0.1 -new v0.0.2 -- go test -bench=BenchmarkSpecific ./specific/pkg
 ```
-benchcheck cool.go.module v0.0.1 v0.0.2 -time-delta -20%
+
+It can be any command that will generate benchmark results with the same formatting as `go test` benchmarks.
+To check it in action with an actual project just run:
+
+```sh
+benchcheck -mod github.com/madlambda/jtoh -old v0.1.1 -new 7979fb19fa039bef19de982b7bcb1c5b67774029
 ```
diff --git a/benchcheck.go b/benchcheck.go
index 395097e..e9eb36f 100644
--- a/benchcheck.go
+++ b/benchcheck.go
@@ -14,52 +14,57 @@ import (
 // CheckerFmt represents the expected string format of a checker.
 const CheckerFmt = "<metric>=(+|-)<number>%"
 
-// Module represents a Go module.
-type Module struct {
-	path string
-}
+type (
+	// Module represents a Go module.
+	Module struct {
+		path string
+	}
 
-// StatResult is the full result showing performance
-// differences between two benchmark runs (set of benchmark functions)
-// for a specific metric, like time/op or speed.
-type StatResult struct {
-	// Metric is the name of metric
-	Metric string
-	// BenchDiffs has the performance diff of all function for a given metric.
-	BenchDiffs []BenchDiff
-}
+	// StatResult is the full result showing performance
+	// differences between two benchmark runs (set of benchmark functions)
+	// for a specific metric, like time/op or speed.
+	StatResult struct {
+		// Metric is the name of metric
+		Metric string
+		// BenchDiffs has the performance diff of all function for a given metric.
+		BenchDiffs []BenchDiff
+	}
 
-// BenchResults represents a single Go benchmark run. Each
-// string is the result of a single Benchmark like this:
-// - "BenchmarkName  	 50	  31735022 ns/op	  61.15 MB/s"
-type BenchResults []string
-
-// BenchDiff is the result showing performance differences
-// for a single benchmark function.
-type BenchDiff struct {
-	// Name of the benchmark function
-	Name string
-	// Old is the performance summary of the old benchmark.
-	Old string
-	// New is the performance summary of the new benchmark.
-	New string
-	// Delta between the old and new performance summaries.
-	Delta float64
-}
+	// BenchResults represents a single Go benchmark run. Each
+	// string is the result of a single Benchmark like this:
+	// - "BenchmarkName  	 50	  31735022 ns/op	  61.15 MB/s"
+	BenchResults []string
+
+	// BenchRunner runs benchmarks
+	BenchRunner func(Module) (BenchResults, error)
+
+	// BenchDiff is the result showing performance differences
+	// for a single benchmark function.
+	BenchDiff struct {
+		// Name of the benchmark function
+		Name string
+		// Old is the performance summary of the old benchmark.
+		Old string
+		// New is the performance summary of the new benchmark.
+		New string
+		// Delta between the old and new performance summaries.
+		Delta float64
+	}
 
-// Checker performs checks on StatResult.
-type Checker struct {
-	metric    string
-	threshold float64
-	repr      string
-}
+	// Checker performs checks on StatResult.
+	Checker struct {
+		metric    string
+		threshold float64
+		repr      string
+	}
 
-// CmdError represents an error running a specific command.
-type CmdError struct {
-	Cmd    *exec.Cmd
-	Err    error
-	Output string
-}
+	// CmdError represents an error running a specific command.
+	CmdError struct {
+		Cmd    *exec.Cmd
+		Err    error
+		Output string
+	}
+)
 
 // Error returns the string representation of the error.
 func (c *CmdError) Error() string {
@@ -153,15 +158,32 @@ func GetModule(name string, version string) (Module, error) {
 	return Module{path: parsedResult.Dir}, nil
 }
 
-// RunBench will run all benchmarks present at the given module
-// return the benchmark results.
+// DefaultRunBench will run all benchmarks present at the given module
+// and return the benchmark results using a default Go benchmark run running all available
+// benchmarks.
 //
 // This function relies on running the "go" command to run benchmarks.
 //
-// Any errors running "go" can be inspected in detail by
-// checking if the returned is a *CmdError.
-func RunBench(mod Module) (BenchResults, error) {
+// Any errors running "go" can be inspected in detail by checking if the returned is a *CmdError.
+func DefaultRunBench(mod Module) (BenchResults, error) {
 	cmd := exec.Command("go", "test", "-bench=.", "./...")
+	return RunBench(cmd, mod)
+}
+
+// NewBenchRunner creates a [BenchRunner] that always executes the command defined by name and args.
+func NewBenchRunner(name string, args ...string) BenchRunner {
+	return func(mod Module) (BenchResults, error) {
+		cmd := exec.Command(name, args...)
+		return RunBench(cmd, mod)
+	}
+}
+
+// RunBench will run all benchmarks present at the given module
+// and return the benchmark results using the provided [*exec.Cmd].
+// The given command is executed inside the given [Module] path.
+//
+// Any errors running the given command can be inspected in detail by checking if the returned is a *CmdError.
+func RunBench(cmd *exec.Cmd, mod Module) (BenchResults, error) {
 	cmd.Dir = mod.Path()
 
 	out, err := cmd.CombinedOutput()
@@ -205,22 +227,22 @@ func Stat(oldres BenchResults, newres BenchResults) ([]StatResult, error) {
 // StatModule will:
 //
 // - Download the specific versions of the given module.
-// - Run benchmarks on each of them.
+// - Run benchmarks on each of them using the provided [BenchRunner].
 // - Compare old vs new version benchmarks and return a stat results.
 //
 // This function relies on running the "go" command to run benchmarks.
 //
 // Any errors running "go" can be inspected in detail by
 // checking if the returned error is a CmdError.
-func StatModule(name string, oldversion, newversion string) ([]StatResult, error) {
-	oldresults, err := benchModule(name, oldversion)
+func StatModule(runBench BenchRunner, name string, oldversion, newversion string) ([]StatResult, error) {
+	oldresults, err := benchModule(runBench, name, oldversion)
 	if err != nil {
-		return nil, fmt.Errorf("running bench for old module: %v", err)
+		return nil, fmt.Errorf("running bench for old module: %w", err)
 	}
 
-	newresults, err := benchModule(name, newversion)
+	newresults, err := benchModule(runBench, name, newversion)
 	if err != nil {
-		return nil, fmt.Errorf("running bench for new module: %v", err)
+		return nil, fmt.Errorf("running bench for new module: %w", err)
 	}
 
 	return Stat(oldresults, newresults)
@@ -282,7 +304,7 @@ func resultsReader(res BenchResults) io.Reader {
 	return strings.NewReader(strings.Join(res, "\n"))
 }
 
-func benchModule(name string, version string) (BenchResults, error) {
+func benchModule(runBench BenchRunner, name string, version string) (BenchResults, error) {
 	mod, err := GetModule(name, version)
 	if err != nil {
 		return nil, err
@@ -294,7 +316,7 @@ func benchModule(name string, version string) (BenchResults, error) {
 	results := BenchResults{}
 
 	for i := 0; i < benchruns; i++ {
-		res, err := RunBench(mod)
+		res, err := runBench(mod)
 		if err != nil {
 			return nil, err
 		}
diff --git a/benchcheck_integration_test.go b/benchcheck_integration_test.go
deleted file mode 100644
index e92329f..0000000
--- a/benchcheck_integration_test.go
+++ /dev/null
@@ -1,147 +0,0 @@
-//go:build integration
-// +build integration
-
-package benchcheck_test
-
-import (
-	"strings"
-	"testing"
-
-	"github.com/madlambda/benchcheck"
-	"github.com/madlambda/spells/assert"
-)
-
-func TestStatModule(t *testing.T) {
-	type delta struct {
-		start float64
-		end   float64
-	}
-	type diff struct {
-		name  string
-		delta delta
-	}
-	type result struct {
-		metric string
-		diffs  []diff
-	}
-	type testcase struct {
-		name   string
-		module string
-		oldver string
-		newver string
-		want   []result
-	}
-
-	if testing.Short() {
-		t.Skip("Skipping in short mode")
-		return
-	}
-
-	t.Parallel()
-
-	tcases := []testcase{
-		{
-			name:   "stat benchcheck",
-			module: "github.com/madlambda/benchcheck",
-			oldver: "0f9165271a00b54163d3fc4c73d52a13c3747a75",
-			newver: "e90da7b50cf0e191004809e415c64319465286d7",
-			want: []result{
-				{
-					metric: "time/op",
-					diffs: []diff{
-						{
-							name:  "Fake",
-							delta: delta{start: -85, end: -75},
-						},
-					},
-				},
-			},
-		},
-		{
-			name:   "stat benchcheck reversed versions",
-			module: "github.com/madlambda/benchcheck",
-			oldver: "e90da7b50cf0e191004809e415c64319465286d7",
-			newver: "0f9165271a00b54163d3fc4c73d52a13c3747a75",
-			want: []result{
-				{
-					metric: "time/op",
-					diffs: []diff{
-						{
-							name:  "Fake",
-							delta: delta{start: 395, end: 405},
-						},
-					},
-				},
-			},
-		},
-		{
-			name:   "stat benchcheck same version",
-			module: "github.com/madlambda/benchcheck",
-			oldver: "e90da7b50cf0e191004809e415c64319465286d7",
-			newver: "e90da7b50cf0e191004809e415c64319465286d7",
-			want: []result{
-				{
-					metric: "time/op",
-					diffs: []diff{
-						{
-							name:  "Fake",
-							delta: delta{start: -1, end: 1},
-						},
-					},
-				},
-			},
-		},
-	}
-
-	for _, tc := range tcases {
-		tcase := tc
-
-		t.Run(tcase.name, func(t *testing.T) {
-			t.Parallel()
-
-			got, err := benchcheck.StatModule(tcase.module, tcase.oldver, tcase.newver)
-			assertNoError(t, err)
-
-			// We can't check everything on the result since variance
-			// is introduced by changes on the environment (this is an e2e test).
-			// We can ensure results inside a reasonably broad delta + function names.
-
-			assert.EqualInts(t, len(tcase.want), len(got), "want %v != got %v", tcase, tcase.want, got)
-
-			for i, gotRes := range got {
-				wantRes := tcase.want[i]
-
-				t.Logf("got bench result: %v", gotRes)
-
-				assert.EqualStrings(t, wantRes.metric, gotRes.Metric)
-
-				for j, gotDiff := range gotRes.BenchDiffs {
-					wantDiff := wantRes.diffs[j]
-					gotName := stripProcCountFromBenchName(gotDiff.Name)
-					assert.EqualStrings(t, wantDiff.name, gotName)
-
-					if gotDiff.Delta < wantDiff.delta.start {
-						t.Fatalf(
-							"got delta %.2f < wanted delta start %.2f",
-							gotDiff.Delta, wantDiff.delta.start,
-						)
-					}
-					if gotDiff.Delta > wantDiff.delta.end {
-						t.Fatalf(
-							"got delta %.2f > wanted delta end %.2f",
-							gotDiff.Delta, wantDiff.delta.end,
-						)
-					}
-				}
-			}
-		})
-	}
-}
-
-func stripProcCountFromBenchName(name string) string {
-	// Benchmark names depend on count of CPUs:
-	// https://cs.opensource.google/go/go/+/refs/tags/go1.18.3:src/testing/benchmark.go;drc=47f806ce81aac555946144f112b9f8733e2ed871;l=495
-	// Here we remove this info so it is easier to test things independent of env.
-	lastIndex := strings.LastIndex(name, "-")
-	return name[:lastIndex]
-}
diff --git a/benchcheck_test.go b/benchcheck_test.go
index 06e7864..5e3470c 100644
--- a/benchcheck_test.go
+++ b/benchcheck_test.go
@@ -50,15 +50,6 @@ func TestGetModule(t *testing.T) {
 				Version: "v0.1.0",
 			},
 		},
-		{
-			desc:          "UsingLatest",
-			moduleName:    "github.com/madlambda/jtoh",
-			moduleVersion: "latest",
-			wantModInfo: ModuleInfo{
-				Name:    "jtoh",
-				Version: "v0.1.0",
-			},
-		},
 		{
 			desc:          "UsingCommitSha",
 			moduleName:    "github.com/madlambda/jtoh",
@@ -303,6 +294,10 @@ func TestChecker(t *testing.T) {
 }
 
 func TestBenchModule(t *testing.T) {
+	// Reintroduce this test after we fix the mistake of testing using "latest"
+	// now older code referenced here have tests that will fail forever because "latest" changed
+	// We will need a new modversion without tests that reference latest
+	t.Skip()
 	t.Parallel()
 
 	const (
@@ -312,7 +307,7 @@ func TestBenchModule(t *testing.T) {
 	mod, err := benchcheck.GetModule(module, modversion)
 	assertNoError(t, err, "benchcheck.GetModule(%q, %q)", module, modversion)
 
-	res, err := benchcheck.RunBench(mod)
+	res, err := benchcheck.DefaultRunBench(mod)
 	assertNoError(t, err, "benchcheck.RunBench(%v)", mod)
 
 	assert.EqualInts(t, 1, len(res), "want single result, got: %v", res)
@@ -324,22 +319,6 @@ func TestBenchModule(t *testing.T) {
 	}
 }
 
-func TestBenchModuleNoBenchmarks(t *testing.T) {
-	t.Parallel()
-
-	const (
-		module     = "github.com/madlambda/benchcheck"
-		modversion = "f15923bf230cc7331ad869fcdaac35172f8b7f38"
-	)
-	mod, err := benchcheck.GetModule(module, modversion)
-	assertNoError(t, err, "benchcheck.GetModule(%q, %q)", module, modversion)
-
-	res, err := benchcheck.RunBench(mod)
-	assertNoError(t, err, "benchcheck.RunBench(%v)", mod)
-
-	assert.EqualInts(t, 0, len(res), "want no results, got: %v", res)
-}
-
 func TestStatBenchmarkResults(t *testing.T) {
 	type testcase struct {
 		name   string
diff --git a/cmd/benchcheck/benchcheck.go b/cmd/benchcheck/benchcheck.go
index 945dcd6..fa94f31 100644
--- a/cmd/benchcheck/benchcheck.go
+++ b/cmd/benchcheck/benchcheck.go
@@ -66,7 +66,20 @@ func main() {
 		log.Fatal("-new is obligatory")
 	}
 
-	results, err := benchcheck.StatModule(*mod, *oldRev, *newRev)
+	runBench := benchcheck.DefaultRunBench
+	var customizedBenchCmd []string
+
+	for i, v := range os.Args {
+		if v == "--" {
+			customizedBenchCmd = os.Args[i+1:]
+		}
+	}
+
+	if len(customizedBenchCmd) > 0 {
+		runBench = benchcheck.NewBenchRunner(customizedBenchCmd[0], customizedBenchCmd[1:]...)
+	}
+
+	results, err := benchcheck.StatModule(runBench, *mod, *oldRev, *newRev)
 	if err != nil {
 		var cmderr *benchcheck.CmdError
 		if errors.As(err, &cmderr) {