diff --git a/benchmark/README.md b/benchmark/README.md
index 3d71c865..93d32b0a 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -222,8 +222,42 @@ You can use python script to run the benchmark:
 python3 benchmark/tools/driving.py
 ```
 
-## Benchmark runner
+## Results on another machine
 
-### Usage
-- `make -C ./benchmark`
-- `python3 ./benchmark/run_benchmark.py`
+kernel:
+
+```txt
+Benchmarking __bench_uprobe_uretprobe in thread 1
+Average time usage 3060.196770 ns, iter 100000 times
+
+Benchmarking __bench_uretprobe in thread 1
+Average time usage 2958.493390 ns, iter 100000 times
+
+Benchmarking __bench_uprobe in thread 1
+Average time usage 1910.731360 ns, iter 100000 times
+
+Benchmarking __bench_read in thread 1
+Average time usage 1957.552190 ns, iter 100000 times
+
+Benchmarking __bench_write in thread 1
+Average time usage 1955.735460 ns, iter 100000 times
+```
+
+Userspace:
+
+```txt
+Benchmarking __bench_uprobe_uretprobe in thread 1
+Average time usage 412.607790 ns, iter 100000 times
+
+Benchmarking __bench_uretprobe in thread 1
+Average time usage 389.096230 ns, iter 100000 times
+
+Benchmarking __bench_uprobe in thread 1
+Average time usage 387.022160 ns, iter 100000 times
+
+Benchmarking __bench_read in thread 1
+Average time usage 415.350530 ns, iter 100000 times
+
+Benchmarking __bench_write in thread 1
+Average time usage 414.350230 ns, iter 100000 times
+```
diff --git a/benchmark/hash_map/README.md b/benchmark/hash_map/README.md
index c0b80d5a..092e26c9 100644
--- a/benchmark/hash_map/README.md
+++ b/benchmark/hash_map/README.md
@@ -1,8 +1,8 @@
 # benchmark of hash maps
 
-- __benchmark_test_function1: hashmap bpf_map_lookup_elem
-- __benchmark_test_function2: hashmap bpf_map_delete_elem
-- __benchmark_test_function3: hashmap bpf_map_update_elem
+- __bench_uprobe_uretprobe: hashmap bpf_map_lookup_elem
+- __bench_uretprobe: hashmap bpf_map_delete_elem
+- __bench_probe: hashmap bpf_map_update_elem
 
 run the uprobe:
 
@@ -23,17 +23,17 @@ in another terminal, run the benchmark:
 ```console
 $ LD_PRELOAD=build/runtime/agent/libbpftime-agent.so benchmark/test
 
-Benchmarking __benchmark_test_function1
+Benchmarking __bench_uprobe_uretprobe
 a[b] + c for 100000 times
 Elapsed time: 0.038217773 seconds
 Average time usage 382.177730 ns
 
-Benchmarking __benchmark_test_function2
+Benchmarking __bench_uretprobe
 a[b] + c for 100000 times
 Elapsed time: 0.020004455 seconds
 Average time usage 200.044550 ns
 
-Benchmarking __benchmark_test_function3
+Benchmarking __bench_probe
 a[b] + c for 100000 times
 Elapsed time: 0.047916014 seconds
 Average time usage 479.160140 ns
diff --git a/benchmark/hash_map/uprobe.bpf.c b/benchmark/hash_map/uprobe.bpf.c
index ca9bc011..8cbc53be 100644
--- a/benchmark/hash_map/uprobe.bpf.c
+++ b/benchmark/hash_map/uprobe.bpf.c
@@ -10,7 +10,7 @@ struct {
 	__type(value, u64);
 } libc_malloc_calls_total SEC(".maps");
 
-SEC("uprobe/benchmark/test:__benchmark_test_function3")
+SEC("uprobe/benchmark/test:__bench_probe")
 int test_update(struct pt_regs *ctx)
 {
 	u32 key = 0;
@@ -20,7 +20,7 @@ int test_update(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("uprobe/benchmark/test:__benchmark_test_function2")
+SEC("uprobe/benchmark/test:__bench_uretprobe")
 int test_delete(struct pt_regs *ctx)
 {
 	u32 key = 0;
@@ -30,7 +30,7 @@ int test_delete(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("uprobe/benchmark/test:__benchmark_test_function1")
+SEC("uprobe/benchmark/test:__bench_uprobe_uretprobe")
 int test_lookup(struct pt_regs *ctx)
 {
 	u32 key = 0;
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index 5f1b964c..a2a78bea 100644
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -84,7 +84,7 @@ async def run_userspace_uprobe_test():
             server.stdout,
             should_exit,
             "SERVER",
-            (server_start_cb, "__benchmark_test_function3 is for uprobe only"),
+            (server_start_cb, "__bench_probe is for uprobe only"),
         )
     )
     await server_start_cb.wait()
@@ -131,7 +131,7 @@ async def run_kernel_uprobe_test():
             server.stdout,
             should_exit,
             "SERVER",
-            (server_start_cb, "__benchmark_test_function3 is for uprobe only"),
+            (server_start_cb, "__bench_probe is for uprobe only"),
         )
     )
     await server_start_cb.wait()
diff --git a/benchmark/test.c b/benchmark/test.c
index ae654c58..81bd5d77 100644
--- a/benchmark/test.c
+++ b/benchmark/test.c
@@ -4,27 +4,37 @@
 #include <stdint.h>
 #include <pthread.h>
 
-__attribute_noinline__ uint64_t __benchmark_test_function3(const char *a, int b,
+__attribute_noinline__ uint64_t __bench_read(char *a, int b,
 							   uint64_t c)
 {
 	return a[b] + c;
 }
 
-__attribute_noinline__ uint64_t __benchmark_test_function2(const char *a, int b,
+__attribute_noinline__ uint64_t __bench_write(char *a, int b,
 							   uint64_t c)
 {
-	static int i = 0;
-	__sync_fetch_and_add(&i, 1);
 	return a[b] + c;
 }
 
-__attribute_noinline__ uint64_t __benchmark_test_function1(const char *a, int b,
+__attribute_noinline__ uint64_t __bench_uprobe(char *a, int b,
 							   uint64_t c)
 {
 	return a[b] + c;
 }
 
-typedef uint64_t (*benchmark_test_function_t)(const char *, int, uint64_t);
+__attribute_noinline__ uint64_t __bench_uretprobe(char *a, int b,
+							   uint64_t c)
+{
+	return a[b] + c;
+}
+
+__attribute_noinline__ uint64_t __bench_uprobe_uretprobe(char *a, int b,
+							   uint64_t c)
+{
+	return a[b] + c;
+}
+
+typedef uint64_t (*benchmark_test_function_t)(char *, int, uint64_t);
 
 void start_timer(struct timespec *start_time)
 {
@@ -53,9 +63,10 @@ static double get_function_time(benchmark_test_function_t func, int iter)
 	// The timespec struct holds seconds and nanoseconds
 	struct timespec start_time, end_time;
 	start_timer(&start_time);
+	char buffer[20] = "hello world";
 	// test base line
 	for (int i = 0; i < iter; i++) {
-		func("hello", i % 4, i);
+		func(buffer, i % 4, i);
 	}
 	end_timer(&end_time);
 	double time = get_elapsed_time(start_time, end_time);
@@ -83,9 +94,11 @@ void *run_bench_functions(void *id_ptr)
 {
 	int id = *(int *)id_ptr;
 	printf("id: %d\n", id);
-	do_benchmark_func(__benchmark_test_function1, iter, id);
-	do_benchmark_func(__benchmark_test_function2, iter, id);
-	do_benchmark_func(__benchmark_test_function3, iter, id);
+	do_benchmark_func(__bench_uprobe_uretprobe, iter, id);
+	do_benchmark_func(__bench_uretprobe, iter, id);
+	do_benchmark_func(__bench_uprobe, iter, id);
+	do_benchmark_func(__bench_read, iter, id);
+	do_benchmark_func(__bench_write, iter, id);
 	return NULL;
 }
 
diff --git a/benchmark/test_embed.c b/benchmark/test_embed.c
index 4fc90266..ee595972 100644
--- a/benchmark/test_embed.c
+++ b/benchmark/test_embed.c
@@ -76,7 +76,7 @@ void end_timer()
 	clock_gettime(CLOCK_MONOTONIC_RAW, &end_time);
 }
 
-__attribute_noinline__ uint64_t __benchmark_test_function3(const char *a, int b,
+__attribute_noinline__ uint64_t __bench_probe(const char *a, int b,
 							   uint64_t c)
 {
 	return a[b] + c;
@@ -93,7 +93,7 @@ uint64_t test_func_wrapper(const char *a, int b, uint64_t c)
 		PT_REGS_PARM3(&regs) = c;
 		ebpf_exec(begin_vm, &regs, sizeof(regs), &ret);
 	}
-	uint64_t hook_func_ret = __benchmark_test_function3(a, b, c);
+	uint64_t hook_func_ret = __bench_probe(a, b, c);
 	if (enable_ebpf) {
 		memset(&regs, 0, sizeof(regs));
 		PT_REGS_PARM1(&regs) = hook_func_ret;
diff --git a/benchmark/uprobe/uprobe-override.c b/benchmark/uprobe/uprobe-override.c
index a16003f2..96b9f5da 100644
--- a/benchmark/uprobe/uprobe-override.c
+++ b/benchmark/uprobe/uprobe-override.c
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
 	}
 	err = bpf_prog_attach_uprobe_with_override(
 		bpf_program__fd(skel->progs.do_uprobe_override_patch), "benchmark/test",
-		"__benchmark_test_function1");
+		"__bench_uprobe_uretprobe");
 	if (err) {
 		fprintf(stderr, "Failed to attach BPF program\n");
 		goto cleanup;
diff --git a/benchmark/uprobe/uprobe.bpf.c b/benchmark/uprobe/uprobe.bpf.c
index 7ac7471e..5450828d 100644
--- a/benchmark/uprobe/uprobe.bpf.c
+++ b/benchmark/uprobe/uprobe.bpf.c
@@ -3,25 +3,41 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-SEC("uprobe/benchmark/test:__benchmark_test_function3")
-int BPF_UPROBE(__benchmark_test_function3, const char *a, int b, uint64_t c)
+SEC("uprobe/benchmark/test:__bench_write")
+int BPF_UPROBE(__bench_write, char *a, int b, uint64_t c)
 {
+	char buffer[5] = "text";
+	bpf_probe_write_user(a, buffer, sizeof(buffer));
 	return b + c;
 }
 
-SEC("uretprobe/benchmark/test:__benchmark_test_function2")
-int BPF_URETPROBE(__benchmark_test_function2, int ret)
+SEC("uprobe/benchmark/test:__bench_read")
+int BPF_UPROBE(__bench_read, char *a, int b, uint64_t c)
+{
+	char buffer[5];
+	int res = bpf_probe_read_user(buffer, sizeof(buffer), a);
+	return b + c + res + buffer[1];
+}
+
+SEC("uprobe/benchmark/test:__bench_uprobe")
+int BPF_UPROBE(__bench_uprobe, char *a, int b, uint64_t c)
+{
+	return b + c;
+}
+
+SEC("uretprobe/benchmark/test:__bench_uretprobe")
+int BPF_URETPROBE(__bench_uretprobe, int ret)
 {
 	return ret;
 }
 
-SEC("uprobe/benchmark/test:__benchmark_test_function1")
-int BPF_UPROBE(__benchmark_test_function1_1, const char *a, int b, uint64_t c)
+SEC("uprobe/benchmark/test:__bench_uprobe_uretprobe")
+int BPF_UPROBE(__bench_uprobe_uretprobe_1, char *a, int b, uint64_t c)
 {
 	return b + c;
 }
 
-SEC("uretprobe/benchmark/test:__benchmark_test_function1")
+SEC("uretprobe/benchmark/test:__bench_uprobe_uretprobe")
 int BPF_URETPROBE(__benchmark_test_function_1_2, int ret)
 {
 	return ret;
diff --git a/benchmark/uprobe/uprobe.c b/benchmark/uprobe/uprobe.c
index 38e52abb..9eff05c3 100644
--- a/benchmark/uprobe/uprobe.c
+++ b/benchmark/uprobe/uprobe.c
@@ -57,9 +57,9 @@ int main(int argc, char **argv)
 	}
 
 	printf("Successfully started! Press Ctrl+C to stop.\n");
-	printf("__benchmark_test_function1 is for both uprobe and uretprobe\n");
-	printf("__benchmark_test_function2 is for uretprobe only\n");
-	printf("__benchmark_test_function3 is for uprobe only\n");
+	printf("__bench_uprobe_uretprobe is for both uprobe and uretprobe\n");
+	printf("__bench_uretprobe is for uretprobe only\n");
+	printf("__bench_probe is for uprobe only\n");
 	fflush(stdout);
 	while (!exiting) {
 		sleep(1);