diff --git a/.gitignore b/.gitignore
index 6994dc0..0a728af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,8 @@
# test data output
testdata/
+venv/
+
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/db.iml b/.idea/db.iml
new file mode 100644
index 0000000..f4a2bef
--- /dev/null
+++ b/.idea/db.iml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..5c038fe
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..4af1825
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..7aa31ea
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..b60581b
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Makefile b/go/Makefile
similarity index 100%
rename from Makefile
rename to go/Makefile
diff --git a/datafile.go b/go/datafile.go
similarity index 100%
rename from datafile.go
rename to go/datafile.go
diff --git a/entry.go b/go/entry.go
similarity index 100%
rename from entry.go
rename to go/entry.go
diff --git a/entry_test.go b/go/entry_test.go
similarity index 100%
rename from entry_test.go
rename to go/entry_test.go
diff --git a/go.mod b/go/go.mod
similarity index 100%
rename from go.mod
rename to go/go.mod
diff --git a/go.sum b/go/go.sum
similarity index 100%
rename from go.sum
rename to go/go.sum
diff --git a/header.go b/go/header.go
similarity index 100%
rename from header.go
rename to go/header.go
diff --git a/header_test.go b/go/header_test.go
similarity index 100%
rename from header_test.go
rename to go/header_test.go
diff --git a/options.go b/go/options.go
similarity index 100%
rename from options.go
rename to go/options.go
diff --git a/options_test.go b/go/options_test.go
similarity index 100%
rename from options_test.go
rename to go/options_test.go
diff --git a/store_disk.go b/go/store_disk.go
similarity index 100%
rename from store_disk.go
rename to go/store_disk.go
diff --git a/store_disk_test.go b/go/store_disk_test.go
similarity index 100%
rename from store_disk_test.go
rename to go/store_disk_test.go
diff --git a/python/Pipfile b/python/Pipfile
new file mode 100644
index 0000000..30c2ed5
--- /dev/null
+++ b/python/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+graphviz = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.9"
+python_full_version = "3.9.6"
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
new file mode 100644
index 0000000..71bcdc7
--- /dev/null
+++ b/python/Pipfile.lock
@@ -0,0 +1,31 @@
+{
+ "_meta": {
+ "hash": {
+ "sha256": "b4e57cf75f4f743a2c55d3179e617886271fb2bc9951b2229605ae1d382fcb68"
+ },
+ "pipfile-spec": 6,
+ "requires": {
+ "python_full_version": "3.9.6",
+ "python_version": "3.9"
+ },
+ "sources": [
+ {
+ "name": "pypi",
+ "url": "https://pypi.org/simple",
+ "verify_ssl": true
+ }
+ ]
+ },
+ "default": {
+ "graphviz": {
+ "hashes": [
+ "sha256:09d6bc81e6a9fa392e7ba52135a9d49f1ed62526f96499325930e87ca1b5925d",
+ "sha256:81f848f2904515d8cd359cc611faba817598d2feaac4027b266aa3eda7b3dde5"
+ ],
+ "index": "pypi",
+ "markers": "python_version >= '3.8'",
+ "version": "==0.20.3"
+ }
+ },
+ "develop": {}
+}
diff --git a/python/b_tree_in_memory.py b/python/b_tree_in_memory.py
new file mode 100644
index 0000000..0219338
--- /dev/null
+++ b/python/b_tree_in_memory.py
@@ -0,0 +1,147 @@
+"""
+B+Tree implementation in python using disk persistence
+
+
+"""
+
+import bisect
+from math import ceil
+from typing import TypeVar, Optional, List
+
+import graphviz
+
+T = TypeVar("T")
+
+
+class TreeNode:
+ def __init__(self, is_leaf=False, order=3):
+ self.order: int = order # max number of keys in a node
+ self.is_leaf: bool = is_leaf
+ self.keys: T = []
+ self.children: List["TreeNode"] = []
+ self.parent: Optional[TreeNode] = None
+
+ self.next: Optional[TreeNode] = None
+ self.previous: Optional[TreeNode] = None
+
+ def __str__(self):
+ return str([key for key in self.keys])
+
+ @property
+ def max_num_of_keys(self):
+ return self.order - 1
+
+ @property
+ def max_num_of_child(self):
+ return self.order
+
+ @property
+ def min_num_of_key(self):
+ return ceil(self.order / 2) - 1
+
+ def insert(self, key):
+ bisect.insort(self.keys, key)
+
+ def split(self):
+ mid_idx = len(self.keys) // 2
+ mid_key = self.keys[mid_idx]
+
+ sibling_node = TreeNode(is_leaf=self.is_leaf, order=self.order)
+ # splitting separator key to sibling node
+ sibling_node.keys = self.keys[mid_idx + 1 :]
+ self.keys = self.keys[: mid_idx + 1]
+
+ # only leaf that needs next reference, will help on deletion
+ if self.is_leaf:
+ self.next = sibling_node
+ sibling_node.previous = self
+
+ # only non-leaf that has children, it needs to be migrated to new node.
+ if not self.is_leaf:
+ # move the children of origin node to the sibling node
+ sibling_node.children = self.children[mid_idx + 1 :]
+ self.children = self.children[: mid_idx + 1]
+
+ for child in sibling_node.children:
+ child.parent = sibling_node
+
+ return mid_key, sibling_node
+
+
+class BPlusTree:
+ def __init__(self, order=4):
+ self.root = TreeNode(is_leaf=True, order=order)
+ self.order = order
+
+ def _find_leaf_node(self, node, key) -> TreeNode:
+ if node.is_leaf:
+ return node
+
+ # todo: binary search
+ for i, separator_key in enumerate(node.keys):
+ if key <= separator_key:
+ return self._find_leaf_node(node.children[i], key)
+
+ return self._find_leaf_node(node.children[-1], key)
+
+ def _split_and_promote(self, node: TreeNode):
+ if len(node.keys) <= node.max_num_of_keys:
+ return
+
+ mid_key, sibling = node.split()
+
+ if node.parent is None:
+ new_root = TreeNode(is_leaf=False, order=self.order)
+ new_root.keys = [mid_key]
+ new_root.children = [node, sibling]
+ node.parent = new_root
+ sibling.parent = new_root
+ self.root = new_root
+ else:
+ parent = node.parent
+ sibling.parent = parent
+
+ bisect.insort(parent.keys, mid_key)
+ parent.children.insert(parent.keys.index(mid_key) + 1, sibling)
+
+ if len(parent.keys) > node.max_num_of_keys:
+ self._split_and_promote(parent)
+
+ def insert(self, key):
+ leaf = self._find_leaf_node(self.root, key)
+ leaf.insert(key)
+
+ if len(leaf.keys) > leaf.max_num_of_keys:
+ self._split_and_promote(leaf)
+
+ def find(self, key):
+ node = self._find_leaf_node(self.root, key)
+ if key in node.keys:
+ return key
+ return None
+
+ def graph(self):
+ dot = graphviz.Digraph()
+ dot.attr("node", shape="square")
+
+ edges = set()
+
+ from queue import Queue
+
+ queue = Queue()
+ queue.put(self.root)
+
+ while queue.empty() is False:
+ node = queue.get()
+ dot.node(str(node), str(node.keys))
+
+ if node.parent:
+ edge = f"{str(node.parent)}-{str(node)}"
+ if edge not in edges:
+ dot.edge(str(node.parent), str(node))
+ edges.add(edge)
+
+ for child in node.children:
+ queue.put(child)
+
+ dot.render("graph", view=True)
diff --git a/python/test_b_tree.py b/python/test_b_tree.py
new file mode 100644
index 0000000..fec758c
--- /dev/null
+++ b/python/test_b_tree.py
@@ -0,0 +1,63 @@
+import random
+from unittest import TestCase
+from uuid import uuid4
+
+from python.b_tree_in_memory import BPlusTree
+
+
+class BTreeTest(TestCase):
+ def test_small_inputs(self):
+ test_num_of_keys = 100
+ btree = BPlusTree(order=5)
+ inputs = [i for i in range(test_num_of_keys)]
+
+ for i in inputs:
+ try:
+ btree.insert(
+ i,
+ )
+ except Exception:
+ btree.graph()
+ self.fail(f"cannot insert key {i}")
+ for i in inputs:
+ try:
+ key = btree.find(key=i)
+ if key is None:
+ btree.graph()
+ self.fail(f"cannot find key {i}")
+ except:
+ self.fail(f"cannot find key {i}")
+
+ btree.graph()
+
+ def test_insert_and_retrieve(self):
+ test_num_of_keys = 1000
+ test_orders = 100
+ test_array = [
+ list(range(1, test_num_of_keys + 1)),
+ list([str(uuid4()) for _ in range(test_num_of_keys)]),
+ ]
+ for inputs in test_array:
+ for order in range(3, test_orders):
+ with self.subTest(
+ msg=f"test orders {order} with input type {type(inputs[0])}"
+ ):
+ btree = BPlusTree(order=order)
+ random.shuffle(list(inputs))
+
+ for i in inputs:
+ btree.insert(i)
+ for i in inputs:
+ key = btree.find(key=i)
+ if key is None:
+ self.fail(f"cannot find key {i}")
+
+ def test_delete_without_underflow(self):
+ btree = BPlusTree(order=4)
+ for i in range(8):
+ btree.insert(i)
+
+ btree.graph()
+
+ btree.delete(2)
+ btree.graph()
diff --git a/readme.md b/readme.md
index 60aa0b5..855fd28 100644
--- a/readme.md
+++ b/readme.md
@@ -1,30 +1,14 @@
-# go-caskdb
+Create your own DB from scratch
-[![codecov](https://codecov.io/gh/luqmansen/go-caskdb/branch/master/graph/badge.svg)](https://codecov.io/gh/luqmansen/go-caskdb)
-[![Actions Status](https://github.com/luqmansen/go-caskdb/actions/workflows/test.yml/badge.svg)](https://github.com/luqmansen/go-caskdb/actions/workflows/test.yml)
+- Storage Engine
+ - B+Tree
+ - SSTable
+- Page Cache
+- Execution Engine
+- SQL Parser
+ - SQL Optimizer
+- Server
-[Riak's Bitcask paper](https://riak.com/assets/bitcask-intro.pdf) implementation in Golang
+Distributed System
-## Todo
-
-- [ ] Implement key deletion
-- [ ] Implement CRC
-- [ ] Implement Max file size
-- [ ] Implement Log Merging
- - [ ] Implement merge trigger
- - [ ] Fragmentation
- - [ ] Dead bytes
- - [ ] Implement merge interval
-- [ ] Add support for ranged query
-
-## Benchmark
-
-| Ops | Result |
-|---------------------------------|-------------------------------------------------------------|
-| Unbuffered Write | `BenchmarkDiskStorage_Set-8 651841 1737 ns/op`
-| Buffered Write | `BenchmarkDiskStorage_Set-8 2569089 501.8 ns/op` |
-| Buffered Write + Sync after set | `BenchmarkDiskStorage_Set-8 7879 313756 ns/op`
-
-## Credits
-
-This repo is inspired by [py-caskdb](https://github.com/avinassh/py-caskdb/)
\ No newline at end of file
+- Raft