diff --git a/Makefile b/Makefile
index 97ff6bb4f..7e92334d0 100644
--- a/Makefile
+++ b/Makefile
@@ -96,6 +96,11 @@ coverage:
 
 .PHONY: test
 test:
+# Ideally, it should be ok to run without installing tparse locally.
+# However, there may be some issues that arise from running the tests
+# in the container. If you encounter any issues, please install tparse
+# locally via `go install github.com/mfridman/tparse/cmd/tparse@latest`
+# and run the tests locally.
 	@if [ "${OCR}" = "true" ]; then \
 		docker run --rm \
 			-v $(PWD):/${SERVICE_NAME} \
diff --git a/pkg/component/operator/document/v0/README.mdx b/pkg/component/operator/document/v0/README.mdx
index 41671ce7a..28cee3963 100644
--- a/pkg/component/operator/document/v0/README.mdx
+++ b/pkg/component/operator/document/v0/README.mdx
@@ -59,6 +59,7 @@ Convert document to text in Markdown format.
 | Images (optional) | `images` | array[string] | Images extracted from the document |
 | Error (optional) | `error` | string | Error message if any during the conversion process |
 | All Page Images (optional) | `all-page-images` | array[string] | The image contains all the pages in the document if we detect there could be images in the page. It will only support DOCX/DOC/PPTX/PPT/PDF. |
+| Markdowns (optional) | `markdowns` | array[string] | Markdown text converted from the PDF document, separated by page. |
 </div>
 
 ### Convert to Text
diff --git a/pkg/component/operator/document/v0/config/tasks.json b/pkg/component/operator/document/v0/config/tasks.json
index fcf549ccf..1c4fc96e7 100644
--- a/pkg/component/operator/document/v0/config/tasks.json
+++ b/pkg/component/operator/document/v0/config/tasks.json
@@ -108,6 +108,16 @@
           },
           "title": "All Page Images",
           "type": "array"
+        },
+        "markdowns": {
+          "description": "Markdown text converted from the PDF document, separated by page.",
+          "instillFormat": "array:string",
+          "instillUIOrder": 5,
+          "items": {
+            "type": "string"
+          },
+          "title": "Markdowns",
+          "type": "array"
         }
       },
       "required": [
diff --git a/pkg/component/operator/document/v0/convert_document_to_markdown.go b/pkg/component/operator/document/v0/convert_document_to_markdown.go
index 49e1f92c7..a82e7c44c 100644
--- a/pkg/component/operator/document/v0/convert_document_to_markdown.go
+++ b/pkg/component/operator/document/v0/convert_document_to_markdown.go
@@ -53,6 +53,7 @@ func (e *execution) convertDocumentToMarkdown(ctx context.Context, job *base.Job
 			}
 			return images
 		}(),
+		Markdowns: transformerOutputStruct.Markdowns,
 	}
 
 	err = job.Output.WriteData(ctx, outputStruct)
diff --git a/pkg/component/operator/document/v0/convert_document_to_markdown_test.go b/pkg/component/operator/document/v0/convert_document_to_markdown_test.go
index a13be4dc7..9618b8da9 100644
--- a/pkg/component/operator/document/v0/convert_document_to_markdown_test.go
+++ b/pkg/component/operator/document/v0/convert_document_to_markdown_test.go
@@ -30,6 +30,7 @@ func TestConvertDocumentToMarkdown(t *testing.T) {
 				Body:          "# This is test file for markdown\n",
 				Images:        []format.Image{},
 				AllPageImages: []format.Image{},
+				Markdowns:     []string{"# This is test file for markdown\n"},
 			},
 		},
 		{
@@ -39,6 +40,7 @@ func TestConvertDocumentToMarkdown(t *testing.T) {
 				Body:          "# This is test file for markdown\n",
 				Images:        []format.Image{},
 				AllPageImages: []format.Image{},
+				Markdowns:     []string{"# This is test file for markdown\n"},
 			},
 		},
 		{
@@ -57,6 +59,7 @@ func TestConvertDocumentToMarkdown(t *testing.T) {
 				Body:          "# This           is     test          file       for markdown\n",
 				Images:        []format.Image{},
 				AllPageImages: []format.Image{},
+				Markdowns:     []string{"# This           is     test          file       for markdown\n"},
 			},
 		},
 		{
diff --git a/pkg/component/operator/document/v0/execution/pdf_checker.py b/pkg/component/operator/document/v0/execution/pdf_checker.py
deleted file mode 100644
index de596d35e..000000000
--- a/pkg/component/operator/document/v0/execution/pdf_checker.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from io import BytesIO
-import json
-import base64
-import sys
-
-# TODO chuang8511:
-# Deal with the import error when running the code in the docker container.
-# Now, we combine all python code into one file to avoid the import error.
-# from pdf_to_markdown import PDFTransformer
-
-if __name__ == "__main__":
-    json_str   = sys.stdin.buffer.read().decode('utf-8')
-    params     = json.loads(json_str)
-    pdf_string = params["PDF"]
-
-    decoded_bytes = base64.b64decode(pdf_string)
-    pdf_file_obj = BytesIO(decoded_bytes)
-    pdf = PDFTransformer(x=pdf_file_obj)
-    pages = pdf.raw_pages
-    output = {
-        "required": len(pages) == 0,
-    }
-    print(json.dumps(output))
diff --git a/pkg/component/operator/document/v0/execution/task_convert_to_images.py b/pkg/component/operator/document/v0/execution/task_convert_to_images.py
deleted file mode 100644
index 6ab77416a..000000000
--- a/pkg/component/operator/document/v0/execution/task_convert_to_images.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from io import BytesIO
-import json
-import base64
-import sys
-
-# TODO chuang8511:
-# Deal with the import error when running the code in the docker container.
-# Now, we combine all python code into one file to avoid the import error.
-# from pdf_to_markdown import PDFTransformer
-# from pdf_to_markdown import PageImageProcessor
-
-if __name__ == "__main__":
-    json_str   = sys.stdin.buffer.read().decode('utf-8')
-    params     = json.loads(json_str)
-    filename   = params["filename"]
-    pdf_string = params["PDF"]
-
-    decoded_bytes = base64.b64decode(pdf_string)
-    pdf_file_obj = BytesIO(decoded_bytes)
-    pdf = PDFTransformer(x=pdf_file_obj)
-    pages = pdf.raw_pages
-    exclude_file_extension = filename.split(".")[0]
-    filenames = []
-    images = []
-
-    for i, page in enumerate(pages):
-        page_image = page.to_image(resolution=500)
-        encoded_image = PageImageProcessor.encode_image(page_image)
-        images.append(encoded_image)
-        filenames.append(f"{exclude_file_extension}_{i}.png")
-
-
-    output = {
-        "images": images,
-        "filename": filenames,
-    }
-
-    print(json.dumps(output))
diff --git a/pkg/component/operator/document/v0/execution/task_convert_to_markdown.py b/pkg/component/operator/document/v0/execution/task_convert_to_markdown.py
deleted file mode 100644
index a7e6b70e6..000000000
--- a/pkg/component/operator/document/v0/execution/task_convert_to_markdown.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from io import BytesIO
-import json
-import base64
-import sys
-
-# TODO chuang8511:
-# Deal with the import error when running the code in the docker container.
-# Now, we combine all python code into one file to avoid the import error.
-# from pdf_to_markdown import PDFTransformer
-
-
-if __name__ == "__main__":
-	json_str = sys.stdin.buffer.read().decode('utf-8')
-	params = json.loads(json_str)
-	display_image_tag = params["display-image-tag"]
-	display_all_page_image = params["display-all-page-image"]
-	pdf_string = params["PDF"]
-	decoded_bytes = base64.b64decode(pdf_string)
-	pdf_file_obj = BytesIO(decoded_bytes)
-	pdf = PDFTransformer(pdf_file_obj, display_image_tag)
-
-	result = ""
-	images = []
-	separator_number = 30
-	image_idx = 0
-	errors = []
-	all_page_images = []
-
-	try:
-		times = len(pdf.raw_pages) // separator_number + 1
-		for i in range(times):
-			pdf = PDFTransformer(pdf_file_obj, display_image_tag, image_idx)
-			if i == times - 1:
-				pdf.pages = pdf.raw_pages[i*separator_number:]
-			else:
-				pdf.pages = pdf.raw_pages[i*separator_number:(i+1)*separator_number]
-
-			pdf.preprocess()
-			image_idx = pdf.image_index
-			result += pdf.execute()
-			for image in pdf.base64_images:
-				images.append(image)
-
-			if display_all_page_image:
-				raw_pages = pdf.raw_pages
-
-				for page_number in pdf.page_numbers_with_images:
-					page = raw_pages[page_number - 1]
-					page_image = page.to_image(resolution=500)
-					encoded_image = PageImageProcessor.encode_image(page_image)
-					all_page_images.append(encoded_image)
-
-			errors += pdf.errors
-
-		output = {
-			"body": result,
-			"images": images,
-			"parsing_error": errors,
-			"all_page_images": all_page_images,
-			"display_all_page_image": display_all_page_image,
-		}
-		print(json.dumps(output))
-	except Exception as e:
-		print(json.dumps({"system_error": str(e)}))
diff --git a/pkg/component/operator/document/v0/io.go b/pkg/component/operator/document/v0/io.go
index b8516eed3..829f651b9 100644
--- a/pkg/component/operator/document/v0/io.go
+++ b/pkg/component/operator/document/v0/io.go
@@ -15,6 +15,7 @@ type ConvertDocumentToMarkdownOutput struct {
 	Images        []format.Image `instill:"images"`
 	Error         string         `instill:"error"`
 	AllPageImages []format.Image `instill:"all-page-images"`
+	Markdowns     []string       `instill:"markdowns"`
 }
 
 type ConvertDocumentToImagesInput struct {
diff --git a/pkg/component/operator/document/v0/pdf_to_markdown/__init__.py b/pkg/component/operator/document/v0/pdf_to_markdown/__init__.py
deleted file mode 100644
index 2fa4bd0d3..000000000
--- a/pkg/component/operator/document/v0/pdf_to_markdown/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from page_image_processor import PageImageProcessor
-from pdf_transformer import PDFTransformer
diff --git a/pkg/component/operator/document/v0/pdf_to_markdown/page_image_processor.py b/pkg/component/operator/document/v0/pdf_to_markdown/page_image_processor.py
deleted file mode 100644
index 35a676ebb..000000000
--- a/pkg/component/operator/document/v0/pdf_to_markdown/page_image_processor.py
+++ /dev/null
@@ -1,307 +0,0 @@
-import base64
-from io import BytesIO
-from PIL import Image
-
-from pdfplumber.page import Page
-from pdfplumber.display import PageImage
-
-class PageImageProcessor:
-    page: Page
-    errors: list[str]
-    images: list[dict]
-
-    def __init__(self, page: Page, image_index: int):
-        self.page = page
-        self.lines = page.extract_text_lines(layout=True, strip=True, return_chars=False)
-        self.images = []
-        self.errors = []
-        page.flush_cache()
-        page.get_textmap.cache_clear()
-        self.image_index = image_index
-
-    def produce_images_by_blocks(self) -> None:
-        saved_blocks = []
-        page = self.page
-        images = page.images
-
-        # Process images detect by pdfplumber
-        for i, image in enumerate(images):
-            bbox = (image["x0"], image["top"], image["x1"], image["bottom"])
-		    # There is a bug in pdfplumber that it can't target the image position correctly.
-            try:
-                img_page = page.crop(bbox=bbox)
-            except Exception as e:
-                self.errors.append(f"image {i} got error: {str(e)}, so it convert all pages into image.")
-                bbox = (0, 0, page.width, page.height)
-                img_page = page
-
-            img_obj = img_page.to_image(resolution=500)
-            img_base64 = self.__class__.encode_image(image=img_obj)
-
-            image["page_number"] = page.page_number
-            image["img_number"] = self.image_index
-            self.image_index += 1
-            image["img_base64"] = img_base64
-            saved_blocks.append(bbox)
-            self.images.append(image)
-
-        # (x0, top, x1, bottom)
-        blocks = self.calculate_blank_blocks(page=page)
-
-        for i, block in enumerate(blocks):
-            block_dict = self.get_block_dict(block)
-            block_image = {
-                "page_number": int,
-                "img_number":  int,
-                "img_base64":  str,
-                "top":         block_dict["top"],
-                "bottom":      block_dict["bottom"],
-            }
-            overlap = False
-            for saved_block in saved_blocks:
-                if self.is_overlap(block1=saved_block, block2=block):
-                    overlap = True
-                    break
-            if overlap:
-                continue
-
-            if self.image_too_small(block=block):
-                continue
-
-            if self.low_possibility_to_be_image(block=block):
-                continue
-
-            try:
-                cropped_page = page.crop(block)
-            except Exception as e:
-                self.errors.append(f"image {i} got error: {str(e)}, do not convert the images.")
-                continue
-
-            im = cropped_page.to_image(resolution=200)
-
-            if self.is_blank_pil_image(im=im):
-                continue
-
-            img_base64 = self.__class__.encode_image(image=im)
-            block_image["page_number"] = page.page_number
-            block_image["img_number"] = self.image_index
-            self.image_index += 1
-            block_image["img_base64"] = img_base64
-            self.images.append(block_image)
-
-    def encode_image(image: PageImage) -> str:
-        buffer = BytesIO()
-        image.save(buffer, format="PNG")
-        buffer.seek(0)
-        img_data = buffer.getvalue()
-        return "data:image/png;base64," + base64.b64encode(img_data).decode("utf-8")
-
-    def get_block_dict(self, block: tuple) -> dict:
-        return {
-            "x0":     block[0],
-            "top":    block[1],
-            "x1":     block[2],
-            "bottom": block[3],
-        }
-
-    def calculate_blank_blocks(self, page: Page) -> list[tuple]:
-        page_width = page.width
-        page_height = page.height
-        lines = self.lines
-
-        page.flush_cache()
-        page.get_textmap.cache_clear()
-
-        blank_blocks = []
-
-        # Track the bottom of the last line processed
-        last_bottom = 0  # Start from the top of the page
-
-        # Check for empty spaces before the first line
-        if lines:
-            first_line = lines[0]
-            if first_line["top"] > 0:
-                blank_blocks.append((0, 0, page_width, first_line["top"]))
-
-        # Process each line to find blank areas between them
-        for i, line in enumerate(lines):
-            # Calculate the blank space above the current line
-            if i > 0:
-                previous_line = lines[i - 1]
-                if line["top"] > previous_line["bottom"]:
-                    # (x0, top, x1, bottom)
-                    blank_blocks.append((0, previous_line["bottom"], page_width, line["top"]))
-
-            # Update last_bottom to the current line's bottom
-            last_bottom = line["bottom"]
-
-        # Check for empty spaces after the last line
-        if last_bottom < page_height:
-            blank_blocks.append((0, last_bottom, page_width, page_height))
-
-
-        return blank_blocks + self.calculate_horizontal_blocks(lines=lines, page_width=page_width, tolerance=30)
-
-    def calculate_horizontal_blocks(self, lines: list[dict[str, any]], page_width: float, tolerance: float) -> list[tuple]:
-        """
-        Calculates horizontal blocks (blank spaces) on the left or right side of text lines.
-
-        Parameters:
-        - lines: A list of dictionaries, each representing a line of text with 'x0', 'x1', 'top', and 'bottom' attributes.
-        - page_width: The width of the page being processed.
-        - tolerance: to tolerate the block judgement
-
-        Returns:
-        - A list of tuples representing the horizontal blocks (x0, top, x1, bottom) where no text exists.
-        """
-        if not lines:
-            return []
-
-        left_blocks = []
-        right_blocks = []
-
-        # Sort the lines by their vertical position (top)
-        sorted_lines = sorted(lines, key=lambda l: l["top"])
-
-        found_block = False
-        block_start_line = None
-
-        for i in range(1, len(sorted_lines)):
-
-            # Check if the block starts with first line
-            # 4 is number to be tuned
-            if not block_start_line and page_width / 4 < sorted_lines[i]["x0"]:
-                block_start_line = sorted_lines[i]
-                line_count = 1
-
-            current_line = sorted_lines[i]
-            previous_line = sorted_lines[i - 1]
-            if not found_block and block_start_line and abs(current_line["x0"] - block_start_line["x0"]) < tolerance:
-                line_count += 1
-                if line_count > 5:
-                    found_block = True
-                    block_start_top = block_start_line["top"]
-
-            elif not block_start_line and abs(current_line["x0"] - previous_line["x0"]) > tolerance:
-                block_start_line = current_line
-                line_count = 1
-            elif not found_block and block_start_line:
-                block_start_line = None
-                line_count = 0
-
-            if found_block and abs(current_line["x0"] - block_start_line["x0"]) > tolerance:
-                # Finalize the left block up to the previous line
-                left_blocks.append((0, block_start_top, previous_line["x0"], previous_line["bottom"]))
-                found_block= False
-                block_start_line = None
-                line_count = 0
-
-
-        found_block = False
-        block_start_line = None
-
-        for i in range(1, len(sorted_lines)):
-            if not block_start_line and page_width / 4 < page_width - sorted_lines[i]["x1"]:
-                block_start_line = sorted_lines[i]
-                line_count = 1
-
-            current_line = sorted_lines[i]
-            previous_line = sorted_lines[i - 1]
-
-            if not found_block and block_start_line and abs(current_line["x1"] - block_start_line["x1"]) < tolerance:
-                line_count += 1
-                if line_count > 5:
-                    found_block = True
-                    block_start_top = block_start_line["top"]
-
-            elif not block_start_line and (current_line["x1"] - previous_line["x1"]) > tolerance:
-                block_start_line = current_line
-                line_count = 1
-
-            elif not found_block and block_start_line:
-                block_start_line = None
-                line_count = 0
-
-            if found_block and abs(current_line["x1"] - block_start_line["x1"]) > tolerance:
-                # Finalize the right block up to the previous line
-                right_blocks.append((previous_line["x1"], block_start_top, page_width, previous_line["bottom"]))
-                found_block= False
-                block_start_line = None
-                line_count = 0
-
-        return left_blocks + right_blocks
-
-    # (x0, top, x1, bottom)
-    def image_too_small(self, block: tuple) -> bool:
-        image_width = block[2] - block[0]
-        image_height = block[3] - block[1]
-        size = image_width * image_height
-        # This is a number to be tuned
-        return size < 15000
-
-    def low_possibility_to_be_image(self, block: tuple, min_size: int = 20, max_aspect_ratio: float = 10.0) -> bool:
-        """
-        Determine if a block has a low likelihood of being an image based on its dimensions.
-
-        Parameters:
-        - block: A 4-tuple (x0, top, x1, bottom) representing the coordinates of the block.
-        - min_size: The minimum width/height required for a block to be considered a potential image.
-        - max_aspect_ratio: The maximum allowed width-to-height (or height-to-width) ratio for a block to be considered an image.
-
-        Returns:
-        - True if the block is unlikely to be an image, False otherwise.
-        """
-        # Calculate the width and height of the block
-        image_width = block[2] - block[0]  # x1 - x0
-        image_height = block[3] - block[1]  # bottom - top
-
-        # Check if the block is too small to be an image
-        if image_width < min_size or image_height < min_size:
-            return True
-
-        # Calculate the aspect ratio
-        aspect_ratio = image_width / image_height if image_height != 0 else float('inf')
-
-        # Check if the aspect ratio is too extreme to be an image
-        if aspect_ratio > max_aspect_ratio or aspect_ratio < 1 / max_aspect_ratio:
-            return True
-
-        # Otherwise, it's not considered "low possibility" of being an image
-        return False
-
-    def is_blank_pil_image(self, im: Image.Image) -> bool:
-        """
-        Check if an in-memory image (from pdfplumber) is blank using Pillow, without NumPy.
-        """
-        pil_image = im.original  # im.original is a PIL Image object
-
-        # Get extrema (min, max) for each channel (for grayscale, there will be one tuple)
-        extrema = pil_image.getextrema()
-
-        # If the extrema (min, max) values are the same, the image is uniform (blank)
-        if isinstance(extrema, tuple) and all(min_val == max_val for min_val, max_val in extrema):
-            return True
-        return False
-
-    def is_overlap(self, block1: tuple, block2: tuple) -> bool:
-        """
-        Determines if two blocks (x0, top, x1, bottom) overlap.
-
-        Parameters:
-        - block1: A tuple representing the first block (x0, top, x1, bottom).
-        - block2: A tuple representing the second block (x0, top, x1, bottom).
-
-        Returns:
-        - True if the blocks overlap, False otherwise.
-        """
-        x0_1, top_1, x1_1, bottom_1 = block1
-        x0_2, top_2, x1_2, bottom_2 = block2
-
-        # Check for horizontal overlap
-        horizontal_overlap = (x0_1 < x1_2 and x1_1 > x0_2)
-
-        # Check for vertical overlap
-        vertical_overlap = (top_1 < bottom_2 and bottom_1 > top_2)
-
-        # If both horizontal and vertical overlaps are true, the blocks overlap
-        return horizontal_overlap and vertical_overlap
diff --git a/pkg/component/operator/document/v0/pdf_to_markdown/pdf_transformer.py b/pkg/component/operator/document/v0/pdf_to_markdown/pdf_transformer.py
deleted file mode 100644
index cf5cead25..000000000
--- a/pkg/component/operator/document/v0/pdf_to_markdown/pdf_transformer.py
+++ /dev/null
@@ -1,439 +0,0 @@
-
-from io import BytesIO
-from collections import Counter
-
-import pdfplumber
-from pdfplumber.page import Page
-
-# TODO chuang8511:
-# Deal with the import error when running the code in the docker container.
-# Now, we combine all python code into one file to avoid the import error.
-# from page_image_processor import PageImageProcessor
-
-
-class PDFTransformer:
-	pdf: pdfplumber.PDF
-	raw_pages: list[Page]
-	metadata: dict
-	display_image_tag: bool
-	image_index: int
-	errors: list[str]
-	pages: list[Page]
-	lines: list[dict]
-	images: list[dict]
-	tables: list[dict]
-	base64_images: list[dict]
-	page_numbers_with_images: list[int]
-
-	def __init__(self, x: BytesIO, display_image_tag: bool = False, image_index: int = 0):
-		self.pdf = pdfplumber.open(x)
-		self.raw_pages = self.pdf.pages
-		self.metadata = self.pdf.metadata
-		self.display_image_tag = display_image_tag
-		self.image_index = image_index
-		self.errors = []
-		self.page_numbers_with_images = []
-
-	def preprocess(self):
-		self.set_heights()
-		self.lines = []
-		self.tables = []
-		self.images = []
-		self.base64_images = []
-		if self.display_image_tag:
-			self.process_image(self.image_index)
-
-		for page in self.pages:
-			page_lines = page.extract_text_lines(layout=True, x_tolerance_ratio=0.1, return_chars= False)
-			page.flush_cache()
-			page.get_textmap.cache_clear()
-
-			self.process_line(page_lines, page.page_number)
-			self.process_table(page)
-
-		self.set_paragraph_information(self.lines)
-
-		self.result = ""
-
-	def process_image(self, i: int):
-		image_index = i
-		for page in self.pages:
-			image_processor = PageImageProcessor(page=page, image_index=image_index)
-			image_processor.produce_images_by_blocks()
-			processed_images = image_processor.images
-			self.images += processed_images
-			image_index = image_processor.image_index
-
-			if page.page_number not in self.page_numbers_with_images:
-				self.page_numbers_with_images.append(page.page_number)
-
-		self.image_index = image_processor.image_index
-
-	def set_heights(self):
-		tolerance = 0.95
-		heights = []
-		largest_text_height, second_largest_text_height = 0, 0
-		for page in self.pages:
-			lines = page.extract_text_lines(layout=True, x_tolerance_ratio=0.1, return_chars= False)
-			page.flush_cache()
-			page.get_textmap.cache_clear()
-			for line in lines:
-				height = int(line["bottom"] - line["top"])
-				heights.append(height)
-				if height > largest_text_height:
-					second_largest_text_height = largest_text_height
-					largest_text_height = height
-				elif height > second_largest_text_height and height < largest_text_height:
-					second_largest_text_height = height
-
-		counter = Counter(heights)
-
-		# if there are too many subtitles, we don't use the title height.
-		# 50 is a temp number. It should be tuned.
-		if counter[largest_text_height] > 50:
-			self.title_height = float("inf")
-		else:
-			self.title_height = round(largest_text_height * tolerance)
-
-		if counter[second_largest_text_height] > 50 or self.title_height == float("inf"):
-			self.subtitle_height = float("inf")
-		else:
-			self.subtitle_height = round(second_largest_text_height * tolerance)
-
-	def set_paragraph_information(self, lines: list[dict]):
-		def round_to_nearest_upper_bound(value, step=3): # for the golden sample case
-			"""
-			Round the value to the nearest upper bound based on the given step.
-			For example, with step=3: 0~3 -> 3, 3~6 -> 6, etc.
-			"""
-			return ((value // step) + 1) * step
-
-		distances = []
-		paragraph_width = 0
-		distances_to_left = []
-
-		for _, line in enumerate(lines):
-			if line["distance_to_next_line"] and line["distance_to_next_line"] > 0:
-				# Round the distance to the nearest integer and add to the list
-				rounded_distance = round_to_nearest_upper_bound(line["distance_to_next_line"])
-				distances.append(rounded_distance)
-
-			if line["line_width"] > paragraph_width:
-				paragraph_width = line["line_width"]
-
-			if line["x0"]:
-				distances_to_left.append(line["x0"])
-
-		# Find the most common distance
-		if distances:
-			common_distance = Counter(distances).most_common(1)[0][0]
-		else:
-			common_distance = 10 ## default value
-
-		if distances_to_left:
-			zero_indent_distance = min(distances_to_left)
-		else:
-			zero_indent_distance = 0
-		paragraph_distance = common_distance * 1.5
-		self.paragraph_distance = paragraph_distance
-		self.paragraph_width = paragraph_width
-		self.zero_indent_distance = zero_indent_distance
-
-	def execute(self):
-		self.set_line_type(self.title_height, self.subtitle_height, "indent")
-		self.result = self.transform_line_to_markdown(self.lines)
-		return self.result
-
-	# It can add more calculation for the future development when we want to extend more use cases.
-	def process_line(self, lines: list[dict], page_number: int):
-		for idx, line in enumerate(lines):
-			line["line_height"] = line["bottom"] - line["top"]
-			line["line_width"] = line["x1"] - line["x0"]
-			line["middle"] = (line["x1"] + line["x0"]) / 2
-			line["distance_to_next_line"] = lines[idx+1]["top"] - line["bottom"] if idx < len(lines) - 1 else None
-			line["page_number"] = page_number
-			self.lines.append(line)
-
-	def process_table(self, page: Page):
-		tables = page.find_tables(
-			table_settings={
-				"vertical_strategy": "lines",
-				"horizontal_strategy": "lines",
-				}
-		)
-		if tables:
-			for table in tables:
-				table_info = {}
-				table_info["bbox"] = table.bbox
-				text = table.extract()
-				table_info["text"] = text
-				table_info["page_number"] = page.page_number
-				self.tables.append(table_info)
-
-	# TODO: Implement paragraph strategy
-	def paragraph_strategy(self, lines: list[dict], subtitle_height: int = 14):
-		# TODO: Implement paragraph strategy
-		# judge the non-title line in a page.
-		# If there is a line with indent, return "indent"
-		# If there is a line with no indent, return "no-indent"
-		return "indent"
-		paragraph_lines_start_positions = []
-		for line in lines:
-			if line["line_height"] < subtitle_height:
-				paragraph_lines_start_positions.append(line["x0"])
-
-	def set_line_type(self, title_height: int = 16, subtitle_height: int = 14, paragraph_strategy: str = "indent"):
-		lines = self.lines
-		current_paragraph = []
-		paragraph_start_position = 0
-		paragraph_idx = 1
-
-		for i, line in enumerate(lines):
-			if line['line_height'] >= title_height:
-				line["type"] = 'title'
-				if current_paragraph:
-					for line_in_paragraph in current_paragraph:
-						line_in_paragraph["type"] = f'paragraph {paragraph_idx}'
-					paragraph_idx += 1
-					current_paragraph = []
-
-			elif line['line_height'] >= subtitle_height:
-				line["type"] = 'subtitle'
-				if current_paragraph:
-					for line_in_paragraph in current_paragraph:
-						line_in_paragraph["type"] = f'paragraph {paragraph_idx}'
-					paragraph_idx += 1
-					current_paragraph = []
-			else:
-				line["type"] = 'paragraph'
-				if current_paragraph:
-					current_paragraph.append(line)
-
-					if ((paragraph_strategy == "indent" and i < len(lines) - 1 and
-							(   # if the next line starts a new paragraph
-								abs(lines[i+1]['x0'] - paragraph_start_position) < 10
-								# if the next line is not in the same layer
-								# or abs(line["middle"] - lines[i+1]["middle"]) > 5
-								)
-							) or
-						(paragraph_strategy == "no-indent"
-							and line["distance_to_next_line"]
-							and line["distance_to_next_line"] > 10) or
-						(i == len(lines) - 1) # final line
-						):
-
-						for line_in_paragraph in current_paragraph:
-							line_in_paragraph["type"] = f'paragraph {paragraph_idx}'
-
-						paragraph_idx += 1
-						current_paragraph = []
-				else:
-					current_paragraph = [line]
-					paragraph_start_position = line["x0"]
-		self.lines = lines
-
-	def transform_line_to_markdown(self, lines: list[dict]):
-		result = ""
-		to_be_processed_table = []
-		for i, line in enumerate(lines):
-			table = self.meet_table(line, line["page_number"])
-			if table and table not in to_be_processed_table:
-				to_be_processed_table.append(table)
-			elif table and table in to_be_processed_table:
-				continue
-			elif to_be_processed_table:
-				for table in to_be_processed_table:
-					result += "\n\n"
-					result += self.transform_table_markdown(table)
-					result += "\n\n"
-					self.tables.remove(table)
-				to_be_processed_table = []
-
-				if (i > 0 and
-					("title" == lines[i-1]["type"] and "title" == lines[i]["type"] or
-	  				"subtitle" == lines[i-1]["type"] and "subtitle" == lines[i]["type"])
-					):
-					while len(result) > 0 and result[-1] == "\n":
-						result = result[:-1]
-
-					line_text = self.line_process(line, i, lines, result)
-					## If line_text prefix or suffix is \n, remove them
-					while line_text.startswith("\n") or line_text.endswith("\n"):
-						line_text = line_text.strip("\n")
-				else:
-					line_text = self.line_process(line, i, lines, result)
-					while (
-						(line_text.startswith("\n") or line_text.endswith("\n"))):
-						line_text = line_text.strip("\n")
-
-				result += line_text
-				result += "\n"
-				## TODO: Do not change another line if it is bullet point or numbered list.
-				if (
-					(line["distance_to_next_line"] and line["distance_to_next_line"] >= self.paragraph_distance) or
-					(
-						line["page_number"] != lines[i+1]["page_number"] if i < len(lines) - 1 else False
-						and line["line_width"] < self.paragraph_width * 0.8
-					)
-					):
-					result += "\n"
-
-			else:
-				if (i > 0 and
-					("title" == lines[i-1]["type"] and "title" == lines[i]["type"] or
-	  				"subtitle" == lines[i-1]["type"] and "subtitle" == lines[i]["type"])
-					):
-					while len(result) > 0 and result[-1] == "\n":
-						result = result[:-1]
-
-					line_text = self.line_process(line, i, lines, result)
-					## If line_text prefix or suffix is \n, remove them
-					while line_text.startswith("\n") or line_text.endswith("\n"):
-						line_text = line_text.strip("\n")
-				else:
-					line_text = self.line_process(line, i, lines, result)
-					while (
-						(line_text.startswith("\n") or line_text.endswith("\n"))):
-						line_text = line_text.strip("\n")
-
-				result += line_text
-
-				## TODO: Do not change another line if it is bullet point or numbered list.
-				if (
-					(line["distance_to_next_line"] and line["distance_to_next_line"] >= self.paragraph_distance) or
-					(
-						line["page_number"] != lines[i+1]["page_number"] if i < len(lines) - 1 else False
-						and line["line_width"] < self.paragraph_width * 0.8
-					)
-					):
-					result += "\n"
-				result += "\n"
-
-
-			if i < len(lines) - 1:
-				result += self.insert_image(line, lines[i+1])
-			else:
-				result += self.insert_image(line, None)
-		if self.tables:
-			processed_table = []
-			for table in self.tables:
-				result += "\n\n"
-				result += self.transform_table_markdown(table)
-				result += "\n\n"
-				processed_table.append(table)
-			for table in processed_table:
-				self.tables.remove(table)
-
-		return result
-
-	def line_process(self, line: dict, i: int, lines: list[dict], current_result: str):
-		result = ""
-		if "type" not in line:
-			return line["text"]
-		if line["type"] == "title":
-			if current_result != "":
-				result += "\n\n"
-			if i > 0 and lines[i-1]["type"] == "title":
-				result += f" {line['text']}\n"
-			else:
-				result += f"# {line['text']}\n"
-		elif line["type"] == "subtitle":
-			if current_result != "":
-				result += "\n\n"
-			if i > 0 and lines[i-1]["type"] == "subtitle":
-				result += f" {line['text']}\n"
-			else:
-				result += f"## {line['text']}\n"
-		elif "paragraph" in line["type"]:
-			# Deal with indentation
-			if self.zero_indent_distance != 0:
-				indent = round((line["x0"] - self.zero_indent_distance) // 10)  # to be tuned
-				if indent > 0:
-					result += " " * indent
-
-			result += line["text"]
-			if (
-				(i < len(lines) - 1) and
-				"type" in lines[i+1] and
-				len(lines[i+1]["type"].split(" ")) == 2 and
-				(int(line["type"].split(" ")[1]) < int(lines[i+1]["type"].split(" ")[1]))
-			):
-				result += "\n"
-				result += "\n"
-		return result
-
-	def meet_table(self, line: dict, page_number: int):
-		tables = self.tables
-		for table in tables:
-			if table["page_number"] == page_number:
-				bbox = table["bbox"]
-				top, bottom = bbox[1], bbox[3]
-				if line["top"] > top and line["bottom"] < bottom:
-					return table
-				else:
-					None
-
-	def transform_table_markdown(self, table: dict):
-		result = ""
-		texts = table["text"]
-		for i, row in enumerate(texts):
-			for j, col in enumerate(row):
-				if col:
-					if "\n" in col:
-						col = col.replace("\n", "<br>")
-					result += col
-
-					if j < len(row) - 1:
-						result += " | "
-				else:
-					if j == 0:
-						result += "||"
-					else:
-						result += "|"
-			if i == 0:
-				result += "\n"
-				## TODO: Judge table that cross the page,
-				result += "|"
-				result += " --- |" * len(row)
-				result += "\n"
-			elif i < len(texts) - 1:
-				result += "\n"
-
-		return result
-
-	def insert_image(self, line: dict, next_line: dict):
-		result = ""
-		images = self.images
-		to_be_removed_images = []
-
-		if images:
-			if next_line:
-				# If there is image between line and next_line, we insert image.
-				if next_line["page_number"] == line["page_number"]:
-					for image in images:
-						if image["page_number"] == line["page_number"] and image["top"] > line["bottom"] and image["bottom"] < next_line["top"]:
-							result += "\n\n"
-							result += f"![image {image['img_number']}]({image['img_number']})"
-							self.base64_images.append(image["img_base64"])
-							result += "\n\n"
-							to_be_removed_images.append(image)
-				elif next_line["page_number"] > line["page_number"]:
-					for image in images:
-						if image["page_number"] >= line["page_number"] and image["page_number"] < next_line["page_number"]:
-							result += "\n\n"
-							result += f"![image {image['img_number']}]({image['img_number']})"
-							self.base64_images.append(image["img_base64"])
-							result += "\n\n"
-							to_be_removed_images.append(image)
-
-			else: # if images exists and there is no next_line, we insert image.
-				for image in images:
-					result += "\n\n"
-					result += f"![image {image['img_number']}]({image['img_number']})"
-					self.base64_images.append(image["img_base64"])
-					result += "\n\n"
-					to_be_removed_images.append(image)
-		for image in to_be_removed_images:
-			self.images.remove(image)
-
-		return result
diff --git a/pkg/component/operator/document/v0/transformer/execution/task_convert_to_markdown.py b/pkg/component/operator/document/v0/transformer/execution/task_convert_to_markdown.py
index a7e6b70e6..79f0a3182 100644
--- a/pkg/component/operator/document/v0/transformer/execution/task_convert_to_markdown.py
+++ b/pkg/component/operator/document/v0/transformer/execution/task_convert_to_markdown.py
@@ -25,6 +25,7 @@
 	image_idx = 0
 	errors = []
 	all_page_images = []
+	markdowns = []
 
 	try:
 		times = len(pdf.raw_pages) // separator_number + 1
@@ -52,12 +53,15 @@
 
 			errors += pdf.errors
 
+			markdowns += pdf.markdowns
+
 		output = {
 			"body": result,
 			"images": images,
 			"parsing_error": errors,
 			"all_page_images": all_page_images,
 			"display_all_page_image": display_all_page_image,
+			"markdowns": markdowns,
 		}
 		print(json.dumps(output))
 	except Exception as e:
diff --git a/pkg/component/operator/document/v0/transformer/markdown.go b/pkg/component/operator/document/v0/transformer/markdown.go
index eee450644..eff7b52a8 100644
--- a/pkg/component/operator/document/v0/transformer/markdown.go
+++ b/pkg/component/operator/document/v0/transformer/markdown.go
@@ -20,6 +20,7 @@ type ConvertDocumentToMarkdownTransformerOutput struct {
 	Images        []string `json:"images,omitempty"`
 	Error         string   `json:"error,omitempty"`
 	AllPageImages []string `json:"all-page-images,omitempty"`
+	Markdowns     []string `json:"markdowns"`
 }
 
 func ConvertDocumentToMarkdown(inputStruct *ConvertDocumentToMarkdownTransformerInput, transformerGetter MarkdownTransformerGetterFunc) (*ConvertDocumentToMarkdownTransformerOutput, error) {
@@ -50,6 +51,7 @@ func ConvertDocumentToMarkdown(inputStruct *ConvertDocumentToMarkdownTransformer
 		Images:        converterOutput.Images,
 		Error:         strings.Join(converterOutput.ParsingError, "\n"),
 		AllPageImages: converterOutput.AllPageImages,
+		Markdowns:     converterOutput.Markdowns,
 	}
 
 	if inputStruct.Filename != "" {
diff --git a/pkg/component/operator/document/v0/transformer/pdf_to_markdown/pdf_transformer.py b/pkg/component/operator/document/v0/transformer/pdf_to_markdown/pdf_transformer.py
index cf5cead25..9c8df5b3d 100644
--- a/pkg/component/operator/document/v0/transformer/pdf_to_markdown/pdf_transformer.py
+++ b/pkg/component/operator/document/v0/transformer/pdf_to_markdown/pdf_transformer.py
@@ -24,6 +24,8 @@ class PDFTransformer:
 	tables: list[dict]
 	base64_images: list[dict]
 	page_numbers_with_images: list[int]
+	# This is the result of the markdown transformation divided by pages.
+	markdowns: list[str]
 
 	def __init__(self, x: BytesIO, display_image_tag: bool = False, image_index: int = 0):
 		self.pdf = pdfplumber.open(x)
@@ -54,6 +56,7 @@ def preprocess(self):
 		self.set_paragraph_information(self.lines)
 
 		self.result = ""
+		self.markdowns = len(self.pdf.pages) * [""]
 
 	def process_image(self, i: int):
 		image_index = i
@@ -157,8 +160,8 @@ def process_line(self, lines: list[dict], page_number: int):
 	def process_table(self, page: Page):
 		tables = page.find_tables(
 			table_settings={
-				"vertical_strategy": "lines",
-				"horizontal_strategy": "lines",
+				"vertical_strategy": "lines_strict",
+				"horizontal_strategy": "lines_strict",
 				}
 		)
 		if tables:
@@ -235,11 +238,20 @@ def set_line_type(self, title_height: int = 16, subtitle_height: int = 14, parag
 	def transform_line_to_markdown(self, lines: list[dict]):
 		result = ""
 		to_be_processed_table = []
+		need_append_to_markdowns = False
+		page_number = 0
+
 		for i, line in enumerate(lines):
 			table = self.meet_table(line, line["page_number"])
 			if table and table not in to_be_processed_table:
 				to_be_processed_table.append(table)
 			elif table and table in to_be_processed_table:
+
+				# Deal with markdowns. If the table is the last element in the page, we need to add the table to the previous markdowns.
+				if i < len(lines) - 1 and line["page_number"] != lines[i+1]["page_number"]:
+					need_append_to_markdowns = True
+					page_number = line["page_number"]
+
 				continue
 			elif to_be_processed_table:
 				for table in to_be_processed_table:
@@ -310,10 +322,27 @@ def transform_line_to_markdown(self, lines: list[dict]):
 				result += "\n"
 
 
+			# Insert image sections
 			if i < len(lines) - 1:
 				result += self.insert_image(line, lines[i+1])
 			else:
 				result += self.insert_image(line, None)
+
+
+			# Deal with markdowns.
+			# If the table is the last element in the page, we need to add the table to the previous markdowns.
+			if need_append_to_markdowns:
+				self.markdowns[page_number] = result
+				result = ""
+				need_append_to_markdowns = False
+				page_number = 0
+
+			# If the next line is in the next page, we need to add the result to the markdowns.
+			elif i < len(lines) - 1 and line["page_number"] != lines[i+1]["page_number"]:
+				self.markdowns[line["page_number"] - 1] = result
+				result = ""
+
+
 		if self.tables:
 			processed_table = []
 			for table in self.tables:
@@ -324,7 +353,16 @@ def transform_line_to_markdown(self, lines: list[dict]):
 			for table in processed_table:
 				self.tables.remove(table)
 
-		return result
+		# Deal with the last page for markdowns
+		if result:
+			self.markdowns[lines[-1]["page_number"] - 1] = result
+
+		combined_markdown = ""
+
+		for markdown in self.markdowns:
+			combined_markdown += markdown
+
+		return combined_markdown
 
 	def line_process(self, line: dict, i: int, lines: list[dict], current_result: str):
 		result = ""
diff --git a/pkg/component/operator/document/v0/transformer/pdftomarkdown.go b/pkg/component/operator/document/v0/transformer/pdftomarkdown.go
index 52267d1fc..3d0a3ff3f 100644
--- a/pkg/component/operator/document/v0/transformer/pdftomarkdown.go
+++ b/pkg/component/operator/document/v0/transformer/pdftomarkdown.go
@@ -15,6 +15,7 @@ type converterOutput struct {
 	SystemError   string   `json:"system_error"`
 	AllPageImages []string `json:"all_page_images"`
 	AllPage       bool     `json:"display_all_page_image"`
+	Markdowns     []string `json:"markdowns"`
 }
 
 func convertPDFToMarkdownWithPDFPlumber(base64Text string, displayImageTag bool, displayAllPage bool) (converterOutput, error) {