From f3eedb100e62f37b22b85f655e15befc6366433a Mon Sep 17 00:00:00 2001
From: provos <provos@gmail.com>
Date: Sat, 15 Jun 2024 08:32:05 -0700
Subject: [PATCH] refactor: Move camera setup logic to Camera class

---
 camera.py       | 44 +++++++++++++++++++++++++++++++++++++++
 components.py   | 17 +++++----------
 segmentation.py | 55 +++----------------------------------------------
 test_webui.py   | 16 +++++++-------
 webui.py        | 16 ++++++--------
 5 files changed, 67 insertions(+), 81 deletions(-)

diff --git a/camera.py b/camera.py
index e91eb66..44e9d1a 100644
--- a/camera.py
+++ b/camera.py
@@ -26,6 +26,50 @@ def __init__(self, distance=100, max_distance=500, focal_length=100):
         self.max_distance = max_distance
         self.focal_length = focal_length
 
+    def setup_camera_and_cards(self, image_slices, depths, sensor_width=35.0):
+        """
+        Set up the camera matrix and the card corners in 3D space.
+
+        Args:
+            image_slices (list): A list of image slices.
+            depths (list): A list of threshold depths for each image slice.
+            sensor_width (float, optional): The width of the camera sensor. Defaults to 35.0.
+
+        Returns:
+            tuple: A tuple containing the camera matrix and a list of card corners in 3D space.
+        """
+        num_slices = len(image_slices)
+        image_height, image_width, _ = image_slices[0].shape
+
+        # Calculate the focal length in pixels
+        focal_length_px = (image_width * self.focal_length) / sensor_width
+
+        # Set up the camera intrinsic parameters
+        camera_matrix = np.array([[focal_length_px, 0, image_width / 2],
+                                [0, focal_length_px, image_height / 2],
+                                [0, 0, 1]], dtype=np.float32)
+
+        # Set up the card corners in 3D space
+        card_corners_3d_list = []
+        # The thresholds start with 0 and end with 255. We want the closest card to be at 0.
+        for i in range(num_slices):
+            z = self.max_distance * ((255 - depths[i]) / 255.0)
+
+            # Calculate the 3D points of the card corners
+            card_width = (image_width * (z + self.camera_distance)) / focal_length_px
+            card_height = (image_height * (z + self.camera_distance)) / focal_length_px
+
+            card_corners_3d = np.array([
+                [-card_width / 2, -card_height / 2, z],
+                [card_width / 2, -card_height / 2, z],
+                [card_width / 2, card_height / 2, z],
+                [-card_width / 2, card_height / 2, z]
+            ], dtype=np.float32)
+            card_corners_3d_list.append(card_corners_3d)
+
+        return camera_matrix, card_corners_3d_list
+
+
     def to_json(self):
         return {
             'position': self._camera_position.tolist(),
diff --git a/components.py b/components.py
index 03e2b68..8b52753 100644
--- a/components.py
+++ b/components.py
@@ -21,7 +21,7 @@
 from controller import AppState, CompositeMode
 from utils import to_image_url, filename_add_version, find_square_bounding_box
 from inpainting import patch_image, create_inpainting_pipeline
-from segmentation import setup_camera_and_cards, render_view, remove_mask_from_alpha
+from segmentation import render_view, remove_mask_from_alpha
 from stabilityai import StabilityAI
 
 
@@ -1502,12 +1502,9 @@ def make_navigation_callbacks(app):
         Input(C.NAV_ZOOM_IN, 'n_clicks'),
         Input(C.NAV_ZOOM_OUT, 'n_clicks'),
         State(C.STORE_APPSTATE_FILENAME, 'data'),
-        State(C.SLIDER_CAMERA_DISTANCE, 'value'),
-        State(C.SLIDER_FOCAL_LENGTH, 'value'),
-        State(C.SLIDER_MAX_DISTANCE, 'value'),
         State(C.LOGS_DATA, 'data'),
         prevent_initial_call=True)
-    def navigate_image(reset, up, down, left, right, zoom_in, zoom_out, filename, camera_distance, focal_length, max_distance, logs):
+    def navigate_image(reset, up, down, left, right, zoom_in, zoom_out, filename, logs):
         if filename is None:
             raise PreventUpdate()
 
@@ -1526,7 +1523,7 @@ def navigate_image(reset, up, down, left, right, zoom_in, zoom_out, filename, ca
 
         if nav_clicked == C.NAV_RESET:
             camera_position = np.array(
-                [0, 0, -camera_distance], dtype=np.float32)
+                [0, 0, -state.camera.camera_distance], dtype=np.float32)
         else:
             # Move the camera position based on the navigation button clicked
             # The distance should be configurable
@@ -1542,13 +1539,9 @@ def navigate_image(reset, up, down, left, right, zoom_in, zoom_out, filename, ca
             camera_position += switch[nav_clicked]
 
         state.camera.camera_position = camera_position
-        state.camera.camera_distance = camera_distance
-        state.camera.focal_length = focal_length
-        state.camera.max_distance = max_distance
 
-        camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-            state.image_slices, state.image_depths,
-            state.camera.camera_distance, state.camera.max_distance, state.camera.focal_length)
+        camera_matrix, card_corners_3d_list = state.camera.setup_camera_and_cards(
+            state.image_slices, state.image_depths)
 
         image = render_view(state.image_slices, camera_matrix,
                             card_corners_3d_list, camera_position)
diff --git a/segmentation.py b/segmentation.py
index 109a6a2..2cd9064 100755
--- a/segmentation.py
+++ b/segmentation.py
@@ -18,6 +18,7 @@
 
 # for exporting a 3d scene
 from gltf import export_gltf
+from camera import Camera
 
 
 def generate_depth_map(image, model: DepthEstimationModel, progress_callback=None):
@@ -139,53 +140,6 @@ def create_slice_from_mask(image, mask, num_expand=50):
     return masked_image
 
 
-def setup_camera_and_cards(image_slices, depths, camera_distance=100.0, max_distance=100.0, focal_length=100.0, sensor_width=35.0):
-    """
-    Set up the camera intrinsic parameters and the card corners in 3D space.
-
-    Args:
-        image_slices (list): A list of image slices.
-        depths (list): A list of threshold depths for each image slice.
-        camera_distance (float, optional): The distance between the camera and the cards. Defaults to 100.0.
-        max_distance (float, optional): The maximum distance for the cards. Defaults to 100.0.
-        focal_length (float, optional): The focal length of the camera. Defaults to 100.0.
-        sensor_width (float, optional): The width of the camera sensor. Defaults to 35.0.
-
-    Returns:
-        tuple: A tuple containing the camera matrix and a list of card corners in 3D space.
-    """
-    num_slices = len(image_slices)
-    image_height, image_width, _ = image_slices[0].shape
-
-    # Calculate the focal length in pixels
-    focal_length_px = (image_width * focal_length) / sensor_width
-
-    # Set up the camera intrinsic parameters
-    camera_matrix = np.array([[focal_length_px, 0, image_width / 2],
-                              [0, focal_length_px, image_height / 2],
-                              [0, 0, 1]], dtype=np.float32)
-
-    # Set up the card corners in 3D space
-    card_corners_3d_list = []
-    # The thresholds start with 0 and end with 255. We want the closest card to be at 0.
-    for i in range(num_slices):
-        z = max_distance * ((255 - depths[i]) / 255.0)
-
-        # Calculate the 3D points of the card corners
-        card_width = (image_width * (z + camera_distance)) / focal_length_px
-        card_height = (image_height * (z + camera_distance)) / focal_length_px
-
-        card_corners_3d = np.array([
-            [-card_width / 2, -card_height / 2, z],
-            [card_width / 2, -card_height / 2, z],
-            [card_width / 2, card_height / 2, z],
-            [-card_width / 2, card_height / 2, z]
-        ], dtype=np.float32)
-        card_corners_3d_list.append(card_corners_3d)
-
-    return camera_matrix, card_corners_3d_list
-
-
 def render_view(image_slices, camera_matrix, card_corners_3d_list, camera_position):
     """
     Render the current view of the camera.
@@ -384,11 +338,8 @@ def process_image(image_path, output_path, num_slices=5,
             image_slices.append(slice_image)
 
     # Set up the camera and cards
-    camera_distance = 100.0
-    max_distance = 500.0
-    focal_length = 100.0
-    camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-        image_slices, thresholds[1:], camera_distance, max_distance, focal_length)
+    camera = Camera(100.0, 500.0, 100.0)
+    camera_matrix, card_corners_3d_list = camera.setup_camera_and_cards(image_slices, thresholds[1:])
 
     # Render the initial view
     camera_position = np.array([0, 0, -100], dtype=np.float32)
diff --git a/test_webui.py b/test_webui.py
index dfc6e83..0a1f38c 100644
--- a/test_webui.py
+++ b/test_webui.py
@@ -10,7 +10,6 @@
     update_threshold_values, click_event,
     copy_to_clipboard, export_state_as_gltf, slice_upload, update_slices)
 from controller import AppState
-from segmentation import setup_camera_and_cards
 from utils import to_image_url
 from camera import Camera
 import constants as C
@@ -250,18 +249,21 @@ def setUp(self):
         self.state.depth_filename.return_value = self.mock_depth_file
         self.mock_depth_file.exists.return_value = True
 
+        self.camera = Camera(10, 100, 50)
+
         self.state.upscaled_filename.return_value = Path("upscaled_file.png")
         self.state.image_slices_filenames = [
             Path(f"slice_{i}.png") for i in range(3)]
         self.state.MODEL_FILE = "model.gltf"
+        self.state.camera = self.camera
 
     @patch("webui.generate_depth_map")
     @patch("webui.postprocess_depth_map")
     @patch("webui.export_gltf")
     def test_export_state_as_gltf(self, mock_export_gltf, mock_postprocess_depth_map, mock_generate_depth_map):
         # Test case 1: Displacement scale is 0
-        camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-            self.state.image_slices, self.state.image_depths, 10, 100, 50)
+        camera_matrix, card_corners_3d_list = self.camera.setup_camera_and_cards(
+            self.state.image_slices, self.state.image_depths)
         mock_export_gltf.return_value = Path("output.gltf")
 
         result = export_state_as_gltf(
@@ -294,8 +296,8 @@ def test_export_state_as_gltf(self, mock_export_gltf, mock_postprocess_depth_map
     def test_export_state_as_gltf_with_displacement(
             self, mock_export_gltf, mock_postprocess_depth_map, mock_generate_depth_map, mock_image_fromarray):
         # Test case 2: Displacement scale is greater than 0
-        camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-            self.state.image_slices, self.state.image_depths, 10, 100, 50)
+        camera_matrix, card_corners_3d_list = self.camera.setup_camera_and_cards(
+            self.state.image_slices, self.state.image_depths)
 
         mock_export_gltf.return_value = Path("output.gltf")
 
@@ -339,8 +341,8 @@ def test_export_state_as_gltf_with_displacement(
     @patch("webui.export_gltf")
     def test_export_state_as_gltf_with_upscaled(self, mock_export_gltf):
         # Test case 3: Upscaled slices exist
-        camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-            self.state.image_slices, self.state.image_depths, 10, 100, 50)
+        camera_matrix, card_corners_3d_list = self.camera.setup_camera_and_cards(
+            self.state.image_slices, self.state.image_depths)
 
         # Pretend the upscaled file exists
         mock_upscaled_file = MagicMock()
diff --git a/webui.py b/webui.py
index b5bfaa1..4cefc9e 100755
--- a/webui.py
+++ b/webui.py
@@ -15,7 +15,6 @@
     analyze_depth_histogram,
     generate_image_slices,
     create_slice_from_mask,
-    setup_camera_and_cards,
     export_gltf,
     blend_with_alpha,
     remove_mask_from_alpha,
@@ -1214,9 +1213,8 @@ def export_state_as_gltf(
         state, filename,
         camera,
         displacement_scale, modelname='midas', support_dof=False):
-    camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-        state.image_slices,
-        state.image_depths, camera.camera_distance, camera.max_distance, camera.focal_length)
+    camera_matrix, card_corners_3d_list = state.camera.setup_camera_and_cards(
+        state.image_slices, state.image_depths)
 
     depth_filenames = []
     if displacement_scale > 0:
@@ -1331,20 +1329,18 @@ def slice_upload(contents, filename, logs):
               Input(C.BTN_EXPORT_ANIMATION, 'n_clicks'),
               State(C.STORE_APPSTATE_FILENAME, 'data'),
               State(C.SLIDER_NUM_FRAMES, 'value'),
-              State(C.SLIDER_CAMERA_DISTANCE, 'value'),
-              State(C.SLIDER_MAX_DISTANCE, 'value'),
-              State(C.SLIDER_FOCAL_LENGTH, 'value'),
               State(C.LOGS_DATA, 'data'),
               running=[(Output(C.BTN_EXPORT_ANIMATION, 'disabled'), True, False)],
               prevent_initial_call=True)
-def export_animation(n_clicks, filename, num_frames, camera_distance, max_distance, focal_length, logs):
+def export_animation(n_clicks, filename, num_frames, logs):
     if n_clicks is None or filename is None:
         raise PreventUpdate()
 
     state = AppState.from_cache(filename)
 
-    camera_matrix, card_corners_3d_list = setup_camera_and_cards(
-        state.image_slices, state.image_depths, camera_distance, max_distance, focal_length)
+    camera_distance = state.camera.camera_distance
+    camera_matrix, card_corners_3d_list = state.camera.setup_camera_and_cards(
+        state.image_slices, state.image_depths)
 
     # Render the initial view
     camera_position = np.array([0, 0, -camera_distance], dtype=np.float32)