From 5516b5cccf90326df13214262053543f4fb50e20 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 11:46:17 +0100 Subject: [PATCH 01/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 4d97ca3187..85f0385210 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -10,6 +10,13 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: """SQLAlchemy connection to a SQLite DB""" database_url = f"sqlite:///{session_store_location}" engine = create_engine(database_url, connect_args={"check_same_thread": False}) + + # Enable Write-Ahead Logging (WAL) + with engine.begin() as conn: + conn.execute("PRAGMA journal_mode=WAL;") + conn.execute("PRAGMA synchronous=NORMAL;") + conn.execute("PRAGMA busy_timeout=30000;") # 30 seconds timeout for locks + session_class = sessionmaker(engine) # TODO: making db session factory shouldn't depend on models. # So want to move the table creation elsewhere ideally. From 84c1f7a6a350d95166615dd816831490353b095f Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 11:56:08 +0100 Subject: [PATCH 02/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 85f0385210..ce599f244c 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -7,19 +7,21 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: - """SQLAlchemy connection to a SQLite DB""" + """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" database_url = f"sqlite:///{session_store_location}" - engine = create_engine(database_url, connect_args={"check_same_thread": False}) + engine = create_engine( + database_url, + connect_args={"check_same_thread": False} # Allows multi-threaded access + ) - # Enable Write-Ahead Logging (WAL) - with engine.begin() as conn: - conn.execute("PRAGMA journal_mode=WAL;") - conn.execute("PRAGMA synchronous=NORMAL;") - conn.execute("PRAGMA busy_timeout=30000;") # 30 seconds timeout for locks + # Enable Write-Ahead Logging (WAL) mode and other optimizations. + with engine.connect() as conn: + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA synchronous=NORMAL") + conn.execute("PRAGMA busy_timeout=30000") # 30 seconds timeout for locks - session_class = sessionmaker(engine) - # TODO: making db session factory shouldn't depend on models. - # So want to move the table creation elsewhere ideally. - # But this means returning engine as well as session class. + # Create the database tables if they do not exist. Base.metadata.create_all(bind=engine) - return session_class + + # Return a session factory bound to the engine. + return sessionmaker(bind=engine) From 51d5bbe5669473e23ce977e2601ee4af28e824c7 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 12:15:04 +0100 Subject: [PATCH 03/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index ce599f244c..308d576470 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -5,23 +5,31 @@ from kedro_viz.models.experiment_tracking import Base +import shutil +import os +from pathlib import Path + def make_db_session_factory(session_store_location: str) -> sessionmaker: """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" - database_url = f"sqlite:///{session_store_location}" + # Use a temporary path for database operations to avoid CIFS locks. + temp_db_path = "/tmp/session_store.db" + shutil.copy(session_store_location, temp_db_path) + + database_url = f"sqlite:///{temp_db_path}" engine = create_engine( database_url, - connect_args={"check_same_thread": False} # Allows multi-threaded access + connect_args={"check_same_thread": False} ) - # Enable Write-Ahead Logging (WAL) mode and other optimizations. with engine.connect() as conn: - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA synchronous=NORMAL") - conn.execute("PRAGMA busy_timeout=30000") # 30 seconds timeout for locks + conn.execute(text("PRAGMA journal_mode=WAL;")) + conn.execute(text("PRAGMA synchronous=NORMAL;")) + conn.execute(text("PRAGMA busy_timeout=60000;")) - # Create the database tables if they do not exist. Base.metadata.create_all(bind=engine) - # Return a session factory bound to the engine. + # Copy back the database to persistent location after session operations. + shutil.copy(temp_db_path, session_store_location) + return sessionmaker(bind=engine) From 475437d519bb40095125a7fc837a206642f69b2f Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 12:25:31 +0100 Subject: [PATCH 04/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 308d576470..385ff39b59 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -11,25 +11,20 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: - """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" + """SQLAlchemy connection to a SQLite DB""" # Use a temporary path for database operations to avoid CIFS locks. temp_db_path = "/tmp/session_store.db" shutil.copy(session_store_location, temp_db_path) database_url = f"sqlite:///{temp_db_path}" - engine = create_engine( - database_url, - connect_args={"check_same_thread": False} - ) - - with engine.connect() as conn: - conn.execute(text("PRAGMA journal_mode=WAL;")) - conn.execute(text("PRAGMA synchronous=NORMAL;")) - conn.execute(text("PRAGMA busy_timeout=60000;")) - + engine = create_engine(database_url, connect_args={"check_same_thread": False}) + session_class = sessionmaker(engine) + # TODO: making db session factory shouldn't depend on models. + # So want to move the table creation elsewhere ideally. + # But this means returning engine as well as session class. Base.metadata.create_all(bind=engine) # Copy back the database to persistent location after session operations. shutil.copy(temp_db_path, session_store_location) - return sessionmaker(bind=engine) + return session_class From 041b390c5a45a0de474691510d2582560c0bbb74 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 12:36:27 +0100 Subject: [PATCH 05/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 38 ++++++++++++++++------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 385ff39b59..f9de524c51 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -1,30 +1,26 @@ """Database management layer based on SQLAlchemy""" -from sqlalchemy import create_engine +from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from kedro_viz.models.experiment_tracking import Base -import shutil -import os -from pathlib import Path - - def make_db_session_factory(session_store_location: str) -> sessionmaker: - """SQLAlchemy connection to a SQLite DB""" - # Use a temporary path for database operations to avoid CIFS locks. - temp_db_path = "/tmp/session_store.db" - shutil.copy(session_store_location, temp_db_path) - - database_url = f"sqlite:///{temp_db_path}" - engine = create_engine(database_url, connect_args={"check_same_thread": False}) - session_class = sessionmaker(engine) - # TODO: making db session factory shouldn't depend on models. - # So want to move the table creation elsewhere ideally. - # But this means returning engine as well as session class. + """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" + database_url = f"sqlite:///{session_store_location}" + engine = create_engine( + database_url, + connect_args={"check_same_thread": False} + ) + + # Apply PRAGMA settings for WAL mode. + with engine.connect() as conn: + conn.execute(text("PRAGMA journal_mode=WAL;")) + conn.execute(text("PRAGMA synchronous=NORMAL;")) + conn.execute(text("PRAGMA busy_timeout=30000;")) # 30 seconds timeout for locks + + # Create the database tables if they do not exist. Base.metadata.create_all(bind=engine) - # Copy back the database to persistent location after session operations. - shutil.copy(temp_db_path, session_store_location) - - return session_class + # Return a session factory bound to the engine. + return sessionmaker(bind=engine) From e4adba0fde39e709341bc9cb01560361cda659f0 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 12:51:42 +0100 Subject: [PATCH 06/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index f9de524c51..c56fecc183 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -2,6 +2,7 @@ from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker +import os from kedro_viz.models.experiment_tracking import Base @@ -13,11 +14,19 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: connect_args={"check_same_thread": False} ) - # Apply PRAGMA settings for WAL mode. - with engine.connect() as conn: - conn.execute(text("PRAGMA journal_mode=WAL;")) - conn.execute(text("PRAGMA synchronous=NORMAL;")) - conn.execute(text("PRAGMA busy_timeout=30000;")) # 30 seconds timeout for locks + # Check if we are running in an Azure ML environment. + is_azure_ml = any( + var in os.environ for var in [ + "AZUREML_ARM_SUBSCRIPTION", + "AZUREML_ARM_RESOURCEGROUP", + "AZUREML_RUN_ID" + ] + ) + + # Apply WAL mode only if we are running in Azure ML. + if is_azure_ml: + with engine.connect() as conn: + conn.execute(text("PRAGMA journal_mode=WAL;")) # Create the database tables if they do not exist. Base.metadata.create_all(bind=engine) From 5b0def112ed2acb6e692761f3d0a6e61f46e11fa Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 12:57:56 +0100 Subject: [PATCH 07/13] lint Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index c56fecc183..916db53bc0 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -1,25 +1,25 @@ """Database management layer based on SQLAlchemy""" +import os + from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker -import os from kedro_viz.models.experiment_tracking import Base + def make_db_session_factory(session_store_location: str) -> sessionmaker: """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" database_url = f"sqlite:///{session_store_location}" - engine = create_engine( - database_url, - connect_args={"check_same_thread": False} - ) + engine = create_engine(database_url, connect_args={"check_same_thread": False}) # Check if we are running in an Azure ML environment. is_azure_ml = any( - var in os.environ for var in [ + var in os.environ + for var in [ "AZUREML_ARM_SUBSCRIPTION", "AZUREML_ARM_RESOURCEGROUP", - "AZUREML_RUN_ID" + "AZUREML_RUN_ID", ] ) From 59adde6c3ab75e4814d1563e11b51d570f7fdecc Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 13:20:02 +0100 Subject: [PATCH 08/13] add test Signed-off-by: Sajid Alam --- .../test_integrations/test_sqlite_store.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/package/tests/test_integrations/test_sqlite_store.py b/package/tests/test_integrations/test_sqlite_store.py index a90d72b9bd..ec14c68730 100644 --- a/package/tests/test_integrations/test_sqlite_store.py +++ b/package/tests/test_integrations/test_sqlite_store.py @@ -8,7 +8,7 @@ import boto3 import pytest from moto import mock_aws -from sqlalchemy import create_engine, func, select +from sqlalchemy import create_engine, func, select, text from sqlalchemy.orm import sessionmaker from kedro_viz.database import make_db_session_factory @@ -372,3 +372,22 @@ def test_sync_with_merge_error(self, mocker, store_path, remote_path, caplog): mock_merge.assert_called_once() mock_upload.assert_called_once() assert "Merge failed on sync: Merge failed" in caplog.text + + def test_make_db_session_factory_with_azure_env_var(self, mocker, tmp_path): + """Test that WAL mode is enabled when running in an Azure environment.""" + mocker.patch.dict( + os.environ, + { + "AZUREML_ARM_SUBSCRIPTION": "dummy_value", + "AZUREML_ARM_RESOURCEGROUP": "dummy_value", + }, + ) + db_location = str(tmp_path / "test_session_store.db") + session_class = make_db_session_factory(db_location) + + # Ensure that the session can be created without issues. + with session_class() as session: + assert session is not None + # Check if the database is using WAL mode by querying the PRAGMA + result = session.execute(text("PRAGMA journal_mode;")).scalar() + assert result == "wal" From 99b0aa26ce72b1f797a69692fc5b56b2c8a486ac Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 14:29:39 +0100 Subject: [PATCH 09/13] Update database.py Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 916db53bc0..9300416ab2 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -9,7 +9,7 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: - """SQLAlchemy connection to a SQLite DB with WAL mode enabled.""" + """SQLAlchemy connection to a SQLite DB""" database_url = f"sqlite:///{session_store_location}" engine = create_engine(database_url, connect_args={"check_same_thread": False}) From 21a2c701af203397bcd493260cb0911693e5b99f Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Wed, 9 Oct 2024 14:31:25 +0100 Subject: [PATCH 10/13] revert comments Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index 9300416ab2..f108c9c20f 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -12,6 +12,9 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: """SQLAlchemy connection to a SQLite DB""" database_url = f"sqlite:///{session_store_location}" engine = create_engine(database_url, connect_args={"check_same_thread": False}) + # TODO: making db session factory shouldn't depend on models. + # So want to move the table creation elsewhere ideally. + # But this means returning engine as well as session class. # Check if we are running in an Azure ML environment. is_azure_ml = any( From 7e982a9c3a14483a7efb08ba15ee6687aeafc657 Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 10 Oct 2024 11:45:09 +0100 Subject: [PATCH 11/13] separate azure logic Signed-off-by: Sajid Alam --- package/kedro_viz/database.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/package/kedro_viz/database.py b/package/kedro_viz/database.py index f108c9c20f..5a62e32bcb 100644 --- a/package/kedro_viz/database.py +++ b/package/kedro_viz/database.py @@ -8,15 +8,8 @@ from kedro_viz.models.experiment_tracking import Base -def make_db_session_factory(session_store_location: str) -> sessionmaker: - """SQLAlchemy connection to a SQLite DB""" - database_url = f"sqlite:///{session_store_location}" - engine = create_engine(database_url, connect_args={"check_same_thread": False}) - # TODO: making db session factory shouldn't depend on models. - # So want to move the table creation elsewhere ideally. - # But this means returning engine as well as session class. - - # Check if we are running in an Azure ML environment. +def configure_wal_for_azure(engine): + """Applies WAL mode to SQLite if running in an Azure ML environment.""" is_azure_ml = any( var in os.environ for var in [ @@ -25,12 +18,22 @@ def make_db_session_factory(session_store_location: str) -> sessionmaker: "AZUREML_RUN_ID", ] ) - - # Apply WAL mode only if we are running in Azure ML. if is_azure_ml: with engine.connect() as conn: conn.execute(text("PRAGMA journal_mode=WAL;")) + +def make_db_session_factory(session_store_location: str) -> sessionmaker: + """SQLAlchemy connection to a SQLite DB""" + database_url = f"sqlite:///{session_store_location}" + engine = create_engine(database_url, connect_args={"check_same_thread": False}) + # TODO: making db session factory shouldn't depend on models. + # So want to move the table creation elsewhere ideally. + # But this means returning engine as well as session class. + + # Check if we are running in an Azure ML environment if so enable WAL mode. + configure_wal_for_azure(engine) + # Create the database tables if they do not exist. Base.metadata.create_all(bind=engine) From 18ffe27b418c25fd98f8da43d63e6a83b98bd62a Mon Sep 17 00:00:00 2001 From: Sajid Alam Date: Thu, 10 Oct 2024 12:48:08 +0100 Subject: [PATCH 12/13] Update RELEASE.md Signed-off-by: Sajid Alam --- RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE.md b/RELEASE.md index e68596830b..bf593350c2 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -16,6 +16,7 @@ Please follow the established format: - Improve `kedro viz build` usage documentation (#2126) - Fix unserializable parameters value (#2122) +- Enable SQLite WAL mode for Azure ML to fix database locking issues (#2131) # Release 10.0.0 From b20ac982d68503ca57e6141f2de9ac5d896b11de Mon Sep 17 00:00:00 2001 From: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com> Date: Mon, 14 Oct 2024 11:42:46 +0100 Subject: [PATCH 13/13] Update RELEASE.md Signed-off-by: Sajid Alam <90610031+SajidAlamQB@users.noreply.github.com> --- RELEASE.md | 1 - 1 file changed, 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index d6c8ac0d1e..66d6a51a24 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -20,7 +20,6 @@ Please follow the established format: - Enable SQLite WAL mode for Azure ML to fix database locking issues (#2131) - # Release 10.0.0 ## Major features and improvements