From 618d1ca51bef020722450167db86e6b3c939e4c8 Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Sun, 24 Dec 2023 13:50:41 +0100 Subject: [PATCH 1/5] Rename .to_ibis to .to_ibis_table. Add support for passing a connection and schema name --- dbt_ibis/_compile.py | 2 +- dbt_ibis/_references.py | 43 ++++++++++++++++++++++++++++++++++------- tests/test_dbt_ibis.py | 12 ++++++------ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/dbt_ibis/_compile.py b/dbt_ibis/_compile.py index f3b9eb9..3b311d7 100644 --- a/dbt_ibis/_compile.py +++ b/dbt_ibis/_compile.py @@ -96,7 +96,7 @@ def compile_ibis_expressions_to_sql( ) else: raise ValueError(f"Unknown reference type: {type(r)}") - ibis_table = r.to_ibis(schema) + ibis_table = r.to_ibis_table(schema) ibis_table = _set_letter_case_on_ibis_expression( ibis_table, letter_case_in_expr ) diff --git a/dbt_ibis/_references.py b/dbt_ibis/_references.py index a518a15..e675b3d 100644 --- a/dbt_ibis/_references.py +++ b/dbt_ibis/_references.py @@ -19,13 +19,38 @@ class _Reference(ABC): def _ibis_table_name(self) -> str: pass - def to_ibis(self, schema: Union[ibis.Schema, dict[str, dt.DataType]]) -> ir.Table: - if schema is None: - raise NotImplementedError - return ibis.table( - schema, - name=self._ibis_table_name, - ) + @abstractproperty + def table_name(self) -> str: + pass + + def to_ibis_table( + self, + schema_definition: Union[ibis.Schema, dict[str, dt.DataType]] | None = None, + con: ibis.BaseBackend | None = None, + schema_name: str | None = None, + database_name: str | None = None, + ) -> ir.Table: + if schema_definition is None and con is None: + raise ValueError( + "Either schema_definition or an Ibis backend need to be provided" + ) + if schema_definition is not None and con is not None: + raise ValueError( + "Either schema_definition or an Ibis backend need to be provided" + + " but not both" + ) + + if schema_definition is not None: + return ibis.table( + schema_definition, + name=self._ibis_table_name, + ) + else: + return con.table( + self.table_name, + schema=schema_name, + database=database_name, + ) @dataclass @@ -38,6 +63,10 @@ class ref(_Reference): def _ibis_table_name(self) -> str: return _REF_IDENTIFIER_PREFIX + self.name + _REF_IDENTIFIER_SUFFIX + @property + def table_name(self) -> str: + return self.name + @dataclass class source(_Reference): diff --git a/tests/test_dbt_ibis.py b/tests/test_dbt_ibis.py index 770dd25..780fa82 100644 --- a/tests/test_dbt_ibis.py +++ b/tests/test_dbt_ibis.py @@ -147,7 +147,7 @@ def test_ref(): assert stg_orders.name == model_name schema = ibis.schema({"col1": "int"}) - ibis_table = stg_orders.to_ibis(schema=schema) + ibis_table = stg_orders.to_ibis_table(schema_definition=schema) assert isinstance(ibis_table, ir.Table) assert ibis_table.schema() == schema @@ -163,7 +163,7 @@ def test_source(): assert orders.table_name == table_name schema = ibis.schema({"col1": "int"}) - ibis_table = orders.to_ibis(schema=schema) + ibis_table = orders.to_ibis_table(schema_definition=schema) assert isinstance(ibis_table, ir.Table) assert ibis_table.schema() == schema @@ -395,13 +395,13 @@ def test_columns_to_ibis_schema(): def test_to_dbt_sql(): orders = source("source1", "orders") - orders_table = orders.to_ibis( - schema=ibis.schema({"order_id": "int", "customer_id": "int"}) + orders_table = orders.to_ibis_table( + schema_definition=ibis.schema({"order_id": "int", "customer_id": "int"}) ) stg_customers = ref("stg_customers") - stg_customers_table = stg_customers.to_ibis( - schema=ibis.schema({"customer_id": "int"}) + stg_customers_table = stg_customers.to_ibis_table( + schema_definition=ibis.schema({"customer_id": "int"}) ) model_expr = orders_table.join( From b95a865ccf137bcb59a431b334879faba29ea9e3 Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Sun, 24 Dec 2023 13:56:39 +0100 Subject: [PATCH 2/5] Move some extraction functionality into separate module --- dbt_ibis/__init__.py | 73 +++++++++++++------------------------------- dbt_ibis/_extract.py | 36 ++++++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 58 insertions(+), 52 deletions(-) create mode 100644 dbt_ibis/_extract.py diff --git a/dbt_ibis/__init__.py b/dbt_ibis/__init__.py index 2038835..5076067 100644 --- a/dbt_ibis/__init__.py +++ b/dbt_ibis/__init__.py @@ -4,12 +4,9 @@ import logging import subprocess import sys -from importlib.machinery import SourceFileLoader -from importlib.util import module_from_spec, spec_from_loader from pathlib import Path -from typing import Callable, Optional, Union +from typing import Optional, Union -import ibis.expr.types as ir from dbt.cli.main import cli from dbt_ibis import _dialects @@ -19,6 +16,8 @@ from dbt_ibis._compile import ( compile_ibis_expressions_to_sql as _compile_ibis_expressions_to_sql, ) +from dbt_ibis._extract import get_expr_func as _get_expr_func +from dbt_ibis._extract import glob_in_paths as _glob_in_paths from dbt_ibis._logging import configure_logging as _configure_logging from dbt_ibis._parse_dbt_project import ( disable_node_not_found_error as _disable_node_not_found_error, @@ -79,6 +78,24 @@ def compile_ibis_to_sql(dbt_parse_arguments: Optional[list[str]] = None) -> None logger.info("Finished compiling Ibis expressions to SQL") +def _get_ibis_expr_infos( + project_root: Union[str, Path], paths: list[str] +) -> list[_IbisExprInfo]: + ibis_files = _glob_in_paths( + project_root=project_root, + paths=paths, + pattern=f"**/*.{_IBIS_FILE_EXTENSION}", + ) + ibis_expr_infos: list[_IbisExprInfo] = [] + for file in ibis_files: + func = _get_expr_func(file) + depends_on = getattr(func, "depends_on", []) + ibis_expr_infos.append( + _IbisExprInfo(ibis_path=file, depends_on=depends_on, func=func) + ) + return ibis_expr_infos + + def _parse_cli_arguments() -> tuple[str, list[str]]: # First argument of sys.argv is path to this file. We then look for # the name of the actual dbt subcommand that the user wants to run and ignore @@ -100,54 +117,6 @@ def _parse_cli_arguments() -> tuple[str, list[str]]: return subcommand, args -def _get_ibis_expr_infos( - project_root: Union[str, Path], paths: list[str] -) -> list[_IbisExprInfo]: - ibis_files = _glob_in_paths( - project_root=project_root, - paths=paths, - pattern=f"**/*.{_IBIS_FILE_EXTENSION}", - ) - ibis_expr_infos: list[_IbisExprInfo] = [] - for file in ibis_files: - func = _get_expr_func(file) - depends_on = getattr(func, "depends_on", []) - ibis_expr_infos.append( - _IbisExprInfo(ibis_path=file, depends_on=depends_on, func=func) - ) - return ibis_expr_infos - - -def _glob_in_paths( - project_root: Union[str, Path], paths: list[str], pattern: str -) -> list[Path]: - if isinstance(project_root, str): - project_root = Path(project_root) - - matches: list[Path] = [] - for m_path in paths: - matches.extend(list((project_root / m_path).glob(pattern))) - return matches - - -def _get_expr_func(file: Path) -> Callable[..., ir.Table]: - # Name arguments to spec_from_loader and SourceFileLoader probably don't matter - # but maybe a good idea to keep them unique across the expressions - spec = spec_from_loader(file.stem, SourceFileLoader(file.stem, str(file))) - if spec is None: - raise ValueError(f"Could not load file: {file}") - expr_module = module_from_spec(spec) - if spec.loader is None: - raise ValueError(f"Could not load file: {file}") - spec.loader.exec_module(expr_module) - func = getattr(expr_module, "model", None) or getattr(expr_module, "test", None) - if func is None: - raise ValueError( - f"Could not find function called 'model' or 'test' in {str(file)}." - ) - return func - - def _clean_up_unused_sql_files( used_sql_files: list[Path], project_root: Union[str, Path], diff --git a/dbt_ibis/_extract.py b/dbt_ibis/_extract.py new file mode 100644 index 0000000..94c00f0 --- /dev/null +++ b/dbt_ibis/_extract.py @@ -0,0 +1,36 @@ +from importlib.machinery import SourceFileLoader +from importlib.util import module_from_spec, spec_from_loader +from pathlib import Path +from typing import Callable, Union + +from ibis import ir + + +def glob_in_paths( + project_root: Union[str, Path], paths: list[str], pattern: str +) -> list[Path]: + if isinstance(project_root, str): + project_root = Path(project_root) + + matches: list[Path] = [] + for m_path in paths: + matches.extend(list((project_root / m_path).glob(pattern))) + return matches + + +def get_expr_func(file: Path) -> Callable[..., ir.Table]: + # Name arguments to spec_from_loader and SourceFileLoader probably don't matter + # but maybe a good idea to keep them unique across the expressions + spec = spec_from_loader(file.stem, SourceFileLoader(file.stem, str(file))) + if spec is None: + raise ValueError(f"Could not load file: {file}") + expr_module = module_from_spec(spec) + if spec.loader is None: + raise ValueError(f"Could not load file: {file}") + spec.loader.exec_module(expr_module) + func = getattr(expr_module, "model", None) or getattr(expr_module, "test", None) + if func is None: + raise ValueError( + f"Could not find function called 'model' or 'test' in {str(file)}." + ) + return func diff --git a/pyproject.toml b/pyproject.toml index db3fc05..c68e20f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dev = [ "ibis-framework[duckdb]", ] doc = ["jupyter-book", "ghp-import"] +jupyter = ["nbformat"] [tool.hatch.version] path = "dbt_ibis/__init__.py" From 0368e65da2f6dd8598affa5d11ae7977baf0d064 Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Mon, 25 Dec 2023 10:59:42 +0100 Subject: [PATCH 3/5] Add a 'jupyter' subcommand to convert Ibis models to Jupyter notebooks for easier development --- dbt_ibis/__init__.py | 7 +++ dbt_ibis/_jupyter.py | 117 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 dbt_ibis/_jupyter.py diff --git a/dbt_ibis/__init__.py b/dbt_ibis/__init__.py index 5076067..91b2fd6 100644 --- a/dbt_ibis/__init__.py +++ b/dbt_ibis/__init__.py @@ -145,6 +145,13 @@ def _clean_up_unused_sql_files( def main() -> None: + if sys.argv[1] == "develop": + from dbt_ibis._jupyter import convert_ibis_file_to_notebook + + convert_ibis_file_to_notebook(sys.argv[2]) + return + + # Normal dbt commands + precompile from here on dbt_subcommand, dbt_parse_arguments = _parse_cli_arguments() if dbt_subcommand != "deps": # If it's deps, we cannot yet parse the dbt project as it will raise diff --git a/dbt_ibis/_jupyter.py b/dbt_ibis/_jupyter.py new file mode 100644 index 0000000..9dee760 --- /dev/null +++ b/dbt_ibis/_jupyter.py @@ -0,0 +1,117 @@ +import inspect +import logging +import textwrap +from pathlib import Path + +from dbt_ibis._extract import get_expr_func + +try: + import nbformat as nbf +except ImportError as err: + raise ImportError( + "This functionality requires additional dependencies." + + " Run 'pip install dbt-ibis[jupyter]' to install them." + ) from err + + +logger = logging.getLogger(__name__) + + +def convert_ibis_file_to_notebook(file_path: str | Path) -> None: + logger.info(f"Converting {file_path} to notebook") + if isinstance(file_path, str): + file_path = Path(file_path) + + # Split source code into multiple parts: + # - Code before the Ibis expression + # - Code after the Ibis expression + # - Ibis expression itself (i.e. 'model' or 'test' function) + # - References to other Ibis expressions based on depends_on + ibis_expr_func = get_expr_func(file_path) + + ibis_expr_function_source_code, start_line = inspect.getsourcelines(ibis_expr_func) + end_line = start_line + len(ibis_expr_function_source_code) - 1 + + source_code = file_path.read_text().splitlines() + + code_before_ibis_expr = source_code[: start_line - 1] + code_after_ibis_expr = source_code[end_line:] + + expr_references = [] + for reference_name, reference in zip( + inspect.signature(ibis_expr_func).parameters, + ibis_expr_func.depends_on, # type: ignore[attr-defined] + ): + reference_repr = repr(reference).replace("'", '"') + expr_references.append( + f"{reference_name} = {reference_repr}.to_ibis_table(con=con)" + ) + + cleaned_function_code = _remove_decorator(ibis_expr_function_source_code) + function_content = _unpack_function_conent(cleaned_function_code) + + # Create and save notebook + nb = _create_notebook( + ibis_expr_code=function_content, + code_before_ibis_expr=code_before_ibis_expr, + code_after_ibis_expr=code_after_ibis_expr, + depends_on_code=expr_references, + ) + notebook_path = file_path.with_suffix(".ipynb") + nbf.write(nb, notebook_path) + logger.info(f"Notebook saved as {notebook_path}") + + +def _remove_decorator(ibis_expr_function_source_code: list[str]) -> list[str]: + cleaned_function_code = [] + def_started = False + for line in ibis_expr_function_source_code: + if not def_started and not line.strip().startswith("def "): + continue + else: + def_started = True + cleaned_function_code.append(line) + + return cleaned_function_code + + +def _unpack_function_conent(cleaned_function_code: list[str]) -> str: + function_content = [] + indent_found = False + for line in cleaned_function_code: + if not indent_found and line.startswith((" ", "\t")): + indent_found = True + if indent_found: + function_content.append(line) + + function_content = [line.replace("return ", "") for line in function_content] + # New lines are already part of the source code here so no need to add them + function_content = textwrap.dedent("".join(function_content)) + return function_content + + +def _create_notebook( + *, + ibis_expr_code: str, + code_before_ibis_expr: list[str], + code_after_ibis_expr: list[str], + depends_on_code: list[str], +) -> nbf.NotebookNode: + nb = nbf.v4.new_notebook() + # Define all code before the Ibis expression as the expression + # might reference functionality defined there. In the ibis file, + # this was not an issue as that was all in a function. Now, we unpack + # the function -> references need to be valid. + nb["cells"] = [ + nbf.v4.new_code_cell( + source="\n".join( + [*code_before_ibis_expr, "", "", *code_after_ibis_expr] + ).strip() + ), + nbf.v4.new_markdown_cell("# Depends on"), + nbf.v4.new_code_cell("\n".join(depends_on_code)), + nbf.v4.new_markdown_cell("# Model"), + nbf.v4.new_code_cell(ibis_expr_code), + ] + + return nb From 0603b5260527c5dab1944a40d6c9027b86c9a91d Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Mon, 25 Dec 2023 13:21:04 +0100 Subject: [PATCH 4/5] Minor refactoring. Change subcommand to 'convert'. --- dbt_ibis/__init__.py | 19 ++++++++++++++--- dbt_ibis/_jupyter.py | 50 ++++++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/dbt_ibis/__init__.py b/dbt_ibis/__init__.py index 91b2fd6..fda6e8d 100644 --- a/dbt_ibis/__init__.py +++ b/dbt_ibis/__init__.py @@ -145,10 +145,23 @@ def _clean_up_unused_sql_files( def main() -> None: - if sys.argv[1] == "develop": - from dbt_ibis._jupyter import convert_ibis_file_to_notebook + if sys.argv[1] == "convert": + file_path = Path(sys.argv[2]) + file_extension = file_path.suffix + from dbt_ibis._jupyter import ( + convert_ibis_file_to_notebook, + ) - convert_ibis_file_to_notebook(sys.argv[2]) + if file_extension == f".{_IBIS_FILE_EXTENSION}": + convert_ibis_file_to_notebook(file_path) + elif file_extension == ".ipynb": + raise NotImplementedError + # convert_notebook_to_ibis_file(file_path) + else: + raise ValueError( + f"Cannot convert file with extension {file_extension}." + + f" Only .{_IBIS_FILE_EXTENSION} and .ipynb are supported." + ) return # Normal dbt commands + precompile from here on diff --git a/dbt_ibis/_jupyter.py b/dbt_ibis/_jupyter.py index 9dee760..bf09b54 100644 --- a/dbt_ibis/_jupyter.py +++ b/dbt_ibis/_jupyter.py @@ -19,25 +19,28 @@ def convert_ibis_file_to_notebook(file_path: str | Path) -> None: logger.info(f"Converting {file_path} to notebook") + if isinstance(file_path, str): file_path = Path(file_path) # Split source code into multiple parts: - # - Code before the Ibis expression - # - Code after the Ibis expression # - Ibis expression itself (i.e. 'model' or 'test' function) + # - Other code, i.e. code before and after the Ibis expression # - References to other Ibis expressions based on depends_on ibis_expr_func = get_expr_func(file_path) - ibis_expr_function_source_code, start_line = inspect.getsourcelines(ibis_expr_func) - end_line = start_line + len(ibis_expr_function_source_code) - 1 - - source_code = file_path.read_text().splitlines() - - code_before_ibis_expr = source_code[: start_line - 1] - code_after_ibis_expr = source_code[end_line:] + ibis_expr_func_code, func_start_line = inspect.getsourcelines(ibis_expr_func) + other_code = _extract_other_code( + file_lines=file_path.read_text().splitlines(), + ibis_expr_func_code=ibis_expr_func_code, + func_start_line=func_start_line, + ) - expr_references = [] + expr_references: list[str] = [ + "# TODO: Create a connected Ibis backend", + "con = ...", + "", + ] for reference_name, reference in zip( inspect.signature(ibis_expr_func).parameters, ibis_expr_func.depends_on, # type: ignore[attr-defined] @@ -47,14 +50,13 @@ def convert_ibis_file_to_notebook(file_path: str | Path) -> None: f"{reference_name} = {reference_repr}.to_ibis_table(con=con)" ) - cleaned_function_code = _remove_decorator(ibis_expr_function_source_code) + cleaned_function_code = _remove_decorator(ibis_expr_func_code) function_content = _unpack_function_conent(cleaned_function_code) # Create and save notebook nb = _create_notebook( ibis_expr_code=function_content, - code_before_ibis_expr=code_before_ibis_expr, - code_after_ibis_expr=code_after_ibis_expr, + other_code=other_code, depends_on_code=expr_references, ) notebook_path = file_path.with_suffix(".ipynb") @@ -62,6 +64,19 @@ def convert_ibis_file_to_notebook(file_path: str | Path) -> None: logger.info(f"Notebook saved as {notebook_path}") +def _extract_other_code( + file_lines: list[str], ibis_expr_func_code: list[str], func_start_line: int +) -> list[str]: + end_line = func_start_line + len(ibis_expr_func_code) - 1 + + other_code = file_lines[: func_start_line - 1] + code_after = file_lines[end_line:] + if code_after: + # Add two empty lines in between so that its better formatted in the notebook + other_code = [*other_code, "", "", *code_after] + return other_code + + def _remove_decorator(ibis_expr_function_source_code: list[str]) -> list[str]: cleaned_function_code = [] def_started = False @@ -93,8 +108,7 @@ def _unpack_function_conent(cleaned_function_code: list[str]) -> str: def _create_notebook( *, ibis_expr_code: str, - code_before_ibis_expr: list[str], - code_after_ibis_expr: list[str], + other_code: list[str], depends_on_code: list[str], ) -> nbf.NotebookNode: nb = nbf.v4.new_notebook() @@ -103,11 +117,7 @@ def _create_notebook( # this was not an issue as that was all in a function. Now, we unpack # the function -> references need to be valid. nb["cells"] = [ - nbf.v4.new_code_cell( - source="\n".join( - [*code_before_ibis_expr, "", "", *code_after_ibis_expr] - ).strip() - ), + nbf.v4.new_code_cell(source="\n".join(other_code).strip()), nbf.v4.new_markdown_cell("# Depends on"), nbf.v4.new_code_cell("\n".join(depends_on_code)), nbf.v4.new_markdown_cell("# Model"), From 967c2666b6b7b9b06fb0354f58298830bb94dfbe Mon Sep 17 00:00:00 2001 From: Stefan Binder Date: Mon, 25 Dec 2023 14:01:32 +0100 Subject: [PATCH 5/5] First draft of converting notebook back to .ibis file --- dbt_ibis/__init__.py | 4 +- dbt_ibis/_jupyter.py | 101 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 4 deletions(-) diff --git a/dbt_ibis/__init__.py b/dbt_ibis/__init__.py index fda6e8d..ef7d7fb 100644 --- a/dbt_ibis/__init__.py +++ b/dbt_ibis/__init__.py @@ -150,13 +150,13 @@ def main() -> None: file_extension = file_path.suffix from dbt_ibis._jupyter import ( convert_ibis_file_to_notebook, + convert_notebook_to_ibis_file, ) if file_extension == f".{_IBIS_FILE_EXTENSION}": convert_ibis_file_to_notebook(file_path) elif file_extension == ".ipynb": - raise NotImplementedError - # convert_notebook_to_ibis_file(file_path) + convert_notebook_to_ibis_file(file_path) else: raise ValueError( f"Cannot convert file with extension {file_extension}." diff --git a/dbt_ibis/_jupyter.py b/dbt_ibis/_jupyter.py index bf09b54..a83897e 100644 --- a/dbt_ibis/_jupyter.py +++ b/dbt_ibis/_jupyter.py @@ -1,8 +1,11 @@ import inspect import logging +import re import textwrap from pathlib import Path +from typing import Literal +from dbt_ibis._compile import IBIS_FILE_EXTENSION from dbt_ibis._extract import get_expr_func try: @@ -16,6 +19,9 @@ logger = logging.getLogger(__name__) +_NB_HEADER_MODEL = "# Model" +_NB_HEADER_DEPENDS_ON = "# Depends on" + def convert_ibis_file_to_notebook(file_path: str | Path) -> None: logger.info(f"Converting {file_path} to notebook") @@ -118,10 +124,101 @@ def _create_notebook( # the function -> references need to be valid. nb["cells"] = [ nbf.v4.new_code_cell(source="\n".join(other_code).strip()), - nbf.v4.new_markdown_cell("# Depends on"), + nbf.v4.new_markdown_cell(_NB_HEADER_DEPENDS_ON), nbf.v4.new_code_cell("\n".join(depends_on_code)), - nbf.v4.new_markdown_cell("# Model"), + nbf.v4.new_markdown_cell(_NB_HEADER_MODEL), nbf.v4.new_code_cell(ibis_expr_code), ] return nb + + +def convert_notebook_to_ibis_file(file_path: str | Path) -> None: + logger.info(f"Converting {file_path} to an Ibis file") + if isinstance(file_path, str): + file_path = Path(file_path) + + nb = nbf.read(file_path, as_version=4) + other_code, depends_on, ibis_expr_code = _split_notebook_content(nb) + + references = _extract_references(depends_on) + + depends_on_decorator = f"@depends_on({', '.join([ref for _, ref in references])})" + function_definition = f"def model({', '.join([name for name, _ in references])}):" + + # Return value of last line of ibis expression. Don't check for now + # if this is valid Python code -> Easy for users to fix themselves if its + # wrong. + def remove_empty_strings_from_end(lst: list[str]) -> list[str]: + while lst and lst[-1] == "": + lst.pop() + return lst + + ibis_expr_code = remove_empty_strings_from_end(ibis_expr_code) + ibis_expr_code[-1] = f"return {ibis_expr_code[-1]}" + ibis_expr_function_code = "\n".join( + [ + depends_on_decorator, + function_definition, + textwrap.indent("\n".join(ibis_expr_code), prefix=" " * 4), + ] + ) + + ibis_file_path = file_path.with_suffix(f".{IBIS_FILE_EXTENSION}") + ibis_file_content_lines = [*other_code, ibis_expr_function_code] + if ibis_file_content_lines[-1] != "": + # Add an empty line at the end as that conforms to most style guides + ibis_file_content_lines.append("") + ibis_file_content = "\n".join(ibis_file_content_lines) + ibis_file_path.write_text(ibis_file_content) + logger.info(f"Ibis file saved as {ibis_file_path}") + + +def _split_notebook_content( # noqa: C901 + nb: nbf.NotebookNode, +) -> tuple[list[str], list[str], list[str]]: + other_code: list[str] = [] + depends_on: list[str] = [] + ibis_expr_code: list[str] = [] + + current_section: Literal["depends_on", "model"] | None = None + + def process_cell_source(source: str) -> list[str]: + return source.split("\n") + + for cell in nb.cells: + if cell.cell_type == "code": + if current_section is None: + other_code.extend(process_cell_source(cell.source)) + elif current_section == "depends_on": + depends_on.extend(process_cell_source(cell.source)) + elif current_section == "model": + ibis_expr_code.extend(process_cell_source(cell.source)) + elif cell.cell_type == "markdown": + if not cell.source.startswith("#"): + continue + elif cell.source.startswith("# Depends on"): + current_section = "depends_on" + elif cell.source.startswith("# Model"): + current_section = "model" + elif cell.source.startswith("##"): + # Count this as a subheading of the current section -> Don't change it + continue + else: + # In this case, it's a top-level heading -> Reset the current section + # and just count the following code cells as other code + current_section = None + else: + # Ignore other cell types such as raw cells + continue + return other_code, depends_on, ibis_expr_code + + +def _extract_references(depends_on: list[str]) -> list[tuple[str, str]]: + references = [] + for line in depends_on: + variable_name = re.search(r"^(\w+)\s*=", line) + reference = re.search(r"=\s*((ref|source).*?)\.to_ibis_table", line) + if variable_name is not None and reference is not None: + references.append((variable_name.group(1), reference.group(1))) + return references