fix notebook serialization issue for markdown

paiqo · Jan 16, 2025 · 307ce99 · 307ce99
1 parent 485ef47
commit 307ce99
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 11 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Release Notes
 
+**v2.4.1:**
+- fix issue with notebook serialization not working properly for markdown
+
 **v2.4.0:**
 - added interactive kernel again ([202](/../../issues/202))
 

diff --git a/README.md b/README.md
@@ -229,12 +229,12 @@ Whenever a notebook is opened from either the local sync folder or via the [Virt
 
 If you are using the [Databricks Extension Connection Manager](#setup-and-configuration-databricks-extension-connection-manager) we will also create a generic notebook kernel for you which used the configured cluster.
 
-To work with non-`.ipynb` notebooks, you can also open source files from Databricks as notebooks (e.g. `.sql`, `.scala`, `.r`). For this to work you need to add `workbench.editorAssociations` for the file types to your VSCode settings. The important part is to use `databricks-notebook` as default editor:
+To work with non-`.ipynb` notebooks (=`SOURCE` format), you can also open source files from Databricks as notebooks (e.g. `.py`, `.sql`, `.scala`, `.r`). This is very common if you open files from a Git Repository used in Databricks. To open those `SOURCE` files, you need to add `workbench.editorAssociations` for the file types to your VSCode settings. The important part is to use `databricks-notebook` as default editor:
 
 ```json
 "settings": {
 		"workbench.editorAssociations":{
-			"*.py": "databricks-notebook",
+			"**/notebooks/**/*.py": "databricks-notebook",
 			"*.scala": "databricks-notebook",
 			"*.sql": "databricks-notebook",
 			"*.r": "databricks-notebook"
@@ -243,10 +243,14 @@ To work with non-`.ipynb` notebooks, you can also open source files from Databri
 }
 ```
 
+The example above will open all `.py` files, where any parent folder is called `notebooks`, as a VSCode notebook. Same also for all Scale, SQL and R files.
+To control how notebooks are serialized in GIT, please check [Manage notebook format](https://docs.databricks.com/en/notebooks/notebook-format.html)
+
 However, there are some technical restrictions working with those files. While they behave like notebooks, they are still just source files in the background which means, the output of executed cells is not persisted. So it can happen that if you save the notebook and it is then reloaded from the source (which can happen automatically in the background), your cell outputs are lost.
 Also, please make sure that the file extensions you configure here are the same as you configured in your `exportFormats`!
+To execute those files as notebooks, you can only use Kernels provided by Databricks Power Tools and no local kernels!
 
-As it is also possible to maintain Python libraries within DAtabricks using [Workspace Files](https://docs.databricks.com/files/workspace.html) there is a clash between file extensions of workspace files and Python notebooks downloaded in source format hence there can be some issues when creating new files etc. Therefore it is recommended to keep using `.ipynb` format for Python notebooks and `.py` for workspace files used in libraries.
+As it is also possible to maintain Python libraries within Databricks using [Workspace Files](https://docs.databricks.com/files/workspace.html) there is a clash between file extensions of workspace files and Python notebooks downloaded in source format hence there can be some issues when creating new files etc. Therefore it is recommended to keep using `.ipynb` format for Python notebooks and `.py` for workspace files used in libraries.
 
 ## Execution Modes
 We distinguish between Live-execution and Offline-execution. In Live-execution mode, files are opened directly from Databricks by mounting the Databricks Workspace into your VSCode Workspace using `wsfs:/` URI scheme. In this mode there is no intermediate local copy but you work directly against the Databricks Workspace. Everything you run must already exist online in the Databricks Workspace.

diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
 	"name": "databricks-vscode",
 	"displayName": "Databricks Power Tools",
 	"description": "Run notebooks cell-by-cell, browse and edit your Databricks Workspace, DBFS, Clusters, Jobs, Secrets, Repos and SQL. Supports Azure Databricks, Databricks on AWS and Databricks on GCP.",
-	"version": "2.4.0",
+	"version": "2.4.1",
 	"publisher": "paiqo",
 	"icon": "resources/databricks_extension.png",
 	"author": {

diff --git a/src/vscode/notebook/DatabricksNotebookSerializer.ts b/src/vscode/notebook/DatabricksNotebookSerializer.ts
@@ -47,12 +47,20 @@ export class DatabricksNotebookSerializer implements vscode.NotebookSerializer {
 			"magic": "%sql",
 			"commentCharacters": "--",
 			"fileExtension": ".sql"
+		},
+		{
+			"databricksLanguage": undefined,
+			"vscodeLanguage": "markdown",
+			"magic": "%md",
+			"commentCharacters": undefined,
+			"fileExtension": undefined
 		}
 	]
 		;
 
 	public async deserializeNotebook(data: Uint8Array, token: vscode.CancellationToken): Promise<DatabricksNotebook> {
 		var contents = Buffer.from(data).toString();
+		contents = contents.replace(/\r/gm, ""); // remove any carriage returns
 
 		var firstLineWithCode: number = 1;
 		const lines: string[] = contents.trimStart().split("\n");
@@ -76,7 +84,7 @@ export class DatabricksNotebookSerializer implements vscode.NotebookSerializer {
 			notebookLanguage = languages[0];
 		}
 		else {
-			// its Python or R
+			// its Python or R which use the same comment-character
 			const rAssignments = contents.split("<-").length;
 			const pythonAssignments = contents.split("=").length;
 
@@ -100,19 +108,17 @@ export class DatabricksNotebookSerializer implements vscode.NotebookSerializer {
 				let firstLine = rawCell.split("\n")[0];
 				let firstLineValues = firstLine.split(/\s+/gm);
 				let magic = firstLineValues[2];
+				cellLanguage = this.LANGUAGE_MAPPING.find(x => x.magic == magic);
 
 				if (magic == "%md") {
 					cell.kind = vscode.NotebookCellKind.Markup;
 					cell.value = cell.value.replace(new RegExp(`^${commentChars} ${this.MAGIC_PREFIX} ${magic}\n`, "gm"), "");
 				}
-				else {
-					cellLanguage = this.LANGUAGE_MAPPING.find(x => x.magic == magic);
-					if (cellLanguage) {
-						cell.metadata = { "cellLanguage": cellLanguage };
-					}
+				if (cellLanguage) {
+					cell.metadata = { "cellLanguage": cellLanguage };
 				}
 
-				cell.value = cell.value.replace(new RegExp(`^${commentChars} ${this.MAGIC_PREFIX} `, "gm"), "");
+				cell.value = cell.value.replace(new RegExp(`^${commentChars} ${this.MAGIC_PREFIX}`, "gm"), "");
 			}
 			cell.languageId = cell.metadata.cellLanguage.vscodeLanguage;
 			notebook.cells.push(cell);