diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 414317e..43df31a 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -33,6 +33,6 @@ jobs: run: test/setup.ps1 shell: pwsh - name: Durable framework tests - run: dotnet test --no-build --verbosity normal ./test/DurableTask.SqlServer.Tests/DurableTask.SqlServer.Tests.csproj + run: dotnet test --no-build --verbosity normal --filter Category!=Stress ./test/DurableTask.SqlServer.Tests/DurableTask.SqlServer.Tests.csproj - name: Functions runtime tests run: dotnet test --no-build --verbosity normal ./test/DurableTask.SqlServer.AzureFunctions.Tests/DurableTask.SqlServer.AzureFunctions.Tests.csproj diff --git a/CHANGELOG.md b/CHANGELOG.md index 3006c2c..c31876e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v0.10.1-beta + +### Updates + +* Removed foreign key constraints from all tables to dramatically improve performance and eliminate common sources of deadlocks ([#46](https://github.com/microsoft/durabletask-mssql/pull/46)) +* Added documentation for how to work around native dependency issues in Azure Functions. +* Added documentation about the taskEventLockTimeout setting in the Azure Functions host.json file. + ## v0.10.0-beta ### Updates diff --git a/docs/quickstart.md b/docs/quickstart.md index 840a36b..e0590ce 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -21,13 +21,24 @@ dotnet add package Microsoft.DurableTask.SqlServer.AzureFunctions --prerelease JavaScript, Python, and PowerShell projects can add the [Microsoft.DurableTask.SqlServer.AzureFunction](https://www.nuget.org/packages/Microsoft.DurableTask.SqlServer.AzureFunctions) package by running the following `func` CLI command. Note that in addition to the Azure Functions Core Tools, you must also have a recent [.NET SDK](https://dotnet.microsoft.com/download/dotnet-core/3.1) installed locally. ```bash -func extensions install -p Microsoft.DurableTask.SqlServer.AzureFunctions -v 0.9.1-beta +func extensions install -p Microsoft.DurableTask.SqlServer.AzureFunctions -v 0.10.1-beta ``` ?> Check [here](https://www.nuget.org/packages/Microsoft.DurableTask.SqlServer.AzureFunctions) to see if newer versions of the SQL provider package are available, and update the above command to reference the latest available version. !> The Durable SQL backend is not currently supported with extension bundles. Support for extension bundles will be available at or before the *General Availability* release. +This command will generate a file named **extensions.csproj** in the local directory, or update the file if one already exists. At the time of writing, you'll need to make an additional edit to this file to work around [this Azure Functions tooling issue](https://github.com/Azure/azure-functions-host/issues/6925#issuecomment-885253901): + +```xml + + + + +``` + +This ensures that all native dependencies are available when you try to start up the Function app. + ### Host.json configuration You can configure the Durable SQL provider by updating the `extensions/durableTask/storageProvider` section of your **host.json** file. @@ -49,6 +60,8 @@ You can configure the Durable SQL provider by updating the `extensions/durableTa The `"type": "mssql"` specification is required to inform the Durable Functions extension that it should use the SQL backend instead of the default Azure Storage backend. +The `taskEventLockTimeout` setting is an optional time-span value in the form hh:mm:ss. This setting controls how long events in the *dt.NewTasks* and *dt.NewEvents* tables remain locked after being queried. While locked, no other app instance can process these tasks. If an app instance that has locked these tasks crashes or becomes unresponsive, another app instance will be able to start processing these tasks once the timeout has expired. The default value is 2 minutes (00:02:00). + The `connectionStringName` setting is required and must be set to the name of the app setting or environment variable that contains your SQL connection string. In the above example, `SQLDB_Connection` is the name of an existing app setting or environment variable. If you're running locally and using a **local.settings.json** file, you can configure it as follows: ```json diff --git a/src/DurableTask.SqlServer/Scripts/logic.sql b/src/DurableTask.SqlServer/Scripts/logic.sql index 66f5198..cfa9f03 100644 --- a/src/DurableTask.SqlServer/Scripts/logic.sql +++ b/src/DurableTask.SqlServer/Scripts/logic.sql @@ -416,10 +416,11 @@ BEGIN BEGIN TRANSACTION DELETE FROM NewEvents WHERE [TaskHub] = @TaskHub AND [InstanceID] IN (SELECT [InstanceID] FROM @InstanceIDs) + DELETE FROM NewTasks WHERE [TaskHub] = @TaskHub AND [InstanceID] IN (SELECT [InstanceID] FROM @InstanceIDs) DELETE FROM Instances WHERE [TaskHub] = @TaskHub AND [InstanceID] IN (SELECT [InstanceID] FROM @InstanceIDs) DECLARE @deletedInstances int = @@ROWCOUNT + DELETE FROM History WHERE [TaskHub] = @TaskHub AND [InstanceID] IN (SELECT [InstanceID] FROM @InstanceIDs) DELETE FROM Payloads WHERE [TaskHub] = @TaskHub AND [InstanceID] IN (SELECT [InstanceID] FROM @InstanceIDs) - -- Other relevant tables are expected to be cleaned up via cascade deletes COMMIT TRANSACTION diff --git a/src/DurableTask.SqlServer/Scripts/schema-0.2.0.sql b/src/DurableTask.SqlServer/Scripts/schema-0.2.0.sql index a41e14f..a47a82d 100644 --- a/src/DurableTask.SqlServer/Scripts/schema-0.2.0.sql +++ b/src/DurableTask.SqlServer/Scripts/schema-0.2.0.sql @@ -134,9 +134,7 @@ BEGIN [ParentInstanceID] varchar(100) NULL, CONSTRAINT PK_Instances PRIMARY KEY (TaskHub, InstanceID), - CONSTRAINT FK_Instances_Input_Payloads FOREIGN KEY (TaskHub, InstanceID, InputPayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID), - CONSTRAINT FK_Instances_Output_Payloads FOREIGN KEY (TaskHub, InstanceID, OutputPayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID), - CONSTRAINT FK_Instances_CustomStatus_Payloads FOREIGN KEY (TaskHub, InstanceID, CustomStatusPayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID) + -- NOTE: No FK constraints for the Payloads table because of high performance cost and deadlock risk ) -- This index is used by LockNext and Purge logic @@ -165,12 +163,13 @@ BEGIN [PayloadID] uniqueidentifier NULL, CONSTRAINT PK_NewEvents PRIMARY KEY (TaskHub, InstanceID, SequenceNumber), - CONSTRAINT FK_NewEvents_Payloads FOREIGN KEY (TaskHub, InstanceID, PayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID) - -- NOTE: no FK constraint to Instances table because we want to allow events to create new instances + -- NOTE: no FK constraint to Instances and Payloads tables because of high performance cost and deadlock risk. + -- Also, we want to allow events to create new instances, which means an Instances row might not yet exist. ) END IF OBJECT_ID(N'dt.History', 'U') IS NULL +BEGIN CREATE TABLE dt.History ( [TaskHub] varchar(50) NOT NULL, [InstanceID] varchar(100) NOT NULL, @@ -186,11 +185,12 @@ IF OBJECT_ID(N'dt.History', 'U') IS NULL [DataPayloadID] uniqueidentifier NULL, CONSTRAINT PK_History PRIMARY KEY (TaskHub, InstanceID, ExecutionID, SequenceNumber), - CONSTRAINT FK_History_Instances FOREIGN KEY (TaskHub, InstanceID) REFERENCES dt.Instances(TaskHub, InstanceID) ON DELETE CASCADE, - CONSTRAINT FK_History_Payloads FOREIGN KEY (TaskHub, InstanceID, DataPayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID) + -- NOTE: no FK constraint to Payloads or Instances tables because of high performance cost and deadlock risk ) +END IF OBJECT_ID(N'dt.NewTasks', 'U') IS NULL +BEGIN CREATE TABLE dt.NewTasks ( [TaskHub] varchar(50) NOT NULL, [SequenceNumber] bigint IDENTITY NOT NULL, -- order is important for FIFO @@ -207,13 +207,13 @@ IF OBJECT_ID(N'dt.NewTasks', 'U') IS NULL [Version] varchar(100) NULL, CONSTRAINT PK_NewTasks PRIMARY KEY (TaskHub, SequenceNumber), - CONSTRAINT FK_NewTasks_Instances FOREIGN KEY (TaskHub, InstanceID) REFERENCES dt.Instances(TaskHub, InstanceID) ON DELETE CASCADE, - CONSTRAINT FK_NewTasks_Payloads FOREIGN KEY (TaskHub, InstanceID, PayloadID) REFERENCES dt.Payloads(TaskHub, InstanceID, PayloadID) + -- NOTE: no FK constraint to Payloads or Instances tables because of high performance cost and deadlock risk ) -- This index is used by vScaleHints CREATE NONCLUSTERED INDEX IX_NewTasks_InstanceID ON dt.NewTasks(TaskHub, InstanceID) INCLUDE ([SequenceNumber], [Timestamp], [LockExpiration], [VisibleTime]) +END GO IF OBJECT_ID(N'dt.GlobalSettings', 'U') IS NULL diff --git a/src/common.props b/src/common.props index f7b13cf..8d78e97 100644 --- a/src/common.props +++ b/src/common.props @@ -16,7 +16,7 @@ 0 - $(MajorVersion).10.0 + $(MajorVersion).10.1 beta $(MajorVersion).0.0.0 .$(GITHUB_RUN_NUMBER) diff --git a/test/DurableTask.SqlServer.Tests/Integration/StressTests.cs b/test/DurableTask.SqlServer.Tests/Integration/StressTests.cs new file mode 100644 index 0000000..6c7cba3 --- /dev/null +++ b/test/DurableTask.SqlServer.Tests/Integration/StressTests.cs @@ -0,0 +1,76 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the MIT License. See LICENSE in the project root for license information. + +namespace DurableTask.SqlServer.Tests.Integration +{ + using System; + using System.Collections.Generic; + using System.Threading.Tasks; + using DurableTask.SqlServer.Tests.Utils; + using Xunit; + using Xunit.Abstractions; + + /// + /// Integration tests that are intended to reveal issues related to load or concurrency. + /// These tests are expected to take longer to complete compared to functional integration + /// tests and therefore may not be appropriate for all CI or rapid testing scenarios. + /// + [Trait("Category", "Stress")] + public class StressTests : IAsyncLifetime + { + readonly TestService testService; + + public StressTests(ITestOutputHelper output) + { + this.testService = new TestService(output); + } + + Task IAsyncLifetime.InitializeAsync() => this.testService.InitializeAsync(); + + Task IAsyncLifetime.DisposeAsync() => this.testService.DisposeAsync(); + + // This test has previously been used to uncover various deadlock issues by stressing the code paths + // related to foreign keys that point to the Instances and Payloads tables. + // Example: https://github.com/microsoft/durabletask-mssql/issues/45 + [Theory] + [InlineData(10)] + [InlineData(2000)] + public async Task ParallelSubOrchestrations(int subOrchestrationCount) + { + const string SubOrchestrationName = "SubOrchestration"; + + this.testService.RegisterInlineOrchestration( + orchestrationName: SubOrchestrationName, + version: "", + implementation: async (ctx, input) => + { + await ctx.CreateTimer(DateTime.MinValue, input); + return ctx.CurrentUtcDateTime; + }); + + TestInstance testInstance = await this.testService.RunOrchestration( + input: 1, + orchestrationName: nameof(ParallelSubOrchestrations), + implementation: async (ctx, input) => + { + var listInstances = new List>(); + for (int i = 0; i < subOrchestrationCount; i++) + { + Task instance = ctx.CreateSubOrchestrationInstance( + name: SubOrchestrationName, + version: "", + instanceId: $"suborchestration[{i}]", + input: $"{i}"); + listInstances.Add(instance); + } + + DateTime[] results = await Task.WhenAll(listInstances); + return new List(results); + }); + + // On a fast Windows desktop machine, a 2000 sub-orchestration test should complete in 30-40 seconds. + // On slower CI machines, this test could take several minutes to complete. + await testInstance.WaitForCompletion(TimeSpan.FromMinutes(5)); + } + } +}