Skip to content

Commit

Permalink
Fix failover Master might not release taskGroup (apache#15287)
Browse files Browse the repository at this point in the history
  • Loading branch information
ruanwenjun authored Dec 6, 2023
1 parent 2119e41 commit 14272da
Showing 1 changed file with 11 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -465,12 +465,13 @@ public void taskFinished(TaskInstance taskInstance) throws StateEventHandleExcep
* release task group
*
*/
public void releaseTaskGroup(TaskInstance taskInstance) throws InterruptedException {
public void releaseTaskGroup(TaskInstance taskInstance) {
ProcessInstance workflowInstance = workflowExecuteContext.getWorkflowInstance();
// todo: use Integer
if (taskInstance.getTaskGroupId() <= 0) {
log.info("The current TaskInstance: {} doesn't use taskGroup, no need to release taskGroup",
taskInstance.getName());
return;
}
TaskInstance nextTaskInstance = processService.releaseTaskGroup(taskInstance);
if (nextTaskInstance == null) {
Expand Down Expand Up @@ -1347,16 +1348,20 @@ private void submitPostNode(Long parentNodeCode) throws StateEventHandleExceptio
TaskExecutionStatus state = existTaskInstance.getState();
if (state == TaskExecutionStatus.RUNNING_EXECUTION
|| state == TaskExecutionStatus.DISPATCH
|| state == TaskExecutionStatus.SUBMITTED_SUCCESS) {
|| state == TaskExecutionStatus.SUBMITTED_SUCCESS
|| state == TaskExecutionStatus.DELAY_EXECUTION) {
// try to take over task instance
if (state != TaskExecutionStatus.SUBMITTED_SUCCESS
&& state != TaskExecutionStatus.DELAY_EXECUTION
&& tryToTakeOverTaskInstance(existTaskInstance)) {
log.info("Success take over task {}", existTaskInstance.getName());
continue;
} else {
// set the task instance state to fault tolerance
existTaskInstance.setFlag(Flag.NO);
existTaskInstance.setState(TaskExecutionStatus.NEED_FAULT_TOLERANCE);
releaseTaskGroup(existTaskInstance);

validTaskMap.remove(existTaskInstance.getTaskCode());
taskInstanceDao.updateById(existTaskInstance);
existTaskInstance = cloneTolerantTaskInstance(existTaskInstance);
Expand Down Expand Up @@ -1444,12 +1449,12 @@ private boolean tryToTakeOverTaskInstance(TaskInstance taskInstance) {
ITaskInstanceOperator iTaskInstanceOperator =
SingletonJdkDynamicRpcClientProxyFactory
.getProxyClient(taskInstance.getHost(), ITaskInstanceOperator.class);
UpdateWorkflowHostResponse updateWorkflowHostResponse = iTaskInstanceOperator.updateWorkflowInstanceHost(
UpdateWorkflowHostResponse response = iTaskInstanceOperator.updateWorkflowInstanceHost(
new UpdateWorkflowHostRequest(taskInstance.getId(), masterConfig.getMasterAddress()));
if (!updateWorkflowHostResponse.isSuccess()) {
if (!response.isSuccess()) {
log.error(
"Takeover TaskInstance failed, receive a failed response from worker: {}, will try to create a new TaskInstance",
taskInstance.getHost());
"Takeover TaskInstance failed, receive a failed response: {} from worker: {}, will try to create a new TaskInstance",
response, taskInstance.getHost());
return false;
}

Expand Down

0 comments on commit 14272da

Please sign in to comment.