From 4d15932ede770bb65786c697bf16cdd4f2cc02be Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Sat, 3 Feb 2024 18:00:33 +0800 Subject: [PATCH 01/10] Fix createFile with permission will not work (#15556) --- .../common/utils/FileUtils.java | 139 +++++------------- .../common/utils/FileUtilsTest.java | 24 ++- .../service/utils/ProcessUtils.java | 3 +- .../storage/abs/AbsStorageOperator.java | 3 +- .../storage/gcs/GcsStorageOperator.java | 3 +- .../storage/obs/ObsStorageOperator.java | 3 +- .../storage/oss/OssStorageOperator.java | 3 +- .../plugin/storage/s3/S3StorageOperator.java | 3 +- .../BaseLinuxShellInterceptorBuilder.java | 8 +- .../plugin/task/java/JavaTaskTest.java | 3 +- .../utils/TaskExecutionContextUtils.java | 3 +- .../utils/TaskExecutionContextUtilsTest.java | 2 +- 12 files changed, 72 insertions(+), 125 deletions(-) diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java index c71c5f2e47842..1f3ccfed3da9f 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/FileUtils.java @@ -25,8 +25,6 @@ import static org.apache.dolphinscheduler.common.constants.Constants.UTF_8; import static org.apache.dolphinscheduler.common.constants.DateConstants.YYYYMMDDHHMMSS; -import org.apache.dolphinscheduler.common.exception.FileOperateException; - import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.SystemUtils; @@ -38,9 +36,7 @@ import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.nio.file.attribute.FileAttribute; import java.nio.file.attribute.PosixFilePermission; import java.nio.file.attribute.PosixFilePermissions; import java.util.Set; @@ -48,11 +44,10 @@ import java.util.zip.CheckedInputStream; import lombok.NonNull; +import lombok.experimental.UtilityClass; import lombok.extern.slf4j.Slf4j; -/** - * file utils - */ +@UtilityClass @Slf4j public class FileUtils { @@ -62,14 +57,7 @@ public class FileUtils { public static final String KUBE_CONFIG_FILE = "config"; - private static final String RWXR_XR_X = "rwxr-xr-x"; - - private static final FileAttribute> PERMISSION_755 = - PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString(RWXR_XR_X)); - - private FileUtils() { - throw new UnsupportedOperationException("Construct FileUtils"); - } + private static final Set PERMISSION_755 = PosixFilePermissions.fromString("rwxr-xr-x"); /** * get download file absolute path and name @@ -161,34 +149,6 @@ public static String getResourceViewSuffixes() { return PropertyUtils.getString(RESOURCE_VIEW_SUFFIXES, RESOURCE_VIEW_SUFFIXES_DEFAULT_VALUE); } - /** - * create directory if absent - * - * @param execLocalPath execute local path - * @throws IOException errors - */ - public static void createWorkDirIfAbsent(String execLocalPath) throws IOException { - // if work dir exists, first delete - File execLocalPathFile = new File(execLocalPath); - - if (execLocalPathFile.exists()) { - try { - org.apache.commons.io.FileUtils.forceDelete(execLocalPathFile); - } catch (Exception ex) { - if (ex instanceof NoSuchFileException || ex.getCause() instanceof NoSuchFileException) { - // this file is already be deleted. - } else { - throw ex; - } - } - } - - // create work dir - org.apache.commons.io.FileUtils.forceMkdir(execLocalPathFile); - String mkdirLog = "create dir success " + execLocalPath; - log.info(mkdirLog); - } - /** * write content to file ,if parent path not exists, it will do one's utmost to mkdir * @@ -231,25 +191,6 @@ public static void deleteFile(String filename) { org.apache.commons.io.FileUtils.deleteQuietly(new File(filename)); } - /** - * Gets all the parent subdirectories of the parentDir directory - * - * @param parentDir parent dir - * @return all dirs - */ - public static File[] getAllDir(String parentDir) { - if (parentDir == null || "".equals(parentDir)) { - throw new RuntimeException("parentDir can not be empty"); - } - - File file = new File(parentDir); - if (!file.exists() || !file.isDirectory()) { - throw new RuntimeException("parentDir not exist, or is not a directory:" + parentDir); - } - - return file.listFiles(File::isDirectory); - } - /** * Get Content * @@ -325,59 +266,47 @@ public static String getFileChecksum(String pathName) throws IOException { return crcString; } - public static void setFileOwner(Path filePath, String fileOwner) throws FileOperateException { - try { - // We use linux command to set the file owner, since jdk api will not use sudo. - String command = String.format("sudo chown %s %s", fileOwner, filePath.toString()); - Runtime.getRuntime().exec(command); - Process process = Runtime.getRuntime().exec(command); - int exitCode = process.waitFor(); - if (0 != exitCode) { - throw new FileOperateException( - "Set file: " + filePath + " to owner: " + fileOwner + " failed, existCode(" + exitCode + ")"); - } - } catch (FileOperateException ex) { - throw ex; - } catch (Exception ex) { - throw new FileOperateException("Set directory: " + filePath + " to owner: " + fileOwner + " failed"); - + public static void createFileWith755(@NonNull Path path) throws IOException { + if (SystemUtils.IS_OS_WINDOWS) { + Files.createFile(path); + } else { + Files.createFile(path); + Files.setPosixFilePermissions(path, PERMISSION_755); } } - public static void setDirectoryOwner(Path filePath, String fileOwner) throws FileOperateException { - try { - // We use linux command to set the file owner, since jdk api will not use sudo. - String command = String.format("sudo chown -R %s %s", fileOwner, filePath.toString()); - Runtime.getRuntime().exec(command); - Process process = Runtime.getRuntime().exec(command); - int exitCode = process.waitFor(); - if (0 != exitCode) { - throw new FileOperateException("Set directory: " + filePath + " to owner: " + fileOwner - + " failed, existCode(" + exitCode + ")"); + public static void createDirectoryWith755(@NonNull Path path) throws IOException { + if (path.toFile().exists()) { + return; + } + if (OSUtils.isWindows()) { + Files.createDirectories(path); + } else { + Path parent = path.getParent(); + if (parent != null && !parent.toFile().exists()) { + createDirectoryWith755(parent); } - } catch (FileOperateException ex) { - throw ex; - } catch (Exception ex) { - throw new FileOperateException("Set directory: " + filePath + " to owner: " + fileOwner + " failed"); + + Files.createDirectory(path); + Files.setPosixFilePermissions(path, PERMISSION_755); } } - public static void createDirectoryIfNotPresent(Path path) throws IOException { - if (Files.exists(path)) { + public static void setFileTo755(File file) throws IOException { + if (OSUtils.isWindows()) { return; } - Files.createDirectories(path); - } - - /** - * Create a file with '755'. - */ - public static void createFileWith755(@NonNull Path path) throws IOException { - if (SystemUtils.IS_OS_WINDOWS) { - Files.createFile(path); - } else { - Files.createFile(path, PERMISSION_755); + if (file.isFile()) { + Files.setPosixFilePermissions(file.toPath(), PERMISSION_755); + return; + } + Files.setPosixFilePermissions(file.toPath(), PERMISSION_755); + File[] files = file.listFiles(); + if (files != null) { + for (File f : files) { + setFileTo755(f); + } } } diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/FileUtilsTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/FileUtilsTest.java index 0ce33bef52ada..f06ece2a566a5 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/FileUtilsTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/FileUtilsTest.java @@ -19,8 +19,13 @@ import static org.apache.dolphinscheduler.common.constants.DateConstants.YYYYMMDDHHMMSS; +import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -55,12 +60,23 @@ public void testGetProcessExecDir() { } @Test - public void testCreateWorkDirIfAbsent() { + public void createDirectoryWith755() throws IOException { + Path path = Paths.get("/tmp/createWorkDirAndUserIfAbsent"); try { - FileUtils.createWorkDirIfAbsent("/tmp/createWorkDirAndUserIfAbsent"); - Assertions.assertTrue(true); + FileUtils.createDirectoryWith755(path); + File file = path.toFile(); + Assertions.assertTrue(file.exists()); + Assertions.assertTrue(file.isDirectory()); + Assertions.assertTrue(file.canExecute()); + Assertions.assertTrue(file.canRead()); + Assertions.assertTrue(file.canWrite()); + + FileUtils.createDirectoryWith755(Paths.get("/")); } catch (Exception e) { - Assertions.fail(); + e.printStackTrace(); + Assertions.fail(e.getMessage()); + } finally { + Files.deleteIfExists(path); } } diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java index 6b97590c8993d..22d17d0fd3fe4 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/utils/ProcessUtils.java @@ -30,6 +30,7 @@ import java.io.File; import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -176,7 +177,7 @@ public static String getPidsStr(int processId) throws Exception { taskExecutionContext.getProcessInstanceId(), taskExecutionContext.getTaskInstanceId())); } - FileUtils.createWorkDirIfAbsent(taskExecutionContext.getExecutePath()); + FileUtils.createDirectoryWith755(Paths.get(taskExecutionContext.getExecutePath())); org.apache.dolphinscheduler.plugin.task.api.utils.ProcessUtils.cancelApplication(taskExecutionContext); return appIds; } else { diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java index bb899030c57a0..d470545babd4b 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-abs/src/main/java/org/apache/dolphinscheduler/plugin/storage/abs/AbsStorageOperator.java @@ -25,6 +25,7 @@ import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; @@ -170,7 +171,7 @@ public void download(String srcFilePath, String dstFilePath, boolean overwrite) if (dstFile.isDirectory()) { Files.delete(dstFile.toPath()); } else { - Files.createDirectories(dstFile.getParentFile().toPath()); + FileUtils.createDirectoryWith755(dstFile.getParentFile().toPath()); } BlobClient blobClient = blobContainerClient.getBlobClient(srcFilePath); diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-gcs/src/main/java/org/apache/dolphinscheduler/plugin/storage/gcs/GcsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-gcs/src/main/java/org/apache/dolphinscheduler/plugin/storage/gcs/GcsStorageOperator.java index 9d09c2c9bb783..e4176dc58eac5 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-gcs/src/main/java/org/apache/dolphinscheduler/plugin/storage/gcs/GcsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-gcs/src/main/java/org/apache/dolphinscheduler/plugin/storage/gcs/GcsStorageOperator.java @@ -25,6 +25,7 @@ import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; @@ -143,7 +144,7 @@ public void download(String srcFilePath, String dstFilePath, boolean overwrite) if (dstFile.isDirectory()) { Files.delete(dstFile.toPath()); } else { - Files.createDirectories(dstFile.getParentFile().toPath()); + FileUtils.createDirectoryWith755(dstFile.getParentFile().toPath()); } Blob blob = gcsStorage.get(BlobId.of(bucketName, srcFilePath)); diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-obs/src/main/java/org/apache/dolphinscheduler/plugin/storage/obs/ObsStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-obs/src/main/java/org/apache/dolphinscheduler/plugin/storage/obs/ObsStorageOperator.java index b660c59ab46fc..b644210048ef2 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-obs/src/main/java/org/apache/dolphinscheduler/plugin/storage/obs/ObsStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-obs/src/main/java/org/apache/dolphinscheduler/plugin/storage/obs/ObsStorageOperator.java @@ -24,6 +24,7 @@ import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; @@ -187,7 +188,7 @@ public void download(String srcFilePath, String dstFilePath, boolean overwrite) if (dstFile.isDirectory()) { Files.delete(dstFile.toPath()); } else { - Files.createDirectories(dstFile.getParentFile().toPath()); + FileUtils.createDirectoryWith755(dstFile.getParentFile().toPath()); } ObsObject obsObject = obsClient.getObject(bucketName, srcFilePath); try ( diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-oss/src/main/java/org/apache/dolphinscheduler/plugin/storage/oss/OssStorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-oss/src/main/java/org/apache/dolphinscheduler/plugin/storage/oss/OssStorageOperator.java index 2b2d18fd277c5..61754b52a7388 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-oss/src/main/java/org/apache/dolphinscheduler/plugin/storage/oss/OssStorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-oss/src/main/java/org/apache/dolphinscheduler/plugin/storage/oss/OssStorageOperator.java @@ -26,6 +26,7 @@ import org.apache.dolphinscheduler.common.enums.ResUploadType; import org.apache.dolphinscheduler.common.factory.OssClientFactory; import org.apache.dolphinscheduler.common.model.OssConnection; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; @@ -213,7 +214,7 @@ public void download(String srcFilePath, String dstFilePath, if (dstFile.isDirectory()) { Files.delete(dstFile.toPath()); } else { - Files.createDirectories(dstFile.getParentFile().toPath()); + FileUtils.createDirectoryWith755(dstFile.getParentFile().toPath()); } OSSObject ossObject = ossClient.getObject(bucketName, srcFilePath); try ( diff --git a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java index 884c672d53045..a13611316edb2 100644 --- a/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java +++ b/dolphinscheduler-storage-plugin/dolphinscheduler-storage-s3/src/main/java/org/apache/dolphinscheduler/plugin/storage/s3/S3StorageOperator.java @@ -25,6 +25,7 @@ import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.ResUploadType; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.storage.api.StorageEntity; import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; @@ -200,7 +201,7 @@ public void download(String srcFilePath, String dstFilePath, if (dstFile.isDirectory()) { Files.delete(dstFile.toPath()); } else { - Files.createDirectories(dstFile.getParentFile().toPath()); + FileUtils.createDirectoryWith755(dstFile.getParentFile().toPath()); } S3Object o = s3Client.getObject(bucketName, srcFilePath); try ( diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/shell/BaseLinuxShellInterceptorBuilder.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/shell/BaseLinuxShellInterceptorBuilder.java index c391ca87a7b44..afc2edc586ff6 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/shell/BaseLinuxShellInterceptorBuilder.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/shell/BaseLinuxShellInterceptorBuilder.java @@ -17,8 +17,6 @@ package org.apache.dolphinscheduler.plugin.task.api.shell; -import org.apache.dolphinscheduler.common.constants.TenantConstants; -import org.apache.dolphinscheduler.common.exception.FileOperateException; import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.PropertyUtils; import org.apache.dolphinscheduler.plugin.task.api.utils.AbstractCommandExecutorConstants; @@ -71,12 +69,8 @@ protected void generateShellScript() throws IOException { "****************************** Script Content *****************************************************************"); } - protected List generateBootstrapCommand() throws FileOperateException { + protected List generateBootstrapCommand() { if (sudoEnable) { - if (!TenantConstants.BOOTSTRAPT_SYSTEM_USER.equals(runUser)) { - // Set the tenant owner as the working directory - FileUtils.setDirectoryOwner(Paths.get(shellDirectory), runUser); - } return bootstrapCommandInSudoMode(); } return bootstrapCommandInNormalMode(); diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-java/src/test/java/org/apache/dolphinscheduler/plugin/task/java/JavaTaskTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-java/src/test/java/org/apache/dolphinscheduler/plugin/task/java/JavaTaskTest.java index 7ea5a792b77ee..55756241ce100 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-java/src/test/java/org/apache/dolphinscheduler/plugin/task/java/JavaTaskTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-java/src/test/java/org/apache/dolphinscheduler/plugin/task/java/JavaTaskTest.java @@ -22,6 +22,7 @@ import static org.apache.dolphinscheduler.plugin.task.java.JavaConstants.RUN_TYPE_JAR; import static org.apache.dolphinscheduler.plugin.task.java.JavaConstants.RUN_TYPE_JAVA; +import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.plugin.task.api.TaskCallBack; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; @@ -155,7 +156,7 @@ public void coverJavaSourceFileExistException() throws IOException { try { Path path = Paths.get(fileName); if (!Files.exists(path)) { - Files.createDirectories(path); + FileUtils.createDirectoryWith755(path); } javaTask.createJavaSourceFileIfNotExists(sourceCode, fileName); } finally { diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java index ddd489adeb293..3cda6ac099e02 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtils.java @@ -97,7 +97,7 @@ public static void createTaskInstanceWorkingDirectory(TaskExecutionContext taskE log.warn("The TaskInstance WorkingDirectory: {} is exist, will recreate again", taskInstanceWorkingDirectory); } - Files.createDirectories(Paths.get(taskInstanceWorkingDirectory)); + FileUtils.createDirectoryWith755(Paths.get(taskInstanceWorkingDirectory)); taskExecutionContext.setExecutePath(taskInstanceWorkingDirectory); taskExecutionContext.setAppInfoPath(FileUtils.getAppInfoPath(taskInstanceWorkingDirectory)); @@ -137,6 +137,7 @@ public static ResourceContext downloadResourcesIfNeeded(String tenant, storageOperate.download(resourceAbsolutePathInStorage, resourceAbsolutePathInLocal, true); log.debug("Download resource file {} under: {} successfully", resourceAbsolutePathInStorage, resourceAbsolutePathInLocal); + FileUtils.setFileTo755(file); WorkerServerMetrics .recordWorkerResourceDownloadTime(System.currentTimeMillis() - resourceDownloadStartTime); WorkerServerMetrics diff --git a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtilsTest.java b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtilsTest.java index b73d073ef6d50..ff1eb26e12090 100644 --- a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtilsTest.java +++ b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/utils/TaskExecutionContextUtilsTest.java @@ -49,7 +49,7 @@ void createTaskInstanceWorkingDirectory() throws IOException { try { // Test if the working directory is exist // will delete it and recreate - Files.createDirectories(Paths.get(taskWorkingDirectory)); + FileUtils.createDirectoryWith755(Paths.get(taskWorkingDirectory)); Files.createFile(Paths.get(taskWorkingDirectory, "text.txt")); Assertions.assertTrue(Files.exists(Paths.get(taskWorkingDirectory, "text.txt"))); From 8efaa9fa1f1a5ab70dad0137df13da6480be59e9 Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Mon, 5 Feb 2024 09:54:06 +0800 Subject: [PATCH 02/10] fix: data quality can not use (#15551) * fix: data quality can not use fix: #15468, #15249, #14858 --------- Co-authored-by: Rick Cheng Co-authored-by: Eric Gao --- docs/docs/en/guide/data-quality.md | 10 +-- docs/docs/en/guide/resource/configuration.md | 5 +- docs/docs/en/guide/upgrade/incompatible.md | 1 + docs/docs/zh/guide/data-quality.md | 9 +-- docs/docs/zh/guide/resource/configuration.md | 5 +- .../docker/file-manage/common.properties | 5 +- .../common/constants/DataSourceConstants.java | 2 +- .../src/main/resources/common.properties | 5 +- .../log/SensitiveDataConverterTest.java | 28 +++++++ .../src/test/resources/common.properties | 5 +- .../datasource/api/utils/CommonUtils.java | 61 ++++++++++++++-- .../mysql/param/MySQLDataSourceProcessor.java | 14 ++++ .../docker/file-manage/common.properties | 5 +- .../runner/TaskExecutionContextFactory.java | 27 +++---- .../datasource/DefaultConnectionParam.java | 36 +++++++++ .../plugin/task/api/model/JdbcInfo.java | 73 ++++--------------- .../plugin/task/api/utils/JdbcUrlParser.java | 24 +++++- .../task/api/utils/JdbcUrlParserTest.java | 18 ++++- .../src/test/resources/common.properties | 5 +- .../plugin/task/dq/DataQualityTask.java | 6 +- 20 files changed, 221 insertions(+), 123 deletions(-) create mode 100644 dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java diff --git a/docs/docs/en/guide/data-quality.md b/docs/docs/en/guide/data-quality.md index 2ed49b5f86ce4..f6aa7a06b27b5 100644 --- a/docs/docs/en/guide/data-quality.md +++ b/docs/docs/en/guide/data-quality.md @@ -12,15 +12,7 @@ The execution logic of the data quality task is as follows: - The current data quality task result is stored in the `t_ds_dq_execute_result` table of `dolphinscheduler` `Worker` sends the task result to `Master`, after `Master` receives `TaskResponse`, it will judge whether the task type is `DataQualityTask`, if so, it will read the corresponding result from `t_ds_dq_execute_result` according to `taskInstanceId`, and then The result is judged according to the check mode, operator and threshold configured by the user. - If the result is a failure, the corresponding operation, alarm or interruption will be performed according to the failure policy configured by the user. -- Add config : `/conf/common.properties` - -```properties -# Change to specific version if you not use dev branch -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar -``` - -- Please fill in `data-quality.jar.name` according to the actual package name. -- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.name`. +- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.name` in `common.properties` with attribute name `data-quality.jar.name` - If the old version is upgraded and used, you need to execute the `sql` update script to initialize the database before running. - `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` was built with no dependencies. If a `JDBC` driver is required, you can set the `-jars` parameter in the `node settings` `Option Parameters`, e.g. `--jars /lib/jars/mysql-connector-java-8.0.16.jar`. - Currently only `MySQL`, `PostgreSQL` and `HIVE` data sources have been tested, other data sources have not been tested yet. diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 112b5458cc675..42e1925e89d4e 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -152,8 +152,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/docs/docs/en/guide/upgrade/incompatible.md b/docs/docs/en/guide/upgrade/incompatible.md index 0ecd94c8cb8d3..4580a7d13d387 100644 --- a/docs/docs/en/guide/upgrade/incompatible.md +++ b/docs/docs/en/guide/upgrade/incompatible.md @@ -10,6 +10,7 @@ This document records the incompatible updates between each version. You need to * Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). * Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)). * Remove `deleteSource` in `download()` of `StorageOperate` ([#14084](https://github.com/apache/dolphinscheduler/pull/14084)) +* Remove default key for attribute `data-quality.jar.name` in `common.properties` ([#15551](https://github.com/apache/dolphinscheduler/pull/15551)) ## 3.2.0 diff --git a/docs/docs/zh/guide/data-quality.md b/docs/docs/zh/guide/data-quality.md index 95ca5e2d689af..2a098a3216821 100644 --- a/docs/docs/zh/guide/data-quality.md +++ b/docs/docs/zh/guide/data-quality.md @@ -13,14 +13,7 @@ > ## 注意事项 -添加配置信息:`/conf/common.properties` - -```properties -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar -``` - -- 这里的`data-quality.jar.name`请根据实际打包的名称来填写。 -- 如果单独打包`data-quality`的话,记得修改包名和`data-quality.jar.name`一致。 +- 如果单独打包`data-quality`的话,记得修改包名和`data-quality.jar.name`一致,配置内容在 `common.properties` 中的 `data-quality.jar.name` - 如果是老版本升级使用,运行之前需要先执行`SQL`更新脚本进行数据库初始化。 - 当前 `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` 是瘦包,不包含任何 `JDBC` 驱动。 如果有 `JDBC` 驱动需要,可以在`节点设置` `选项参数`处设置 `--jars` 参数, diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index c5c7f856201db..57d0935e09290 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -156,8 +156,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index 000341f15353a..d43e55e822cbf 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -84,8 +84,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java index 568eb8c6f5729..11347942bde4c 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/DataSourceConstants.java @@ -102,7 +102,7 @@ public class DataSourceConstants { * dataSource sensitive param */ public static final String DATASOURCE_PASSWORD_REGEX = - "(?<=((?i)password((\":\")|(\\\\\":\\\\\")|(=')))).*?(?=((\")|(\\\\\")|(')))"; + "(?<=((?i)password((\" : \")|(\":\")|(\\\\\":\\\\\")|(=')))).*?(?=((\")|(\\\\\")|(')))"; /** * datasource encryption salt diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 28ebf4571d442..451a0f734c979 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -120,8 +120,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java index c641c296b8fb9..e6078ae95fff9 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/SensitiveDataConverterTest.java @@ -79,6 +79,34 @@ public void testPwdLogMsgConverter() { " }\n" + "}"); + // data quality + tcs.put("\"readers\" : [ {\n" + + " \"type\" : \"JDBC\",\n" + + " \"config\" : {\n" + + " \"database\" : \"dolphinscheduler\",\n" + + " \"password\" : \"view1\",\n" + + " \"driver\" : \"com.mysql.cj.jdbc.Driver\",\n" + + " \"user\" : \"root\",\n" + + " \"output_table\" : \"dolphinscheduler_users\",\n" + + " \"table\" : \"users\",\n" + + " \"url\" : \"jdbc:mysql://127.0.0.1:3307/dolphinscheduler?userSSL=true&enabledTLSProtocols=TLSv1.2\"\n" + + + " }\n" + + " } ]", + "\"readers\" : [ {\n" + + " \"type\" : \"JDBC\",\n" + + " \"config\" : {\n" + + " \"database\" : \"dolphinscheduler\",\n" + + " \"password\" : \"*****\",\n" + + " \"driver\" : \"com.mysql.cj.jdbc.Driver\",\n" + + " \"user\" : \"root\",\n" + + " \"output_table\" : \"dolphinscheduler_users\",\n" + + " \"table\" : \"users\",\n" + + " \"url\" : \"jdbc:mysql://127.0.0.1:3307/dolphinscheduler?userSSL=true&enabledTLSProtocols=TLSv1.2\"\n" + + + " }\n" + + " } ]"); + for (String logMsg : tcs.keySet()) { String maskedLog = SensitiveDataConverter.maskSensitiveData(logMsg); logger.info("original parameter : {}", logMsg); diff --git a/dolphinscheduler-common/src/test/resources/common.properties b/dolphinscheduler-common/src/test/resources/common.properties index ef6cc3710e67f..107977df7fd6b 100644 --- a/dolphinscheduler-common/src/test/resources/common.properties +++ b/dolphinscheduler-common/src/test/resources/common.properties @@ -115,8 +115,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java index e6ecef287ae6c..a4e64594c0c23 100644 --- a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java +++ b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java @@ -36,17 +36,26 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import java.io.File; import java.io.IOException; +import java.util.Optional; + +import lombok.extern.slf4j.Slf4j; + +import org.springframework.core.io.ClassPathResource; /** * common utils */ +@Slf4j public class CommonUtils { private CommonUtils() { throw new UnsupportedOperationException("Construct CommonUtils"); } + private static String DEFAULT_DATA_QUALITY_JAR_PATH = null; + private static final boolean IS_DEVELOP_MODE = PropertyUtils.getBoolean(Constants.DEVELOPMENT_STATE, true); /** @@ -123,14 +132,56 @@ public static boolean loadKerberosConf(String javaSecurityKrb5Conf, String login return false; } - public static String getDataQualityJarName() { - String dqsJarName = PropertyUtils.getString(DATA_QUALITY_JAR_NAME); + public static String getDataQualityJarPath() { + String dqsJarPath = PropertyUtils.getString(DATA_QUALITY_JAR_NAME); - if (StringUtils.isEmpty(dqsJarName)) { - return "dolphinscheduler-data-quality.jar"; + if (StringUtils.isEmpty(dqsJarPath)) { + log.info("data quality jar path is empty, will try to get it from data quality jar name"); + return getDefaultDataQualityJarPath(); } - return dqsJarName; + return dqsJarPath; + } + + private static String getDefaultDataQualityJarPath() { + if (StringUtils.isNotEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + return DEFAULT_DATA_QUALITY_JAR_PATH; + } + try { + // not standalone mode + String currentAbsolutePath = new ClassPathResource("./").getFile().getAbsolutePath(); + String currentLibPath = currentAbsolutePath + "/../libs"; + getDataQualityJarPathFromPath(currentLibPath).ifPresent(jarName -> DEFAULT_DATA_QUALITY_JAR_PATH = jarName); + + // standalone mode + if (StringUtils.isEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + log.info( + "Can not get data quality jar from path {}, maybe service running in standalone mode, will try to find another path", + currentLibPath); + currentLibPath = currentAbsolutePath + "/../../worker-server/libs"; + getDataQualityJarPathFromPath(currentLibPath) + .ifPresent(jarName -> DEFAULT_DATA_QUALITY_JAR_PATH = jarName); + } + } catch (IOException e) { + throw new RuntimeException("get default data quality jar path error", e); + } + log.info("get default data quality jar name: {}", DEFAULT_DATA_QUALITY_JAR_PATH); + return DEFAULT_DATA_QUALITY_JAR_PATH; + } + + private static Optional getDataQualityJarPathFromPath(String path) { + log.info("Try to get data quality jar from path {}", path); + File[] jars = new File(path).listFiles(); + if (jars == null) { + log.warn("No data quality related jar found from path {}", path); + return Optional.empty(); + } + for (File jar : jars) { + if (jar.getName().startsWith("dolphinscheduler-data-quality")) { + return Optional.of(jar.getAbsolutePath()); + } + } + return Optional.empty(); } /** diff --git a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java index c1b91e5930d67..b954defdd1c9b 100644 --- a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java +++ b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-mysql/src/main/java/org/apache/dolphinscheduler/plugin/datasource/mysql/param/MySQLDataSourceProcessor.java @@ -33,6 +33,7 @@ import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; @@ -115,6 +116,10 @@ public String getValidationQuery() { @Override public String getJdbcUrl(ConnectionParam connectionParam) { MySQLConnectionParam mysqlConnectionParam = (MySQLConnectionParam) connectionParam; + if (MapUtils.isNotEmpty(mysqlConnectionParam.getOther())) { + return String.format("%s?%s", mysqlConnectionParam.getJdbcUrl(), + transformOther(mysqlConnectionParam.getOther())); + } return mysqlConnectionParam.getJdbcUrl(); } @@ -182,4 +187,13 @@ private static boolean checkKeyIsLegitimate(String key) { && !key.contains(ALLOW_URL_IN_LOCAL_IN_FILE_NAME); } + private String transformOther(Map otherMap) { + if (MapUtils.isNotEmpty(otherMap)) { + List list = new ArrayList<>(otherMap.size()); + otherMap.forEach((key, value) -> list.add(String.format("%s=%s", key, value))); + return String.join("&", list); + } + return null; + } + } diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index f704bb60fcce2..b5f61011b3bbc 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -95,8 +95,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality option, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java index d436004719a58..ab1806ff67202 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/TaskExecutionContextFactory.java @@ -17,13 +17,6 @@ package org.apache.dolphinscheduler.server.master.runner; -import static org.apache.dolphinscheduler.common.constants.Constants.ADDRESS; -import static org.apache.dolphinscheduler.common.constants.Constants.DATABASE; -import static org.apache.dolphinscheduler.common.constants.Constants.JDBC_URL; -import static org.apache.dolphinscheduler.common.constants.Constants.OTHER; -import static org.apache.dolphinscheduler.common.constants.Constants.PASSWORD; -import static org.apache.dolphinscheduler.common.constants.Constants.SINGLE_SLASH; -import static org.apache.dolphinscheduler.common.constants.Constants.USER; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.CLUSTER; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.NAMESPACE_NAME; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.TASK_TYPE_DATA_QUALITY; @@ -70,6 +63,8 @@ import org.apache.dolphinscheduler.server.master.exception.TaskExecutionContextCreateException; import org.apache.dolphinscheduler.service.expand.CuringParamsService; import org.apache.dolphinscheduler.service.process.ProcessService; +import org.apache.dolphinscheduler.spi.datasource.BaseConnectionParam; +import org.apache.dolphinscheduler.spi.datasource.DefaultConnectionParam; import org.apache.dolphinscheduler.spi.enums.DbType; import org.apache.commons.collections4.CollectionUtils; @@ -80,7 +75,6 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.Properties; import lombok.extern.slf4j.Slf4j; @@ -400,15 +394,16 @@ public DataSource getDefaultDataSource() { dataSource.setUserName(hikariDataSource.getUsername()); JdbcInfo jdbcInfo = JdbcUrlParser.getJdbcInfo(hikariDataSource.getJdbcUrl()); if (jdbcInfo != null) { - Properties properties = new Properties(); - properties.setProperty(USER, hikariDataSource.getUsername()); - properties.setProperty(PASSWORD, hikariDataSource.getPassword()); - properties.setProperty(DATABASE, jdbcInfo.getDatabase()); - properties.setProperty(ADDRESS, jdbcInfo.getAddress()); - properties.setProperty(OTHER, jdbcInfo.getParams()); - properties.setProperty(JDBC_URL, jdbcInfo.getAddress() + SINGLE_SLASH + jdbcInfo.getDatabase()); + // + BaseConnectionParam baseConnectionParam = new DefaultConnectionParam(); + baseConnectionParam.setUser(hikariDataSource.getUsername()); + baseConnectionParam.setPassword(hikariDataSource.getPassword()); + baseConnectionParam.setDatabase(jdbcInfo.getDatabase()); + baseConnectionParam.setAddress(jdbcInfo.getAddress()); + baseConnectionParam.setJdbcUrl(jdbcInfo.getJdbcUrl()); + baseConnectionParam.setOther(jdbcInfo.getParams()); dataSource.setType(DbType.of(JdbcUrlParser.getDbType(jdbcInfo.getDriverName()).getCode())); - dataSource.setConnectionParams(JSONUtils.toJsonString(properties)); + dataSource.setConnectionParams(JSONUtils.toJsonString(baseConnectionParam)); } return dataSource; diff --git a/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java new file mode 100644 index 0000000000000..a681ca6230e7d --- /dev/null +++ b/dolphinscheduler-spi/src/main/java/org/apache/dolphinscheduler/spi/datasource/DefaultConnectionParam.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.spi.datasource; + +public class DefaultConnectionParam extends BaseConnectionParam { + + @Override + public String toString() { + return "DefaultConnectionParam{" + + "user='" + user + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", database='" + database + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", driverLocation='" + driverLocation + '\'' + + ", driverClassName='" + driverClassName + '\'' + + ", validationQuery='" + validationQuery + '\'' + + ", other='" + other + '\'' + + '}'; + } +} diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java index 3e8f47ba7bf6e..5e90dffbfebae 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/model/JdbcInfo.java @@ -17,9 +17,20 @@ package org.apache.dolphinscheduler.plugin.task.api.model; +import java.util.Map; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + /** * JdbcInfo */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor public class JdbcInfo { private String host; @@ -30,67 +41,9 @@ public class JdbcInfo { private String database; - private String params; + private Map params; private String address; - public String getHost() { - return host; - } - - public void setHost(String host) { - this.host = host; - } - - public String getPort() { - return port; - } - - public void setPort(String port) { - this.port = port; - } - - public String getDriverName() { - return driverName; - } - - public void setDriverName(String driverName) { - this.driverName = driverName; - } - - public String getDatabase() { - return database; - } - - public void setDatabase(String database) { - this.database = database; - } - - public String getParams() { - return params; - } - - public void setParams(String params) { - this.params = params; - } - - public String getAddress() { - return address; - } - - public void setAddress(String address) { - this.address = address; - } - - @Override - public String toString() { - return "JdbcInfo{" - + "host='" + host + '\'' - + ", port='" + port + '\'' - + ", driverName='" + driverName + '\'' - + ", database='" + database + '\'' - + ", params='" + params + '\'' - + ", address='" + address + '\'' - + '}'; - } + private String jdbcUrl; } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java index 2ab0d69e23139..e8e5ec299cf6d 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParser.java @@ -19,6 +19,7 @@ import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.COLON; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.DOUBLE_SLASH; +import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EQUAL_SIGN; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.QUESTION; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.SEMICOLON; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.SINGLE_SLASH; @@ -30,6 +31,9 @@ import org.apache.commons.lang3.StringUtils; +import java.util.HashMap; +import java.util.Map; + /** * JdbcUrlParser */ @@ -105,8 +109,24 @@ public static JdbcInfo getJdbcInfo(String jdbcUrl) { jdbcInfo.setHost(host); jdbcInfo.setPort(port); jdbcInfo.setDatabase(database); - jdbcInfo.setParams(params); - jdbcInfo.setAddress("jdbc:" + driverName + "://" + host + COLON + port); + + if (StringUtils.isNotEmpty(params)) { + Map others = new HashMap<>(); + String[] paramList = params.split("&"); + for (String param : paramList) { + // handle bad params + if (StringUtils.isEmpty(param) || !param.contains(EQUAL_SIGN)) { + continue; + } + String[] kv = param.split(EQUAL_SIGN); + others.put(kv[0], kv[1]); + } + jdbcInfo.setParams(others); + } + + String address = "jdbc:" + driverName + "://" + host + COLON + port; + jdbcInfo.setAddress(address); + jdbcInfo.setJdbcUrl(address + SINGLE_SLASH + database); return jdbcInfo; } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java index 9cef20818717f..bad9171bacb0d 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/JdbcUrlParserTest.java @@ -34,10 +34,20 @@ public void testGetJdbcInfo() { + "useUnicode=true&characterEncoding=UTF-8"); if (jdbcInfo != null) { String jdbcInfoStr = jdbcInfo.toString(); - String expected = "JdbcInfo{host='localhost', port='3306', " - + "driverName='mysql', database='dolphinscheduler', " - + "params='useUnicode=true&characterEncoding=UTF-8', " - + "address='jdbc:mysql://localhost:3306'}"; + String expected = + "JdbcInfo(host=localhost, port=3306, driverName=mysql, database=dolphinscheduler, " + + "params={useUnicode=true, characterEncoding=UTF-8}, address=jdbc:mysql://localhost:3306, jdbcUrl=jdbc:mysql://localhost:3306/dolphinscheduler)"; + Assertions.assertEquals(expected, jdbcInfoStr); + } + + // bad jdbc url case + jdbcInfo = JdbcUrlParser.getJdbcInfo("jdbc:mysql://localhost:3306/dolphinscheduler?" + + "useUnicode=true&&characterEncoding=UTF-8"); + if (jdbcInfo != null) { + String jdbcInfoStr = jdbcInfo.toString(); + String expected = + "JdbcInfo(host=localhost, port=3306, driverName=mysql, database=dolphinscheduler, " + + "params={useUnicode=true, characterEncoding=UTF-8}, address=jdbc:mysql://localhost:3306, jdbcUrl=jdbc:mysql://localhost:3306/dolphinscheduler)"; Assertions.assertEquals(expected, jdbcInfoStr); } } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties index 5fab54a1435d2..9855d855e9f28 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties @@ -84,8 +84,9 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option -data-quality.jar.name=dolphinscheduler-data-quality-dev-SNAPSHOT.jar +# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory +# if you re-build it alone, or auto discovery mechanism fail +data-quality.jar.name= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java index 1bf1a6454c562..ec8adc3eefcf9 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-dataquality/src/main/java/org/apache/dolphinscheduler/plugin/task/dq/DataQualityTask.java @@ -50,7 +50,6 @@ import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; -import java.io.File; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -117,6 +116,7 @@ public void init() { DataQualityConfiguration dataQualityConfiguration = ruleManager.generateDataQualityParameter(); + log.info("data quality configuration: {}", JSONUtils.toPrettyJsonString(dataQualityConfiguration)); dataQualityParameters .getSparkParameters() .setMainArgs("\"" @@ -177,9 +177,7 @@ protected Map getProperties() { protected void setMainJarName() { ResourceInfo mainJar = new ResourceInfo(); - String basePath = System.getProperty("user.dir").replace(File.separator + "bin", ""); - mainJar.setResourceName( - basePath + File.separator + "libs" + File.separator + CommonUtils.getDataQualityJarName()); + mainJar.setResourceName(CommonUtils.getDataQualityJarPath()); dataQualityParameters.getSparkParameters().setMainJar(mainJar); } From ef9ed3db55cb1647886b06c2b2c6a5cfcdccfb5c Mon Sep 17 00:00:00 2001 From: caishunfeng Date: Mon, 5 Feb 2024 10:23:47 +0800 Subject: [PATCH 03/10] fix switch js (#15487) Co-authored-by: Rick Cheng Co-authored-by: Eric Gao --- .../server/master/utils/SwitchTaskUtils.java | 17 +++++++++++++++++ .../master/utils/SwitchTaskUtilsTest.java | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/SwitchTaskUtils.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/SwitchTaskUtils.java index 4e1c30313865f..f4ebd0c60cab3 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/SwitchTaskUtils.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/utils/SwitchTaskUtils.java @@ -23,6 +23,7 @@ import org.apache.commons.collections4.MapUtils; import java.util.Map; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -33,6 +34,7 @@ import lombok.extern.slf4j.Slf4j; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; @Slf4j public class SwitchTaskUtils { @@ -41,6 +43,15 @@ public class SwitchTaskUtils { private static final ScriptEngine engine; private static final String rgex = "['\"]*\\$\\{(.*?)\\}['\"]*"; + private static final Set blackKeySet = Sets.newHashSet( + "java", + "invoke", + "new", + "eval", + "function", + "import", + "\\\\"); + static { manager = new ScriptEngineManager(); engine = manager.getEngineByName("js"); @@ -83,6 +94,12 @@ public static String generateContentWithTaskParams(String condition, Map { SwitchTaskUtils.generateContentWithTaskParams(content, globalParams, varParams); }); + + String cmd = "bash /tmp/shell"; + String cmdContent = "java.lang.Runtime.getRuntime().exec(\"${cmd}\")"; + globalParams.put("cmd", new Property("cmd", Direct.IN, DataType.VARCHAR, cmd)); + Assertions.assertThrowsExactly(IllegalArgumentException.class, () -> { + SwitchTaskUtils.generateContentWithTaskParams(cmdContent, globalParams, varParams); + }); + + String contentWithUnicode = + "\\\\u006a\\\\u0061\\\\u0076\\\\u0061\\\\u002e\\\\u006c\\\\u0061\\\\u006e\\\\u0067\\\\u002e\\\\u0052\\\\u0075\\\\u006e\\\\u0074\\\\u0069\\\\u006d\\\\u0065.getRuntime().exec(\\\"open -a Calculator.app\\"; + Assertions.assertThrowsExactly(IllegalArgumentException.class, () -> { + SwitchTaskUtils.generateContentWithTaskParams(contentWithUnicode, globalParams, varParams); + }); + } } From 4d6af516b9bbc1587318f9a305d82b461c124a30 Mon Sep 17 00:00:00 2001 From: dada Sun <707509803@qq.com> Date: Mon, 5 Feb 2024 11:41:21 +0800 Subject: [PATCH 04/10] [Bug][Task Api] fix 'MACPATTERN' in ProcessUtils and cover all cases on MacOS in ProcessUtilsTest (#15480) (#15529) Co-authored-by: Rick Cheng --- .../dolphinscheduler/plugin/task/api/utils/ProcessUtils.java | 2 +- .../plugin/task/api/utils/ProcessUtilsTest.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java index 46b8ff4c7bc04..7b61a1eaec40e 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java @@ -71,7 +71,7 @@ private ProcessUtils() { * Initialization regularization, solve the problem of pre-compilation performance, * avoid the thread safety problem of multi-thread operation */ - private static final Pattern MACPATTERN = Pattern.compile("-[+|-]-\\s(\\d+)"); + private static final Pattern MACPATTERN = Pattern.compile("-[+|-][-|=]\\s(\\d+)"); /** * Expression of PID recognition in Windows scene diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtilsTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtilsTest.java index ef10e73325714..14caf470d7aaa 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtilsTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtilsTest.java @@ -38,6 +38,7 @@ public void testGetPidsStr() throws Exception { String command; MockedStatic osUtilsMockedStatic = Mockito.mockStatic(OSUtils.class); if (SystemUtils.IS_OS_MAC) { + pids = "-+= 6279 sudo -+- 6282 558_1497.sh --- 6354 sleep"; command = String.format("%s -sp %d", TaskConstants.PSTREE, processId); } else if (SystemUtils.IS_OS_LINUX) { command = String.format("%s -p %d", TaskConstants.PSTREE, processId); @@ -54,6 +55,7 @@ public void testGetPidsStr() throws Exception { String exceptPidsStr2 = "2000 2100 2101"; String command2; if (SystemUtils.IS_OS_MAC) { + pids2 = "-+= 2000 apache2 -+- 2100 222332-apache2-submit_task.py --- 2101 apache2"; command2 = String.format("%s -sp %d", TaskConstants.PSTREE, processId2); } else if (SystemUtils.IS_OS_LINUX) { command2 = String.format("%s -p %d", TaskConstants.PSTREE, processId2); @@ -70,6 +72,7 @@ public void testGetPidsStr() throws Exception { String exceptPidsStr3 = "5000 6000 7000 7100"; String command3; if (SystemUtils.IS_OS_MAC) { + pids3 = "-+= 5000 sshd -+- 6000 sshd --= 7000 bash --- 7100 python"; command3 = String.format("%s -sp %d", TaskConstants.PSTREE, processId3); } else if (SystemUtils.IS_OS_LINUX) { command3 = String.format("%s -p %d", TaskConstants.PSTREE, processId3); From d8e820c4b9fa70e6d7c8368a8dc17082f8b15db0 Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Mon, 5 Feb 2024 12:20:07 +0800 Subject: [PATCH 05/10] fix: ddl without drop exists (#14128) Co-authored-by: Rick Cheng --- .../src/main/resources/sql/dolphinscheduler_mysql.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dolphinscheduler-dao/src/main/resources/sql/dolphinscheduler_mysql.sql b/dolphinscheduler-dao/src/main/resources/sql/dolphinscheduler_mysql.sql index 0d3e8b190a2b1..9dcdc02203148 100644 --- a/dolphinscheduler-dao/src/main/resources/sql/dolphinscheduler_mysql.sql +++ b/dolphinscheduler-dao/src/main/resources/sql/dolphinscheduler_mysql.sql @@ -2092,6 +2092,10 @@ CREATE TABLE `t_ds_fav_task` AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8 COLLATE = utf8_bin; +-- ---------------------------- +-- Table structure for t_ds_trigger_relation +-- ---------------------------- +DROP TABLE IF EXISTS `t_ds_trigger_relation`; CREATE TABLE `t_ds_trigger_relation` ( `id` bigint(20) NOT NULL AUTO_INCREMENT, `trigger_type` int(11) NOT NULL DEFAULT '0' COMMENT '0 process 1 task', From e5a208f363abcb31bb7e8ffee16c7a142671c667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E5=8F=AF=E8=80=90?= <46134044+sdhzwc@users.noreply.github.com> Date: Mon, 5 Feb 2024 14:23:16 +0800 Subject: [PATCH 06/10] [Bug][force-success] force success add end time (#15144) Co-authored-by: Rick Cheng --- .../api/service/impl/TaskInstanceServiceImpl.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TaskInstanceServiceImpl.java b/dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TaskInstanceServiceImpl.java index d3426dfa3e6c9..f06f8115a9249 100644 --- a/dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TaskInstanceServiceImpl.java +++ b/dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TaskInstanceServiceImpl.java @@ -255,6 +255,7 @@ public Result forceTaskSuccess(User loginUser, long projectCode, Integer taskIns // change the state of the task instance task.setState(TaskExecutionStatus.FORCED_SUCCESS); + task.setEndTime(new Date()); int changedNum = taskInstanceMapper.updateById(task); if (changedNum > 0) { processService.forceProcessInstanceSuccessByTaskInstanceId(taskInstanceId); From 01eb8f834ff4abf662b704379f1bcd019f4d2d74 Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Mon, 5 Feb 2024 16:17:30 +0800 Subject: [PATCH 07/10] fix: start param for wf not work (#15544) * fix: start param for wf not work fix: #15280 * fix test --- .../runner/WorkflowExecuteRunnable.java | 39 +--------------- .../service/expand/CuringParamsService.java | 12 +++++ .../expand/CuringParamsServiceImpl.java | 27 +++++++++++- .../service/process/ProcessService.java | 3 ++ .../service/process/ProcessServiceImpl.java | 31 ++++++------- .../expand/CuringParamsServiceTest.java | 44 +++++++++++++++++++ 6 files changed, 99 insertions(+), 57 deletions(-) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java index 2db49c6f09032..fa658c04f1095 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java @@ -20,7 +20,6 @@ import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_COMPLEMENT_DATA_END_DATE; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_COMPLEMENT_DATA_START_DATE; -import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_FATHER_PARAMS; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_RECOVERY_START_NODE_STRING; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_RECOVER_PROCESS_ID_STRING; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_START_NODES; @@ -29,8 +28,6 @@ import static org.apache.dolphinscheduler.common.constants.Constants.DEFAULT_WORKER_GROUP; import static org.apache.dolphinscheduler.common.constants.DateConstants.YYYY_MM_DD_HH_MM_SS; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.TASK_TYPE_BLOCKING; -import static org.apache.dolphinscheduler.plugin.task.api.enums.DataType.VARCHAR; -import static org.apache.dolphinscheduler.plugin.task.api.enums.Direct.IN; import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.enums.CommandType; @@ -860,7 +857,7 @@ private void initTaskQueue() throws StateEventHandleException, CronParseExceptio Map cmdParam = JSONUtils.toMap(workflowInstance.getCommandParam()); if (cmdParam != null) { // reset global params while there are start parameters - setGlobalParamIfCommanded(workflowDefinition, cmdParam); + processService.setGlobalParamIfCommanded(workflowDefinition, cmdParam); Date start = null; Date end = null; @@ -2057,40 +2054,6 @@ public Map getWaitToRetryTaskInstanceMap() { return waitToRetryTaskInstanceMap; } - private void setGlobalParamIfCommanded(ProcessDefinition processDefinition, Map cmdParam) { - // get start params from command param - Map startParamMap = new HashMap<>(); - if (cmdParam.containsKey(CMD_PARAM_START_PARAMS)) { - String startParamJson = cmdParam.get(CMD_PARAM_START_PARAMS); - startParamMap = JSONUtils.toMap(startParamJson); - } - Map fatherParamMap = new HashMap<>(); - if (cmdParam.containsKey(CMD_PARAM_FATHER_PARAMS)) { - String fatherParamJson = cmdParam.get(CMD_PARAM_FATHER_PARAMS); - fatherParamMap = JSONUtils.toMap(fatherParamJson); - } - startParamMap.putAll(fatherParamMap); - // set start param into global params - Map globalMap = processDefinition.getGlobalParamMap(); - List globalParamList = processDefinition.getGlobalParamList(); - if (startParamMap.size() > 0 && globalMap != null) { - // start param to overwrite global param - for (Map.Entry param : globalMap.entrySet()) { - String val = startParamMap.get(param.getKey()); - if (val != null) { - param.setValue(val); - } - } - // start param to create new global param if global not exist - for (Map.Entry startParam : startParamMap.entrySet()) { - if (!globalMap.containsKey(startParam.getKey())) { - globalMap.put(startParam.getKey(), startParam.getValue()); - globalParamList.add(new Property(startParam.getKey(), IN, VARCHAR, startParam.getValue())); - } - } - } - } - /** * clear related data if command of process instance is EXECUTE_TASK * 1. find all task code from sub dag (only contains related task) diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsService.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsService.java index d6d50ace80a38..eff0a3071f70b 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsService.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsService.java @@ -27,6 +27,8 @@ import java.util.List; import java.util.Map; +import javax.annotation.Nullable; + import lombok.NonNull; public interface CuringParamsService { @@ -80,6 +82,16 @@ Map paramParsingPreparation(@NonNull TaskInstance taskInstance @NonNull AbstractParameters parameters, @NonNull ProcessInstance processInstance); + /** + * Parse workflow star parameter + */ + Map parseWorkflowStartParam(@Nullable Map cmdParam); + + /** + * Parse workflow father parameter + */ + Map parseWorkflowFatherParam(@Nullable Map cmdParam); + /** * preBuildBusinessParams * @param processInstance diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceImpl.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceImpl.java index e23c07b1b709e..afcfae3fd6774 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceImpl.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceImpl.java @@ -28,6 +28,7 @@ import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.PARAMETER_WORKFLOW_DEFINITION_NAME; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.PARAMETER_WORKFLOW_INSTANCE_ID; +import org.apache.dolphinscheduler.common.constants.CommandKeyConstants; import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.constants.DateConstants; import org.apache.dolphinscheduler.common.enums.CommandType; @@ -55,6 +56,8 @@ import java.util.Set; import java.util.stream.Collectors; +import javax.annotation.Nullable; + import lombok.NonNull; import org.springframework.beans.factory.annotation.Autowired; @@ -141,6 +144,28 @@ public String curingGlobalParams(Integer processInstanceId, Map return JSONUtils.toJsonString(globalParamList); } + @Override + public Map parseWorkflowStartParam(@Nullable Map cmdParam) { + if (cmdParam == null || !cmdParam.containsKey(CommandKeyConstants.CMD_PARAM_START_PARAMS)) { + return new HashMap<>(); + } + String startParamJson = cmdParam.get(CommandKeyConstants.CMD_PARAM_START_PARAMS); + Map startParamMap = JSONUtils.toMap(startParamJson); + return startParamMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, + entry -> new Property(entry.getKey(), Direct.IN, DataType.VARCHAR, entry.getValue()))); + } + + @Override + public Map parseWorkflowFatherParam(@Nullable Map cmdParam) { + if (cmdParam == null || !cmdParam.containsKey(CommandKeyConstants.CMD_PARAM_FATHER_PARAMS)) { + return new HashMap<>(); + } + String startParamJson = cmdParam.get(CommandKeyConstants.CMD_PARAM_FATHER_PARAMS); + Map startParamMap = JSONUtils.toMap(startParamJson); + return startParamMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, + entry -> new Property(entry.getKey(), Direct.IN, DataType.VARCHAR, entry.getValue()))); + } + /** * the global parameters and local parameters used in the worker will be prepared here, and built-in parameters. * @@ -199,7 +224,7 @@ public Map paramParsingPreparation(@NonNull TaskInstance taskI } if (MapUtils.isNotEmpty(cmdParam)) { - prepareParamsMap.putAll(ParameterUtils.getUserDefParamsMap(cmdParam)); + prepareParamsMap.putAll(parseWorkflowStartParam(cmdParam)); } Iterator> iter = prepareParamsMap.entrySet().iterator(); diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java index 091d6353fb373..ba4def7e0634a 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java @@ -50,6 +50,7 @@ import org.apache.dolphinscheduler.service.model.TaskNode; import java.util.List; +import java.util.Map; import java.util.Optional; import javax.annotation.Nullable; @@ -194,4 +195,6 @@ TaskGroupQueue insertIntoTaskGroupQueue(Integer taskId, void forceProcessInstanceSuccessByTaskInstanceId(Integer taskInstanceId); void saveCommandTrigger(Integer commandId, Integer processInstanceId); + + void setGlobalParamIfCommanded(ProcessDefinition processDefinition, Map cmdParam); } diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessServiceImpl.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessServiceImpl.java index 93b257ba27ace..130a1c0993fde 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessServiceImpl.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessServiceImpl.java @@ -28,8 +28,6 @@ import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_SUB_PROCESS_DEFINE_CODE; import static org.apache.dolphinscheduler.common.constants.CommandKeyConstants.CMD_PARAM_SUB_PROCESS_PARENT_INSTANCE_ID; import static org.apache.dolphinscheduler.common.constants.Constants.LOCAL_PARAMS; -import static org.apache.dolphinscheduler.plugin.task.api.enums.DataType.VARCHAR; -import static org.apache.dolphinscheduler.plugin.task.api.enums.Direct.IN; import static org.apache.dolphinscheduler.plugin.task.api.utils.DataQualityConstants.TASK_INSTANCE_ID; import org.apache.dolphinscheduler.common.constants.CommandKeyConstants; @@ -587,35 +585,32 @@ private ProcessInstance generateNewProcessInstance(ProcessDefinition processDefi return processInstance; } - private void setGlobalParamIfCommanded(ProcessDefinition processDefinition, Map cmdParam) { + @Override + public void setGlobalParamIfCommanded(ProcessDefinition processDefinition, Map cmdParam) { + // get start params from command param - Map startParamMap = new HashMap<>(); - if (cmdParam != null && cmdParam.containsKey(CommandKeyConstants.CMD_PARAM_START_PARAMS)) { - String startParamJson = cmdParam.get(CommandKeyConstants.CMD_PARAM_START_PARAMS); - startParamMap = JSONUtils.toMap(startParamJson); - } - Map fatherParamMap = new HashMap<>(); - if (cmdParam != null && cmdParam.containsKey(CommandKeyConstants.CMD_PARAM_FATHER_PARAMS)) { - String fatherParamJson = cmdParam.get(CommandKeyConstants.CMD_PARAM_FATHER_PARAMS); - fatherParamMap = JSONUtils.toMap(fatherParamJson); - } - startParamMap.putAll(fatherParamMap); + Map fatherParam = curingGlobalParamsService.parseWorkflowFatherParam(cmdParam); + Map startParamMap = new HashMap<>(fatherParam); + + Map currentStartParamMap = curingGlobalParamsService.parseWorkflowStartParam(cmdParam); + startParamMap.putAll(currentStartParamMap); + // set start param into global params Map globalMap = processDefinition.getGlobalParamMap(); List globalParamList = processDefinition.getGlobalParamList(); if (MapUtils.isNotEmpty(startParamMap) && globalMap != null) { // start param to overwrite global param for (Map.Entry param : globalMap.entrySet()) { - String val = startParamMap.get(param.getKey()); + String val = startParamMap.get(param.getKey()).getValue(); if (val != null) { param.setValue(val); } } // start param to create new global param if global not exist - for (Entry startParam : startParamMap.entrySet()) { + for (Entry startParam : startParamMap.entrySet()) { if (!globalMap.containsKey(startParam.getKey())) { - globalMap.put(startParam.getKey(), startParam.getValue()); - globalParamList.add(new Property(startParam.getKey(), IN, VARCHAR, startParam.getValue())); + globalMap.put(startParam.getKey(), startParam.getValue().getValue()); + globalParamList.add(startParam.getValue()); } } } diff --git a/dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceTest.java b/dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceTest.java index 29b3ed58c1031..96c950324943d 100644 --- a/dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceTest.java +++ b/dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/expand/CuringParamsServiceTest.java @@ -33,6 +33,8 @@ import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; import org.apache.dolphinscheduler.plugin.task.api.parameters.SubProcessParameters; +import org.apache.commons.collections4.MapUtils; + import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -234,4 +236,46 @@ public void testParamParsingPreparation() { Assertions.assertEquals(propertyMap.get(TaskConstants.PARAMETER_WORKFLOW_DEFINITION_CODE).getValue(), String.valueOf(processDefinition.getCode())); } + + @Test + public void testParseWorkflowStartParam() { + Map result = new HashMap<>(); + // empty cmd param + Map startParamMap = new HashMap<>(); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowStartParam(startParamMap); + Assertions.assertTrue(MapUtils.isEmpty(result)); + + // without key + startParamMap.put("testStartParam", "$[yyyyMMdd]"); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowStartParam(startParamMap); + Assertions.assertTrue(MapUtils.isEmpty(result)); + + startParamMap.put("StartParams", "{\"param1\":\"11111\", \"param2\":\"22222\"}"); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowStartParam(startParamMap); + Assertions.assertTrue(MapUtils.isNotEmpty(result)); + Assertions.assertEquals(2, result.keySet().size()); + Assertions.assertEquals("11111", result.get("param1").getValue()); + Assertions.assertEquals("22222", result.get("param2").getValue()); + } + + @Test + public void testParseWorkflowFatherParam() { + Map result = new HashMap<>(); + // empty cmd param + Map startParamMap = new HashMap<>(); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowFatherParam(startParamMap); + Assertions.assertTrue(MapUtils.isEmpty(result)); + + // without key + startParamMap.put("testfatherParams", "$[yyyyMMdd]"); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowFatherParam(startParamMap); + Assertions.assertTrue(MapUtils.isEmpty(result)); + + startParamMap.put("fatherParams", "{\"param1\":\"11111\", \"param2\":\"22222\"}"); + result = dolphinSchedulerCuringGlobalParams.parseWorkflowFatherParam(startParamMap); + Assertions.assertTrue(MapUtils.isNotEmpty(result)); + Assertions.assertEquals(2, result.keySet().size()); + Assertions.assertEquals("11111", result.get("param1").getValue()); + Assertions.assertEquals("22222", result.get("param2").getValue()); + } } From 91d56f48601279eeb7de1a057e5c4425f86f2198 Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Mon, 5 Feb 2024 17:14:59 +0800 Subject: [PATCH 08/10] fix: data quality may fail in docker mode (#15563) --- deploy/kubernetes/dolphinscheduler/README.md | 2 +- .../kubernetes/dolphinscheduler/values.yaml | 2 +- docs/docs/en/architecture/configuration.md | 2 +- docs/docs/en/guide/data-quality.md | 2 +- docs/docs/en/guide/resource/configuration.md | 7 +++-- docs/docs/en/guide/upgrade/incompatible.md | 1 + docs/docs/zh/architecture/configuration.md | 2 +- docs/docs/zh/guide/data-quality.md | 2 +- docs/docs/zh/guide/resource/configuration.md | 7 +++-- .../docker/file-manage/common.properties | 7 +++-- .../src/main/resources/common.properties | 7 +++-- .../src/test/resources/common.properties | 7 +++-- .../datasource/api/utils/CommonUtils.java | 29 ++++++++++++++----- .../docker/file-manage/common.properties | 7 +++-- .../src/main/docker/Dockerfile | 1 + .../plugin/task/api/TaskConstants.java | 4 +-- .../src/test/resources/common.properties | 7 +++-- 17 files changed, 60 insertions(+), 36 deletions(-) diff --git a/deploy/kubernetes/dolphinscheduler/README.md b/deploy/kubernetes/dolphinscheduler/README.md index 6c04c4085d20f..5659605b957ad 100644 --- a/deploy/kubernetes/dolphinscheduler/README.md +++ b/deploy/kubernetes/dolphinscheduler/README.md @@ -121,7 +121,7 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | conf.common."alert.rpc.port" | int | `50052` | rpc port | | conf.common."appId.collect" | string | `"log"` | way to collect applicationId: log, aop | | conf.common."conda.path" | string | `"/opt/anaconda3/etc/profile.d/conda.sh"` | set path of conda.sh | -| conf.common."data-quality.jar.name" | string | `"dolphinscheduler-data-quality-dev-SNAPSHOT.jar"` | data quality option | +| conf.common."data-quality.jar.dir" | string | `nil` | data quality option | | conf.common."data.basedir.path" | string | `"/tmp/dolphinscheduler"` | user data local directory path, please make sure the directory exists and have read write permissions | | conf.common."datasource.encryption.enable" | bool | `false` | datasource encryption enable | | conf.common."datasource.encryption.salt" | string | `"!@#$%^&*"` | datasource encryption salt | diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 6effdf15ac311..a8d9a34875ca0 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -328,7 +328,7 @@ conf: datasource.encryption.salt: '!@#$%^&*' # -- data quality option - data-quality.jar.name: dolphinscheduler-data-quality-dev-SNAPSHOT.jar + data-quality.jar.dir: # -- Whether hive SQL is executed in the same session support.hive.oneSession: false diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index f4ab1435d5a4a..b9a26b865c773 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -226,7 +226,7 @@ The default configuration is as follows: | yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | job history status url of yarn | | datasource.encryption.enable | false | whether to enable datasource encryption | | datasource.encryption.salt | !@#$%^&* | the salt of the datasource encryption | -| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | the jar of data quality | +| data-quality.jar.dir | | the jar of data quality | | support.hive.oneSession | false | specify whether hive SQL is executed in the same session | | sudo.enable | true | whether to enable sudo | | alert.rpc.port | 50052 | the RPC port of Alert Server | diff --git a/docs/docs/en/guide/data-quality.md b/docs/docs/en/guide/data-quality.md index f6aa7a06b27b5..dca777d76fb8a 100644 --- a/docs/docs/en/guide/data-quality.md +++ b/docs/docs/en/guide/data-quality.md @@ -12,7 +12,7 @@ The execution logic of the data quality task is as follows: - The current data quality task result is stored in the `t_ds_dq_execute_result` table of `dolphinscheduler` `Worker` sends the task result to `Master`, after `Master` receives `TaskResponse`, it will judge whether the task type is `DataQualityTask`, if so, it will read the corresponding result from `t_ds_dq_execute_result` according to `taskInstanceId`, and then The result is judged according to the check mode, operator and threshold configured by the user. - If the result is a failure, the corresponding operation, alarm or interruption will be performed according to the failure policy configured by the user. -- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.name` in `common.properties` with attribute name `data-quality.jar.name` +- If you package `data-quality` separately, remember to modify the package name to be consistent with `data-quality.jar.dir` in `common.properties` with attribute name `data-quality.jar.dir` - If the old version is upgraded and used, you need to execute the `sql` update script to initialize the database before running. - `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` was built with no dependencies. If a `JDBC` driver is required, you can set the `-jars` parameter in the `node settings` `Option Parameters`, e.g. `--jars /lib/jars/mysql-connector-java-8.0.16.jar`. - Currently only `MySQL`, `PostgreSQL` and `HIVE` data sources have been tested, other data sources have not been tested yet. diff --git a/docs/docs/en/guide/resource/configuration.md b/docs/docs/en/guide/resource/configuration.md index 42e1925e89d4e..67c68a22c2249 100644 --- a/docs/docs/en/guide/resource/configuration.md +++ b/docs/docs/en/guide/resource/configuration.md @@ -152,9 +152,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/docs/docs/en/guide/upgrade/incompatible.md b/docs/docs/en/guide/upgrade/incompatible.md index 4580a7d13d387..f45af712c3a9d 100644 --- a/docs/docs/en/guide/upgrade/incompatible.md +++ b/docs/docs/en/guide/upgrade/incompatible.md @@ -11,6 +11,7 @@ This document records the incompatible updates between each version. You need to * Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)). * Remove `deleteSource` in `download()` of `StorageOperate` ([#14084](https://github.com/apache/dolphinscheduler/pull/14084)) * Remove default key for attribute `data-quality.jar.name` in `common.properties` ([#15551](https://github.com/apache/dolphinscheduler/pull/15551)) +* Rename attribute `data-quality.jar.name` to `data-quality.jar.dir` in `common.properties` and represent for directory ([#15563](https://github.com/apache/dolphinscheduler/pull/15563)) ## 3.2.0 diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 65113b76a18e8..0b3ea9bc5bd1c 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -226,7 +226,7 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId | yarn.job.history.status.address | http://ds1:19888/ws/v1/history/mapreduce/jobs/%s | yarn的作业历史状态URL | | datasource.encryption.enable | false | 是否启用datasource 加密 | | datasource.encryption.salt | !@#$%^&* | datasource加密使用的salt | -| data-quality.jar.name | dolphinscheduler-data-quality-dev-SNAPSHOT.jar | 配置数据质量使用的jar包 | +| data-quality.jar.dir | | 配置数据质量使用的jar包 | | support.hive.oneSession | false | 设置hive SQL是否在同一个session中执行 | | sudo.enable | true | 是否开启sudo | | alert.rpc.port | 50052 | Alert Server的RPC端口 | diff --git a/docs/docs/zh/guide/data-quality.md b/docs/docs/zh/guide/data-quality.md index 2a098a3216821..17b2a55cb2bf8 100644 --- a/docs/docs/zh/guide/data-quality.md +++ b/docs/docs/zh/guide/data-quality.md @@ -13,7 +13,7 @@ > ## 注意事项 -- 如果单独打包`data-quality`的话,记得修改包名和`data-quality.jar.name`一致,配置内容在 `common.properties` 中的 `data-quality.jar.name` +- 如果单独打包`data-quality`的话,记得修改包路径和`data-quality.jar.dir`一致,配置内容在 `common.properties` 中的 `data-quality.jar.dir` - 如果是老版本升级使用,运行之前需要先执行`SQL`更新脚本进行数据库初始化。 - 当前 `dolphinscheduler-data-quality-dev-SNAPSHOT.jar` 是瘦包,不包含任何 `JDBC` 驱动。 如果有 `JDBC` 驱动需要,可以在`节点设置` `选项参数`处设置 `--jars` 参数, diff --git a/docs/docs/zh/guide/resource/configuration.md b/docs/docs/zh/guide/resource/configuration.md index 57d0935e09290..739d6fb30cf6c 100644 --- a/docs/docs/zh/guide/resource/configuration.md +++ b/docs/docs/zh/guide/resource/configuration.md @@ -156,9 +156,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties index d43e55e822cbf..96879cc2721c4 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/common.properties @@ -84,9 +84,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 451a0f734c979..669d3dfef3488 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -120,9 +120,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-common/src/test/resources/common.properties b/dolphinscheduler-common/src/test/resources/common.properties index 107977df7fd6b..7f66a32a23d49 100644 --- a/dolphinscheduler-common/src/test/resources/common.properties +++ b/dolphinscheduler-common/src/test/resources/common.properties @@ -115,9 +115,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java index a4e64594c0c23..1c24785c2f903 100644 --- a/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java +++ b/dolphinscheduler-datasource-plugin/dolphinscheduler-datasource-api/src/main/java/org/apache/dolphinscheduler/plugin/datasource/api/utils/CommonUtils.java @@ -18,7 +18,7 @@ package org.apache.dolphinscheduler.plugin.datasource.api.utils; import static org.apache.dolphinscheduler.common.constants.Constants.RESOURCE_STORAGE_TYPE; -import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.DATA_QUALITY_JAR_NAME; +import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.DATA_QUALITY_JAR_DIR; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.HADOOP_SECURITY_AUTHENTICATION; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.HADOOP_SECURITY_AUTHENTICATION_STARTUP_STATE; import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.JAVA_SECURITY_KRB5_CONF; @@ -133,14 +133,28 @@ public static boolean loadKerberosConf(String javaSecurityKrb5Conf, String login } public static String getDataQualityJarPath() { - String dqsJarPath = PropertyUtils.getString(DATA_QUALITY_JAR_NAME); + log.info("Trying to get data quality jar in path"); + String dqJarDir = PropertyUtils.getString(DATA_QUALITY_JAR_DIR); + + if (StringUtils.isNotEmpty(dqJarDir)) { + log.info( + "Configuration data-quality.jar.dir is not empty, will try to get data quality jar from directory {}", + dqJarDir); + getDataQualityJarPathFromPath(dqJarDir).ifPresent(jarName -> DEFAULT_DATA_QUALITY_JAR_PATH = jarName); + } + + if (StringUtils.isEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + log.info("data quality jar path is empty, will try to auto discover it from build-in rules."); + getDefaultDataQualityJarPath(); + } - if (StringUtils.isEmpty(dqsJarPath)) { - log.info("data quality jar path is empty, will try to get it from data quality jar name"); - return getDefaultDataQualityJarPath(); + if (StringUtils.isEmpty(DEFAULT_DATA_QUALITY_JAR_PATH)) { + log.error( + "Can not find data quality jar in both configuration and auto discover, please check your configuration or report a bug."); + throw new RuntimeException("data quality jar path is empty"); } - return dqsJarPath; + return DEFAULT_DATA_QUALITY_JAR_PATH; } private static String getDefaultDataQualityJarPath() { @@ -173,7 +187,7 @@ private static Optional getDataQualityJarPathFromPath(String path) { log.info("Try to get data quality jar from path {}", path); File[] jars = new File(path).listFiles(); if (jars == null) { - log.warn("No data quality related jar found from path {}", path); + log.warn("No any files find given path {}", path); return Optional.empty(); } for (File jar : jars) { @@ -181,6 +195,7 @@ private static Optional getDataQualityJarPathFromPath(String path) { return Optional.of(jar.getAbsolutePath()); } } + log.warn("No data quality related jar found from path {}", path); return Optional.empty(); } diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index b5f61011b3bbc..7583b3293a6f5 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -95,9 +95,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality option, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data diff --git a/dolphinscheduler-standalone-server/src/main/docker/Dockerfile b/dolphinscheduler-standalone-server/src/main/docker/Dockerfile index 40158ff8e17ed..6eebf041622c6 100644 --- a/dolphinscheduler-standalone-server/src/main/docker/Dockerfile +++ b/dolphinscheduler-standalone-server/src/main/docker/Dockerfile @@ -20,6 +20,7 @@ FROM eclipse-temurin:8-jdk ENV DOCKER true ENV TZ Asia/Shanghai ENV DOLPHINSCHEDULER_HOME /opt/dolphinscheduler +ENV DATA_QUALITY_JAR_DIR /opt/dolphinscheduler/libs/worker-server RUN apt update ; \ apt install -y sudo ; \ diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskConstants.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskConstants.java index fe8d9a77bf668..43734416e7e43 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskConstants.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskConstants.java @@ -358,9 +358,9 @@ private TaskConstants() { public static final String RESOURCE_UPLOAD_PATH = "resource.storage.upload.base.path"; /** - * data.quality.jar.name + * data.quality.jar.dir */ - public static final String DATA_QUALITY_JAR_NAME = "data-quality.jar.name"; + public static final String DATA_QUALITY_JAR_DIR = "data-quality.jar.dir"; public static final String TASK_TYPE_CONDITIONS = "CONDITIONS"; diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties index 9855d855e9f28..402112263f9d0 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/test/resources/common.properties @@ -84,9 +84,10 @@ datasource.encryption.enable=false # datasource encryption salt datasource.encryption.salt=!@#$%^&* -# data quality absolute path, it would auto discovery from libs directory. You can also specific the jar name in libs directory -# if you re-build it alone, or auto discovery mechanism fail -data-quality.jar.name= +# data quality jar directory path, it would auto discovery data quality jar from this given dir. You should keep it empty if you do not change anything in +# data-quality, it will auto discovery by dolphinscheduler itself. Change it only if you want to use your own data-quality jar and it is not in worker-server +# libs directory(but may sure your jar name start with `dolphinscheduler-data-quality`). +data-quality.jar.dir= #data-quality.error.output.path=/tmp/data-quality-error-data From 43a06525a29e01f6769aa0057512884d7441b438 Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Tue, 6 Feb 2024 10:09:30 +0800 Subject: [PATCH 09/10] Fix Recover WorkflowInstance will casue workflow Instance state is success but task insatnce is killed/paused (#15574) --- .../service/ProcessInstanceServiceTest.java | 1 - .../dao/mapper/TaskInstanceMapper.java | 33 ------ .../dao/repository/TaskInstanceDao.java | 5 +- .../dao/mapper/TaskInstanceMapper.xml | 99 +--------------- .../dao/mapper/ProcessInstanceMapperTest.java | 24 ---- .../dao/mapper/TaskInstanceMapperTest.java | 110 ------------------ .../service/process/ProcessService.java | 3 - .../service/process/ProcessServiceImpl.java | 61 ++++------ 8 files changed, 26 insertions(+), 310 deletions(-) diff --git a/dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/ProcessInstanceServiceTest.java b/dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/ProcessInstanceServiceTest.java index 800f8bda18947..9fb4d830528d7 100644 --- a/dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/ProcessInstanceServiceTest.java +++ b/dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/ProcessInstanceServiceTest.java @@ -769,7 +769,6 @@ public void testViewGantt() throws Exception { processInstance.getProcessDefinitionCode(), processInstance.getProcessDefinitionVersion())).thenReturn(new ProcessDefinitionLog()); when(processInstanceMapper.queryDetailById(1)).thenReturn(processInstance); - when(taskInstanceMapper.queryByInstanceIdAndName(Mockito.anyInt(), Mockito.any())).thenReturn(taskInstance); DAG graph = new DAG<>(); for (long i = 1; i <= 7; ++i) { graph.addNode(i, new TaskNode()); diff --git a/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.java b/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.java index 04e818f5c3ae0..f80dac826c6e1 100644 --- a/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.java +++ b/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.java @@ -39,23 +39,10 @@ */ public interface TaskInstanceMapper extends BaseMapper { - List queryTaskByProcessIdAndState(@Param("processInstanceId") Integer processInstanceId, - @Param("state") Integer state); - List findValidTaskListByProcessId(@Param("processInstanceId") Integer processInstanceId, @Param("flag") Flag flag, @Param("testFlag") int testFlag); - List queryByHostAndStatus(@Param("host") String host, - @Param("states") int[] stateArray); - - int setFailoverByHostAndStateArray(@Param("host") String host, - @Param("states") int[] stateArray, - @Param("destStatus") TaskExecutionStatus destStatus); - - TaskInstance queryByInstanceIdAndName(@Param("processInstanceId") int processInstanceId, - @Param("name") String name); - TaskInstance queryByInstanceIdAndCode(@Param("processInstanceId") int processInstanceId, @Param("taskCode") Long taskCode); @@ -66,9 +53,6 @@ TaskInstance queryByInstanceIdAndCode(@Param("processInstanceId") int processIns List queryByProcessInstanceIdsAndTaskCodes(@Param("processInstanceIds") List processInstanceIds, @Param("taskCodes") List taskCodes); - Integer countTask(@Param("projectCodes") Long[] projectCodes, - @Param("taskIds") int[] taskIds); - /** * Statistics task instance group by given project codes list by start time *

@@ -97,20 +81,6 @@ List countTaskInstanceStateByProjectIdsV2(@Param("startTime" @Param("endTime") Date endTime, @Param("projectIds") Set projectIds); - /** - * Statistics task instance group by given project codes list by submit time - *

- * We only need project codes to determine whether the task instance belongs to the user or not. - * - * @param startTime Statistics start time - * @param endTime Statistics end time - * @param projectCodes Project codes list to filter - * @return List of ExecuteStatusCount - */ - List countTaskInstanceStateByProjectCodesAndStatesBySubmitTime(@Param("startTime") Date startTime, - @Param("endTime") Date endTime, - @Param("projectCodes") Long[] projectCodes, - @Param("states") List states); /** * Statistics task instance group by given project codes list by submit time *

@@ -159,9 +129,6 @@ IPage queryStreamTaskInstanceListPaging(IPage page, @Param("startTime") Date startTime, @Param("endTime") Date endTime); - List loadAllInfosNoRelease(@Param("processInstanceId") int processInstanceId, - @Param("status") int status); - void deleteByWorkflowInstanceId(@Param("workflowInstanceId") int workflowInstanceId); List findByWorkflowInstanceId(@Param("workflowInstanceId") Integer workflowInstanceId); diff --git a/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/TaskInstanceDao.java b/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/TaskInstanceDao.java index 0156416fd33b6..27a0dbb0f6532 100644 --- a/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/TaskInstanceDao.java +++ b/dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/repository/TaskInstanceDao.java @@ -100,9 +100,10 @@ List queryLastTaskInstanceListIntervalInProcessInstance(Integer pr /** * find last task instance corresponding to taskCode in the date interval + * * @param processInstanceId Task's parent process instance id - * @param depTaskCode taskCode - * @param testFlag test flag + * @param depTaskCode taskCode + * @param testFlag test flag * @return task instance */ TaskInstance queryLastTaskInstanceIntervalInProcessInstance(Integer processInstanceId, diff --git a/dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.xml b/dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.xml index 1544d0ed8fb61..548baae566006 100644 --- a/dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.xml +++ b/dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/TaskInstanceMapper.xml @@ -30,24 +30,6 @@ ${alias}.flag, ${alias}.is_cache, ${alias}.cache_key, ${alias}.retry_interval, ${alias}.max_retry_times, ${alias}.task_instance_priority, ${alias}.worker_group,${alias}.environment_code , ${alias}.executor_id, ${alias}.first_submit_time, ${alias}.delay_time, ${alias}.task_params, ${alias}.var_pool, ${alias}.dry_run, ${alias}.test_flag, ${alias}.task_group_id, ${alias}.task_execute_type - - update t_ds_task_instance - set state = #{destStatus} - where host = #{host} - - and state in - - #{i} - - - - - - + - + - + -