diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index f4b4ce49d..126ef0731 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -28,7 +28,7 @@ public enum DefaultUseCases { COHERE_EMBEDDING_MODEL_DEPLOY( "cohere-embedding_model_deploy", "defaults/cohere-embedding-defaults.json", - "substitutionTemplates/deploy-remote-model-template-extra-params.json" + "substitutionTemplates/deploy-remote-model-extra-params-template.json" ), /** defaults file and substitution ready template for Bedrock Titan embedding model */ BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY( diff --git a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java index a991904b0..40b4ed43e 100644 --- a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java +++ b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java @@ -59,6 +59,7 @@ public class ParseUtils { // Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar // private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}"); private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}"); + private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\""); private ParseUtils() {} @@ -70,7 +71,7 @@ private ParseUtils() {} * @param json the json string * @return The XContent parser for the json string * @throws IOException on failure to create the parser - */ + */ public static XContentParser jsonToParser(String json) throws IOException { XContentParser parser = JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, @@ -104,7 +105,7 @@ public static String resourceToString(String path) throws IOException { * Builds an XContent object representing a map of String keys to String values. * * @param xContentBuilder An XContent builder whose position is at the start of the map object to build - * @param map A map as key-value String pairs. + * @param map A map as key-value String pairs. * @throws IOException on a build failure */ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map map) throws IOException { @@ -119,7 +120,7 @@ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map map) throws IOException { @@ -138,7 +139,7 @@ public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map parseStringToStringMap(XContentParser parser) * Parses an XContent object representing a map of String keys to Object values. * The Object value here can either be a string or a map * If an array is found in the given parser we conver the array to a string representation of the array + * * @param parser An XContent parser whose position is at the start of the map object to parse * @return A map as identified by the key-value pairs in the XContent * @throws IOException on a parse failure @@ -189,10 +191,13 @@ public static Map parseStringToObjectMap(XContentParser parser) // Handle array: convert it to a string representation List elements = new ArrayList<>(); while (parser.nextToken() != XContentParser.Token.END_ARRAY) { - elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element + if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) { + elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes + } else { + elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element + } } - String arrayString = "[" + String.join(", ", elements) + "]"; - map.put(fieldName, arrayString); + map.put(fieldName, elements.toString()); } else { // Otherwise, parse it as a string map.put(fieldName, parser.text()); @@ -220,6 +225,7 @@ public static Instant parseInstant(XContentParser parser) throws IOException { * (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.). * This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be * parsed using User.parse(string). + * * @param client Client containing user info. A public API request will fill in the user info in the thread context. * @return parsed user object */ @@ -233,7 +239,7 @@ public static User getUserContext(Client client) { * Creates a XContentParser from a given Registry * * @param xContentRegistry main registry for serializable content - * @param bytesReference given bytes to be parsed + * @param bytesReference given bytes to be parsed * @return bytesReference of {@link java.time.Instant} * @throws IOException IOException if content can't be parsed correctly */ @@ -244,7 +250,8 @@ public static XContentParser createXContentParserFromRegistry(NamedXContentRegis /** * Generates a string to string Map - * @param map content map + * + * @param map content map * @param fieldName fieldName * @return instance of the map */ @@ -260,15 +267,15 @@ public static Map getStringToStringMap(Object map, String fieldN * Creates a map containing the specified input keys, with values derived from template data or previous node * output. * - * @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown - * @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map - * @param currentNodeInputs Input params and content for this node, from workflow parsing - * @param outputs WorkflowData content of previous steps + * @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown + * @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map + * @param currentNodeInputs Input params and content for this node, from workflow parsing + * @param outputs WorkflowData content of previous steps * @param previousNodeInputs Input params for this node that come from previous steps - * @param params Params that came from REST path + * @param params Params that came from REST path * @return A map containing the requiredInputKeys with their corresponding values, - * and optionalInputKeys with their corresponding values if present. - * Throws a {@link FlowFrameworkException} if a required key is not present. + * and optionalInputKeys with their corresponding values if present. + * Throws a {@link FlowFrameworkException} if a required key is not present. */ public static Map getInputsFromPreviousSteps( Set requiredInputKeys, @@ -357,9 +364,10 @@ public static Map getInputsFromPreviousSteps( /** * Executes substitution on the given value by looking at any matching values in either the ouputs or params map - * @param value the Object that will have the substitution done on + * + * @param value the Object that will have the substitution done on * @param outputs potential location of values to be substituted in - * @param params potential location of values to be subsituted in + * @param params potential location of values to be subsituted in * @return the substituted object back */ public static Object conditionallySubstitute(Object value, Map outputs, Map params) { @@ -403,6 +411,7 @@ public static Object conditionallySubstitute(Object value, Map /** * Generates a String to String map based on a Json File + * * @param path file path * @return instance of the string * @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map @@ -430,15 +440,21 @@ public static Map parseJsonFileToStringToStringMap(String path) * (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays, * This also removes the quotations around the array making the array valid to consume * (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3]) + * * @param input The inputString given to be transformed * @return the transformed string */ public static String removingBackslashesAndQuotesInArrayInJsonString(String input) { - return Pattern.compile("\"\\[(.*?)]\"").matcher(input).replaceAll(matchResult -> { + Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input); + StringBuffer result = new StringBuffer(); + while (matcher.find()) { // Extract matched content and remove backslashes before quotes - String withoutEscapes = matchResult.group(1).replaceAll("\\\\\"", "\""); + String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\""); // Return the transformed string with the brackets but without the outer quotes - return "[" + withoutEscapes + "]"; - }); + matcher.appendReplacement(result, "[" + withoutEscapes + "]"); + } + // Append remaining input after the last match + matcher.appendTail(result); + return result.toString(); } } diff --git a/src/main/resources/defaults/bedrock-titan-embedding-defaults.json b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json index 20baf867b..401924ea2 100644 --- a/src/main/resources/defaults/bedrock-titan-embedding-defaults.json +++ b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json @@ -4,7 +4,6 @@ "create_connector.name": "Amazon Bedrock Connector: embedding", "create_connector.description": "The connector to bedrock Titan embedding model", "create_connector.region": "us-east-1", - "create_connector.endpoint": "api.openai.com", "create_connector.credential.access_key": "123", "create_connector.credential.secret_key": "123", "create_connector.credential.session_token": "123", diff --git a/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json b/src/main/resources/defaults/bedrock-titan-multimodal-defaults.json similarity index 93% rename from src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json rename to src/main/resources/defaults/bedrock-titan-multimodal-defaults.json index b1666bec5..65f3b44d9 100644 --- a/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json +++ b/src/main/resources/defaults/bedrock-titan-multimodal-defaults.json @@ -1,11 +1,10 @@ { "template.name": "deploy-bedrock-titan-multimodal-embedding-model", - "template.description": "deploying Amazon Bedrock Titan multimodal embedding model ", + "template.description": "Deploying Amazon Bedrock Titan multimodal embedding model ", "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding", "create_connector.description": "The connector to bedrock Titan multi-modal embedding model", "create_connector.region": "us-east-1", - "create_connector.input_docs_processed_step_size": 2, - "create_connector.endpoint": "api.openai.com", + "create_connector.input_docs_processed_step_size": "2", "create_connector.credential.access_key": "123", "create_connector.credential.secret_key": "123", "create_connector.credential.session_token": "123", diff --git a/src/main/resources/defaults/cohere-chat-defaults.json b/src/main/resources/defaults/cohere-chat-defaults.json index bc200d335..7ac6eb0ef 100644 --- a/src/main/resources/defaults/cohere-chat-defaults.json +++ b/src/main/resources/defaults/cohere-chat-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-cohere-chat-model", - "template.description": "deploying cohere chat model", + "template.description": "Deploying a Cohere chat model", "create_connector.name": "Cohere Chat Model", "create_connector.description": "The connector to Cohere's public chat API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/cohere-embedding-defaults.json b/src/main/resources/defaults/cohere-embedding-defaults.json index 53a402f60..3a6c7d043 100644 --- a/src/main/resources/defaults/cohere-embedding-defaults.json +++ b/src/main/resources/defaults/cohere-embedding-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-cohere-model", - "template.description": "deploying cohere embedding model", + "template.description": "Deploying a Cohere embedding model", "create_connector.name": "cohere-embedding-connector", "create_connector.description": "The connector to Cohere's public embed API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json index 439f905f2..56fc2ba73 100644 --- a/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json +++ b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json @@ -1,6 +1,6 @@ { "template.name": "semantic search with cohere embedding", - "template.description": "Setting up semantic search, with cohere embedding model", + "template.description": "Setting up semantic search, with a Cohere embedding model", "create_connector.name": "cohere-embedding-connector", "create_connector.description": "The connector to Cohere's public embed API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/conversational-search-defaults.json b/src/main/resources/defaults/conversational-search-defaults.json index 5f1072d82..a983fec95 100644 --- a/src/main/resources/defaults/conversational-search-defaults.json +++ b/src/main/resources/defaults/conversational-search-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-cohere-chat-model", - "template.description": "deploying cohere chat model", + "template.description": "A template to deploy a Cohere chat model", "create_connector.name": "Cohere Chat Model", "create_connector.description": "The connector to Cohere's public chat API", "create_connector.protocol": "http", @@ -13,7 +13,7 @@ "register_remote_model.description": "cohere-chat-model", "create_search_pipeline.pipeline_id": "rag-pipeline", "create_search_pipeline.retrieval_augmented_generation.tag": "openai_pipeline_demo", - "create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline Using cohere Connector", + "create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline using a Cohere chat model", "create_search_pipeline.retrieval_augmented_generation.context_field_list": "[\"text\"]", "create_search_pipeline.retrieval_augmented_generation.system_prompt": "You are a helpful assistant", "create_search_pipeline.retrieval_augmented_generation.user_instructions": "Generate a concise and informative answer in less than 100 words for the given question" diff --git a/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json index aa4ae0512..f39dde616 100644 --- a/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json +++ b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json @@ -1,6 +1,6 @@ { "template.name": "local-model-neural-sparse-search", - "template.description": "setting up neural sparse search with local model", + "template.description": "Setting up neural sparse search with pretrained local model", "register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1", "register_local_sparse_encoding_model.description": "This is a neural sparse encoding model", "register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT", diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json index 3bd47f625..2a05cc5bf 100644 --- a/src/main/resources/defaults/multi-modal-search-defaults.json +++ b/src/main/resources/defaults/multi-modal-search-defaults.json @@ -4,12 +4,12 @@ "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", "create_ingest_pipeline.description": "A text/image embedding pipeline", "create_ingest_pipeline.model_id": "123", - "create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.embedding": "vector_embedding", "text_image_embedding.field_map.text": "image_description", "text_image_embedding.field_map.image": "image_binary", "create_index.name": "my-multimodal-nlp-index", - "create_index.settings.number_of_shards": 2, - "text_image_embedding.field_map.output.dimension": 1024, + "create_index.settings.number_of_shards": "2", + "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", "create_index.mappings.method.name": "hnsw" } diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json index f7656d967..afd3b5da5 100644 --- a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json +++ b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json @@ -17,7 +17,7 @@ "register_remote_model.description": "bedrock-multi-modal-embedding-model", "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", "create_ingest_pipeline.description": "A text/image embedding pipeline", - "text_image_embedding.create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.embedding": "vector_embedding", "text_image_embedding.field_map.text": "image_description", "text_image_embedding.field_map.image": "image_binary", "create_index.name": "my-multimodal-nlp-index", diff --git a/src/main/resources/defaults/openai-chat-defaults.json b/src/main/resources/defaults/openai-chat-defaults.json index 2b28088db..e495079f6 100644 --- a/src/main/resources/defaults/openai-chat-defaults.json +++ b/src/main/resources/defaults/openai-chat-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-openai-chat-model", - "template.description": "deploying openAI chat model", + "template.description": "Deploying an OpenAI chat model", "create_connector.name": "OpenAI Chat Connector", "create_connector.description": "Connector to public OpenAI model", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/openai-embedding-defaults.json b/src/main/resources/defaults/openai-embedding-defaults.json index 4775e1c27..6951cedaa 100644 --- a/src/main/resources/defaults/openai-embedding-defaults.json +++ b/src/main/resources/defaults/openai-embedding-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-openai-model", - "template.description": "deploying openAI embedding model", + "template.description": "Deploying an OpenAI embedding model", "create_connector.name": "OpenAI-embedding-connector", "create_connector.description": "Connector to public OpenAI model", "create_connector.protocol": "http", diff --git a/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json index 4f9ea5911..bf073b2b8 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json @@ -26,7 +26,7 @@ "input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}" }, "credential": { - "access_ key": "${{create_connector.credential.access_key}}", + "access_key": "${{create_connector.credential.access_key}}", "secret_key": "${{create_connector.credential.secret_key}}", "session_token": "${{create_connector.credential.session_token}}" }, diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json index 6c2f7cc05..f03f8e6c6 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json @@ -44,7 +44,7 @@ "id": "register_model", "type": "register_remote_model", "previous_node_inputs": { - "create_connector_step_1": "parameters" + "create_connector": "parameters" }, "user_inputs": { "name": "${{register_remote_model.name}}", @@ -56,7 +56,7 @@ "id": "deploy_model", "type": "deploy_model", "previous_node_inputs": { - "register_model_1": "model_id" + "register_model": "model_id" } } ], diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-template-extra-params.json b/src/main/resources/substitutionTemplates/deploy-remote-model-extra-params-template.json similarity index 100% rename from src/main/resources/substitutionTemplates/deploy-remote-model-template-extra-params.json rename to src/main/resources/substitutionTemplates/deploy-remote-model-extra-params-template.json diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-template.json b/src/main/resources/substitutionTemplates/deploy-remote-model-template.json index bc1c9eebc..4e74a9d2c 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-model-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-model-template.json @@ -46,7 +46,7 @@ "id": "register_model", "type": "register_remote_model", "previous_node_inputs": { - "create_connector_step_1": "parameters" + "create_connector": "parameters" }, "user_inputs": { "name": "${{register_remote_model.name}}", @@ -58,7 +58,7 @@ "id": "deploy_model", "type": "deploy_model", "previous_node_inputs": { - "register_model_1": "model_id" + "register_model": "model_id" } } ], diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-template.json index afd85c0df..b8acfed47 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-template.json @@ -23,7 +23,7 @@ { "text_image_embedding": { "model_id": "${{create_ingest_pipeline.model_id}}", - "embedding": "${{create_ingest_pipeline.embedding}}", + "embedding": "${{text_image_embedding.embedding}}", "field_map": { "text": "${{text_image_embedding.field_map.text}}", "image": "${{text_image_embedding.field_map.image}}" @@ -53,7 +53,7 @@ "id": { "type": "text" }, - "${{text_embedding.field_map.output}}": { + "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", "method": { diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json index 2c5d1efd2..a19965aa3 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json @@ -73,7 +73,7 @@ { "text_image_embedding": { "model_id": "${{register_model.model_id}}", - "embedding": "${{text_image_embedding.create_ingest_pipeline.embedding}}", + "embedding": "${{text_image_embedding.embedding}}", "field_map": { "text": "${{text_image_embedding.field_map.text}}", "image": "${{text_image_embedding.field_map.image}}" @@ -103,7 +103,7 @@ "id": { "type": "text" }, - "${{text_image_embedding.create_ingest_pipeline.embedding}}": { + "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", "method": { diff --git a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java index f14dfe6d8..65c3cd6d5 100644 --- a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java +++ b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java @@ -18,6 +18,7 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.flowframework.FlowFrameworkRestTestCase; import org.opensearch.flowframework.TestHelpers; +import org.opensearch.flowframework.common.DefaultUseCases; import org.opensearch.flowframework.model.ProvisioningProgress; import org.opensearch.flowframework.model.ResourceCreated; import org.opensearch.flowframework.model.State; @@ -32,6 +33,7 @@ import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.Collections; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -465,7 +467,21 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti "org.opensearch.flowframework.exception.WorkflowStepException during step create_ingest_pipeline, restStatus: BAD_REQUEST" ) ); - } + public void testAllDefaultUseCasesCreation() throws Exception { + Set allUseCaseNames = EnumSet.allOf(DefaultUseCases.class) + .stream() + .map(DefaultUseCases::getUseCaseName) + .collect(Collectors.toSet()); + + for (String useCaseName : allUseCaseNames) { + Response response = createWorkflowWithUseCase(client(), useCaseName); + assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); + + Map responseMap = entityAsMap(response); + String workflowId = (String) responseMap.get(WORKFLOW_ID); + getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); + } + } }