diff --git a/instill/helpers/__init__.py b/instill/helpers/__init__.py index bc52916e..c34038e9 100644 --- a/instill/helpers/__init__.py +++ b/instill/helpers/__init__.py @@ -1,15 +1,26 @@ -from instill.helpers.protobufs.parse import ( - Metadata, - construct_image_to_image_infer_response, - construct_image_to_image_metadata_response, - construct_infer_response, - construct_metadata_response, - construct_text_generation_chat_infer_response, - construct_text_generation_chat_metadata_response, - construct_text_generation_infer_response, - construct_text_generation_metadata_response, - construct_text_to_image_infer_response, - construct_text_to_image_metadata_response, - construct_visual_question_answering_infer_response, - construct_visual_question_answering_metadata_response, +# pylint: disable=no-name-in-module +from instill.helpers.protobufs.ray_pb2 import TriggerRequest, TriggerResponse +from instill.helpers.ray_io import ( + construct_task_classification_output, + construct_task_detection_output, + construct_task_image_to_image_output, + construct_task_instance_segmentation_output, + construct_task_keypoint_output, + construct_task_ocr_output, + construct_task_semantic_segmentation_output, + construct_task_text_generation_chat_output, + construct_task_text_generation_output, + construct_task_text_to_image_output, + construct_task_visual_question_answering_output, + parse_task_classification_to_vision_input, + parse_task_detection_to_vision_input, + parse_task_image_to_image_input, + parse_task_instance_segmentation_to_vision_input, + parse_task_keypoint_to_vision_input, + parse_task_ocr_to_vision_input, + parse_task_semantic_segmentation_to_vision_input, + parse_task_text_generation_chat_to_conversation_input, + parse_task_text_generation_to_conversation_input, + parse_task_text_to_image_input, + parse_task_visual_question_answering_to_conversation_multimodal_input, ) diff --git a/instill/helpers/const.py b/instill/helpers/const.py index 644ba03b..e1bedf13 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -4,6 +4,8 @@ import numpy as np +PROMPT_ROLES = ["user", "assistant", "system"] + class DataType(Enum): TYPE_BOOL = 1 @@ -21,63 +23,49 @@ class DataType(Enum): TYPE_STRING = 13 -class TextGenerationInput: - prompt = "" +class VisionInput: + image: np.ndarray + + +class ConversationInput: + conversation: List[Dict[str, str]] + max_new_tokens = 100 + temperature = 0.8 + top_k = 1 + seed = 0 + stop_words: Any = "" # Optional + extra_params: Dict[str, str] = {} + + +class ConversationMultiModelInput: + conversation: List[Union[Dict[str, Union[str, Dict[str, str]]]]] prompt_images: Union[List[np.ndarray], None] = None - chat_history: Union[List[str], None] = None - system_message: Union[str, None] = None max_new_tokens = 100 temperature = 0.8 top_k = 1 - random_seed = 0 + seed = 0 stop_words: Any = "" # Optional extra_params: Dict[str, str] = {} class TextToImageInput: - prompt_image: Union[np.ndarray, None] = None prompt = "" - negative_prompt = "" steps = 5 - guidance_scale = 7.5 + cfg_scale = 7.5 seed = 0 samples = 1 extra_params: Dict[str, str] = {} class ImageToImageInput: - prompt_image: Union[np.ndarray, None] = None prompt = "" + prompt_image: Union[np.ndarray, None] = None steps = 5 - guidance_scale = 7.5 + cfg_scale = 7.5 seed = 0 samples = 1 - extra_params: Dict[str, str] = {} - - -class TextGenerationChatInput: - prompt = "" - prompt_images: Union[List[np.ndarray], None] = None - chat_history: Union[List[str], None] = None - system_message: Union[str, None] = None - max_new_tokens = 100 - temperature = 0.8 - top_k = 1 - random_seed = 0 - stop_words: Any = "" # Optional - extra_params: Dict[str, str] = {} - - -class VisualQuestionAnsweringInput: - prompt = "" - prompt_images: Union[List[np.ndarray], None] = None - chat_history: Union[List[str], None] = None - system_message: Union[str, None] = None - max_new_tokens = 100 - temperature = 0.8 - top_k = 1 - random_seed = 0 - stop_words: Any = "" # Optional + low_threshold = 100 + high_threshold = 200 extra_params: Dict[str, str] = {} diff --git a/instill/helpers/errors.py b/instill/helpers/errors.py index 03023bc2..e6ade39f 100644 --- a/instill/helpers/errors.py +++ b/instill/helpers/errors.py @@ -16,3 +16,13 @@ def __str__( self, ) -> str: return f"model config file `instill.yaml` is missing {self.field} field" + + +class InvalidInputException(Exception): + def __str__(self) -> str: + return "trigger request input error" + + +class InvalidOutputShapeException(Exception): + def __str__(self) -> str: + return "outputs length not matched" diff --git a/instill/helpers/protobufs/parse.py b/instill/helpers/protobufs/parse.py deleted file mode 100644 index 07ff568d..00000000 --- a/instill/helpers/protobufs/parse.py +++ /dev/null @@ -1,479 +0,0 @@ -from dataclasses import dataclass -from typing import List - -from instill.helpers.protobufs.ray_pb2 import ( - ModelMetadataRequest, - ModelMetadataResponse, - RayServiceCallRequest, - RayServiceCallResponse, - InferTensor, -) - -from instill.helpers.const import DataType - - -@dataclass -class Metadata: - name: str - datatype: str - shape: list - - -def construct_metadata_response( - req: ModelMetadataRequest, - inputs: List[Metadata], - outputs: List[Metadata], -) -> ModelMetadataResponse: - resp = ModelMetadataResponse( - name=req.name, - versions=req.version, - framework="python", - inputs=[], - outputs=[], - ) - - for i in inputs: - resp.inputs.append( - ModelMetadataResponse.TensorMetadata( - name=i.name, - datatype=i.datatype, - shape=i.shape, - ) - ) - - for o in outputs: - resp.outputs.append( - ModelMetadataResponse.TensorMetadata( - name=o.name, - datatype=o.datatype, - shape=o.shape, - ) - ) - - return resp - - -def construct_infer_response( - req: RayServiceCallRequest, - outputs: List[Metadata], - raw_outputs: List[bytes], -) -> RayServiceCallResponse: - resp = RayServiceCallResponse( - model_name=req.model_name, - model_version=req.model_version, - outputs=[], - raw_output_contents=[], - ) - - for o in outputs: - resp.outputs.append( - InferTensor( - name=o.name, - datatype=o.datatype, - shape=o.shape, - ) - ) - - resp.raw_output_contents.extend(raw_outputs) - - return resp - - -def construct_text_generation_metadata_response( - req: ModelMetadataRequest, -) -> ModelMetadataResponse: - return construct_metadata_response( - req=req, - inputs=[ - Metadata( - name="prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="prompt_images", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="chat_history", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="system_message", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="max_new_tokens", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="temperature", - datatype=str(DataType.TYPE_FP32.name), - shape=[1], - ), - Metadata( - name="top_k", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="random_seed", - datatype=str(DataType.TYPE_UINT64.name), - shape=[1], - ), - Metadata( - name="extra_params", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - ], - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=[-1, -1], - ), - ], - ) - - -def construct_text_generation_infer_response( - req: RayServiceCallRequest, - shape: list, - raw_outputs: List[bytes], -): - return construct_infer_response( - req=req, - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=shape, - ) - ], - raw_outputs=raw_outputs, - ) - - -def construct_text_generation_chat_metadata_response( - req: ModelMetadataRequest, -) -> ModelMetadataResponse: - return construct_metadata_response( - req=req, - inputs=[ - Metadata( - name="prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="prompt_images", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="chat_history", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="system_message", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="max_new_tokens", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="temperature", - datatype=str(DataType.TYPE_FP32.name), - shape=[1], - ), - Metadata( - name="top_k", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="random_seed", - datatype=str(DataType.TYPE_UINT64.name), - shape=[1], - ), - Metadata( - name="extra_params", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - ], - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=[-1, -1], - ), - ], - ) - - -def construct_text_generation_chat_infer_response( - req: RayServiceCallRequest, - shape: list, - raw_outputs: List[bytes], -): - return construct_infer_response( - req=req, - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=shape, - ) - ], - raw_outputs=raw_outputs, - ) - - -def construct_text_to_image_metadata_response( - req: ModelMetadataRequest, -): - return construct_metadata_response( - req=req, - inputs=[ - Metadata( - name="prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="negative_prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="prompt_image", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="samples", - datatype=str(DataType.TYPE_INT32.name), - shape=[1], - ), - Metadata( - name="scheduler", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="steps", - datatype=str(DataType.TYPE_INT32.name), - shape=[1], - ), - Metadata( - name="guidance_scale", - datatype=str(DataType.TYPE_FP32.name), - shape=[1], - ), - Metadata( - name="seed", - datatype=str(DataType.TYPE_INT64.name), - shape=[1], - ), - Metadata( - name="extra_params", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - ], - outputs=[ - Metadata( - name="images", - datatype=str(DataType.TYPE_FP32.name), - shape=[-1, -1, -1, -1], - ), - ], - ) - - -def construct_text_to_image_infer_response( - req: RayServiceCallRequest, - shape: list, - raw_outputs: List[bytes], -): - return construct_infer_response( - req=req, - outputs=[ - Metadata( - name="images", - datatype=str(DataType.TYPE_FP32.name), - shape=shape, - ) - ], - raw_outputs=raw_outputs, - ) - - -def construct_image_to_image_metadata_response( - req: ModelMetadataRequest, -): - return construct_metadata_response( - req=req, - inputs=[ - Metadata( - name="prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="negative_prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="prompt_image", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="samples", - datatype=str(DataType.TYPE_INT32.name), - shape=[1], - ), - Metadata( - name="scheduler", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="steps", - datatype=str(DataType.TYPE_INT32.name), - shape=[1], - ), - Metadata( - name="guidance_scale", - datatype=str(DataType.TYPE_FP32.name), - shape=[1], - ), - Metadata( - name="seed", - datatype=str(DataType.TYPE_INT64.name), - shape=[1], - ), - Metadata( - name="extra_params", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - ], - outputs=[ - Metadata( - name="images", - datatype=str(DataType.TYPE_FP32.name), - shape=[-1, -1, -1, -1], - ), - ], - ) - - -def construct_image_to_image_infer_response( - req: RayServiceCallRequest, - shape: list, - raw_outputs: List[bytes], -): - return construct_infer_response( - req=req, - outputs=[ - Metadata( - name="images", - datatype=str(DataType.TYPE_FP32.name), - shape=shape, - ) - ], - raw_outputs=raw_outputs, - ) - - -def construct_visual_question_answering_metadata_response( - req: ModelMetadataRequest, -): - return construct_metadata_response( - req=req, - inputs=[ - Metadata( - name="prompt", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="prompt_images", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="chat_history", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="system_message", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - Metadata( - name="max_new_tokens", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="temperature", - datatype=str(DataType.TYPE_FP32.name), - shape=[1], - ), - Metadata( - name="top_k", - datatype=str(DataType.TYPE_UINT32.name), - shape=[1], - ), - Metadata( - name="random_seed", - datatype=str(DataType.TYPE_UINT64.name), - shape=[1], - ), - Metadata( - name="extra_params", - datatype=str(DataType.TYPE_STRING.name), - shape=[1], - ), - ], - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=[-1, -1], - ), - ], - ) - - -def construct_visual_question_answering_infer_response( - req: RayServiceCallRequest, - shape: list, - raw_outputs: List[bytes], -): - return construct_infer_response( - req=req, - outputs=[ - Metadata( - name="text", - datatype=str(DataType.TYPE_STRING.name), - shape=shape, - ) - ], - raw_outputs=raw_outputs, - ) diff --git a/instill/helpers/protobufs/ray_pb2.py b/instill/helpers/protobufs/ray_pb2.py index 0fbd8150..db5a5a82 100644 --- a/instill/helpers/protobufs/ray_pb2.py +++ b/instill/helpers/protobufs/ray_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: ray.proto +# source: ray/v1/ray.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -11,30 +12,21 @@ _sym_db = _symbol_database.Default() +from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tray.proto\x12\tray.serve\"5\n\x14ModelMetadataRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\"<\n\x0bInferTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\"\x8e\x02\n\x15ModelMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x11\n\tframework\x18\x03 \x01(\t\x12?\n\x06inputs\x18\x04 \x03(\x0b\x32/.ray.serve.ModelMetadataResponse.TensorMetadata\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.ray.serve.ModelMetadataResponse.TensorMetadata\x1a?\n\x0eTensorMetadata\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\"\x80\x02\n\x15RayServiceCallRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12&\n\x06inputs\x18\x03 \x03(\x0b\x32\x16.ray.serve.InferTensor\x12L\n\x07outputs\x18\x06 \x03(\x0b\x32;.ray.serve.RayServiceCallRequest.InferRequestedOutputTensor\x12\x1a\n\x12raw_input_contents\x18\x07 \x03(\x0c\x1a*\n\x1aInferRequestedOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\"\x89\x01\n\x16RayServiceCallResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\'\n\x07outputs\x18\x05 \x03(\x0b\x32\x16.ray.serve.InferTensor\x12\x1b\n\x13raw_output_contents\x18\x06 \x03(\x0c\x32\xb5\x01\n\nRayService\x12T\n\rModelMetadata\x12\x1f.ray.serve.ModelMetadataRequest\x1a .ray.serve.ModelMetadataResponse\"\x00\x12Q\n\x08__call__\x12 .ray.serve.RayServiceCallRequest\x1a!.ray.serve.RayServiceCallResponse\"\x00\x42\x07Z\x05./rayb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10ray/v1/ray.proto\x12\x06ray.v1\x1a\x1cgoogle/protobuf/struct.proto\">\n\x0eTriggerRequest\x12,\n\x0btask_inputs\x18\x01 \x03(\x0b\x32\x17.google.protobuf.Struct\"@\n\x0fTriggerResponse\x12-\n\x0ctask_outputs\x18\x01 \x03(\x0b\x32\x17.google.protobuf.Struct2J\n\nRayService\x12<\n\x07Trigger\x12\x16.ray.v1.TriggerRequest\x1a\x17.ray.v1.TriggerResponse\"\x00\x42\rZ\x0b./rayserverb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ray_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ray.v1.ray_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z\005./ray' - _globals['_MODELMETADATAREQUEST']._serialized_start=24 - _globals['_MODELMETADATAREQUEST']._serialized_end=77 - _globals['_INFERTENSOR']._serialized_start=79 - _globals['_INFERTENSOR']._serialized_end=139 - _globals['_MODELMETADATARESPONSE']._serialized_start=142 - _globals['_MODELMETADATARESPONSE']._serialized_end=412 - _globals['_MODELMETADATARESPONSE_TENSORMETADATA']._serialized_start=349 - _globals['_MODELMETADATARESPONSE_TENSORMETADATA']._serialized_end=412 - _globals['_RAYSERVICECALLREQUEST']._serialized_start=415 - _globals['_RAYSERVICECALLREQUEST']._serialized_end=671 - _globals['_RAYSERVICECALLREQUEST_INFERREQUESTEDOUTPUTTENSOR']._serialized_start=629 - _globals['_RAYSERVICECALLREQUEST_INFERREQUESTEDOUTPUTTENSOR']._serialized_end=671 - _globals['_RAYSERVICECALLRESPONSE']._serialized_start=674 - _globals['_RAYSERVICECALLRESPONSE']._serialized_end=811 - _globals['_RAYSERVICE']._serialized_start=814 - _globals['_RAYSERVICE']._serialized_end=995 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z\013./rayserver' + _globals['_TRIGGERREQUEST']._serialized_start=58 + _globals['_TRIGGERREQUEST']._serialized_end=120 + _globals['_TRIGGERRESPONSE']._serialized_start=122 + _globals['_TRIGGERRESPONSE']._serialized_end=186 + _globals['_RAYSERVICE']._serialized_start=188 + _globals['_RAYSERVICE']._serialized_end=262 # @@protoc_insertion_point(module_scope) diff --git a/instill/helpers/protobufs/ray_pb2.pyi b/instill/helpers/protobufs/ray_pb2.pyi index 4a67105b..cab0b3ac 100644 --- a/instill/helpers/protobufs/ray_pb2.pyi +++ b/instill/helpers/protobufs/ray_pb2.pyi @@ -1,3 +1,4 @@ +from google.protobuf import struct_pb2 as _struct_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -5,74 +6,14 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map DESCRIPTOR: _descriptor.FileDescriptor -class ModelMetadataRequest(_message.Message): - __slots__ = ["name", "version"] - NAME_FIELD_NUMBER: _ClassVar[int] - VERSION_FIELD_NUMBER: _ClassVar[int] - name: str - version: str - def __init__(self, name: _Optional[str] = ..., version: _Optional[str] = ...) -> None: ... +class TriggerRequest(_message.Message): + __slots__ = ("task_inputs",) + TASK_INPUTS_FIELD_NUMBER: _ClassVar[int] + task_inputs: _containers.RepeatedCompositeFieldContainer[_struct_pb2.Struct] + def __init__(self, task_inputs: _Optional[_Iterable[_Union[_struct_pb2.Struct, _Mapping]]] = ...) -> None: ... -class InferTensor(_message.Message): - __slots__ = ["name", "datatype", "shape"] - NAME_FIELD_NUMBER: _ClassVar[int] - DATATYPE_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - name: str - datatype: str - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__(self, name: _Optional[str] = ..., datatype: _Optional[str] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... - -class ModelMetadataResponse(_message.Message): - __slots__ = ["name", "versions", "framework", "inputs", "outputs"] - class TensorMetadata(_message.Message): - __slots__ = ["name", "datatype", "shape"] - NAME_FIELD_NUMBER: _ClassVar[int] - DATATYPE_FIELD_NUMBER: _ClassVar[int] - SHAPE_FIELD_NUMBER: _ClassVar[int] - name: str - datatype: str - shape: _containers.RepeatedScalarFieldContainer[int] - def __init__(self, name: _Optional[str] = ..., datatype: _Optional[str] = ..., shape: _Optional[_Iterable[int]] = ...) -> None: ... - NAME_FIELD_NUMBER: _ClassVar[int] - VERSIONS_FIELD_NUMBER: _ClassVar[int] - FRAMEWORK_FIELD_NUMBER: _ClassVar[int] - INPUTS_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - name: str - versions: _containers.RepeatedScalarFieldContainer[str] - framework: str - inputs: _containers.RepeatedCompositeFieldContainer[ModelMetadataResponse.TensorMetadata] - outputs: _containers.RepeatedCompositeFieldContainer[ModelMetadataResponse.TensorMetadata] - def __init__(self, name: _Optional[str] = ..., versions: _Optional[_Iterable[str]] = ..., framework: _Optional[str] = ..., inputs: _Optional[_Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]]] = ..., outputs: _Optional[_Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]]] = ...) -> None: ... - -class RayServiceCallRequest(_message.Message): - __slots__ = ["model_name", "model_version", "inputs", "outputs", "raw_input_contents"] - class InferRequestedOutputTensor(_message.Message): - __slots__ = ["name"] - NAME_FIELD_NUMBER: _ClassVar[int] - name: str - def __init__(self, name: _Optional[str] = ...) -> None: ... - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] - INPUTS_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - RAW_INPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - model_name: str - model_version: str - inputs: _containers.RepeatedCompositeFieldContainer[InferTensor] - outputs: _containers.RepeatedCompositeFieldContainer[RayServiceCallRequest.InferRequestedOutputTensor] - raw_input_contents: _containers.RepeatedScalarFieldContainer[bytes] - def __init__(self, model_name: _Optional[str] = ..., model_version: _Optional[str] = ..., inputs: _Optional[_Iterable[_Union[InferTensor, _Mapping]]] = ..., outputs: _Optional[_Iterable[_Union[RayServiceCallRequest.InferRequestedOutputTensor, _Mapping]]] = ..., raw_input_contents: _Optional[_Iterable[bytes]] = ...) -> None: ... - -class RayServiceCallResponse(_message.Message): - __slots__ = ["model_name", "model_version", "outputs", "raw_output_contents"] - MODEL_NAME_FIELD_NUMBER: _ClassVar[int] - MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] - OUTPUTS_FIELD_NUMBER: _ClassVar[int] - RAW_OUTPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] - model_name: str - model_version: str - outputs: _containers.RepeatedCompositeFieldContainer[InferTensor] - raw_output_contents: _containers.RepeatedScalarFieldContainer[bytes] - def __init__(self, model_name: _Optional[str] = ..., model_version: _Optional[str] = ..., outputs: _Optional[_Iterable[_Union[InferTensor, _Mapping]]] = ..., raw_output_contents: _Optional[_Iterable[bytes]] = ...) -> None: ... +class TriggerResponse(_message.Message): + __slots__ = ("task_outputs",) + TASK_OUTPUTS_FIELD_NUMBER: _ClassVar[int] + task_outputs: _containers.RepeatedCompositeFieldContainer[_struct_pb2.Struct] + def __init__(self, task_outputs: _Optional[_Iterable[_Union[_struct_pb2.Struct, _Mapping]]] = ...) -> None: ... diff --git a/instill/helpers/protobufs/ray_pb2_grpc.py b/instill/helpers/protobufs/ray_pb2_grpc.py index cb1dec7b..7206243f 100644 --- a/instill/helpers/protobufs/ray_pb2_grpc.py +++ b/instill/helpers/protobufs/ray_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -import ray_pb2 as ray__pb2 +import ray_pb2 as ray_dot_v1_dot_ray__pb2 class RayServiceStub(object): @@ -15,15 +15,10 @@ def __init__(self, channel): Args: channel: A grpc.Channel. """ - self.ModelMetadata = channel.unary_unary( - '/ray.serve.RayService/ModelMetadata', - request_serializer=ray__pb2.ModelMetadataRequest.SerializeToString, - response_deserializer=ray__pb2.ModelMetadataResponse.FromString, - ) - self.__call__ = channel.unary_unary( - '/ray.serve.RayService/__call__', - request_serializer=ray__pb2.RayServiceCallRequest.SerializeToString, - response_deserializer=ray__pb2.RayServiceCallResponse.FromString, + self.Trigger = channel.unary_unary( + '/ray.v1.RayService/Trigger', + request_serializer=ray_dot_v1_dot_ray__pb2.TriggerRequest.SerializeToString, + response_deserializer=ray_dot_v1_dot_ray__pb2.TriggerResponse.FromString, ) @@ -31,16 +26,8 @@ class RayServiceServicer(object): """Ray service for internal process """ - def ModelMetadata(self, request, context): - """ModelMetadata method receives a ModelMetadataRequest message and - returns a ModelMetadataResponse - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def __call__(self, request, context): - """__call__ method is the defaut trigger entry for ray deployment + def Trigger(self, request, context): + """Trigger method is the defaut trigger entry for ray deployment """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') @@ -49,19 +36,14 @@ def __call__(self, request, context): def add_RayServiceServicer_to_server(servicer, server): rpc_method_handlers = { - 'ModelMetadata': grpc.unary_unary_rpc_method_handler( - servicer.ModelMetadata, - request_deserializer=ray__pb2.ModelMetadataRequest.FromString, - response_serializer=ray__pb2.ModelMetadataResponse.SerializeToString, - ), - '__call__': grpc.unary_unary_rpc_method_handler( - servicer.__call__, - request_deserializer=ray__pb2.RayServiceCallRequest.FromString, - response_serializer=ray__pb2.RayServiceCallResponse.SerializeToString, + 'Trigger': grpc.unary_unary_rpc_method_handler( + servicer.Trigger, + request_deserializer=ray_dot_v1_dot_ray__pb2.TriggerRequest.FromString, + response_serializer=ray_dot_v1_dot_ray__pb2.TriggerResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( - 'ray.serve.RayService', rpc_method_handlers) + 'ray.v1.RayService', rpc_method_handlers) server.add_generic_rpc_handlers((generic_handler,)) @@ -71,24 +53,7 @@ class RayService(object): """ @staticmethod - def ModelMetadata(request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/ray.serve.RayService/ModelMetadata', - ray__pb2.ModelMetadataRequest.SerializeToString, - ray__pb2.ModelMetadataResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) - - @staticmethod - def __call__(request, + def Trigger(request, target, options=(), channel_credentials=None, @@ -98,8 +63,8 @@ def __call__(request, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary(request, target, '/ray.serve.RayService/__call__', - ray__pb2.RayServiceCallRequest.SerializeToString, - ray__pb2.RayServiceCallResponse.FromString, + return grpc.experimental.unary_unary(request, target, '/ray.v1.RayService/Trigger', + ray_dot_v1_dot_ray__pb2.TriggerRequest.SerializeToString, + ray_dot_v1_dot_ray__pb2.TriggerResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/instill/helpers/ray_io.py b/instill/helpers/ray_io.py index c63f7888..5de4ba66 100644 --- a/instill/helpers/ray_io.py +++ b/instill/helpers/ray_io.py @@ -1,706 +1,1035 @@ +# pylint: disable=no-member,no-name-in-module, inconsistent-return-statements import base64 import io -import json -import struct -from json.decoder import JSONDecodeError -from typing import List +import re +from typing import Dict, List, Union -import numpy as np +import requests +from google.protobuf import json_format, struct_pb2 from PIL import Image +import instill.protogen.model.model.v1alpha.common_pb2 as commonpb +import instill.protogen.model.model.v1alpha.model_pb2 as modelpb +import instill.protogen.model.model.v1alpha.task_classification_pb2 as classificationpb +import instill.protogen.model.model.v1alpha.task_detection_pb2 as detectionpb +import instill.protogen.model.model.v1alpha.task_image_to_image_pb2 as imagetoimagepb +import instill.protogen.model.model.v1alpha.task_instance_segmentation_pb2 as instancesegmentationpb +import instill.protogen.model.model.v1alpha.task_keypoint_pb2 as keypointpb +import instill.protogen.model.model.v1alpha.task_ocr_pb2 as ocrpb +import instill.protogen.model.model.v1alpha.task_semantic_segmentation_pb2 as semanticsegmentationpb +import instill.protogen.model.model.v1alpha.task_text_generation_chat_pb2 as textgenerationchatpb +import instill.protogen.model.model.v1alpha.task_text_generation_pb2 as textgenerationpb +import instill.protogen.model.model.v1alpha.task_text_to_image_pb2 as texttoimagepb +import instill.protogen.model.model.v1alpha.task_visual_question_answering_pb2 as visualquestionansweringpb from instill.helpers.const import ( + PROMPT_ROLES, + ConversationInput, + ConversationMultiModelInput, ImageToImageInput, - TextGenerationChatInput, - TextGenerationInput, TextToImageInput, - VisualQuestionAnsweringInput, + VisionInput, ) +from instill.helpers.errors import InvalidInputException, InvalidOutputShapeException +from instill.helpers.protobufs.ray_pb2 import TriggerRequest, TriggerResponse + + +def base64_to_pil_image(base64_str): + return Image.open( + io.BytesIO( + base64.b64decode( + re.sub( + "^data:image/.+;base64,", + "", + base64_str, + ) + ) + ) + ) + + +def url_to_pil_image(url): + resp = requests.get(url, timeout=10) + resp.raise_for_status() + return Image.open(io.BytesIO(resp.content)) + + +def protobuf_to_struct(pb_msg): + """Convert Protobuf message to Struct""" + dict_data = json_format.MessageToDict(pb_msg) + + # Convert dictionary to struct_pb2.Struct + struct_pb = struct_pb2.Struct() + json_format.ParseDict(dict_data, struct_pb) + + return struct_pb + + +def struct_to_protobuf(struct_pb, pb_message_type): + """Convert Struct to Protobuf message""" + dict_data = json_format.MessageToDict(struct_pb) + + # Parse dictionary to Protobuf message + pb_msg = pb_message_type() + json_format.ParseDict(dict_data, pb_msg) + + return pb_msg + + +def struct_to_dict(struct_obj): + """Convert Protobuf Struct to dictionary""" + if isinstance(struct_obj, struct_pb2.Struct): + return {k: struct_to_dict(v) for k, v in struct_obj.fields.items()} + if isinstance(struct_obj, struct_pb2.ListValue): + return [struct_to_dict(v) for v in struct_obj.values] + if isinstance(struct_obj, struct_pb2.Value): + kind = struct_obj.WhichOneof("kind") + if kind == "null_value": + return None + if kind == "number_value": + return struct_obj.number_value + if kind == "string_value": + return struct_obj.string_value + if kind == "bool_value": + return struct_obj.bool_value + if kind == "struct_value": + return struct_to_dict(struct_obj.struct_value) + if kind == "list_value": + return struct_to_dict(struct_obj.list_value) + else: + return struct_obj + + +def parse_task_classification_to_vision_input( + request: TriggerRequest, +) -> List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + classification_pb: classificationpb.ClassificationInput = ( + task_input_pb.classification + ) + + inp = VisionInput() + if ( + classification_pb.image_base64 != "" and classification_pb.image_url != "" + ) or ( + classification_pb.image_base64 == "" and classification_pb.image_url == "" + ): + raise InvalidInputException + if classification_pb.image_base64 != "": + inp.image = base64_to_pil_image(classification_pb.image_base64) + elif classification_pb.image_url != "": + inp.image = url_to_pil_image(classification_pb.image_url) + + input_list.append(inp) + + return input_list + + +def construct_task_classification_output( + categories: List[str], + scores: List[float], +) -> TriggerResponse: + if not len(categories) == len(scores): + raise InvalidOutputShapeException + + task_outputs = [] + for category, score in zip(categories, scores): + + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + classification=classificationpb.ClassificationOutput( + category=category, score=score + ) + ) + ) + ) + return TriggerResponse(task_outputs=task_outputs) -def serialize_byte_tensor(input_tensor): - """ - Serializes a bytes tensor into a flat numpy array of length prepended - bytes. The numpy array should use dtype of np.object_. For np.bytes_, - numpy will remove trailing zeros at the end of byte sequence and because - of this it should be avoided. - Parameters - ---------- - input_tensor : np.array - The bytes tensor to serialize. - Returns - ------- - serialized_bytes_tensor : np.array - The 1-D numpy array of type uint8 containing the serialized bytes in 'C' order. - Raises - ------ - InferenceServerException - If unable to serialize the given tensor. - """ - if input_tensor.size == 0: - return () - - # If the input is a tensor of string/bytes objects, then must flatten those - # into a 1-dimensional array containing the 4-byte byte size followed by the - # actual element bytes. All elements are concatenated together in "C" order. - if (input_tensor.dtype == np.object_) or (input_tensor.dtype.type == np.bytes_): - flattened_ls: list = [] - for obj in np.nditer(input_tensor, flags=["refs_ok"], order="C"): - # If directly passing bytes to BYTES type, - # don't convert it to str as Python will encode the - # bytes which may distort the meaning - assert isinstance(obj, np.ndarray) - if input_tensor.dtype == np.object_: - if isinstance(obj.item(), bytes): - s = obj.item() - else: - s = str(obj.item()).encode("utf-8") - else: - s = obj.item() - flattened_ls.append(struct.pack(" List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + detection_pb: detectionpb.DetectionInput = task_input_pb.detection -def deserialize_bytes_tensor(encoded_tensor): - """ - Deserializes an encoded bytes tensor into an - numpy array of dtype of python objects - - Parameters - ---------- - encoded_tensor : bytes - The encoded bytes tensor where each element - has its length in first 4 bytes followed by - the content - Returns - ------- - string_tensor : np.array - The 1-D numpy array of type object containing the - deserialized bytes in 'C' order. + inp = VisionInput() + if (detection_pb.image_base64 != "" and detection_pb.image_url != "") or ( + detection_pb.image_base64 == "" and detection_pb.image_url == "" + ): + raise InvalidInputException + if detection_pb.image_base64 != "": + inp.image = base64_to_pil_image(detection_pb.image_base64) + elif detection_pb.image_url != "": + inp.image = url_to_pil_image(detection_pb.image_url) - """ - strs = [] - offset = 0 - val_buf = encoded_tensor - while offset < len(val_buf): - l = struct.unpack_from(" TextGenerationInput: - text_generation_input = TextGenerationInput() - - for i, b_input_tensor in zip(request.inputs, request.raw_input_contents): - input_name = i.name - - if input_name == "prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_generation_input.prompt = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `prompt` type\ - ({type(text_generation_input.prompt)}): {text_generation_input.prompt}" - ) + input_list.append(inp) - if input_name == "prompt_images": - input_tensors = deserialize_bytes_tensor(b_input_tensor) - images = [] - for enc in input_tensors: - if len(enc) == 0: - continue - try: - enc_json = json.loads(str(enc.decode("utf-8"))) - if len(enc_json) == 0: - continue - decoded_enc = enc_json[0] - except JSONDecodeError: - print("[DEBUG] WARNING `enc_json` parsing faield!") - # pil_img = Image.open(io.BytesIO(enc.astype(bytes))) # RGB - pil_img = Image.open(io.BytesIO(base64.b64decode(decoded_enc))) - - image = np.array(pil_img) - if len(image.shape) == 2: # gray image - raise ValueError( - f"The image shape with {image.shape} is " - f"not in acceptable" - ) - images.append(image) - # TODO: check wethere there are issues in batch size dimention - text_generation_input.prompt_images = images - print( - "[DEBUG] input `prompt_images` type" - f"({type(text_generation_input.prompt_images)}): " - f"{text_generation_input.prompt_images}" - ) + return input_list - if input_name == "chat_history": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - chat_history_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `chat_history_str` type" - f"({type(chat_history_str)}): " - f"{chat_history_str}" - ) - try: - text_generation_input.chat_history = json.loads(chat_history_str) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue - if input_name == "system_message": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_generation_input.system_message = str( - input_tensor[0].decode("utf-8") - ) - print( - "[DEBUG] input `system_message` type" - f"({type(text_generation_input.system_message)}): " - f"{text_generation_input.system_message}" - ) +def construct_task_detection_output( + categories: List[List[str]], + scores: List[List[float]], + bounding_boxes: List[List[tuple]], +) -> TriggerResponse: + """Construct trigger output for detection task - if input_name == "max_new_tokens": - text_generation_input.max_new_tokens = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `max_new_tokens` type" - f"({type(text_generation_input.max_new_tokens)}): " - f"{text_generation_input.max_new_tokens}" + Args: + categories (List[List[str]]): for each image input, the list of detected object's category + scores (List[List[float]]): for each image input, the list of detected object's score + bounding_boxes (List[List[tuple]]): for each image input, the list of detected object's bbox, with the format + (top, left, width, height) + """ + if not len(categories) == len(scores) == len(bounding_boxes): + raise InvalidOutputShapeException + + task_outputs = [] + for category, score, bbox in zip(categories, scores, bounding_boxes): + objects = [] + for cat, sc, bb in zip(category, score, bbox): + objects.append( + detectionpb.DetectionObject( + category=cat, + score=sc, + bounding_box=commonpb.BoundingBox( + top=bb[0], + left=bb[1], + width=bb[2], + height=bb[3], + ), ) - - if input_name == "top_k": - text_generation_input.top_k = int.from_bytes(b_input_tensor, "little") - print( - "[DEBUG] input `top_k` type" - f"({type(text_generation_input.top_k)}): " - f"{text_generation_input.top_k}" + ) + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + detection=detectionpb.DetectionOutput(objects=objects) ) + ) + ) - if input_name == "temperature": - text_generation_input.temperature = struct.unpack("f", b_input_tensor)[ - 0 - ] - print( - "[DEBUG] input `temperature` type" - f"({type(text_generation_input.temperature)}): " - f"{text_generation_input.temperature}" - ) - text_generation_input.temperature = round( - text_generation_input.temperature, 2 - ) + return TriggerResponse(task_outputs=task_outputs) - if input_name == "random_seed": - text_generation_input.random_seed = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `random_seed` type" - f"({type(text_generation_input.random_seed)}): " - f"{text_generation_input.random_seed}" - ) - if input_name == "extra_params": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - extra_params_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `extra_params` type" - f"({type(extra_params_str)}): " - f"{extra_params_str}" - ) +def parse_task_ocr_to_vision_input( + request: TriggerRequest, +) -> List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) - try: - text_generation_input.extra_params = json.loads(extra_params_str) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + ocr_pb: ocrpb.OcrInput = task_input_pb.ocr - return text_generation_input + inp = VisionInput() + if (ocr_pb.image_base64 != "" and ocr_pb.image_url != "") or ( + ocr_pb.image_base64 == "" and ocr_pb.image_url == "" + ): + raise InvalidInputException + if ocr_pb.image_base64 != "": + inp.image = base64_to_pil_image(ocr_pb.image_base64) + elif ocr_pb.image_url != "": + inp.image = url_to_pil_image(ocr_pb.image_url) - @staticmethod - def parse_task_text_generation_output(sequences: list): - text_outputs = [seq["generated_text"].encode("utf-8") for seq in sequences] + input_list.append(inp) - return serialize_byte_tensor(np.asarray(text_outputs)) + return input_list - @staticmethod - def parse_task_text_to_image_input(request) -> TextToImageInput: - text_to_image_input = TextToImageInput() - for i, b_input_tensor in zip(request.inputs, request.raw_input_contents): - input_name = i.name +def construct_task_ocr_output( + texts: List[List[str]], + scores: List[List[float]], + bounding_boxes: List[List[tuple]], +) -> TriggerResponse: + """Construct trigger output for ocr task - if input_name == "prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_to_image_input.prompt = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `prompt` type\ - ({type(text_to_image_input.prompt)}): {text_to_image_input.prompt}" + Args: + texts (List[List[str]]): for each image input, the list of detected text + scores (List[List[float]]): for each image input, the list of detected text's score + bounding_boxes (List[List[tuple]]): for each image input, the list of detected text's bbox, with the format + (top, left, width, height) + """ + if not len(texts) == len(scores) == len(bounding_boxes): + raise InvalidOutputShapeException + + task_outputs = [] + for text, score, bbox in zip(texts, scores, bounding_boxes): + objects = [] + for txt, sc, bb in zip(text, score, bbox): + objects.append( + ocrpb.OcrObject( + text=txt, + score=sc, + bounding_box=commonpb.BoundingBox( + top=bb[0], + left=bb[1], + width=bb[2], + height=bb[3], + ), ) - - if input_name == "negative_prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_to_image_input.negative_prompt = str( - input_tensor[0].decode("utf-8") + ) + task_outputs.append( + protobuf_to_struct(modelpb.TaskOutput(ocr=ocrpb.OcrOutput(objects=objects))) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_instance_segmentation_to_vision_input( + request: TriggerRequest, +) -> List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + instance_segmentation_pb: instancesegmentationpb.InstanceSegmentationInput = ( + task_input_pb.instance_segmentation + ) + + inp = VisionInput() + if ( + instance_segmentation_pb.image_base64 != "" + and instance_segmentation_pb.image_url != "" + ) or ( + instance_segmentation_pb.image_base64 == "" + and instance_segmentation_pb.image_url == "" + ): + raise InvalidInputException + if instance_segmentation_pb.image_base64 != "": + inp.image = base64_to_pil_image( + instance_segmentation_pb.image_base64, + ) + elif instance_segmentation_pb.image_url != "": + inp.image = url_to_pil_image(instance_segmentation_pb.image_url) + + input_list.append(inp) + + return input_list + + +def construct_task_instance_segmentation_output( + rles: List[List[str]], + categories: List[List[str]], + scores: List[List[float]], + bounding_boxes: List[List[tuple]], +) -> TriggerResponse: + """Construct trigger output for instance segmentation task + + Args: + rles (List[List[str]]): for each image input, the list of detected object's rle + categories (List[List[str]]): for each image input, the list of detected object's category + scores (List[List[float]]): for each image input, the list of detected text's score + bounding_boxes (List[List[tuple]]): for each image input, the list of detected text's bbox, with the format + (top, left, width, height) + """ + if not len(rles) == len(categories) == len(scores) == len(bounding_boxes): + raise InvalidOutputShapeException + + task_outputs = [] + for rle, category, score, bbox in zip(rles, categories, scores, bounding_boxes): + objects = [] + for r, cat, sc, bb in zip(rle, category, score, bbox): + objects.append( + instancesegmentationpb.InstanceSegmentationObject( + rle=r, + category=cat, + score=sc, + bounding_box=commonpb.BoundingBox( + top=bb[0], + left=bb[1], + width=bb[2], + height=bb[3], + ), ) - print( - f"[DEBUG] input `negative_prompt` type\ - ({type(text_to_image_input.negative_prompt)}): {text_to_image_input.negative_prompt}" + ) + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + instance_segmentation=instancesegmentationpb.InstanceSegmentationOutput( + objects=objects + ) ) - - if input_name == "steps": - text_to_image_input.steps = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `steps` type\ - ({type(text_to_image_input.steps)}): {text_to_image_input.steps}" + ) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_semantic_segmentation_to_vision_input( + request: TriggerRequest, +) -> List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + semantic_segmentation_pb: semanticsegmentationpb.SemanticSegmentationInput = ( + task_input_pb.semantic_segmentation + ) + + inp = VisionInput() + if ( + semantic_segmentation_pb.image_base64 != "" + and semantic_segmentation_pb.image_url != "" + ) or ( + semantic_segmentation_pb.image_base64 == "" + and semantic_segmentation_pb.image_url == "" + ): + raise InvalidInputException + if semantic_segmentation_pb.image_base64 != "": + inp.image = base64_to_pil_image( + semantic_segmentation_pb.image_base64, + ) + elif semantic_segmentation_pb.image_url != "": + inp.image = url_to_pil_image(semantic_segmentation_pb.image_url) + + input_list.append(inp) + + return input_list + + +def construct_task_semantic_segmentation_output( + rles: List[List[str]], + categories: List[List[str]], +) -> TriggerResponse: + """Construct trigger output for semantic segmentation task + + Args: + rles (List[List[str]]): for each image input, the list of detected object's rle + categories (List[List[str]]): for each image input, the list of detected object's category + (top, left, width, height) + """ + if not len(rles) == len(categories): + raise InvalidOutputShapeException + + task_outputs = [] + for rle, category in zip(rles, categories): + objects = [] + for r, cat in zip(rle, category): + objects.append( + semanticsegmentationpb.SemanticSegmentationStuff( + rle=r, + category=cat, ) - - if input_name == "seed": - text_to_image_input.seed = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `seed` type\ - ({type(text_to_image_input.seed)}): {text_to_image_input.seed}" + ) + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + instance_segmentation=semanticsegmentationpb.SemanticSegmentationOutput( + stuffs=objects + ) ) + ) + ) - if input_name == "guidance_scale": - text_to_image_input.guidance_scale = struct.unpack("f", b_input_tensor)[ - 0 - ] - print( - f"[DEBUG] input `guidance_scale` type\ - ({type(text_to_image_input.guidance_scale)}): {text_to_image_input.guidance_scale}" - ) - text_to_image_input.guidance_scale = round( - text_to_image_input.guidance_scale, 2 - ) + return TriggerResponse(task_outputs=task_outputs) - if input_name == "samples": - text_to_image_input.samples = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `samples` type\ - ({type(text_to_image_input.samples)}): {text_to_image_input.samples}" - ) - if input_name == "extra_params": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - extra_params_str = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `extra_params` type\ - ({type(extra_params_str)}): {extra_params_str}" - ) +def parse_task_keypoint_to_vision_input( + request: TriggerRequest, +) -> List[VisionInput]: + input_list = [] + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) - try: - text_to_image_input.extra_params = json.loads(extra_params_str) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + keypoint_pb: keypointpb.KeypointInput = task_input_pb.keypoint + + inp = VisionInput() + if (keypoint_pb.image_base64 != "" and keypoint_pb.image_url != "") or ( + keypoint_pb.image_base64 == "" and keypoint_pb.image_url == "" + ): + raise InvalidInputException + if keypoint_pb.image_base64 != "": + inp.image = base64_to_pil_image(keypoint_pb.image_base64) + elif keypoint_pb.image_url != "": + inp.image = url_to_pil_image(keypoint_pb.image_url) - return text_to_image_input + input_list.append(inp) - @staticmethod - def parse_task_text_to_image_output(image): - return np.asarray(image).tobytes() + return input_list - @staticmethod - def parse_task_image_to_image_input(request) -> ImageToImageInput: - image_to_image_input = ImageToImageInput() - for i, b_input_tensor in zip(request.inputs, request.raw_input_contents): - input_name = i.name +def construct_task_keypoint_output( + keypoints: List[List[List[tuple]]], + scores: List[List[float]], + bounding_boxes: List[List[tuple]], +) -> TriggerResponse: + """Construct trigger output for keypoint task - if input_name == "prompt_image": - input_tensors = deserialize_bytes_tensor(b_input_tensor) - images = [] - for enc in input_tensors: - pil_img = Image.open(io.BytesIO(enc.astype(bytes))) # RGB - image = np.array(pil_img) - if len(image.shape) == 2: # gray image + Args: + keypoints (List[List[List[str]]]): for each image input, the list of detected object's keypoints, + with the format (x_coordinate, y_coordinate, visibility) + scores (List[List[float]]): for each image input, the list of detected object's score + bounding_boxes (List[List[tuple]]): for each image input, the list of detected object's bbox, with the format + (top, left, width, height) + """ + if not len(keypoints) == len(scores) == len(bounding_boxes): + raise InvalidOutputShapeException + + task_outputs = [] + for keypoint, score, bbox in zip(keypoints, scores, bounding_boxes): + objects = [] + for kps, sc, bb in zip(keypoint, score, bbox): + point_list = [] + for kp in kps: + point_list.append( + keypointpb.Keypoint( + x=kp[0], + y=kp[1], + v=kp[2], + ) + ) + objects.append( + keypointpb.KeypointObject( + keypoints=point_list, + score=sc, + bounding_box=commonpb.BoundingBox( + top=bb[0], + left=bb[1], + width=bb[2], + height=bb[3], + ), + ) + ) + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput(keypoint=keypointpb.KeypointOutput(objects=objects)) + ) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_text_generation_to_conversation_input( + request: TriggerRequest, +) -> List[ConversationInput]: + + input_list = [] + + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + text_generation_pb: textgenerationpb.TextGenerationInput = ( + task_input_pb.text_generation + ) + + inp = ConversationInput() + + conversation: List[Dict[str, str]] = [] + + # system message + if ( + text_generation_pb.system_message is not None + and len(text_generation_pb.system_message) > 0 + ): + conversation.append( + {"role": "system", "content": text_generation_pb.system_message} + ) + + # conversation history + if ( + text_generation_pb.chat_history is not None + and len(text_generation_pb.chat_history) > 0 + ): + for chat_entity in text_generation_pb.chat_history: + chat_message = None + if len(chat_entity["content"]) > 1: + raise ValueError( + "Multiple text message detected" + " in a single chat history entity" + ) + + if chat_entity["content"][0]["type"] == "text": + if "Content" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["Content"]["Text"] + elif "Text" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["Text"] + elif "text" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["text"] + else: raise ValueError( - f"The image shape with {image.shape} is " - f"not in acceptable" + f"Unknown structure of chat_history: {text_generation_pb.chat_history}" ) - images.append(image) - image_to_image_input.prompt_image = images[0] - print( - f"[DEBUG] input `prompt_image` type\ - ({type(image_to_image_input.prompt_image)}): {image_to_image_input.prompt_image}" - ) + else: + raise ValueError( + "Unsupported chat_hisotry message type" + ", expected 'text'" + f" but get {chat_entity['content'][0]['type']}" + ) - if input_name == "prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - image_to_image_input.prompt = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `prompt` type\ - ({type(image_to_image_input.prompt)}): {image_to_image_input.prompt}" - ) + if chat_entity["role"] not in PROMPT_ROLES: + raise ValueError( + f"Role `{chat_entity['role']}` is not in supported" + f"role list ({','.join(PROMPT_ROLES)})" + ) + if ( + chat_entity["role"] == PROMPT_ROLES[-1] + and text_generation_pb.system_message is not None + and len(text_generation_pb.system_message) > 0 + ): + continue + if chat_message is None: + raise ValueError( + f"No message found in chat_history. {chat_message}" + ) - if input_name == "steps": - image_to_image_input.steps = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `steps` type\ - ({type(image_to_image_input.steps)}): {image_to_image_input.steps}" - ) + if len(conversation) == 1 and chat_entity["role"] != PROMPT_ROLES[0]: + conversation.append({"role": "user", "content": " "}) - if input_name == "seed": - image_to_image_input.seed = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `seed` type\ - ({type(image_to_image_input.seed)}): {image_to_image_input.seed}" - ) + if ( + len(conversation) > 0 + and conversation[-1]["role"] == chat_entity["role"] + ): + last_conversation = conversation.pop() + chat_message = f"{last_conversation['content']}\n\n{chat_message}" - if input_name == "guidance_scale": - image_to_image_input.guidance_scale = struct.unpack( - "f", b_input_tensor - )[0] - print( - f"[DEBUG] input `guidance_scale` type\ - ({type(image_to_image_input.guidance_scale)}): {image_to_image_input.guidance_scale}" - ) - image_to_image_input.guidance_scale = round( - image_to_image_input.guidance_scale, 2 + conversation.append( + {"role": chat_entity["role"], "content": chat_message} ) - if input_name == "samples": - image_to_image_input.samples = int.from_bytes(b_input_tensor, "little") - print( - f"[DEBUG] input `samples` type\ - ({type(image_to_image_input.samples)}): {image_to_image_input.samples}" - ) + # conversation + prompt = text_generation_pb.prompt + if len(conversation) > 0 and conversation[-1]["role"] == PROMPT_ROLES[0]: + last_conversation = conversation.pop() + prompt = f"{last_conversation['content']}\n\n{prompt}" - if input_name == "extra_params": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - extra_params_str = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `extra_params` type\ - ({type(extra_params_str)}): {extra_params_str}" - ) + conversation.append({"role": "user", "content": prompt}) - try: - image_to_image_input.extra_params = json.loads(extra_params_str) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + inp.conversation = conversation + + # max new tokens + if text_generation_pb.max_new_tokens is not None: + inp.max_new_tokens = text_generation_pb.max_new_tokens - return image_to_image_input + # temperature + if text_generation_pb.temperature is not None: + inp.temperature = text_generation_pb.temperature - @staticmethod - def parse_task_image_to_image_output(image): - return np.asarray(image).tobytes() + # top k + if text_generation_pb.top_k is not None: + inp.top_k = text_generation_pb.top_k - @staticmethod - def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput: - text_generation_chat_input = TextGenerationChatInput() + # seed + if text_generation_pb.seed is not None: + inp.seed = text_generation_pb.seed - for i, b_input_tensor in zip(request.inputs, request.raw_input_contents): - input_name = i.name + input_list.append(inp) - if input_name == "prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_generation_chat_input.prompt = str(input_tensor[0].decode("utf-8")) - print( - f"[DEBUG] input `prompt` type\ - ({type(text_generation_chat_input.prompt)}): {text_generation_chat_input.prompt}" + return input_list + + +def construct_task_text_generation_output(texts: List[str]) -> TriggerResponse: + task_outputs = [] + + for text in texts: + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + text_generation=textgenerationpb.TextGenerationOutput(text=text) ) + ) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_text_generation_chat_to_conversation_input( + request: TriggerRequest, +) -> List[ConversationInput]: + + input_list = [] + + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + text_generation_chat_pb: textgenerationchatpb.TextGenerationChatInput = ( + task_input_pb.text_generation_chat + ) + + inp = ConversationInput() + + conversation = [] + + # system message + if ( + text_generation_chat_pb.system_message is not None + and len(text_generation_chat_pb.system_message) > 0 + ): + conversation.append( + { + "role": "system", + "content": text_generation_chat_pb.system_message, + } + ) + + # conversation history + if ( + text_generation_chat_pb.chat_history is not None + and len(text_generation_chat_pb.chat_history) > 0 + ): + for chat_entity in text_generation_chat_pb.chat_history: + chat_message = None + if len(chat_entity["content"]) > 1: + raise ValueError( + "Multiple text message detected" + " in a single chat history entity" + ) - if input_name == "prompt_images": - input_tensors = deserialize_bytes_tensor(b_input_tensor) - images = [] - for enc in input_tensors: - if len(enc) == 0: - continue - try: - enc_json = json.loads(str(enc.decode("utf-8"))) - if len(enc_json) == 0: - continue - decoded_enc = enc_json[0] - except JSONDecodeError: - print("[DEBUG] WARNING `enc_json` parsing faield!") - # pil_img = Image.open(io.BytesIO(enc.astype(bytes))) # RGB - pil_img = Image.open(io.BytesIO(base64.b64decode(decoded_enc))) - - image = np.array(pil_img) - if len(image.shape) == 2: # gray image + if chat_entity["content"][0]["type"] == "text": + if "Content" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["Content"]["Text"] + elif "Text" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["Text"] + elif "text" in chat_entity["content"][0]: + chat_message = chat_entity["content"][0]["text"] + else: raise ValueError( - f"The image shape with {image.shape} is " - f"not in acceptable" + f"Unknown structure of chat_history: {text_generation_chat_pb.chat_history}" ) - images.append(image) - text_generation_chat_input.prompt_images = images - print( - "[DEBUG] input `prompt_images` type" - f"({type(text_generation_chat_input.prompt_images)}): " - f"{text_generation_chat_input.prompt_images}" - ) + else: + raise ValueError( + "Unsupported chat_hisotry message type" + ", expected 'text'" + f" but get {chat_entity['content'][0]['type']}" + ) - if input_name == "chat_history": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - chat_history_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `chat_history_str` type" - f"({type(chat_history_str)}): " - f"{chat_history_str}" - ) - try: - text_generation_chat_input.chat_history = json.loads( - chat_history_str + if chat_entity["role"] not in PROMPT_ROLES: + raise ValueError( + f"Role `{chat_entity['role']}` is not in supported" + f"role list ({','.join(PROMPT_ROLES)})" ) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") + if ( + chat_entity["role"] == PROMPT_ROLES[-1] + and text_generation_chat_pb.system_message is not None + and len(text_generation_chat_pb.system_message) > 0 + ): continue + if chat_message is None: + raise ValueError( + f"No message found in chat_history. {chat_message}" + ) - if input_name == "system_message": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_generation_chat_input.system_message = str( - input_tensor[0].decode("utf-8") - ) - print( - "[DEBUG] input `system_message` type" - f"({type(text_generation_chat_input.system_message)}): " - f"{text_generation_chat_input.system_message}" - ) + if len(conversation) == 1 and chat_entity["role"] != PROMPT_ROLES[0]: + conversation.append({"role": "user", "content": " "}) - if input_name == "max_new_tokens": - text_generation_chat_input.max_new_tokens = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `max_new_tokens` type" - f"({type(text_generation_chat_input.max_new_tokens)}): " - f"{text_generation_chat_input.max_new_tokens}" - ) + if ( + len(conversation) > 0 + and conversation[-1]["role"] == chat_entity["role"] + ): + last_conversation = conversation.pop() + chat_message = f"{last_conversation['content']}\n\n{chat_message}" - if input_name == "top_k": - text_generation_chat_input.top_k = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `top_k` type" - f"({type(text_generation_chat_input.top_k)}): " - f"{text_generation_chat_input.top_k}" + conversation.append( + {"role": chat_entity["role"], "content": chat_message} ) - if input_name == "temperature": - text_generation_chat_input.temperature = struct.unpack( - "f", b_input_tensor - )[0] - print( - "[DEBUG] input `temperature` type" - f"({type(text_generation_chat_input.temperature)}): " - f"{text_generation_chat_input.temperature}" - ) - text_generation_chat_input.temperature = round( - text_generation_chat_input.temperature, 2 - ) + # conversation + prompt = text_generation_chat_pb.prompt + if len(conversation) > 0 and conversation[-1]["role"] == PROMPT_ROLES[0]: + last_conversation = conversation.pop() + prompt = f"{last_conversation['content']}\n\n{prompt}" - if input_name == "random_seed": - text_generation_chat_input.random_seed = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `random_seed` type" - f"({type(text_generation_chat_input.random_seed)}): " - f"{text_generation_chat_input.random_seed}" - ) + conversation.append({"role": "user", "content": prompt}) - if input_name == "extra_params": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - extra_params_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `extra_params` type" - f"({type(extra_params_str)}): " - f"{extra_params_str}" - ) + inp.conversation = conversation - try: - text_generation_chat_input.extra_params = json.loads( - extra_params_str - ) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + # max new tokens + if text_generation_chat_pb.max_new_tokens is not None: + inp.max_new_tokens = text_generation_chat_pb.max_new_tokens - return text_generation_chat_input + # temperature + if text_generation_chat_pb.temperature is not None: + inp.temperature = text_generation_chat_pb.temperature - @staticmethod - def parse_task_text_generation_chat_output(sequences: list): - text_outputs = [seq["generated_text"].encode("utf-8") for seq in sequences] + # top k + if text_generation_chat_pb.top_k is not None: + inp.top_k = text_generation_chat_pb.top_k - return serialize_byte_tensor(np.asarray(text_outputs)) + # seed + if text_generation_chat_pb.seed is not None: + inp.seed = text_generation_chat_pb.seed - @staticmethod - def parse_task_visual_question_answering_input( - request, - ) -> VisualQuestionAnsweringInput: - text_visual_question_answering_input = VisualQuestionAnsweringInput() + input_list.append(inp) - for i, b_input_tensor in zip(request.inputs, request.raw_input_contents): - input_name = i.name + return input_list - if input_name == "prompt": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_visual_question_answering_input.prompt = str( - input_tensor[0].decode("utf-8") - ) - print( - "[DEBUG] input `prompt` type" - f"({type(text_visual_question_answering_input.prompt)}): " - f"{text_visual_question_answering_input.prompt}" - ) - if input_name == "prompt_images": - input_tensors = deserialize_bytes_tensor(b_input_tensor) - images = [] - for enc in input_tensors: - if len(enc) == 0: - continue - try: - enc_json = json.loads(str(enc.decode("utf-8"))) - if len(enc_json) == 0: - continue - decoded_enc = enc_json[0] - except JSONDecodeError: - print("[DEBUG] WARNING `enc_json` parsing faield!") - # pil_img = Image.open(io.BytesIO(enc.astype(bytes))) # RGB - pil_img = Image.open(io.BytesIO(base64.b64decode(decoded_enc))) - - image = np.array(pil_img) - if len(image.shape) == 2: # gray image - raise ValueError( - f"The image shape with {image.shape} is " - f"not in acceptable" - ) - images.append(image) - # TODO: check wethere there are issues in batch size dimention - text_visual_question_answering_input.prompt_images = images - print( - "[DEBUG] input `prompt_images` type" - f"({type(text_visual_question_answering_input.prompt_images)}): " - f"{text_visual_question_answering_input.prompt_images}" - ) +def construct_task_text_generation_chat_output(texts: List[str]) -> TriggerResponse: + task_outputs = [] - if input_name == "chat_history": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - chat_history_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `chat_history_str` type" - f"({type(chat_history_str)}): " - f"{chat_history_str}" + for text in texts: + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + text_generation_chat=textgenerationchatpb.TextGenerationChatOutput( + text=text + ) ) - try: - text_visual_question_answering_input.chat_history = json.loads( - chat_history_str + ) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_visual_question_answering_to_conversation_multimodal_input( + request: TriggerRequest, +) -> List[ConversationMultiModelInput]: + + input_list = [] + + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + visual_question_answering_pb: ( + visualquestionansweringpb.VisualQuestionAnsweringInput + ) = task_input_pb.visual_question_answering + + inp = ConversationMultiModelInput() + + conversation: List[Union[Dict[str, Union[str, Dict[str, str]]]]] = [] + + # system message + if ( + visual_question_answering_pb.system_message is not None + and len(visual_question_answering_pb.system_message) > 0 + ): + conversation.append( + { + "role": "system", + "content": { + "type": "text", + "content": visual_question_answering_pb.system_message, + }, + } + ) + + # conversation history + if ( + visual_question_answering_pb.chat_history is not None + and len(visual_question_answering_pb.chat_history) > 0 + ): + for chat_entity in visual_question_answering_pb.chat_history: + chat_dict = json_format.MessageToDict(chat_entity) + conversation.append(chat_dict) + + # conversation + prompt = visual_question_answering_pb.prompt + if len(conversation) > 0 and conversation[-1]["role"] == PROMPT_ROLES[0]: + last_conversation = conversation.pop() + prompt = f"{last_conversation['content']['content']}\n\n{prompt}" # type: ignore + + conversation.append( + { + "role": "user", + "content": { + "type": "text", + "content": prompt, + }, + } + ) + + inp.conversation = conversation + + # prompt images + prompt_image_list = [] + if ( + visual_question_answering_pb.prompt_images is not None + and len(visual_question_answering_pb.prompt_images) > 0 + ): + for prompt_image in visual_question_answering_pb.prompt_images: + if ( + prompt_image.prompt_image_base64 != "" + and prompt_image.prompt_image_url != "" + ) or ( + prompt_image.prompt_image_base64 == "" + and prompt_image.prompt_image_url == "" + ): + raise InvalidInputException + if prompt_image.prompt_image_base64 != "": + prompt_image_list.append( + base64_to_pil_image(prompt_image.prompt_image_base64) ) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + elif prompt_image.prompt_image_url != "": + prompt_image_list.append( + url_to_pil_image(prompt_image.prompt_image_url) + ) + inp.prompt_images = prompt_image_list - if input_name == "system_message": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - text_visual_question_answering_input.system_message = str( - input_tensor[0].decode("utf-8") - ) - print( - "[DEBUG] input `system_message` type" - f"({type(text_visual_question_answering_input.system_message)}): " - f"{text_visual_question_answering_input.system_message}" - ) + # max new tokens + if visual_question_answering_pb.max_new_tokens is not None: + inp.max_new_tokens = visual_question_answering_pb.max_new_tokens - if input_name == "max_new_tokens": - text_visual_question_answering_input.max_new_tokens = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `max_new_tokens` type" - f"({type(text_visual_question_answering_input.max_new_tokens)}): " - f"{text_visual_question_answering_input.max_new_tokens}" - ) + # temperature + if visual_question_answering_pb.temperature is not None: + inp.temperature = visual_question_answering_pb.temperature - if input_name == "top_k": - text_visual_question_answering_input.top_k = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `top_k` type" - f"({type(text_visual_question_answering_input.top_k)}): " - f"{text_visual_question_answering_input.top_k}" - ) + # top k + if visual_question_answering_pb.top_k is not None: + inp.top_k = visual_question_answering_pb.top_k - if input_name == "temperature": - text_visual_question_answering_input.temperature = struct.unpack( - "f", b_input_tensor - )[0] - print( - "[DEBUG] input `temperature` type" - f"({type(text_visual_question_answering_input.temperature)}): " - f"{text_visual_question_answering_input.temperature}" - ) - text_visual_question_answering_input.temperature = round( - text_visual_question_answering_input.temperature, 2 - ) + # seed + if visual_question_answering_pb.seed is not None: + inp.seed = visual_question_answering_pb.seed - if input_name == "random_seed": - text_visual_question_answering_input.random_seed = int.from_bytes( - b_input_tensor, "little" - ) - print( - "[DEBUG] input `random_seed` type" - f"({type(text_visual_question_answering_input.random_seed)}): " - f"{text_visual_question_answering_input.random_seed}" + input_list.append(inp) + + return input_list + + +def construct_task_visual_question_answering_output( + texts: List[str], +) -> TriggerResponse: + task_outputs = [] + + for text in texts: + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + visual_question_answering=visualquestionansweringpb.VisualQuestionAnsweringOutput( + text=text + ) ) + ) + ) + + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_text_to_image_input( + request: TriggerRequest, +) -> List[TextToImageInput]: + + input_list = [] + + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) + + text_to_image_pb: texttoimagepb.TextToImageInput = task_input_pb.text_to_image + + inp = TextToImageInput() + + # prompt + inp.prompt = text_to_image_pb.prompt + + # steps + if text_to_image_pb.steps is not None: + inp.steps = text_to_image_pb.steps + + # temperature + if text_to_image_pb.cfg_scale is not None: + inp.cfg_scale = text_to_image_pb.cfg_scale + + # top k + if text_to_image_pb.samples is not None: + inp.samples = text_to_image_pb.samples + + # seed + if text_to_image_pb.seed is not None: + inp.seed = text_to_image_pb.seed + + input_list.append(inp) + + return input_list + - if input_name == "extra_params": - input_tensor = deserialize_bytes_tensor(b_input_tensor) - extra_params_str = str(input_tensor[0].decode("utf-8")) - print( - "[DEBUG] input `extra_params` type" - f"({type(extra_params_str)}): " - f"{extra_params_str}" +def construct_task_text_to_image_output( + images: List[List[str]], +) -> TriggerResponse: + """Construct trigger output for keypoint task + + Args: + images (List[List[str]]): for each input prompt, the generated images with the length of `samples` + """ + task_outputs = [] + + for imgs in images: + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + text_to_image=texttoimagepb.TextToImageOutput(images=imgs) ) + ) + ) - try: - text_visual_question_answering_input.extra_params = json.loads( - extra_params_str - ) - except JSONDecodeError: - print("[DEBUG] WARNING `extra_params` parsing faield!") - continue + return TriggerResponse(task_outputs=task_outputs) + + +def parse_task_image_to_image_input( + request: TriggerRequest, +) -> List[ImageToImageInput]: + + input_list = [] - return text_visual_question_answering_input + for task_input in request.task_inputs: + task_input_pb = struct_to_protobuf(task_input, modelpb.TaskInput) - @staticmethod - def parse_task_visual_question_answering_output(sequences: list): - text_outputs = [seq["generated_text"].encode("utf-8") for seq in sequences] + image_to_image_pb: imagetoimagepb.ImageToImageInput = ( + task_input_pb.text_to_image + ) - return serialize_byte_tensor(np.asarray(text_outputs)) + inp = ImageToImageInput() + # prompt + inp.prompt = image_to_image_pb.prompt -class RawIO: - @staticmethod - def parse_byte_tensor(byte_tensor) -> List[str]: - input_tensors = deserialize_bytes_tensor(byte_tensor) - outs = [str(tensor.decode("utf-8")) for tensor in input_tensors] + # prompt images + if ( + image_to_image_pb.prompt_image_base64 != "" + and image_to_image_pb.prompt_image_url != "" + ) or ( + image_to_image_pb.prompt_image_base64 == "" + and image_to_image_pb.prompt_image_url == "" + ): + raise InvalidInputException + if image_to_image_pb.prompt_image_base64 != "": + inp.prompt_image = base64_to_pil_image( + image_to_image_pb.prompt_image_base64 + ) + elif image_to_image_pb.prompt_image_url != "": + inp.prompt_image = url_to_pil_image(image_to_image_pb.prompt_image_url) - return outs + # steps + if image_to_image_pb.steps is not None: + inp.steps = image_to_image_pb.steps - @staticmethod - def parse_unsigned_int_tensor(int_tensor) -> int: - return int.from_bytes(int_tensor, "little") + # temperature + if image_to_image_pb.cfg_scale is not None: + inp.cfg_scale = image_to_image_pb.cfg_scale - @staticmethod - def parse_signed_int_tensor(int_tensor) -> int: - return int.from_bytes(int_tensor, "little", signed=True) + # top k + if image_to_image_pb.samples is not None: + inp.samples = image_to_image_pb.samples - @staticmethod - def parse_float_tensor(float_tensor) -> float: - return struct.unpack("f", float_tensor)[0] + # seed + if image_to_image_pb.seed is not None: + inp.seed = image_to_image_pb.seed + + input_list.append(inp) + + return input_list + + +def construct_task_image_to_image_output( + images: List[List[str]], +) -> TriggerResponse: + """Construct trigger output for keypoint task + + Args: + images (List[List[str]]): for each input prompt, the generated images with the length of `samples` + """ + task_outputs = [] + + for imgs in images: + task_outputs.append( + protobuf_to_struct( + modelpb.TaskOutput( + image_to_image=imagetoimagepb.ImageToImageOutput(images=imgs) + ) + ) + ) - @staticmethod - def parse_boolean_tensor(bool_tensor) -> bool: - return struct.unpack("?", bool_tensor)[0] + return TriggerResponse(task_outputs=task_outputs) diff --git a/poetry.lock b/poetry.lock index 025cc837..72668b2a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -211,8 +211,8 @@ files = [ lazy-object-proxy = ">=1.4.0" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} wrapt = [ - {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, + {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, ] [[package]] @@ -720,10 +720,10 @@ isort = ">=4.3.21,<6.0" jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ - {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] pyyaml = ">=6.0.1" toml = {version = ">=0.10.0,<1.0.0", markers = "python_version < \"3.11\""} @@ -888,24 +888,22 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth [[package]] name = "fastapi" -version = "0.1.17" +version = "0.109.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "fastapi-0.1.17-py3-none-any.whl", hash = "sha256:a6aaad2f60684477480ac9d7a1c95e67f4696a722f184db467494bfdd5b8f29d"}, - {file = "fastapi-0.1.17.tar.gz", hash = "sha256:a9a9b6cc32c38bab27a6549b94c44a30c70b485bc789d03de3aa8725f3394be5"}, + {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, + {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, ] [package.dependencies] -pydantic = ">=0.17" -starlette = ">=0.9.7" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.36.3,<0.37.0" +typing-extensions = ">=4.8.0" [package.extras] -all = ["aiofiles", "email_validator", "graphene", "itsdangerous", "jinja2", "python-multipart", "pyyaml", "requests", "ujson", "ujson", "uvicorn"] -dev = ["passlib[bcrypt]", "pyjwt"] -doc = ["markdown-include", "mkdocs", "mkdocs-material"] -test = ["black", "email_validator", "isort", "mypy", "pytest (>=4.0.0)", "pytest-cov", "requests"] +all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "filelock" @@ -2962,8 +2960,8 @@ files = [ astroid = ">=2.12.13,<=2.14.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ - {version = ">=0.2", markers = "python_version < \"3.11\""}, {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, + {version = ">=0.2", markers = "python_version < \"3.11\""}, ] isort = ">=4.2.5,<6" mccabe = ">=0.6,<0.8" @@ -3168,7 +3166,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -3350,8 +3347,8 @@ fastapi = {version = "*", optional = true, markers = "extra == \"serve\""} filelock = "*" frozenlist = "*" grpcio = [ - {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""}, {version = ">=1.42.0", optional = true, markers = "python_version >= \"3.10\" and extra == \"serve\""}, + {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""}, ] jsonschema = "*" memray = {version = "*", optional = true, markers = "sys_platform != \"win32\" and extra == \"serve\""} @@ -3420,13 +3417,13 @@ rpds-py = ">=0.7.0" [[package]] name = "requests" -version = "2.32.0" +version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" files = [ - {file = "requests-2.32.0-py3-none-any.whl", hash = "sha256:f2c3881dddb70d056c5bd7600a4fae312b2a300e39be6a118d30b90bd27262b5"}, - {file = "requests-2.32.0.tar.gz", hash = "sha256:fa5490319474c82ef1d2c9bc459d3652e3ae4ef4c4ebdd18a21145a47ca4b6b8"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] @@ -3740,13 +3737,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "starlette" -version = "0.36.2" +version = "0.36.3" description = "The little ASGI library that shines." optional = false python-versions = ">=3.8" files = [ - {file = "starlette-0.36.2-py3-none-any.whl", hash = "sha256:e53e086e620ba715f0c187daca92927b722484d148e70921f0de075936119536"}, - {file = "starlette-0.36.2.tar.gz", hash = "sha256:4134757b950f027c8f16028ec81787751bb44145df17c8b0fa84087b9851b42d"}, + {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, + {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, ] [package.dependencies] @@ -3887,6 +3884,20 @@ files = [ {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"}, ] +[[package]] +name = "types-requests" +version = "2.32.0.20240712" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20240712.tar.gz", hash = "sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358"}, + {file = "types_requests-2.32.0.20240712-py3-none-any.whl", hash = "sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "typing-extensions" version = "4.10.0" @@ -4439,4 +4450,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "1876de5ff065c34c43e4b345f4ba8963d82300262105ddfd30c7dd59438ffe35" +content-hash = "025e3ea2f8e21e4bf117fe6acfd61ff0782d73cf182a19568b095a85ed2ada29" diff --git a/pyproject.toml b/pyproject.toml index d7c561db..9e2fccf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,15 +32,18 @@ grpcio = "^1.59.0" pyyaml = "^6.0.1" numpy = "^1.21.0" protobuf = "^4.24.2" -types-protobuf = "^4.24.0.1" -types-pyyaml = "^6.0.12.11" +requests = "^2.32.3" +fastapi = "^0.109.2" google-api-core = "^2.11.1" googleapis-common-protos = "^1.60.0" protoc-gen-openapiv2 = "^0.0.1" pydantic = ">=1.10.13" pillow = "^10.1.0" -ray = {version = "2.21.0", extras = ["serve"]} +ray = { version = "2.21.0", extras = ["serve"] } jsonschema = "^4.20.0" +types-requests = "^2.32.0.20240712" +types-protobuf = "^4.24.0.1" +types-pyyaml = "^6.0.12.11" [tool.poetry.dev-dependencies]