From 8a5c4c8ce556ca45d19a0563f98fab0b9c327958 Mon Sep 17 00:00:00 2001
From: Alex W <alex@lightsonsoftware.com>
Date: Thu, 19 Dec 2024 21:43:37 -0500
Subject: [PATCH] frontend: Add special handling for HTTP errors when trying to
 begin a streaming chat (#873)

* Add special handling for HTTP errors when trying to begin a streaming chat

* Fix formatting

* Fix build

* Fix build

* Apply the improved stream error handling to Coral
---
 .../src/cohere-client/client.ts               |  19 +-
 .../components/MessageRow/MessageContent.tsx  |   2 +-
 .../assistants_web/src/hooks/use-chat.ts      | 516 +++++++++---------
 .../coral_web/src/cohere-client/client.ts     |  19 +-
 .../src/components/MessageContent.tsx         |   2 +-
 src/interfaces/coral_web/src/hooks/chat.ts    | 516 +++++++++---------
 6 files changed, 536 insertions(+), 538 deletions(-)
diff --git a/src/interfaces/assistants_web/src/cohere-client/client.ts b/src/interfaces/assistants_web/src/cohere-client/client.ts
index c5372f6fa5..362e0ed8cc 100644
--- a/src/interfaces/assistants_web/src/cohere-client/client.ts
+++ b/src/interfaces/assistants_web/src/cohere-client/client.ts
@@ -134,7 +134,24 @@ export class CohereClient {
       body: requestBody,
       signal,
       openWhenHidden: true, // When false, the requests will be paused when the tab is hidden and resume/retry when the tab is visible again
-      onopen: onOpen,
+      onopen: async (response: Response) => {
+        if (
+          response.status !== 200 &&
+          response.headers.get('content-type')?.includes('application/json')
+        ) {
+          await response
+            .json()
+            .catch((e) => {
+              throw new CohereNetworkError('Failed to decode error message JSON', response.status);
+            })
+            .then((data) => {
+              throw new CohereNetworkError(data.detail, response.status);
+            });
+        }
+        if (onOpen) {
+          onOpen(response);
+        }
+      },
       onmessage: onMessage,
       onclose: onClose,
       onerror: onError,
diff --git a/src/interfaces/assistants_web/src/components/MessageRow/MessageContent.tsx b/src/interfaces/assistants_web/src/components/MessageRow/MessageContent.tsx
index a88dc0b91c..32493794e4 100644
--- a/src/interfaces/assistants_web/src/components/MessageRow/MessageContent.tsx
+++ b/src/interfaces/assistants_web/src/components/MessageRow/MessageContent.tsx
@@ -39,7 +39,7 @@ export const MessageContent: React.FC<Props> = ({ isLast, message, onRetry }) =>
         <MessageInfo type="error">
           {message.error}
           {isLast && (
-            <button className="underline underline-offset-1" type="button" onClick={onRetry}>
+            <button className="ml-2 underline underline-offset-1" type="button" onClick={onRetry}>
               Retry?
             </button>
           )}
diff --git a/src/interfaces/assistants_web/src/hooks/use-chat.ts b/src/interfaces/assistants_web/src/hooks/use-chat.ts
index e1ed63964d..18af4511c2 100644
--- a/src/interfaces/assistants_web/src/hooks/use-chat.ts
+++ b/src/interfaces/assistants_web/src/hooks/use-chat.ts
@@ -216,300 +216,282 @@ export const useChat = (config?: { onSend?: (msg: string) => void }) => {
     // Temporarily store the streaming `parameters` partial JSON string for a tool call
     let toolCallParamaterStr = '';
 
-    try {
-      clearComposerFiles();
-      clearUploadingErrors();
-
-      await streamConverse({
-        request,
-        headers,
-        onRead: (eventData) => {
-          switch (eventData.event) {
-            case StreamEvent.STREAM_START: {
-              const data = eventData.data;
-              setIsStreaming(true);
-              conversationId = data?.conversation_id ?? '';
-              generationId = data?.generation_id ?? '';
-              break;
-            }
+    clearComposerFiles();
+    clearUploadingErrors();
 
-            case StreamEvent.TEXT_GENERATION: {
-              setIsStreamingToolEvents(false);
-              const data = eventData.data;
-              botResponse += data?.text ?? '';
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: botResponse,
-                generationId,
-                isRAGOn,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
-            }
+    await streamConverse({
+      request,
+      headers,
+      onRead: (eventData) => {
+        switch (eventData.event) {
+          case StreamEvent.STREAM_START: {
+            const data = eventData.data;
+            setIsStreaming(true);
+            conversationId = data?.conversation_id ?? '';
+            generationId = data?.generation_id ?? '';
+            break;
+          }
 
-            // This event only occurs when we use tools.
-            case StreamEvent.SEARCH_RESULTS: {
-              const data = eventData.data;
-              const documents = data?.documents ?? [];
-
-              const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
-                mapDocuments(documents);
-              documentsMap = { ...documentsMap, ...newDocumentsMap };
-              outputFiles = { ...outputFiles, ...newOutputFilesMap };
-              saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
-
-              toolEvents.push({
-                text: '',
-                stream_search_results: data,
-                tool_calls: [],
-              } as StreamToolCallsGeneration);
-              currentToolEventIndex += 1;
+          case StreamEvent.TEXT_GENERATION: {
+            setIsStreamingToolEvents(false);
+            const data = eventData.data;
+            botResponse += data?.text ?? '';
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: botResponse,
+              generationId,
+              isRAGOn,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              break;
-            }
+          // This event only occurs when we use tools.
+          case StreamEvent.SEARCH_RESULTS: {
+            const data = eventData.data;
+            const documents = data?.documents ?? [];
+
+            const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
+              mapDocuments(documents);
+            documentsMap = { ...documentsMap, ...newDocumentsMap };
+            outputFiles = { ...outputFiles, ...newOutputFilesMap };
+            saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
+
+            toolEvents.push({
+              text: '',
+              stream_search_results: data,
+              tool_calls: [],
+            } as StreamToolCallsGeneration);
+            currentToolEventIndex += 1;
+
+            break;
+          }
 
-            case StreamEvent.TOOL_CALLS_CHUNK: {
-              setIsStreamingToolEvents(true);
-              const data = eventData.data;
-
-              // Initiate an empty tool event if one doesn't already exist at the current index
-              const toolEvent: StreamToolCallsGeneration = toolEvents[currentToolEventIndex] ?? {
-                text: '',
-                tool_calls: [],
-              };
-              toolEvent.text += data?.text ?? '';
-
-              // A tool call needs to be added/updated if a tool call delta is present in the event
-              if (data?.tool_call_delta) {
-                const currentToolCallsIndex = data.tool_call_delta.index ?? 0;
-                let toolCall = toolEvent.tool_calls?.[currentToolCallsIndex];
-                if (!toolCall) {
-                  toolCall = {
-                    name: '',
-                    parameters: {},
-                  };
-                  toolCallParamaterStr = '';
-                }
+          case StreamEvent.TOOL_CALLS_CHUNK: {
+            setIsStreamingToolEvents(true);
+            const data = eventData.data;
 
-                if (data?.tool_call_delta?.name) {
-                  toolCall.name = data.tool_call_delta.name;
-                }
-                if (data?.tool_call_delta?.parameters) {
-                  toolCallParamaterStr += data?.tool_call_delta?.parameters;
-
-                  // Attempt to parse the partial parameter string as valid JSON to show that the parameters
-                  // are streaming in. To make the partial JSON string valid JSON after the object key comes in,
-                  // we naively try to add `"}` to the end.
-                  try {
-                    const partialParams = JSON.parse(toolCallParamaterStr + `"}`);
-                    toolCall.parameters = partialParams;
-                  } catch (e) {
-                    // Ignore parsing error
-                  }
-                }
+            // Initiate an empty tool event if one doesn't already exist at the current index
+            const toolEvent: StreamToolCallsGeneration = toolEvents[currentToolEventIndex] ?? {
+              text: '',
+              tool_calls: [],
+            };
+            toolEvent.text += data?.text ?? '';
+
+            // A tool call needs to be added/updated if a tool call delta is present in the event
+            if (data?.tool_call_delta) {
+              const currentToolCallsIndex = data.tool_call_delta.index ?? 0;
+              let toolCall = toolEvent.tool_calls?.[currentToolCallsIndex];
+              if (!toolCall) {
+                toolCall = {
+                  name: '',
+                  parameters: {},
+                };
+                toolCallParamaterStr = '';
+              }
 
-                // Update the tool call list with the new/updated tool call
-                if (toolEvent.tool_calls?.[currentToolCallsIndex]) {
-                  toolEvent.tool_calls[currentToolCallsIndex] = toolCall;
-                } else {
-                  toolEvent.tool_calls?.push(toolCall);
+              if (data?.tool_call_delta?.name) {
+                toolCall.name = data.tool_call_delta.name;
+              }
+              if (data?.tool_call_delta?.parameters) {
+                toolCallParamaterStr += data?.tool_call_delta?.parameters;
+
+                // Attempt to parse the partial parameter string as valid JSON to show that the parameters
+                // are streaming in. To make the partial JSON string valid JSON after the object key comes in,
+                // we naively try to add `"}` to the end.
+                try {
+                  const partialParams = JSON.parse(toolCallParamaterStr + `"}`);
+                  toolCall.parameters = partialParams;
+                } catch (e) {
+                  // Ignore parsing error
                 }
               }
 
-              // Update the tool event list with the new/updated tool event
-              if (toolEvents[currentToolEventIndex]) {
-                toolEvents[currentToolEventIndex] = toolEvent;
+              // Update the tool call list with the new/updated tool call
+              if (toolEvent.tool_calls?.[currentToolCallsIndex]) {
+                toolEvent.tool_calls[currentToolCallsIndex] = toolCall;
               } else {
-                toolEvents.push(toolEvent);
+                toolEvent.tool_calls?.push(toolCall);
               }
+            }
 
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: botResponse,
-                isRAGOn,
-                generationId,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
+            // Update the tool event list with the new/updated tool event
+            if (toolEvents[currentToolEventIndex]) {
+              toolEvents[currentToolEventIndex] = toolEvent;
+            } else {
+              toolEvents.push(toolEvent);
             }
 
-            case StreamEvent.TOOL_CALLS_GENERATION: {
-              const data = eventData.data;
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: botResponse,
+              isRAGOn,
+              generationId,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              if (toolEvents[currentToolEventIndex]) {
-                toolEvents[currentToolEventIndex] = data;
-                currentToolEventIndex += 1;
-              } else {
-                toolEvents.push(data);
-                currentToolEventIndex = toolEvents.length; // double check this is right
-              }
-              break;
-            }
+          case StreamEvent.TOOL_CALLS_GENERATION: {
+            const data = eventData.data;
 
-            case StreamEvent.CITATION_GENERATION: {
-              const data = eventData.data;
-              const newCitations = [...(data?.citations ?? [])];
-              const fixedCitations = fixInlineCitationsForMarkdown(newCitations, botResponse);
-              citations.push(...fixedCitations);
-              citations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
-              saveCitations(generationId, fixedCitations, documentsMap);
-
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: replaceTextWithCitations(botResponse, citations, generationId),
-                citations,
-                isRAGOn,
-                generationId,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
+            if (toolEvents[currentToolEventIndex]) {
+              toolEvents[currentToolEventIndex] = data;
+              currentToolEventIndex += 1;
+            } else {
+              toolEvents.push(data);
+              currentToolEventIndex = toolEvents.length; // double check this is right
             }
+            break;
+          }
 
-            case StreamEvent.STREAM_END: {
-              const data = eventData.data;
+          case StreamEvent.CITATION_GENERATION: {
+            const data = eventData.data;
+            const newCitations = [...(data?.citations ?? [])];
+            const fixedCitations = fixInlineCitationsForMarkdown(newCitations, botResponse);
+            citations.push(...fixedCitations);
+            citations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
+            saveCitations(generationId, fixedCitations, documentsMap);
+
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: replaceTextWithCitations(botResponse, citations, generationId),
+              citations,
+              isRAGOn,
+              generationId,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              conversationId = data?.conversation_id ?? '';
+          case StreamEvent.STREAM_END: {
+            const data = eventData.data;
 
-              if (currentConversationId !== conversationId) {
-                setConversation({ id: conversationId });
-              }
-              // Make sure our URL is up to date with the conversationId
-              if (!window.location.pathname.includes(`c/${conversationId}`) && conversationId) {
-                const newUrl =
-                  window.location.pathname === '/'
-                    ? `c/${conversationId}`
-                    : window.location.pathname + `/c/${conversationId}`;
-                window?.history?.replaceState(null, '', newUrl);
-                queryClient.invalidateQueries({ queryKey: ['conversations'] });
-              }
-
-              const responseText = data.text ?? '';
-
-              addSearchResults(data?.search_results ?? []);
-
-              // When we use documents for RAG, we don't get the documents split up by snippet
-              // and their new ids until the final response. In the future, we will potentially
-              // get the snippets in the citation-generation event and we can inject them there.
-              const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
-                mapDocuments(data.documents ?? []);
-              documentsMap = { ...documentsMap, ...newDocumentsMap };
-              outputFiles = { ...outputFiles, ...newOutputFilesMap };
-
-              saveCitations(generationId, citations, documentsMap);
-              saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
-
-              const outputText =
-                data?.finish_reason === FinishReason.MAX_TOKENS ? botResponse : responseText;
-
-              // Replace HTML code blocks with iframes
-              const transformedText = replaceCodeBlockWithIframe(outputText);
-
-              const finalText = isRAGOn
-                ? replaceTextWithCitations(
-                    // TODO(@wujessica): temporarily use the text generated from the stream when MAX_TOKENS
-                    // because the final response doesn't give us the full text yet. Note - this means that
-                    // citations will only appear for the first 'block' of text generated.
-                    transformedText,
-                    citations,
-                    generationId
-                  )
-                : botResponse;
-
-              const finalMessage: FulfilledMessage = {
-                id: data.message_id,
-                type: MessageType.BOT,
-                state: BotState.FULFILLED,
-                generationId,
-                text: citations.length > 0 ? finalText : fixMarkdownImagesInText(transformedText),
-                citations,
-                isRAGOn,
-                originalText: isRAGOn ? responseText : botResponse,
-                toolEvents,
-              };
-
-              setConversation({ messages: [...newMessages, finalMessage] });
-              setStreamingMessage(null);
-
-              if (shouldUpdateConversationTitle(newMessages)) {
-                handleUpdateConversationTitle(conversationId);
-              }
+            conversationId = data?.conversation_id ?? '';
 
-              break;
+            if (currentConversationId !== conversationId) {
+              setConversation({ id: conversationId });
+            }
+            // Make sure our URL is up to date with the conversationId
+            if (!window.location.pathname.includes(`c/${conversationId}`) && conversationId) {
+              const newUrl =
+                window.location.pathname === '/'
+                  ? `c/${conversationId}`
+                  : window.location.pathname + `/c/${conversationId}`;
+              window?.history?.replaceState(null, '', newUrl);
+              queryClient.invalidateQueries({ queryKey: ['conversations'] });
             }
-          }
-        },
-        onHeaders: () => {},
-        onFinish: () => {
-          setIsStreaming(false);
-        },
-        onError: (e) => {
-          citations = [];
-          if (isCohereNetworkError(e)) {
-            const networkError = e;
-            let errorMessage = USER_ERROR_MESSAGE;
-
-            setConversation({
-              messages: newMessages.map((m, i) =>
-                i < newMessages.length - 1
-                  ? m
-                  : { ...m, error: `[${networkError.status}] ${errorMessage}` }
-              ),
-            });
-          } else if (isStreamError(e)) {
-            const streamError = e;
 
-            const lastMessage: ErrorMessage = createErrorMessage({
-              text: botResponse,
-              error: `[${streamError.code}] ${USER_ERROR_MESSAGE}`,
-            });
+            const responseText = data.text ?? '';
+
+            addSearchResults(data?.search_results ?? []);
+
+            // When we use documents for RAG, we don't get the documents split up by snippet
+            // and their new ids until the final response. In the future, we will potentially
+            // get the snippets in the citation-generation event and we can inject them there.
+            const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
+              mapDocuments(data.documents ?? []);
+            documentsMap = { ...documentsMap, ...newDocumentsMap };
+            outputFiles = { ...outputFiles, ...newOutputFilesMap };
+
+            saveCitations(generationId, citations, documentsMap);
+            saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
+
+            const outputText =
+              data?.finish_reason === FinishReason.MAX_TOKENS ? botResponse : responseText;
+
+            // Replace HTML code blocks with iframes
+            const transformedText = replaceCodeBlockWithIframe(outputText);
+
+            const finalText = isRAGOn
+              ? replaceTextWithCitations(
+                  // TODO(@wujessica): temporarily use the text generated from the stream when MAX_TOKENS
+                  // because the final response doesn't give us the full text yet. Note - this means that
+                  // citations will only appear for the first 'block' of text generated.
+                  transformedText,
+                  citations,
+                  generationId
+                )
+              : botResponse;
+
+            const finalMessage: FulfilledMessage = {
+              id: data.message_id,
+              type: MessageType.BOT,
+              state: BotState.FULFILLED,
+              generationId,
+              text: citations.length > 0 ? finalText : fixMarkdownImagesInText(transformedText),
+              citations,
+              isRAGOn,
+              originalText: isRAGOn ? responseText : botResponse,
+              toolEvents,
+            };
 
-            setConversation({ messages: [...newMessages, lastMessage] });
-          } else {
-            let error =
-              (e as CohereNetworkError)?.message ||
-              'Unable to generate a response since an error was encountered.';
+            setConversation({ messages: [...newMessages, finalMessage] });
+            setStreamingMessage(null);
 
-            if (error === 'network error' && deployment === DEPLOYMENT_COHERE_PLATFORM) {
-              error += ' (Ensure a COHERE_API_KEY is configured correctly)';
+            if (shouldUpdateConversationTitle(newMessages)) {
+              handleUpdateConversationTitle(conversationId);
             }
-            setConversation({
-              messages: [
-                ...newMessages,
-                createErrorMessage({
-                  text: botResponse,
-                  error,
-                }),
-              ],
-            });
-          }
-          setIsStreaming(false);
-          setStreamingMessage(null);
-          setPendingMessage(null);
-        },
-      });
-    } catch (e) {
-      if (isCohereNetworkError(e) && e?.status) {
-        let errorMessage = USER_ERROR_MESSAGE;
-
-        setConversation({
-          messages: newMessages.map((m, i) =>
-            i < newMessages.length - 1
-              ? m
-              : { ...m, error: `[${(e as CohereNetworkError)?.status}] ${errorMessage}` }
-          ),
-        });
-      }
 
-      setIsStreaming(false);
-      setStreamingMessage(null);
-      setPendingMessage(null);
-    }
+            break;
+          }
+        }
+      },
+      onHeaders: () => {},
+      onFinish: () => {
+        setIsStreaming(false);
+      },
+      onError: (e) => {
+        citations = [];
+        if (isCohereNetworkError(e)) {
+          const networkError = e;
+          let errorMessage = networkError.message ?? USER_ERROR_MESSAGE;
+
+          setConversation({
+            messages: newMessages.map((m, i) =>
+              i < newMessages.length - 1
+                ? m
+                : { ...m, error: `[${networkError.status}] ${errorMessage}` }
+            ),
+          });
+        } else if (isStreamError(e)) {
+          const streamError = e;
+
+          const lastMessage: ErrorMessage = createErrorMessage({
+            text: botResponse,
+            error: `[${streamError.code}] ${USER_ERROR_MESSAGE}`,
+          });
+
+          setConversation({ messages: [...newMessages, lastMessage] });
+        } else {
+          let error =
+            (e as CohereNetworkError)?.message ||
+            'Unable to generate a response since an error was encountered.';
+
+          if (error === 'network error' && deployment === DEPLOYMENT_COHERE_PLATFORM) {
+            error += ' (Ensure a COHERE_API_KEY is configured correctly)';
+          }
+          setConversation({
+            messages: [
+              ...newMessages,
+              createErrorMessage({
+                text: botResponse,
+                error,
+              }),
+            ],
+          });
+        }
+        setIsStreaming(false);
+        setStreamingMessage(null);
+        setPendingMessage(null);
+      },
+    });
   };
 
   const getChatRequest = (message: string, overrides?: ChatRequestOverrides): CohereChatRequest => {
diff --git a/src/interfaces/coral_web/src/cohere-client/client.ts b/src/interfaces/coral_web/src/cohere-client/client.ts
index d0be521b42..fa1982299d 100644
--- a/src/interfaces/coral_web/src/cohere-client/client.ts
+++ b/src/interfaces/coral_web/src/cohere-client/client.ts
@@ -109,7 +109,24 @@ export class CohereClient {
       body: requestBody,
       signal,
       openWhenHidden: true, // When false, the requests will be paused when the tab is hidden and resume/retry when the tab is visible again
-      onopen: onOpen,
+      onopen: async (response: Response) => {
+        if (
+          response.status !== 200 &&
+          response.headers.get('content-type')?.includes('application/json')
+        ) {
+          await response
+            .json()
+            .catch(() => {
+              throw new CohereNetworkError('Failed to decode error message JSON', response.status);
+            })
+            .then((data) => {
+              throw new CohereNetworkError(data.detail, response.status);
+            });
+        }
+        if (onOpen) {
+          onOpen(response);
+        }
+      },
       onmessage: onMessage,
       onclose: onClose,
       onerror: onError,
diff --git a/src/interfaces/coral_web/src/components/MessageContent.tsx b/src/interfaces/coral_web/src/components/MessageContent.tsx
index 73e23bad43..5025a9a677 100644
--- a/src/interfaces/coral_web/src/components/MessageContent.tsx
+++ b/src/interfaces/coral_web/src/components/MessageContent.tsx
@@ -45,7 +45,7 @@ export const MessageContent: React.FC<Props> = ({ isLast, message, onRetry }) =>
         <MessageInfo type="error">
           {message.error}
           {isLast && (
-            <button className="underline underline-offset-1" type="button" onClick={onRetry}>
+            <button className="ml-2 underline underline-offset-1" type="button" onClick={onRetry}>
               {STRINGS.retryQuestion}
             </button>
           )}
diff --git a/src/interfaces/coral_web/src/hooks/chat.ts b/src/interfaces/coral_web/src/hooks/chat.ts
index 7332418b3d..17d5bb2aa2 100644
--- a/src/interfaces/coral_web/src/hooks/chat.ts
+++ b/src/interfaces/coral_web/src/hooks/chat.ts
@@ -235,301 +235,283 @@ export const useChat = (config?: { onSend?: (msg: string) => void }) => {
     // Temporarily store the streaming `parameters` partial JSON string for a tool call
     let toolCallParamaterStr = '';
 
-    try {
-      clearComposerFiles();
-      clearUploadingErrors();
-
-      await streamConverse({
-        request,
-        headers,
-        onRead: (eventData: ChatResponseEvent) => {
-          switch (eventData.event) {
-            case StreamEvent.STREAM_START: {
-              const data = eventData.data as StreamStart;
-              setIsStreaming(true);
-              conversationId = data?.conversation_id ?? '';
-              generationId = data?.generation_id ?? '';
-              break;
-            }
-
-            case StreamEvent.TEXT_GENERATION: {
-              setIsStreamingToolEvents(false);
-              const data = eventData.data as StreamTextGeneration;
-              botResponse += data?.text ?? '';
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: botResponse,
-                generationId,
-                isRAGOn,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
-            }
+    clearComposerFiles();
+    clearUploadingErrors();
 
-            // This event only occurs when we use tools.
-            case StreamEvent.SEARCH_RESULTS: {
-              const data = eventData.data as StreamSearchResults;
-              const documents = data?.documents ?? [];
-
-              const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
-                mapDocuments(documents);
-              documentsMap = { ...documentsMap, ...newDocumentsMap };
-              outputFiles = { ...outputFiles, ...newOutputFilesMap };
-              // we are only interested in web_search results
-              // ignore search results of pyhton interpreter tool
-              if (
-                toolEvents[currentToolEventIndex - 1]?.tool_calls?.[0]?.name !==
-                TOOL_PYTHON_INTERPRETER_ID
-              ) {
-                toolEvents.push({
-                  text: '',
-                  stream_search_results: data,
-                  tool_calls: [],
-                } as StreamToolCallsGeneration);
-                currentToolEventIndex += 1;
-              }
-              break;
-            }
+    await streamConverse({
+      request,
+      headers,
+      onRead: (eventData: ChatResponseEvent) => {
+        switch (eventData.event) {
+          case StreamEvent.STREAM_START: {
+            const data = eventData.data as StreamStart;
+            setIsStreaming(true);
+            conversationId = data?.conversation_id ?? '';
+            generationId = data?.generation_id ?? '';
+            break;
+          }
 
-            case StreamEvent.TOOL_CALLS_CHUNK: {
-              setIsStreamingToolEvents(true);
-              const data = eventData.data as StreamToolCallsChunk;
+          case StreamEvent.TEXT_GENERATION: {
+            setIsStreamingToolEvents(false);
+            const data = eventData.data as StreamTextGeneration;
+            botResponse += data?.text ?? '';
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: botResponse,
+              generationId,
+              isRAGOn,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              // Initiate an empty tool event if one doesn't already exist at the current index
-              const toolEvent: StreamToolCallsGeneration = toolEvents[currentToolEventIndex] ?? {
+          // This event only occurs when we use tools.
+          case StreamEvent.SEARCH_RESULTS: {
+            const data = eventData.data as StreamSearchResults;
+            const documents = data?.documents ?? [];
+
+            const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
+              mapDocuments(documents);
+            documentsMap = { ...documentsMap, ...newDocumentsMap };
+            outputFiles = { ...outputFiles, ...newOutputFilesMap };
+            // we are only interested in web_search results
+            // ignore search results of pyhton interpreter tool
+            if (
+              toolEvents[currentToolEventIndex - 1]?.tool_calls?.[0]?.name !==
+              TOOL_PYTHON_INTERPRETER_ID
+            ) {
+              toolEvents.push({
                 text: '',
+                stream_search_results: data,
                 tool_calls: [],
-              };
-              toolEvent.text += data?.text ?? '';
-
-              // A tool call needs to be added/updated if a tool call delta is present in the event
-              if (data?.tool_call_delta) {
-                const currentToolCallsIndex = data.tool_call_delta.index ?? 0;
-                let toolCall = toolEvent.tool_calls?.[currentToolCallsIndex];
-                if (!toolCall) {
-                  toolCall = {
-                    name: '',
-                    parameters: {},
-                  };
-                  toolCallParamaterStr = '';
-                }
+              } as StreamToolCallsGeneration);
+              currentToolEventIndex += 1;
+            }
+            break;
+          }
 
-                if (data?.tool_call_delta?.name) {
-                  toolCall.name = data.tool_call_delta.name;
-                }
-                if (data?.tool_call_delta?.parameters) {
-                  toolCallParamaterStr += data?.tool_call_delta?.parameters;
-
-                  // Attempt to parse the partial parameter string as valid JSON to show that the parameters
-                  // are streaming in. To make the partial JSON string valid JSON after the object key comes in,
-                  // we naively try to add `"}` to the end.
-                  try {
-                    const partialParams = JSON.parse(toolCallParamaterStr + `"}`);
-                    toolCall.parameters = partialParams;
-                  } catch (e) {
-                    // Ignore parsing error
-                  }
-                }
+          case StreamEvent.TOOL_CALLS_CHUNK: {
+            setIsStreamingToolEvents(true);
+            const data = eventData.data as StreamToolCallsChunk;
 
-                // Update the tool call list with the new/updated tool call
-                if (toolEvent.tool_calls?.[currentToolCallsIndex]) {
-                  toolEvent.tool_calls[currentToolCallsIndex] = toolCall;
-                } else {
-                  toolEvent.tool_calls?.push(toolCall);
+            // Initiate an empty tool event if one doesn't already exist at the current index
+            const toolEvent: StreamToolCallsGeneration = toolEvents[currentToolEventIndex] ?? {
+              text: '',
+              tool_calls: [],
+            };
+            toolEvent.text += data?.text ?? '';
+
+            // A tool call needs to be added/updated if a tool call delta is present in the event
+            if (data?.tool_call_delta) {
+              const currentToolCallsIndex = data.tool_call_delta.index ?? 0;
+              let toolCall = toolEvent.tool_calls?.[currentToolCallsIndex];
+              if (!toolCall) {
+                toolCall = {
+                  name: '',
+                  parameters: {},
+                };
+                toolCallParamaterStr = '';
+              }
+
+              if (data?.tool_call_delta?.name) {
+                toolCall.name = data.tool_call_delta.name;
+              }
+              if (data?.tool_call_delta?.parameters) {
+                toolCallParamaterStr += data?.tool_call_delta?.parameters;
+
+                // Attempt to parse the partial parameter string as valid JSON to show that the parameters
+                // are streaming in. To make the partial JSON string valid JSON after the object key comes in,
+                // we naively try to add `"}` to the end.
+                try {
+                  const partialParams = JSON.parse(toolCallParamaterStr + `"}`);
+                  toolCall.parameters = partialParams;
+                } catch (e) {
+                  // Ignore parsing error
                 }
               }
 
-              // Update the tool event list with the new/updated tool event
-              if (toolEvents[currentToolEventIndex]) {
-                toolEvents[currentToolEventIndex] = toolEvent;
+              // Update the tool call list with the new/updated tool call
+              if (toolEvent.tool_calls?.[currentToolCallsIndex]) {
+                toolEvent.tool_calls[currentToolCallsIndex] = toolCall;
               } else {
-                toolEvents.push(toolEvent);
+                toolEvent.tool_calls?.push(toolCall);
               }
+            }
 
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: botResponse,
-                isRAGOn,
-                generationId,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
+            // Update the tool event list with the new/updated tool event
+            if (toolEvents[currentToolEventIndex]) {
+              toolEvents[currentToolEventIndex] = toolEvent;
+            } else {
+              toolEvents.push(toolEvent);
             }
 
-            case StreamEvent.TOOL_CALLS_GENERATION: {
-              const data = eventData.data as StreamToolCallsGeneration;
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: botResponse,
+              isRAGOn,
+              generationId,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              if (toolEvents[currentToolEventIndex]) {
-                toolEvents[currentToolEventIndex] = data;
-                currentToolEventIndex += 1;
-              } else {
-                toolEvents.push(data);
-                currentToolEventIndex = toolEvents.length; // double check this is right
-              }
-              break;
-            }
+          case StreamEvent.TOOL_CALLS_GENERATION: {
+            const data = eventData.data as StreamToolCallsGeneration;
 
-            case StreamEvent.CITATION_GENERATION: {
-              const data = eventData.data as StreamCitationGeneration;
-              const newCitations = [...(data?.citations ?? [])];
-              newCitations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
-              citations.push(...newCitations);
-              citations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
-              saveCitations(generationId, newCitations, documentsMap);
-
-              setStreamingMessage({
-                type: MessageType.BOT,
-                state: BotState.TYPING,
-                text: replaceTextWithCitations(botResponse, citations, generationId),
-                citations,
-                isRAGOn,
-                generationId,
-                originalText: botResponse,
-                toolEvents,
-              });
-              break;
+            if (toolEvents[currentToolEventIndex]) {
+              toolEvents[currentToolEventIndex] = data;
+              currentToolEventIndex += 1;
+            } else {
+              toolEvents.push(data);
+              currentToolEventIndex = toolEvents.length; // double check this is right
             }
+            break;
+          }
 
-            case StreamEvent.STREAM_END: {
-              const data = eventData.data as StreamEnd;
-
-              conversationId = data?.conversation_id ?? '';
+          case StreamEvent.CITATION_GENERATION: {
+            const data = eventData.data as StreamCitationGeneration;
+            const newCitations = [...(data?.citations ?? [])];
+            newCitations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
+            citations.push(...newCitations);
+            citations.sort((a, b) => (a.start ?? 0) - (b.start ?? 0));
+            saveCitations(generationId, newCitations, documentsMap);
+
+            setStreamingMessage({
+              type: MessageType.BOT,
+              state: BotState.TYPING,
+              text: replaceTextWithCitations(botResponse, citations, generationId),
+              citations,
+              isRAGOn,
+              generationId,
+              originalText: botResponse,
+              toolEvents,
+            });
+            break;
+          }
 
-              if (id !== conversationId) {
-                setConversation({ id: conversationId });
-              }
-              // Make sure our URL is up to date with the conversationId
-              if (!window.location.pathname.includes(`c/${conversationId}`) && conversationId) {
-                const newUrl =
-                  window.location.pathname === '/'
-                    ? `c/${conversationId}`
-                    : window.location.pathname + `/c/${conversationId}`;
-                window?.history?.replaceState(null, '', newUrl);
-                queryClient.invalidateQueries({ queryKey: ['conversations'] });
-              }
+          case StreamEvent.STREAM_END: {
+            const data = eventData.data as StreamEnd;
 
-              const responseText = data.text ?? '';
-
-              addSearchResults(data?.search_results ?? []);
-
-              // When we use documents for RAG, we don't get the documents split up by snippet
-              // and their new ids until the final response. In the future, we will potentially
-              // get the snippets in the citation-generation event and we can inject them there.
-              const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
-                mapDocuments(data.documents ?? []);
-              documentsMap = { ...documentsMap, ...newDocumentsMap };
-              outputFiles = { ...outputFiles, ...newOutputFilesMap };
-
-              saveCitations(generationId, citations, documentsMap);
-              saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
-
-              const outputText =
-                data?.finish_reason === FinishReason.MAX_TOKENS ? botResponse : responseText;
-
-              // Replace HTML code blocks with iframes
-              const transformedText = replaceCodeBlockWithIframe(outputText);
-
-              const finalText = isRAGOn
-                ? replaceTextWithCitations(
-                    // TODO(@wujessica): temporarily use the text generated from the stream when MAX_TOKENS
-                    // because the final response doesn't give us the full text yet. Note - this means that
-                    // citations will only appear for the first 'block' of text generated.
-                    transformedText,
-                    citations,
-                    generationId
-                  )
-                : botResponse;
-
-              const finalMessage: FulfilledMessage = {
-                type: MessageType.BOT,
-                state: BotState.FULFILLED,
-                generationId,
-                text: citations.length > 0 ? finalText : fixMarkdownImagesInText(transformedText),
-                citations,
-                isRAGOn,
-                originalText: isRAGOn ? responseText : botResponse,
-                toolEvents,
-              };
-
-              setConversation({ messages: [...newMessages, finalMessage] });
-              setStreamingMessage(null);
-
-              if (shouldUpdateConversationTitle(newMessages)) {
-                handleUpdateConversationTitle(conversationId);
-              }
+            conversationId = data?.conversation_id ?? '';
 
-              break;
+            if (id !== conversationId) {
+              setConversation({ id: conversationId });
+            }
+            // Make sure our URL is up to date with the conversationId
+            if (!window.location.pathname.includes(`c/${conversationId}`) && conversationId) {
+              const newUrl =
+                window.location.pathname === '/'
+                  ? `c/${conversationId}`
+                  : window.location.pathname + `/c/${conversationId}`;
+              window?.history?.replaceState(null, '', newUrl);
+              queryClient.invalidateQueries({ queryKey: ['conversations'] });
             }
-          }
-        },
-        onHeaders: () => {},
-        onFinish: () => {
-          setIsStreaming(false);
-        },
-        onError: (e) => {
-          citations = [];
-          if (isCohereNetworkError(e)) {
-            const networkError = e;
-            let errorMessage = USER_ERROR_MESSAGE;
-
-            setConversation({
-              messages: newMessages.map((m, i) =>
-                i < newMessages.length - 1
-                  ? m
-                  : { ...m, error: `[${networkError.status}] ${errorMessage}` }
-              ),
-            });
-          } else if (isStreamError(e)) {
-            const streamError = e;
 
-            const lastMessage: ErrorMessage = createErrorMessage({
-              text: botResponse,
-              error: `[${streamError.code}] ${USER_ERROR_MESSAGE}`,
-            });
+            const responseText = data.text ?? '';
+
+            addSearchResults(data?.search_results ?? []);
+
+            // When we use documents for RAG, we don't get the documents split up by snippet
+            // and their new ids until the final response. In the future, we will potentially
+            // get the snippets in the citation-generation event and we can inject them there.
+            const { documentsMap: newDocumentsMap, outputFilesMap: newOutputFilesMap } =
+              mapDocuments(data.documents ?? []);
+            documentsMap = { ...documentsMap, ...newDocumentsMap };
+            outputFiles = { ...outputFiles, ...newOutputFilesMap };
+
+            saveCitations(generationId, citations, documentsMap);
+            saveOutputFiles({ ...savedOutputFiles, ...outputFiles });
+
+            const outputText =
+              data?.finish_reason === FinishReason.MAX_TOKENS ? botResponse : responseText;
+
+            // Replace HTML code blocks with iframes
+            const transformedText = replaceCodeBlockWithIframe(outputText);
+
+            const finalText = isRAGOn
+              ? replaceTextWithCitations(
+                  // TODO(@wujessica): temporarily use the text generated from the stream when MAX_TOKENS
+                  // because the final response doesn't give us the full text yet. Note - this means that
+                  // citations will only appear for the first 'block' of text generated.
+                  transformedText,
+                  citations,
+                  generationId
+                )
+              : botResponse;
+
+            const finalMessage: FulfilledMessage = {
+              type: MessageType.BOT,
+              state: BotState.FULFILLED,
+              generationId,
+              text: citations.length > 0 ? finalText : fixMarkdownImagesInText(transformedText),
+              citations,
+              isRAGOn,
+              originalText: isRAGOn ? responseText : botResponse,
+              toolEvents,
+            };
 
-            setConversation({ messages: [...newMessages, lastMessage] });
-          } else {
-            let error = (e as CohereNetworkError)?.message || STRINGS.generationError;
+            setConversation({ messages: [...newMessages, finalMessage] });
+            setStreamingMessage(null);
 
-            if (error === 'network error' && deployment === DEPLOYMENT_COHERE_PLATFORM) {
-              error += ` (${STRINGS.networkErrorSuggestion})`;
+            if (shouldUpdateConversationTitle(newMessages)) {
+              handleUpdateConversationTitle(conversationId);
             }
-            setConversation({
-              messages: [
-                ...newMessages,
-                createErrorMessage({
-                  text: botResponse,
-                  error,
-                }),
-              ],
-            });
-          }
-          setIsStreaming(false);
-          setStreamingMessage(null);
-          setPendingMessage(null);
-        },
-      });
-    } catch (e) {
-      if (isCohereNetworkError(e) && e?.status) {
-        let errorMessage = USER_ERROR_MESSAGE;
-
-        setConversation({
-          messages: newMessages.map((m, i) =>
-            i < newMessages.length - 1
-              ? m
-              : { ...m, error: `[${(e as CohereNetworkError)?.status}] ${errorMessage}` }
-          ),
-        });
-      }
 
-      setIsStreaming(false);
-      setStreamingMessage(null);
-      setPendingMessage(null);
-    }
+            break;
+          }
+        }
+      },
+      onHeaders: () => {},
+      onFinish: () => {
+        setIsStreaming(false);
+      },
+      onError: (e) => {
+        citations = [];
+        if (isCohereNetworkError(e)) {
+          const networkError = e;
+          let errorMessage = networkError.message ?? USER_ERROR_MESSAGE;
+
+          setConversation({
+            messages: newMessages.map((m, i) =>
+              i < newMessages.length - 1
+                ? m
+                : { ...m, error: `[${networkError.status}] ${errorMessage}` }
+            ),
+          });
+        } else if (isStreamError(e)) {
+          const streamError = e;
+
+          const lastMessage: ErrorMessage = createErrorMessage({
+            text: botResponse,
+            error: `[${streamError.code}] ${USER_ERROR_MESSAGE}`,
+          });
+
+          setConversation({ messages: [...newMessages, lastMessage] });
+        } else {
+          let error = (e as CohereNetworkError)?.message || STRINGS.generationError;
+
+          if (error === 'network error' && deployment === DEPLOYMENT_COHERE_PLATFORM) {
+            error += ` (${STRINGS.networkErrorSuggestion})`;
+          }
+          setConversation({
+            messages: [
+              ...newMessages,
+              createErrorMessage({
+                text: botResponse,
+                error,
+              }),
+            ],
+          });
+        }
+        setIsStreaming(false);
+        setStreamingMessage(null);
+        setPendingMessage(null);
+      },
+    });
   };
 
   const getChatRequest = (message: string, overrides?: ChatRequestOverrides): CohereChatRequest => {