From 6aefee8571737933367e7f5f3b7718d8d8ef71f3 Mon Sep 17 00:00:00 2001
From: Teresa Hoang <125500434+teresaqhoang@users.noreply.github.com>
Date: Fri, 28 Jul 2023 16:46:53 -0700
Subject: [PATCH] Token Usage (#39)

### Motivation and Context

<!-- Thank you for your contribution to the copilot-chat repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

This PR adds the token usage feature, in which token usage is calculated
and shown per prompt and per session. Each token usage calculation will
be split into two values:
1. total tokens used in chat completion of the bot response prompt
2. total tokens used in dependencies used to generate prompt

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

webapi
- Token usage per prompt persists as part of ChatMessage object
- Add initial bot message and token usage tracking to
ChatHistoryController and tracking of token usage for bot response in
ChatMessage and ChatSession models.
- Update ChatSkill to save token usage to context variables and return
with bot response.
- Added token usage calculation in ChatSkill by calculating total token
usage for dependency functions and chat completion and sending updated
response to client. Copy token usage into original chat context.
- Calculate memory extraction token usage by taking into account
cumulative semantic memory token usage.
- Update Utilities to include GetTokenUsage method.

Webapp
- AppState: Added a new TokenUsage field to track total usage across all
chats by app session, and appSlice has been updated to cumulate session
token usage.
- Update SignalRMiddleware to handle token usage when receiving message
updates from server. Update message property to tokenUsage if tokenUsage
is defined, otherwise update content.
- Fix ChatHistoryTextContent to include TypingIndicator when bot
response is generating.
- Changed PromptDetails -> PromptDialog component to show prompt details
and token usage graph.
- Removed TypingIndicatorRenderer

Token usage shown for ChatMessages of type Message and Plan

![image](https://github.com/microsoft/chat-copilot/assets/125500434/9ae5a262-67ed-400c-8e26-b486f0e307c8)

Hardcoded bot responses default to 0

![image](https://github.com/microsoft/chat-copilot/assets/125500434/0240695a-14ea-4a53-90f7-e2c3f64df0fe)
Loading state

![image](https://github.com/microsoft/chat-copilot/assets/125500434/cb3b0ab7-d76e-4404-8a09-7fa8078fbbf1)

Info

![image](https://github.com/microsoft/chat-copilot/assets/125500434/9747b239-daa8-4553-ab57-9210c5553211)

This is what it will look like in settings dialog once changes go in

![image](https://github.com/microsoft/chat-copilot/assets/125500434/aca7d038-96f9-4d3e-95a7-339bede3ebc7)


### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [Contribution
Guidelines](https://github.com/microsoft/copilot-chat/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/copilot-chat/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
~~[ ] All unit tests pass, and I have added new tests where possible~~
- [x] I didn't break anyone :smile:

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 .../Controllers/ChatHistoryController.cs      |  14 +-
 .../Controllers/ChatMemoryController.cs       |   2 +-
 .../Controllers/DocumentImportController.cs   |   2 +-
 webapi/CopilotChat/Models/ChatMessage.cs      |  17 +-
 .../CopilotChat/Models/CreateChatResponse.cs  |  36 ++++
 .../Skills/ChatSkills/ChatSkill.cs            |  97 +++++++++--
 .../Skills/ChatSkills/DocumentMemorySkill.cs  |   2 +-
 .../ChatSkills/ExternalInformationSkill.cs    |  13 +-
 .../ChatSkills/SemanticChatMemoryExtractor.cs |   6 +-
 .../ChatSkills/SemanticChatMemorySkill.cs     |   2 +-
 webapi/CopilotChat/Skills/TokenUtilities.cs   |  89 ++++++++++
 webapi/CopilotChat/Skills/Utilities.cs        |   6 -
 webapi/Services/IOcrEngine.cs                 |   1 -
 webapp/src/Constants.ts                       |   4 +-
 webapp/src/components/chat/ChatInput.tsx      |   6 +-
 webapp/src/components/chat/ChatRoom.tsx       |   5 +-
 webapp/src/components/chat/ChatStatus.tsx     |  37 +++-
 .../chat/chat-history/ChatHistoryItem.tsx     |   4 +-
 .../chat-history/ChatHistoryTextContent.tsx   |  23 ++-
 .../chat/prompt-details/PromptDetails.tsx     |  51 ------
 .../chat/prompt-dialog/PromptDialog.tsx       |  73 ++++++++
 .../TypingIndicatorRenderer.tsx               |  51 ------
 .../header/settings-dialog/SettingsDialog.tsx |   4 +-
 webapp/src/components/shared/Alerts.tsx       |  11 +-
 .../components/token-usage/TokenUsageBar.tsx  |  36 ++++
 .../token-usage/TokenUsageGraph.tsx           | 164 ++++++++++++++++++
 .../token-usage/TokenUsageLegendItem.tsx      |  69 ++++++++
 .../token-usage/TokenUsageLegendLabel.tsx     |  23 +++
 webapp/src/libs/hooks/useChat.ts              |  31 ++--
 webapp/src/libs/models/ChatMessage.ts         |   3 +
 webapp/src/libs/models/ChatSession.ts         |   3 +
 webapp/src/libs/models/TokenUsage.ts          |  25 +++
 .../libs/semantic-kernel/model/AskResult.ts   |   8 +-
 webapp/src/libs/services/ChatService.ts       |   6 +-
 webapp/src/redux/features/app/AppState.ts     |   3 +
 webapp/src/redux/features/app/appSlice.ts     |  33 +++-
 .../message-relay/signalRMiddleware.ts        |  25 +--
 webapp/src/styles.tsx                         |   2 +-
 38 files changed, 776 insertions(+), 211 deletions(-)
 create mode 100644 webapi/CopilotChat/Models/CreateChatResponse.cs
 create mode 100644 webapi/CopilotChat/Skills/TokenUtilities.cs
 delete mode 100644 webapp/src/components/chat/prompt-details/PromptDetails.tsx
 create mode 100644 webapp/src/components/chat/prompt-dialog/PromptDialog.tsx
 delete mode 100644 webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx
 create mode 100644 webapp/src/components/token-usage/TokenUsageBar.tsx
 create mode 100644 webapp/src/components/token-usage/TokenUsageGraph.tsx
 create mode 100644 webapp/src/components/token-usage/TokenUsageLegendItem.tsx
 create mode 100644 webapp/src/components/token-usage/TokenUsageLegendLabel.tsx
 create mode 100644 webapp/src/libs/models/TokenUsage.ts

diff --git a/webapi/CopilotChat/Controllers/ChatHistoryController.cs b/webapi/CopilotChat/Controllers/ChatHistoryController.cs
index b01799eed..c9b930fdb 100644
--- a/webapi/CopilotChat/Controllers/ChatHistoryController.cs
+++ b/webapi/CopilotChat/Controllers/ChatHistoryController.cs
@@ -14,6 +14,7 @@
 using SemanticKernel.Service.CopilotChat.Hubs;
 using SemanticKernel.Service.CopilotChat.Models;
 using SemanticKernel.Service.CopilotChat.Options;
+using SemanticKernel.Service.CopilotChat.Skills;
 using SemanticKernel.Service.CopilotChat.Storage;
 
 namespace SemanticKernel.Service.CopilotChat.Controllers;
@@ -80,19 +81,22 @@ public async Task<IActionResult> CreateChatSessionAsync([FromBody] CreateChatPar
         var newChat = new ChatSession(chatParameter.Title, this._promptOptions.SystemDescription);
         await this._sessionRepository.CreateAsync(newChat);
 
-        var initialBotMessage = this._promptOptions.InitialBotMessage;
-        // The initial bot message doesn't need a prompt.
+        // Create initial bot message
         var chatMessage = ChatMessage.CreateBotResponseMessage(
             newChat.Id,
-            initialBotMessage,
-            string.Empty);
+            this._promptOptions.InitialBotMessage,
+            string.Empty, // The initial bot message doesn't need a prompt.
+            TokenUtilities.EmptyTokenUsages());
         await this._messageRepository.CreateAsync(chatMessage);
 
         // Add the user to the chat session
         await this._participantRepository.CreateAsync(new ChatParticipant(chatParameter.UserId, newChat.Id));
 
         this._logger.LogDebug("Created chat session with id {0}.", newChat.Id);
-        return this.CreatedAtAction(nameof(this.GetChatSessionByIdAsync), new { chatId = newChat.Id }, newChat);
+        return this.CreatedAtAction(
+            nameof(this.GetChatSessionByIdAsync),
+            new { chatId = newChat.Id },
+            new CreateChatResponse(newChat, chatMessage));
     }
 
     /// <summary>
diff --git a/webapi/CopilotChat/Controllers/ChatMemoryController.cs b/webapi/CopilotChat/Controllers/ChatMemoryController.cs
index 51d309310..a2867bc5b 100644
--- a/webapi/CopilotChat/Controllers/ChatMemoryController.cs
+++ b/webapi/CopilotChat/Controllers/ChatMemoryController.cs
@@ -103,4 +103,4 @@ private bool ValidateMemoryName(string memoryName)
     }
 
     # endregion
-}
\ No newline at end of file
+}
diff --git a/webapi/CopilotChat/Controllers/DocumentImportController.cs b/webapi/CopilotChat/Controllers/DocumentImportController.cs
index 7b665d036..0147eb21d 100644
--- a/webapi/CopilotChat/Controllers/DocumentImportController.cs
+++ b/webapi/CopilotChat/Controllers/DocumentImportController.cs
@@ -548,7 +548,7 @@ await kernel.Memory.SaveInformationAsync(
                 id: key,
                 description: $"Document: {documentName}");
             importResult.AddKey(key);
-            importResult.Tokens += Utilities.TokenCount(paragraph);
+            importResult.Tokens += TokenUtilities.TokenCount(paragraph);
         }
 
         this._logger.LogInformation(
diff --git a/webapi/CopilotChat/Models/ChatMessage.cs b/webapi/CopilotChat/Models/ChatMessage.cs
index 16d3c5d2a..86c218ab6 100644
--- a/webapi/CopilotChat/Models/ChatMessage.cs
+++ b/webapi/CopilotChat/Models/ChatMessage.cs
@@ -1,6 +1,7 @@
 ﻿// Copyright (c) Microsoft. All rights reserved.
 
 using System;
+using System.Collections.Generic;
 using System.Globalization;
 using System.Text.Json;
 using System.Text.Json.Serialization;
@@ -110,6 +111,12 @@ public enum ChatMessageType
     [JsonPropertyName("type")]
     public ChatMessageType Type { get; set; }
 
+    /// <summary>
+    /// Counts of total token usage used to generate bot response.
+    /// </summary>
+    [JsonPropertyName("tokenUsage")]
+    public Dictionary<string, int>? TokenUsage { get; set; }
+
     /// <summary>
     /// Create a new chat message. Timestamp is automatically generated.
     /// </summary>
@@ -120,6 +127,7 @@ public enum ChatMessageType
     /// <param name="prompt">The prompt used to generate the message</param>
     /// <param name="authorRole">Role of the author</param>
     /// <param name="type">Type of the message</param>
+    /// <param name="tokenUsage">Total token usages used to generate bot response</param>
     public ChatMessage(
         string userId,
         string userName,
@@ -127,7 +135,8 @@ public ChatMessage(
         string content,
         string prompt = "",
         AuthorRoles authorRole = AuthorRoles.User,
-        ChatMessageType type = ChatMessageType.Message)
+        ChatMessageType type = ChatMessageType.Message,
+        Dictionary<string, int>? tokenUsage = null)
     {
         this.Timestamp = DateTimeOffset.Now;
         this.UserId = userId;
@@ -138,6 +147,7 @@ public ChatMessage(
         this.Prompt = prompt;
         this.AuthorRole = authorRole;
         this.Type = type;
+        this.TokenUsage = tokenUsage;
     }
 
     /// <summary>
@@ -146,9 +156,10 @@ public ChatMessage(
     /// <param name="chatId">The chat ID that this message belongs to</param>
     /// <param name="content">The message</param>
     /// <param name="prompt">The prompt used to generate the message</param>
-    public static ChatMessage CreateBotResponseMessage(string chatId, string content, string prompt)
+    /// <param name="tokenUsage">Total token usage of response completion</param>
+    public static ChatMessage CreateBotResponseMessage(string chatId, string content, string prompt, Dictionary<string, int>? tokenUsage = null)
     {
-        return new ChatMessage("bot", "bot", chatId, content, prompt, AuthorRoles.Bot, IsPlan(content) ? ChatMessageType.Plan : ChatMessageType.Message);
+        return new ChatMessage("bot", "bot", chatId, content, prompt, AuthorRoles.Bot, IsPlan(content) ? ChatMessageType.Plan : ChatMessageType.Message, tokenUsage);
     }
 
     /// <summary>
diff --git a/webapi/CopilotChat/Models/CreateChatResponse.cs b/webapi/CopilotChat/Models/CreateChatResponse.cs
new file mode 100644
index 000000000..61d63f3bf
--- /dev/null
+++ b/webapi/CopilotChat/Models/CreateChatResponse.cs
@@ -0,0 +1,36 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Text.Json.Serialization;
+
+namespace SemanticKernel.Service.CopilotChat.Models;
+
+/// <summary>
+/// Response to chatSession/create request.
+/// </summary>
+public class CreateChatResponse
+{
+    /// <summary>
+    /// ID that is persistent and unique to new chat session.
+    /// </summary>
+    [JsonPropertyName("id")]
+    public string Id { get; set; }
+
+    /// <summary>
+    /// Title of the chat.
+    /// </summary>
+    [JsonPropertyName("title")]
+    public string Title { get; set; }
+
+    /// <summary>
+    /// Initial bot message.
+    /// </summary>
+    [JsonPropertyName("initialBotMessage")]
+    public ChatMessage? InitialBotMessage { get; set; }
+
+    public CreateChatResponse(ChatSession chatSession, ChatMessage initialBotMessage)
+    {
+        this.Id = chatSession.Id;
+        this.Title = chatSession.Title;
+        this.InitialBotMessage = initialBotMessage;
+    }
+}
diff --git a/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs b/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs
index b88d5335f..dedbce976 100644
--- a/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs
+++ b/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs
@@ -116,7 +116,7 @@ public async Task<string> ExtractUserIntentAsync(SKContext context)
         var historyTokenBudget =
             tokenLimit -
             this._promptOptions.ResponseTokenLimit -
-            Utilities.TokenCount(string.Join("\n", new string[]
+            TokenUtilities.TokenCount(string.Join("\n", new string[]
                 {
                     this._promptOptions.SystemDescription,
                     this._promptOptions.SystemIntent,
@@ -139,6 +139,9 @@ public async Task<string> ExtractUserIntentAsync(SKContext context)
             settings: this.CreateIntentCompletionSettings()
         );
 
+        // Get token usage from ChatCompletion result and add to context
+        TokenUtilities.GetFunctionTokenUsage(result, context, "SystemIntentExtraction");
+
         if (result.ErrorOccurred)
         {
             context.Log.LogError("{0}: {1}", result.LastErrorDescription, result.LastException);
@@ -161,7 +164,7 @@ public async Task<string> ExtractAudienceAsync(SKContext context)
         var historyTokenBudget =
             tokenLimit -
             this._promptOptions.ResponseTokenLimit -
-            Utilities.TokenCount(string.Join("\n", new string[]
+            TokenUtilities.TokenCount(string.Join("\n", new string[]
                 {
                     this._promptOptions.SystemAudience,
                     this._promptOptions.SystemAudienceContinuation,
@@ -182,6 +185,9 @@ public async Task<string> ExtractAudienceAsync(SKContext context)
             settings: this.CreateIntentCompletionSettings()
         );
 
+        // Get token usage from ChatCompletion result and add to context
+        TokenUtilities.GetFunctionTokenUsage(result, context, "SystemAudienceExtraction");
+
         if (result.ErrorOccurred)
         {
             context.Log.LogError("{0}: {1}", result.LastErrorDescription, result.LastException);
@@ -229,7 +235,7 @@ public async Task<string> ExtractChatHistoryAsync(
                 }
             }
 
-            var tokenCount = Utilities.TokenCount(formattedMessage);
+            var tokenCount = TokenUtilities.TokenCount(formattedMessage);
 
             if (remainingToken - tokenCount >= 0)
             {
@@ -262,7 +268,7 @@ public async Task<SKContext> ChatAsync(
         SKContext context)
     {
         // Set the system description in the prompt options
-        await SetSystemDescriptionAsync(chatId);
+        await this.SetSystemDescriptionAsync(chatId);
 
         // Save this new message to memory such that subsequent chat responses can use it
         await this.UpdateBotResponseStatusOnClient(chatId, "Saving user message to chat history");
@@ -284,7 +290,7 @@ public async Task<SKContext> ChatAsync(
         // Save hardcoded response if user cancelled plan
         if (chatContext.Variables.ContainsKey("userCancelledPlan"))
         {
-            await this.SaveNewResponseAsync("I am sorry the plan did not meet your goals.", string.Empty, chatId, userId);
+            await this.SaveNewResponseAsync("I am sorry the plan did not meet your goals.", string.Empty, chatId, userId, TokenUtilities.EmptyTokenUsages());
             return context;
         }
 
@@ -296,6 +302,7 @@ public async Task<SKContext> ChatAsync(
             return context;
         }
 
+        context.Variables.Set("tokenUsage", JsonSerializer.Serialize(chatMessage.TokenUsage));
         return context;
     }
 
@@ -350,7 +357,11 @@ public async Task<SKContext> ChatAsync(
             chatContext.Variables.Set("prompt", prompt);
 
             // Save a new response to the chat history with the proposed plan content
-            return await this.SaveNewResponseAsync(JsonSerializer.Serialize<ProposedPlan>(proposedPlan), prompt, chatId, userId);
+            return await this.SaveNewResponseAsync(
+                JsonSerializer.Serialize<ProposedPlan>(proposedPlan), prompt, chatId, userId,
+                // TODO: [Issue #2106] Accommodate plan token usage differently
+                this.GetTokenUsagesAsync(chatContext)
+            );
         }
 
         // Query relevant semantic and document memories
@@ -376,7 +387,7 @@ public async Task<SKContext> ChatAsync(
         // Fill in the chat history if there is any token budget left
         var chatContextComponents = new List<string>() { chatMemories, documentMemories, planResult };
         var chatContextText = string.Join("\n\n", chatContextComponents.Where(c => !string.IsNullOrEmpty(c)));
-        var chatHistoryTokenLimit = remainingToken - Utilities.TokenCount(chatContextText);
+        var chatHistoryTokenLimit = remainingToken - TokenUtilities.TokenCount(chatContextText);
         if (chatHistoryTokenLimit > 0)
         {
             await this.UpdateBotResponseStatusOnClient(chatId, "Extracting chat history");
@@ -399,6 +410,7 @@ public async Task<SKContext> ChatAsync(
             this._promptOptions.SystemChatPrompt,
             chatContext);
         chatContext.Variables.Set("prompt", renderedPrompt);
+        chatContext.Variables.Set(TokenUtilities.GetFunctionKey(chatContext.Log, "SystemMetaPrompt")!, TokenUtilities.TokenCount(renderedPrompt).ToString(CultureInfo.InvariantCulture));
 
         if (chatContext.ErrorOccurred)
         {
@@ -417,9 +429,15 @@ await SemanticChatMemoryExtractor.ExtractSemanticChatMemoryAsync(
             chatContext,
             this._promptOptions);
 
-        // Save the message
+        // Calculate total token usage for dependency functions and prompt template and send to client
+        await this.UpdateBotResponseStatusOnClient(chatId, "Calculating token usage");
+        chatMessage.TokenUsage = this.GetTokenUsagesAsync(chatContext, chatMessage.Content);
+        await this.UpdateMessageOnClient(chatMessage);
+
+        // Save the message with final completion token usage
         await this.UpdateBotResponseStatusOnClient(chatId, "Saving message to chat history");
         await this._chatMessageRepository.UpsertAsync(chatMessage);
+
         return chatMessage;
     }
 
@@ -442,6 +460,13 @@ private async Task<string> GetAudienceAsync(SKContext context)
 
         var audience = await this.ExtractAudienceAsync(audienceContext);
 
+        // Copy token usage into original chat context
+        var functionKey = TokenUtilities.GetFunctionKey(context.Log, "SystemAudienceExtraction")!;
+        if (audienceContext.Variables.TryGetValue(functionKey, out string? tokenUsage))
+        {
+            context.Variables.Set(functionKey, tokenUsage);
+        }
+
         // Propagate the error
         if (audienceContext.ErrorOccurred)
         {
@@ -473,6 +498,14 @@ private async Task<string> GetUserIntentAsync(SKContext context)
             );
 
             userIntent = await this.ExtractUserIntentAsync(intentContext);
+
+            // Copy token usage into original chat context
+            var functionKey = TokenUtilities.GetFunctionKey(context.Log, "SystemIntentExtraction")!;
+            if (intentContext.Variables.TryGetValue(functionKey!, out string? tokenUsage))
+            {
+                context.Variables.Set(functionKey!, tokenUsage);
+            }
+
             // Propagate the error
             if (intentContext.ErrorOccurred)
             {
@@ -579,8 +612,9 @@ private async Task<ChatMessage> SaveNewMessageAsync(string message, string userI
     /// <param name="prompt">Prompt used to generate the response.</param>
     /// <param name="chatId">The chat ID</param>
     /// <param name="userId">The user ID</param>
+    ///  <param name="tokenUsage">Total token usage of response completion</param>
     /// <returns>The created chat message.</returns>
-    private async Task<ChatMessage> SaveNewResponseAsync(string response, string prompt, string chatId, string userId)
+    private async Task<ChatMessage> SaveNewResponseAsync(string response, string prompt, string chatId, string userId, Dictionary<string, int>? tokenUsage)
     {
         // Make sure the chat exists.
         if (!await this._chatSessionRepository.TryFindByIdAsync(chatId, v => _ = v))
@@ -651,10 +685,10 @@ private int GetChatContextTokenLimit(string audience, string userIntent)
         var tokenLimit = this._promptOptions.CompletionTokenLimit;
         var remainingToken =
             tokenLimit -
-            Utilities.TokenCount(audience) -
-            Utilities.TokenCount(userIntent) -
+            TokenUtilities.TokenCount(audience) -
+            TokenUtilities.TokenCount(userIntent) -
             this._promptOptions.ResponseTokenLimit -
-            Utilities.TokenCount(string.Join("\n", new string[]
+            TokenUtilities.TokenCount(string.Join("\n", new string[]
                 {
                             this._promptOptions.SystemDescription,
                             this._promptOptions.SystemResponse,
@@ -665,6 +699,33 @@ private int GetChatContextTokenLimit(string audience, string userIntent)
         return remainingToken;
     }
 
+    /// <summary>
+    /// Gets token usage totals for each semantic function if not undefined.
+    /// </summary>
+    /// <param name="chatContext">Context maintained during response generation.</param>
+    /// <param name="content">String representing bot response. If null, response is still being generated or was hardcoded.</param>
+    /// <returns>Dictionary containing function to token usage mapping for each total that's defined.</returns>
+    private Dictionary<string, int> GetTokenUsagesAsync(SKContext chatContext, string? content = null)
+    {
+        var tokenUsageDict = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
+
+        // Total token usage of each semantic function
+        foreach (string function in TokenUtilities.semanticFunctions.Values)
+        {
+            if (chatContext.Variables.TryGetValue($"{function}TokenUsage", out string? tokenUsage))
+            {
+                tokenUsageDict.Add(function, int.Parse(tokenUsage, CultureInfo.InvariantCulture));
+            }
+        }
+
+        if (content != null)
+        {
+            tokenUsageDict.Add(TokenUtilities.semanticFunctions["SystemCompletion"]!, TokenUtilities.TokenCount(content));
+        }
+
+        return tokenUsageDict;
+    }
+
     /// <summary>
     /// Stream the response to the client.
     /// </summary>
@@ -685,7 +746,7 @@ private async Task<ChatMessage> StreamResponseToClient(string chatId, string use
         await foreach (string contentPiece in stream)
         {
             chatMessage.Content += contentPiece;
-            await this.UpdateMessageContentOnClient(chatId, chatMessage);
+            await this.UpdateMessageOnClient(chatMessage);
         }
 
         return chatMessage;
@@ -698,10 +759,11 @@ private async Task<ChatMessage> StreamResponseToClient(string chatId, string use
     /// <param name="userId">The user ID</param>
     /// <param name="prompt">Prompt used to generate the message</param>
     /// <param name="content">Content of the message</param>
+    /// <param name="tokenUsage">Total token usage of response completion</param>
     /// <returns>The created chat message</returns>
-    private async Task<ChatMessage> CreateBotMessageOnClient(string chatId, string userId, string prompt, string content)
+    private async Task<ChatMessage> CreateBotMessageOnClient(string chatId, string userId, string prompt, string content, Dictionary<string, int>? tokenUsage = null)
     {
-        var chatMessage = ChatMessage.CreateBotResponseMessage(chatId, content, prompt);
+        var chatMessage = ChatMessage.CreateBotResponseMessage(chatId, content, prompt, tokenUsage);
         await this._messageRelayHubContext.Clients.Group(chatId).SendAsync("ReceiveMessage", chatId, userId, chatMessage);
         return chatMessage;
     }
@@ -709,11 +771,10 @@ private async Task<ChatMessage> CreateBotMessageOnClient(string chatId, string u
     /// <summary>
     /// Update the response on the client.
     /// </summary>
-    /// <param name="chatId">The chat ID</param>
     /// <param name="message">The message</param>
-    private async Task UpdateMessageContentOnClient(string chatId, ChatMessage message)
+    private async Task UpdateMessageOnClient(ChatMessage message)
     {
-        await this._messageRelayHubContext.Clients.Group(chatId).SendAsync("ReceiveMessageStream", chatId, message.Id, message.Content);
+        await this._messageRelayHubContext.Clients.Group(message.ChatId).SendAsync("ReceiveMessageUpdate", message);
     }
 
     /// <summary>
diff --git a/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs b/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs
index ac9f87938..9fce9043f 100644
--- a/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs
+++ b/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs
@@ -78,7 +78,7 @@ public async Task<string> QueryDocumentsAsync(
         string documentsText = string.Empty;
         foreach (var memory in relevantMemories)
         {
-            var tokenCount = Utilities.TokenCount(memory.Metadata.Text);
+            var tokenCount = TokenUtilities.TokenCount(memory.Metadata.Text);
             if (remainingToken - tokenCount > 0)
             {
                 documentsText += $"\n\nSnippet from {memory.Metadata.Description}: {memory.Metadata.Text}";
diff --git a/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs b/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs
index 38e0a2836..d45861c8b 100644
--- a/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs
+++ b/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs
@@ -72,6 +72,7 @@ public async Task<string> AcquireExternalInformationAsync(
         [Description("The intent to whether external information is needed")] string userIntent,
         SKContext context)
     {
+        // TODO: [Issue #2106] Calculate planner and plan token usage
         FunctionsView functions = this._planner.Kernel.Skills.GetFunctionsView(true, true);
         if (functions.NativeFunctions.IsEmpty && functions.SemanticFunctions.IsEmpty)
         {
@@ -100,8 +101,8 @@ public async Task<string> AcquireExternalInformationAsync(
             newPlanContext = await plan.InvokeAsync(newPlanContext);
             int tokenLimit =
                 int.Parse(context["tokenLimit"], new NumberFormatInfo()) -
-                Utilities.TokenCount(PromptPreamble) -
-                Utilities.TokenCount(PromptPostamble);
+                TokenUtilities.TokenCount(PromptPreamble) -
+                TokenUtilities.TokenCount(PromptPostamble);
 
             // The result of the plan may be from an OpenAPI skill. Attempt to extract JSON from the response.
             bool extractJsonFromOpenApi =
@@ -238,7 +239,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan
             document = JsonDocument.Parse(jsonContent);
         }
 
-        int jsonContentTokenCount = Utilities.TokenCount(jsonContent);
+        int jsonContentTokenCount = TokenUtilities.TokenCount(jsonContent);
 
         // Return the JSON content if it does not exceed the token limit
         if (jsonContentTokenCount < tokenLimit)
@@ -264,7 +265,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan
             {
                 // Save property name for result interpolation
                 JsonProperty firstProperty = document.RootElement.EnumerateObject().First();
-                tokenLimit -= Utilities.TokenCount(firstProperty.Name);
+                tokenLimit -= TokenUtilities.TokenCount(firstProperty.Name);
                 resultsDescriptor = string.Format(CultureInfo.InvariantCulture, "{0}: ", firstProperty.Name);
 
                 // Extract object to be truncated
@@ -279,7 +280,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan
         {
             foreach (JsonProperty property in document.RootElement.EnumerateObject())
             {
-                int propertyTokenCount = Utilities.TokenCount(property.ToString());
+                int propertyTokenCount = TokenUtilities.TokenCount(property.ToString());
 
                 if (tokenLimit - propertyTokenCount > 0)
                 {
@@ -299,7 +300,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan
         {
             foreach (JsonElement item in document.RootElement.EnumerateArray())
             {
-                int itemTokenCount = Utilities.TokenCount(item.ToString());
+                int itemTokenCount = TokenUtilities.TokenCount(item.ToString());
 
                 if (tokenLimit - itemTokenCount > 0)
                 {
diff --git a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs
index a2a2461c7..14bac7b23 100644
--- a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs
+++ b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs
@@ -87,7 +87,7 @@ internal static async Task<SemanticChatMemory> ExtractCognitiveMemoryAsync(
         var remainingToken =
             tokenLimit -
             options.ResponseTokenLimit -
-            Utilities.TokenCount(memoryPrompt); ;
+            TokenUtilities.TokenCount(memoryPrompt); ;
 
         var memoryExtractionContext = Utilities.CopyContextWithVariablesClone(context);
         memoryExtractionContext.Variables.Set("tokenLimit", remainingToken.ToString(new NumberFormatInfo()));
@@ -101,6 +101,10 @@ internal static async Task<SemanticChatMemory> ExtractCognitiveMemoryAsync(
             settings: CreateMemoryExtractionSettings(options)
         );
 
+        // Get token usage from ChatCompletion result and add to context
+        // Since there are multiple memory types, total token usage is calculated by cumulating the token usage of each memory type.
+        TokenUtilities.GetFunctionTokenUsage(result, context, $"SystemCognitive_{memoryName}");
+
         SemanticChatMemory memory = SemanticChatMemory.FromJson(result.ToString());
         return memory;
     }
diff --git a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs
index 8f15d88f5..7998fd6ef 100644
--- a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs
+++ b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs
@@ -75,7 +75,7 @@ public async Task<string> QueryMemoriesAsync(
         string memoryText = "";
         foreach (var memory in relevantMemories)
         {
-            var tokenCount = Utilities.TokenCount(memory.Metadata.Text);
+            var tokenCount = TokenUtilities.TokenCount(memory.Metadata.Text);
             if (remainingToken - tokenCount > 0)
             {
                 memoryText += $"\n[{memory.Metadata.Description}] {memory.Metadata.Text}";
diff --git a/webapi/CopilotChat/Skills/TokenUtilities.cs b/webapi/CopilotChat/Skills/TokenUtilities.cs
new file mode 100644
index 000000000..b463cec86
--- /dev/null
+++ b/webapi/CopilotChat/Skills/TokenUtilities.cs
@@ -0,0 +1,89 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using Azure.AI.OpenAI;
+using Microsoft.Extensions.Logging;
+using Microsoft.SemanticKernel.Connectors.AI.OpenAI.Tokenizers;
+using Microsoft.SemanticKernel.Orchestration;
+
+namespace SemanticKernel.Service.CopilotChat.Skills;
+
+/// <summary>
+/// Utility methods for token management.
+/// </summary>
+public static class TokenUtilities
+{
+    /// <summary>
+    /// Semantic dependencies of ChatSkill.
+    ///  If you add a new semantic dependency, please add it here.
+    /// </summary>
+    public static readonly Dictionary<string, string> semanticFunctions = new()
+    {
+        // TODO: [Issue #2106] Calculate token usage for planner dependencies.
+        { "SystemAudienceExtraction", "audienceExtraction" },
+        { "SystemIntentExtraction", "userIntentExtraction" },
+        { "SystemMetaPrompt", "metaPromptTemplate" },
+        { "SystemCompletion", "responseCompletion"},
+        { "SystemCognitive_WorkingMemory", "workingMemoryExtraction" },
+        { "SystemCognitive_LongTermMemory", "longTermMemoryExtraction" }
+    };
+
+    /// <summary>
+    /// Gets dictionary containing empty token usage totals.
+    /// Use for responses that are hardcoded and/or do not have semantic (token) dependencies.
+    /// </summary>
+    internal static Dictionary<string, int> EmptyTokenUsages()
+    {
+        return semanticFunctions.Values.ToDictionary(v => v, v => 0, StringComparer.OrdinalIgnoreCase);
+    }
+
+    /// <summary>
+    /// Gets key used to identify function token usage in context variables.
+    /// </summary>
+    /// <param name="logger">The logger instance to use for logging errors.</param>
+    /// <param name="functionName">Name of semantic function.</param>
+    /// <returns>The key corresponding to the semantic function name, or null if the function name is unknown.</returns>
+    internal static string? GetFunctionKey(ILogger logger, string? functionName)
+    {
+        if (functionName == null || !semanticFunctions.TryGetValue(functionName, out string? key))
+        {
+            logger.LogError("Unknown token dependency {0}. Please define function as semanticFunctions entry in TokenUtilities.cs", functionName);
+            return null;
+        };
+
+        return $"{key}TokenUsage";
+    }
+
+    /// <summary>
+    /// Gets the total token usage from a Chat or Text Completion result context and adds it as a variable to response context.
+    /// </summary>
+    /// <param name="result">Result context of chat completion</param>
+    /// <param name="chatContext">Context maintained during response generation.</param>
+    /// <param name="functionName">Name of the function that invoked the chat completion.</param>
+    /// <returns> true if token usage is found in result context; otherwise, false.</returns>
+    internal static void GetFunctionTokenUsage(SKContext result, SKContext chatContext, string? functionName = null)
+    {
+        var functionKey = GetFunctionKey(chatContext.Log, functionName);
+        if (functionKey == null)
+        {
+            return;
+        }
+
+        if (result.ModelResults == null || result.ModelResults.Count == 0)
+        {
+            chatContext.Log.LogError("Unable to determine token usage for {0}", functionKey);
+            return;
+        }
+
+        var tokenUsage = result.ModelResults.First().GetResult<ChatCompletions>().Usage.TotalTokens;
+        chatContext.Variables.Set(functionKey!, tokenUsage.ToString(CultureInfo.InvariantCulture));
+    }
+
+    /// <summary>
+    /// Calculate the number of tokens in a string.
+    /// </summary>
+    internal static int TokenCount(string text) => GPT3Tokenizer.Encode(text).Count;
+}
diff --git a/webapi/CopilotChat/Skills/Utilities.cs b/webapi/CopilotChat/Skills/Utilities.cs
index a11419dd1..25c225d16 100644
--- a/webapi/CopilotChat/Skills/Utilities.cs
+++ b/webapi/CopilotChat/Skills/Utilities.cs
@@ -1,6 +1,5 @@
 ﻿//Copyright (c) Microsoft. All rights reserved.
 
-using Microsoft.SemanticKernel.Connectors.AI.OpenAI.Tokenizers;
 using Microsoft.SemanticKernel.Orchestration;
 
 namespace SemanticKernel.Service.CopilotChat.Skills;
@@ -24,9 +23,4 @@ internal static SKContext CopyContextWithVariablesClone(SKContext context)
             context.Skills,
             context.Log,
             context.CancellationToken);
-
-    /// <summary>
-    /// Calculate the number of tokens in a string.
-    /// </summary>
-    internal static int TokenCount(string text) => GPT3Tokenizer.Encode(text).Count;
 }
diff --git a/webapi/Services/IOcrEngine.cs b/webapi/Services/IOcrEngine.cs
index 499279bad..bd1527676 100644
--- a/webapi/Services/IOcrEngine.cs
+++ b/webapi/Services/IOcrEngine.cs
@@ -10,7 +10,6 @@ namespace SemanticKernel.Service.Services;
 /// </summary>
 public interface IOcrEngine
 {
-
     /// <summary>
     /// Reads all text from the image file.
     /// </summary>
diff --git a/webapp/src/Constants.ts b/webapp/src/Constants.ts
index 334ac8bad..6f83a901f 100644
--- a/webapp/src/Constants.ts
+++ b/webapp/src/Constants.ts
@@ -47,6 +47,6 @@ export const Constants = {
         msGraphScopes: ['Calendars.Read', 'Mail.Read', 'Mail.Send', 'Tasks.ReadWrite', 'User.Read'],
         // All OpenAI plugin manifest files should be located at this path per OpenAI requirements: "https://platform.openai.com/docs/plugins/getting-started/plugin-manifest
         MANIFEST_PATH: '/.well-known/ai-plugin.json',
-    }, 
-   KEYSTROKE_DEBOUNCE_TIME_MS: 250
+    },
+    KEYSTROKE_DEBOUNCE_TIME_MS: 250,
 };
diff --git a/webapp/src/components/chat/ChatInput.tsx b/webapp/src/components/chat/ChatInput.tsx
index ff606f8a2..8690b9a52 100644
--- a/webapp/src/components/chat/ChatInput.tsx
+++ b/webapp/src/components/chat/ChatInput.tsx
@@ -8,15 +8,13 @@ import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk';
 import React, { useRef } from 'react';
 import { Constants } from '../../Constants';
 import { AuthHelper } from '../../libs/auth/AuthHelper';
+import { GetResponseOptions, useChat } from '../../libs/hooks/useChat';
 import { AlertType } from '../../libs/models/AlertType';
 import { ChatMessageType } from '../../libs/models/ChatMessage';
-import { GetResponseOptions, useChat } from '../../libs/hooks/useChat';
 import { useAppDispatch, useAppSelector } from '../../redux/app/hooks';
 import { RootState } from '../../redux/app/store';
 import { addAlert } from '../../redux/features/app/appSlice';
-import {
-    editConversationInput, updateBotResponseStatus
-} from '../../redux/features/conversations/conversationsSlice';
+import { editConversationInput, updateBotResponseStatus } from '../../redux/features/conversations/conversationsSlice';
 import { Alerts } from '../shared/Alerts';
 import { SpeechService } from './../../libs/services/SpeechService';
 import { updateUserIsTyping } from './../../redux/features/conversations/conversationsSlice';
diff --git a/webapp/src/components/chat/ChatRoom.tsx b/webapp/src/components/chat/ChatRoom.tsx
index 1d23dd47a..86dd8c419 100644
--- a/webapp/src/components/chat/ChatRoom.tsx
+++ b/webapp/src/components/chat/ChatRoom.tsx
@@ -4,8 +4,8 @@ import { makeStyles, shorthands, tokens } from '@fluentui/react-components';
 import debug from 'debug';
 import React from 'react';
 import { Constants } from '../../Constants';
-import { AuthorRoles, IChatMessage } from '../../libs/models/ChatMessage';
 import { GetResponseOptions, useChat } from '../../libs/hooks/useChat';
+import { AuthorRoles, IChatMessage } from '../../libs/models/ChatMessage';
 import { useAppDispatch, useAppSelector } from '../../redux/app/hooks';
 import { RootState } from '../../redux/app/store';
 import { addMessageToConversationFromUser } from '../../redux/features/conversations/conversationsSlice';
@@ -89,6 +89,7 @@ export const ChatRoom: React.FC = () => {
         log('submitting user chat message');
 
         const chatInput: IChatMessage = {
+            chatId: selectedId,
             timestamp: new Date().getTime(),
             userId: activeUserInfo?.id as string,
             userName: activeUserInfo?.username as string,
@@ -116,4 +117,4 @@ export const ChatRoom: React.FC = () => {
             </div>
         </div>
     );
-};
\ No newline at end of file
+};
diff --git a/webapp/src/components/chat/ChatStatus.tsx b/webapp/src/components/chat/ChatStatus.tsx
index fb17d3394..9cc7b6737 100644
--- a/webapp/src/components/chat/ChatStatus.tsx
+++ b/webapp/src/components/chat/ChatStatus.tsx
@@ -1,12 +1,23 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+import { makeStyles } from '@fluentui/react-components';
+import { Animation } from '@fluentui/react-northstar';
 import React from 'react';
 import { IChatUser } from '../../libs/models/ChatUser';
 import { useAppSelector } from '../../redux/app/hooks';
 import { RootState } from '../../redux/app/store';
-import { TypingIndicatorRenderer } from './typing-indicator/TypingIndicatorRenderer';
+import { TypingIndicator } from './typing-indicator/TypingIndicator';
+
+const useClasses = makeStyles({
+    root: {
+        display: 'flex',
+        flexDirection: 'row',
+    },
+});
 
 export const ChatStatus: React.FC = () => {
+    const classes = useClasses();
+
     const { conversations, selectedId } = useAppSelector((state: RootState) => state.conversations);
     const { users } = conversations[selectedId];
     const { activeUserInfo } = useAppSelector((state: RootState) => state.app);
@@ -23,10 +34,26 @@ export const ChatStatus: React.FC = () => {
         checkAreTyping();
     }, [activeUserInfo, users]);
 
+    let message = conversations[selectedId].botResponseStatus;
+    const numberOfUsersTyping = typingUserList.length;
+    if (numberOfUsersTyping === 1) {
+        message = message ? `${message} and a user is typing` : 'A user is typing';
+    } else if (numberOfUsersTyping > 1) {
+        message = message
+            ? `${message} and ${numberOfUsersTyping} users are typing`
+            : `${numberOfUsersTyping} users are typing`;
+    }
+
+    if (!message) {
+        return null;
+    }
+
     return (
-        <TypingIndicatorRenderer
-            botResponseStatus={conversations[selectedId].botResponseStatus}
-            numberOfUsersTyping={typingUserList.length}
-        />
+        <Animation name="slideInCubic" keyframeParams={{ distance: '2.4rem' }}>
+            <div className={classes.root}>
+                <label>{message}</label>
+                <TypingIndicator />
+            </div>
+        </Animation>
     );
 };
diff --git a/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx b/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx
index 39cbc584c..66d63fdc7 100644
--- a/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx
+++ b/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx
@@ -11,7 +11,7 @@ import { FeatureKeys } from '../../../redux/features/app/AppState';
 import { Breakpoints, customTokens } from '../../../styles';
 import { timestampToDateString } from '../../utils/TextUtils';
 import { PlanViewer } from '../plan-viewer/PlanViewer';
-import { PromptDetails } from '../prompt-details/PromptDetails';
+import { PromptDialog } from '../prompt-dialog/PromptDialog';
 import * as utils from './../../utils/TextUtils';
 import { ChatHistoryDocumentContent } from './ChatHistoryDocumentContent';
 import { ChatHistoryTextContent } from './ChatHistoryTextContent';
@@ -132,7 +132,7 @@ export const ChatHistoryItem: React.FC<ChatHistoryItemProps> = ({ message, getRe
                 <div className={classes.header}>
                     {!isMe && <Text weight="semibold">{fullName}</Text>}
                     <Text className={classes.time}>{timestampToDateString(message.timestamp, true)}</Text>
-                    {isBot && <PromptDetails message={message} />}
+                    {isBot && <PromptDialog message={message} />}
                 </div>
                 {content}
                 {showShowRLHFMessage && <UserFeedbackActions messageIndex={messageIndex} />}
diff --git a/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx b/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx
index bdba245b3..9f5028d5f 100644
--- a/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx
+++ b/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx
@@ -4,6 +4,7 @@ import { makeStyles } from '@fluentui/react-components';
 import React from 'react';
 import { IChatMessage } from '../../../libs/models/ChatMessage';
 import { convertToAnchorTags } from '../../utils/TextUtils';
+import { TypingIndicator } from '../typing-indicator/TypingIndicator';
 import * as utils from './../../utils/TextUtils';
 
 const useClasses = makeStyles({
@@ -19,14 +20,18 @@ interface ChatHistoryTextContentProps {
 export const ChatHistoryTextContent: React.FC<ChatHistoryTextContentProps> = ({ message }) => {
     const classes = useClasses();
 
-    let content = message.content.trim()
-        .replace(/[\u00A0-\u9999<>&]/g, function (i: string) {
-            return `&#${i.charCodeAt(0)};`;
-        });
+    let content = message.content.trim().replace(/[\u00A0-\u9999<>&]/g, function (i: string) {
+        return `&#${i.charCodeAt(0)};`;
+    });
     content = utils.formatChatTextContent(content);
-    content = content
-            .replace(/\n/g, '<br />')
-            .replace(/ {2}/g, '&nbsp;&nbsp;');
+    content = content.replace(/\n/g, '<br />').replace(/ {2}/g, '&nbsp;&nbsp;');
 
-    return <div className={classes.content} dangerouslySetInnerHTML={{ __html: convertToAnchorTags(content) }} />;
-};
\ No newline at end of file
+    return (
+        <div
+            className={classes.content}
+            dangerouslySetInnerHTML={{
+                __html: content.length === 0 ? <TypingIndicator /> : convertToAnchorTags(content),
+            }}
+        />
+    );
+};
diff --git a/webapp/src/components/chat/prompt-details/PromptDetails.tsx b/webapp/src/components/chat/prompt-details/PromptDetails.tsx
deleted file mode 100644
index 7e9fe7cf9..000000000
--- a/webapp/src/components/chat/prompt-details/PromptDetails.tsx
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) Microsoft. All rights reserved.
-
-import { Button, Dialog, DialogActions, DialogBody, DialogContent, DialogSurface, DialogTitle, DialogTrigger, Tooltip, makeStyles, shorthands } from '@fluentui/react-components';
-import { Info16Regular } from '@fluentui/react-icons';
-import React from 'react';
-import { IChatMessage } from '../../../libs/models/ChatMessage';
-
-const useClasses = makeStyles({
-    infoButton: {
-        ...shorthands.padding(0),
-        ...shorthands.margin(0),
-        minWidth: 'auto',
-        marginLeft: 'auto', // align to right
-    },
-});
-
-interface IPromptDetailsProps {
-    message: IChatMessage;
-}
-
-export const PromptDetails: React.FC<IPromptDetailsProps> = ({ message }) => {
-    const classes = useClasses();
-
-    return (
-        <Dialog>
-            <DialogTrigger disableButtonEnhancement>
-                <Tooltip content={'Show prompt'} relationship="label">
-                    <Button className={ classes.infoButton } icon={<Info16Regular />} appearance="transparent" />
-                </Tooltip>
-            </DialogTrigger>
-            <DialogSurface>
-                <DialogBody>
-                    <DialogTitle>Prompt</DialogTitle>
-                    <DialogContent>
-                        {(!message.prompt)
-                            ? 'No prompt available. The response is either a plan proposal or a hard-coded response.'
-                            : message.prompt.split('\n').map(
-                                (paragraph, idx) => <p key={`prompt-details-${idx}`}>{paragraph}</p>,
-                            )
-                        }
-                    </DialogContent>
-                    <DialogActions>
-                        <DialogTrigger disableButtonEnhancement>
-                            <Button appearance="secondary">Close</Button>
-                        </DialogTrigger>
-                    </DialogActions>
-                </DialogBody>
-            </DialogSurface>
-        </Dialog>
-    );
-};
diff --git a/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx b/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx
new file mode 100644
index 000000000..02996f22e
--- /dev/null
+++ b/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+import {
+    Button,
+    Dialog,
+    DialogActions,
+    DialogBody,
+    DialogContent,
+    DialogSurface,
+    DialogTitle,
+    DialogTrigger,
+    Label,
+    Link,
+    Tooltip,
+    makeStyles,
+    shorthands,
+} from '@fluentui/react-components';
+import { Info16Regular } from '@fluentui/react-icons';
+import React from 'react';
+import { IChatMessage } from '../../../libs/models/ChatMessage';
+import { useDialogClasses } from '../../../styles';
+import { TokenUsageGraph } from '../../token-usage/TokenUsageGraph';
+
+const useClasses = makeStyles({
+    infoButton: {
+        ...shorthands.padding(0),
+        ...shorthands.margin(0),
+        minWidth: 'auto',
+        marginLeft: 'auto', // align to right
+    },
+});
+
+interface IPromptDialogProps {
+    message: IChatMessage;
+}
+
+export const PromptDialog: React.FC<IPromptDialogProps> = ({ message }) => {
+    const classes = useClasses();
+    const dialogClasses = useDialogClasses();
+
+    return (
+        <Dialog>
+            <DialogTrigger disableButtonEnhancement>
+                <Tooltip content={'Show prompt'} relationship="label">
+                    <Button className={classes.infoButton} icon={<Info16Regular />} appearance="transparent" />
+                </Tooltip>
+            </DialogTrigger>
+            <DialogSurface>
+                <DialogBody>
+                    <DialogTitle>Prompt</DialogTitle>
+                    <DialogContent>
+                        <TokenUsageGraph promptView tokenUsage={message.tokenUsage ?? {}} />
+                        {message.prompt
+                            ?.split('\n')
+                            .map((paragraph, idx) => <p key={`prompt-details-${idx}`}>{paragraph}</p>)}
+                    </DialogContent>
+                    <DialogActions position="start" className={dialogClasses.footer}>
+                        <Label size="small" color="brand">
+                            Want to learn more about prompts? Click{' '}
+                            <Link href="https://aka.ms/sk-about-prompts" target="_blank" rel="noreferrer">
+                                here
+                            </Link>
+                            .
+                        </Label>
+                        <DialogTrigger disableButtonEnhancement>
+                            <Button appearance="secondary">Close</Button>
+                        </DialogTrigger>
+                    </DialogActions>
+                </DialogBody>
+            </DialogSurface>
+        </Dialog>
+    );
+};
diff --git a/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx b/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx
deleted file mode 100644
index 36a193808..000000000
--- a/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) Microsoft. All rights reserved.
-
-import { makeStyles } from '@fluentui/react-components';
-import { Animation } from '@fluentui/react-northstar';
-import * as React from 'react';
-import { TypingIndicator } from './TypingIndicator';
-
-const useClasses = makeStyles({
-    root: {
-        display: 'flex',
-        flexDirection: 'row',
-    },
-});
-
-interface TypingIndicatorRendererProps {
-    botResponseStatus: string | undefined;
-    numberOfUsersTyping: number;
-}
-
-export const TypingIndicatorRenderer: React.FC<TypingIndicatorRendererProps> = ({
-    botResponseStatus,
-    numberOfUsersTyping,
-}) => {
-    const classes = useClasses();
-
-    let message = botResponseStatus;
-    if (numberOfUsersTyping === 1) {
-        message = message ? `${message} and a user is typing` : 'A user is typing';
-    } else if (numberOfUsersTyping > 1) {
-        message = message
-            ? `${message} and ${numberOfUsersTyping} users are typing`
-            : `${numberOfUsersTyping} users are typing`;
-    }
-
-    if (!message) {
-        return null;
-    }
-
-    const typingIndicator = (
-        <div className={classes.root}>
-            <label>{message}</label>
-            <TypingIndicator />
-        </div>
-    );
-
-    return (
-        <Animation name="slideInCubic" keyframeParams={{ distance: '2.4rem' }}>
-            {typingIndicator}
-        </Animation>
-    );
-};
diff --git a/webapp/src/components/header/settings-dialog/SettingsDialog.tsx b/webapp/src/components/header/settings-dialog/SettingsDialog.tsx
index 7f90b142c..9b82bcc32 100644
--- a/webapp/src/components/header/settings-dialog/SettingsDialog.tsx
+++ b/webapp/src/components/header/settings-dialog/SettingsDialog.tsx
@@ -25,6 +25,7 @@ import React from 'react';
 import { useAppSelector } from '../../../redux/app/hooks';
 import { RootState } from '../../../redux/app/store';
 import { SharedStyles, useDialogClasses } from '../../../styles';
+import { TokenUsageGraph } from '../../token-usage/TokenUsageGraph';
 import { SettingSection } from './SettingSection';
 
 const useClasses = makeStyles({
@@ -55,7 +56,7 @@ interface ISettingsDialogProps {
 export const SettingsDialog: React.FC<ISettingsDialogProps> = ({ open, closeDialog }) => {
     const classes = useClasses();
     const dialogClasses = useDialogClasses();
-    const { settings } = useAppSelector((state: RootState) => state.app);
+    const { settings, tokenUsage } = useAppSelector((state: RootState) => state.app);
 
     return (
         <Dialog
@@ -68,6 +69,7 @@ export const SettingsDialog: React.FC<ISettingsDialogProps> = ({ open, closeDial
                 <DialogBody className={classes.root}>
                     <DialogTitle>Settings</DialogTitle>
                     <DialogContent className={classes.content}>
+                        <TokenUsageGraph tokenUsage={tokenUsage} />
                         <Accordion collapsible multiple defaultOpenItems={['basic']}>
                             <AccordionItem value="basic">
                                 <AccordionHeader expandIconPosition="end">
diff --git a/webapp/src/components/shared/Alerts.tsx b/webapp/src/components/shared/Alerts.tsx
index a15a6cb98..eb0386dee 100644
--- a/webapp/src/components/shared/Alerts.tsx
+++ b/webapp/src/components/shared/Alerts.tsx
@@ -1,9 +1,6 @@
 // Copyright (c) Microsoft. All rights reserved.
 
-import {
-    makeStyles,
-    tokens
-} from '@fluentui/react-components';
+import { makeStyles, tokens } from '@fluentui/react-components';
 import { Alert } from '@fluentui/react-components/unstable';
 import { Dismiss16Regular } from '@fluentui/react-icons';
 import React from 'react';
@@ -12,8 +9,6 @@ import { RootState } from '../../redux/app/store';
 import { removeAlert } from '../../redux/features/app/appSlice';
 
 const useClasses = makeStyles({
-    root: {
-    },
     alert: {
         fontWeight: tokens.fontWeightRegular,
         color: tokens.colorNeutralForeground1,
@@ -29,7 +24,7 @@ export const Alerts: React.FC = () => {
     const { alerts } = useAppSelector((state: RootState) => state.app);
 
     return (
-        <div className={classes.root}>
+        <div>
             {alerts.map(({ type, message }, index) => {
                 return (
                     <Alert
@@ -54,4 +49,4 @@ export const Alerts: React.FC = () => {
             })}
         </div>
     );
-};
\ No newline at end of file
+};
diff --git a/webapp/src/components/token-usage/TokenUsageBar.tsx b/webapp/src/components/token-usage/TokenUsageBar.tsx
new file mode 100644
index 000000000..868b6c023
--- /dev/null
+++ b/webapp/src/components/token-usage/TokenUsageBar.tsx
@@ -0,0 +1,36 @@
+import { Popover, PopoverSurface, PopoverTrigger, tokens } from '@fluentui/react-components';
+import { TokenUsageViewDetails } from '../../libs/models/TokenUsage';
+
+interface ITokenUsageBar {
+    details: TokenUsageViewDetails;
+    totalUsage: number;
+}
+
+export const TokenUsageBar: React.FC<ITokenUsageBar> = ({ details, totalUsage }) => {
+    const percentage = details.usageCount / totalUsage;
+    const barWidth = percentage * 500;
+
+    return (
+        <Popover
+            openOnHover
+            mouseLeaveDelay={0}
+            positioning={{
+                position: 'above',
+            }}
+            withArrow
+            appearance="inverted"
+        >
+            <PopoverTrigger>
+                <div
+                    key={details.legendLabel}
+                    style={{
+                        backgroundColor: details.color,
+                        height: tokens.spacingVerticalMNudge,
+                        width: `${barWidth}px`,
+                    }}
+                />
+            </PopoverTrigger>
+            <PopoverSurface>{`${details.legendLabel} (${details.usageCount})`}</PopoverSurface>
+        </Popover>
+    );
+};
diff --git a/webapp/src/components/token-usage/TokenUsageGraph.tsx b/webapp/src/components/token-usage/TokenUsageGraph.tsx
new file mode 100644
index 000000000..d583b1517
--- /dev/null
+++ b/webapp/src/components/token-usage/TokenUsageGraph.tsx
@@ -0,0 +1,164 @@
+import {
+    Body1,
+    Button,
+    Divider,
+    makeStyles,
+    mergeClasses,
+    Popover,
+    PopoverSurface,
+    PopoverTrigger,
+    shorthands,
+    Text,
+    tokens,
+} from '@fluentui/react-components';
+import { Brands } from '@fluentui/tokens';
+import {
+    TokenUsage,
+    TokenUsageFunctionNameMap,
+    TokenUsageView,
+    TokenUsageViewDetails,
+} from '../../libs/models/TokenUsage';
+import { useAppSelector } from '../../redux/app/hooks';
+import { RootState } from '../../redux/app/store';
+import { semanticKernelBrandRamp } from '../../styles';
+import { TypingIndicator } from '../chat/typing-indicator/TypingIndicator';
+import { Info16 } from '../shared/BundledIcons';
+import { TokenUsageBar } from './TokenUsageBar';
+import { TokenUsageLegendItem } from './TokenUsageLegendItem';
+
+const useClasses = makeStyles({
+    horizontal: {
+        display: 'flex',
+        ...shorthands.gap(tokens.spacingVerticalSNudge),
+        alignItems: 'center',
+    },
+    content: {
+        display: 'flex',
+        flexDirection: 'column',
+        ...shorthands.gap(tokens.spacingHorizontalS),
+        paddingBottom: tokens.spacingHorizontalM,
+    },
+    popover: {
+        width: '300px',
+    },
+    header: {
+        marginBlockEnd: tokens.spacingHorizontalM,
+    },
+    legend: {
+        'flex-flow': 'wrap',
+    },
+});
+
+interface ITokenUsageGraph {
+    tokenUsage: TokenUsage;
+    promptView?: boolean;
+}
+
+const contrastColors = [
+    tokens.colorPaletteBlueBackground2,
+    tokens.colorPaletteBlueForeground2,
+    tokens.colorPaletteBlueBorderActive,
+];
+
+export const TokenUsageGraph: React.FC<ITokenUsageGraph> = ({ promptView, tokenUsage }) => {
+    const classes = useClasses();
+    const { conversations, selectedId } = useAppSelector((state: RootState) => state.conversations);
+    const loadingResponse = conversations[selectedId].botResponseStatus;
+
+    const responseGenerationView: TokenUsageView = {};
+    const memoryExtractionView: TokenUsageView = {};
+
+    let memoryExtractionUsage = 0;
+    let responseGenerationUsage = 0;
+    let brandColorIndex = 120 as Brands;
+    const brandStep = 20;
+    let contrastColorsIndex = 0;
+
+    Object.entries(tokenUsage).forEach(([key, value]) => {
+        const viewDetails: TokenUsageViewDetails = {
+            usageCount: value ?? 0,
+            legendLabel: TokenUsageFunctionNameMap[key],
+            color: semanticKernelBrandRamp[brandColorIndex],
+        };
+
+        if (key.toLocaleUpperCase().includes('MEMORY')) {
+            memoryExtractionUsage += value ?? 0;
+            viewDetails.color = contrastColors[contrastColorsIndex++];
+            memoryExtractionView[key] = viewDetails;
+        } else {
+            responseGenerationUsage += value ?? 0;
+            brandColorIndex = (brandColorIndex - brandStep < 0 ? 160 : brandColorIndex - brandStep) as Brands;
+            responseGenerationView[key] = viewDetails;
+        }
+    });
+
+    const totalUsage = memoryExtractionUsage + responseGenerationUsage;
+
+    return (
+        <>
+            <h3 className={classes.header}>
+                Token Usage
+                <Popover withArrow>
+                    <PopoverTrigger disableButtonEnhancement>
+                        <Button icon={<Info16 />} appearance="transparent" />
+                    </PopoverTrigger>
+                    <PopoverSurface className={classes.popover}>
+                        <Body1>
+                            Token count for each category is the total sum of tokens used for the prompt template and
+                            chat completion for the respective completion functions. For more details about token usage,
+                            see:{' '}
+                            <a href="https://learn.microsoft.com/en-us/dotnet/api/azure.ai.openai.completionsusage?view=azure-dotnet-preview">
+                                CompletionsUsage docs here.
+                            </a>
+                        </Body1>
+                    </PopoverSurface>
+                </Popover>
+            </h3>
+            <div className={classes.content}>
+                {loadingResponse ? (
+                    <Body1>
+                        Final token usage will be available once bot response is generated.
+                        <TypingIndicator />
+                    </Body1>
+                ) : (
+                    <>
+                        {totalUsage > 0 ? (
+                            <>
+                                {!promptView && <Text>Total token usage for current session</Text>}
+                                <div className={classes.horizontal} style={{ gap: tokens.spacingHorizontalXXS }}>
+                                    {Object.entries(responseGenerationView).map(([key, details]) => {
+                                        return <TokenUsageBar key={key} details={details} totalUsage={totalUsage} />;
+                                    })}
+                                    {Object.entries(memoryExtractionView).map(([key, details]) => {
+                                        return <TokenUsageBar key={key} details={details} totalUsage={totalUsage} />;
+                                    })}
+                                </div>
+                                <div className={mergeClasses(classes.legend, classes.horizontal)}>
+                                    <TokenUsageLegendItem
+                                        key={'Response Generation'}
+                                        name={'Response Generation'}
+                                        usageCount={responseGenerationUsage}
+                                        items={responseGenerationView}
+                                        color={semanticKernelBrandRamp[(brandColorIndex + brandStep) as Brands]}
+                                    />
+                                    <TokenUsageLegendItem
+                                        key={'Memory Extraction'}
+                                        name={'Memory Extraction'}
+                                        usageCount={memoryExtractionUsage}
+                                        items={memoryExtractionView}
+                                        color={contrastColors[contrastColorsIndex - 1]}
+                                    />
+                                </div>
+                            </>
+                        ) : promptView ? (
+                            <Text>No tokens were used. This is a hardcoded response.</Text>
+                        ) : (
+                            <Text>No tokens have been used in this session yet.</Text>
+                        )}
+                    </>
+                )}
+            </div>
+            <Divider />
+        </>
+    );
+};
diff --git a/webapp/src/components/token-usage/TokenUsageLegendItem.tsx b/webapp/src/components/token-usage/TokenUsageLegendItem.tsx
new file mode 100644
index 000000000..0da042059
--- /dev/null
+++ b/webapp/src/components/token-usage/TokenUsageLegendItem.tsx
@@ -0,0 +1,69 @@
+import {
+    Popover,
+    PopoverSurface,
+    PopoverTrigger,
+    Text,
+    makeStyles,
+    shorthands,
+    tokens,
+} from '@fluentui/react-components';
+import { TokenUsageView } from '../../libs/models/TokenUsage';
+import { TokenUsageLegendLabel } from './TokenUsageLegendLabel';
+
+export const useClasses = makeStyles({
+    root: {
+        display: 'flex',
+        ...shorthands.gap(tokens.spacingVerticalSNudge),
+        alignItems: 'center',
+    },
+    colors: {
+        display: 'flex',
+        ...shorthands.gap(tokens.spacingVerticalXXS),
+    },
+    legendColor: {
+        height: tokens.spacingVerticalMNudge,
+        width: tokens.spacingHorizontalMNudge,
+    },
+});
+
+interface ITokenUsageLegendItem {
+    name: string;
+    usageCount: number;
+    items: TokenUsageView;
+    color: string;
+}
+
+export const TokenUsageLegendItem: React.FC<ITokenUsageLegendItem> = ({ name, usageCount, items, color }) => {
+    const classes = useClasses();
+    return (
+        <div className={classes.root}>
+            <Popover
+                openOnHover
+                mouseLeaveDelay={0}
+                positioning={{
+                    position: 'below',
+                    align: 'start',
+                }}
+                withArrow
+                appearance="inverted"
+            >
+                <PopoverTrigger>
+                    <div
+                        key={color}
+                        style={{
+                            backgroundColor: color,
+                            height: tokens.spacingVerticalMNudge,
+                            width: tokens.spacingHorizontalMNudge,
+                        }}
+                    />
+                </PopoverTrigger>
+                <PopoverSurface>
+                    {Object.values(items).map((details) => {
+                        return <TokenUsageLegendLabel key={details.legendLabel} details={details} />;
+                    })}
+                </PopoverSurface>
+            </Popover>
+            <Text>{`${name} (${usageCount})`}</Text>
+        </div>
+    );
+};
diff --git a/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx b/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx
new file mode 100644
index 000000000..6618b66f8
--- /dev/null
+++ b/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx
@@ -0,0 +1,23 @@
+import { Text, tokens } from '@fluentui/react-components';
+import { TokenUsageViewDetails } from '../../libs/models/TokenUsage';
+import { useClasses } from './TokenUsageLegendItem';
+
+interface ITokenUsageLegendLabel {
+    details: TokenUsageViewDetails;
+}
+
+export const TokenUsageLegendLabel: React.FC<ITokenUsageLegendLabel> = ({ details }) => {
+    const classes = useClasses();
+    return (
+        <div className={classes.root}>
+            <div
+                style={{
+                    backgroundColor: details.color,
+                    height: tokens.spacingVerticalMNudge,
+                    width: tokens.spacingHorizontalMNudge,
+                }}
+            />
+            <Text>{`${details.legendLabel} (${details.usageCount})`}</Text>
+        </div>
+    );
+};
diff --git a/webapp/src/libs/hooks/useChat.ts b/webapp/src/libs/hooks/useChat.ts
index a61b5cf49..5630c4edd 100644
--- a/webapp/src/libs/hooks/useChat.ts
+++ b/webapp/src/libs/hooks/useChat.ts
@@ -4,21 +4,23 @@ import { useMsal } from '@azure/msal-react';
 import { Constants } from '../../Constants';
 import { useAppDispatch, useAppSelector } from '../../redux/app/hooks';
 import { RootState } from '../../redux/app/store';
-import { addAlert } from '../../redux/features/app/appSlice';
+import { addAlert, updateTokenUsage } from '../../redux/features/app/appSlice';
 import { ChatState } from '../../redux/features/conversations/ChatState';
 import { Conversations } from '../../redux/features/conversations/ConversationsState';
 import {
     addConversation,
     setConversations,
     setSelectedConversation,
+    updateBotResponseStatus,
 } from '../../redux/features/conversations/conversationsSlice';
 import { Plugin } from '../../redux/features/plugins/PluginsState';
 import { AuthHelper } from '../auth/AuthHelper';
 import { AlertType } from '../models/AlertType';
 import { Bot } from '../models/Bot';
-import { ChatMessageType } from '../models/ChatMessage';
+import { ChatMessageType, IChatMessage } from '../models/ChatMessage';
 import { IChatSession } from '../models/ChatSession';
 import { IChatUser } from '../models/ChatUser';
+import { TokenUsage } from '../models/TokenUsage';
 import { IAskVariables } from '../semantic-kernel/model/Ask';
 import { BotService } from '../services/BotService';
 import { ChatService } from '../services/ChatService';
@@ -72,15 +74,13 @@ export const useChat = () => {
         const chatTitle = `Copilot @ ${new Date().toLocaleString()}`;
         const accessToken = await AuthHelper.getSKaaSAccessToken(instance, inProgress);
         try {
-            await chatService.createChatAsync(userId, chatTitle, accessToken).then(async (result: IChatSession) => {
-                const chatMessages = await chatService.getChatMessagesAsync(result.id, 0, 1, accessToken);
-
+            await chatService.createChatAsync(userId, chatTitle, accessToken).then((result: IChatSession) => {
                 const newChat: ChatState = {
                     id: result.id,
                     title: result.title,
                     systemDescription: result.systemDescription,
                     memoryBalance: result.memoryBalance,
-                    messages: chatMessages,
+                    messages: [result.initialBotMessage as IChatMessage],
                     users: [loggedInUser],
                     botProfilePicture: getBotProfilePicture(Object.keys(conversations).length),
                     input: '',
@@ -125,12 +125,21 @@ export const useChat = () => {
         }
 
         try {
-            await chatService.getBotResponseAsync(
-                ask,
-                await AuthHelper.getSKaaSAccessToken(instance, inProgress),
-                getEnabledPlugins(),
-            );
+            const askResult = await chatService
+                .getBotResponseAsync(
+                    ask,
+                    await AuthHelper.getSKaaSAccessToken(instance, inProgress),
+                    getEnabledPlugins(),
+                )
+                .catch((e: any) => {
+                    throw e;
+                });
+
+            // Update token usage of current session
+            const responseTokenUsage = askResult.variables.find((v) => v.key === 'tokenUsage')?.value;
+            if (responseTokenUsage) dispatch(updateTokenUsage(JSON.parse(responseTokenUsage) as TokenUsage));
         } catch (e: any) {
+            dispatch(updateBotResponseStatus({ chatId, status: undefined }));
             const errorMessage = `Unable to generate bot response. Details: ${getErrorDetails(e)}`;
             dispatch(addAlert({ message: errorMessage, type: AlertType.Error }));
         }
diff --git a/webapp/src/libs/models/ChatMessage.ts b/webapp/src/libs/models/ChatMessage.ts
index 6a975b0e3..f9dc2e80e 100644
--- a/webapp/src/libs/models/ChatMessage.ts
+++ b/webapp/src/libs/models/ChatMessage.ts
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 
 import { PlanState } from './Plan';
+import { TokenUsage } from './TokenUsage';
 
 /**
  * Role of the author of a chat message. It's a copy of AuthorRoles in the API C# code.
@@ -41,6 +42,7 @@ export enum UserFeedback {
 }
 
 export interface IChatMessage {
+    chatId: string;
     type: ChatMessageType;
     timestamp: number;
     userName: string;
@@ -53,4 +55,5 @@ export interface IChatMessage {
     planState?: PlanState;
     // TODO: [Issue #42] Persistent RLHF
     userFeedback?: UserFeedback;
+    tokenUsage?: TokenUsage;
 }
diff --git a/webapp/src/libs/models/ChatSession.ts b/webapp/src/libs/models/ChatSession.ts
index 73f587a1c..932df40ba 100644
--- a/webapp/src/libs/models/ChatSession.ts
+++ b/webapp/src/libs/models/ChatSession.ts
@@ -1,8 +1,11 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+import { IChatMessage } from './ChatMessage';
+
 export interface IChatSession {
     id: string;
     title: string;
+    initialBotMessage?: IChatMessage;
     systemDescription: string;
     memoryBalance: number;
 }
diff --git a/webapp/src/libs/models/TokenUsage.ts b/webapp/src/libs/models/TokenUsage.ts
new file mode 100644
index 000000000..01e5c33d1
--- /dev/null
+++ b/webapp/src/libs/models/TokenUsage.ts
@@ -0,0 +1,25 @@
+/// Information about token usage used to generate bot response.
+export type TokenUsage = Record<string, number | undefined>;
+
+export type TokenUsageView = Record<string, TokenUsageViewDetails>;
+
+export interface TokenUsageViewDetails {
+    usageCount: number;
+    legendLabel: string;
+    color: string;
+}
+
+export interface FunctionDetails {
+    usageCount: number;
+    legendLabel: string;
+    color?: string;
+}
+
+export const TokenUsageFunctionNameMap: Record<string, string> = {
+    audienceExtraction: 'Audience Extraction',
+    userIntentExtraction: 'User Intent Extraction',
+    metaPromptTemplate: 'Meta Prompt Template',
+    responseCompletion: 'Response Completion',
+    workingMemoryExtraction: 'Working Memory Extraction',
+    longTermMemoryExtraction: 'Long Term Memory Extraction',
+};
diff --git a/webapp/src/libs/semantic-kernel/model/AskResult.ts b/webapp/src/libs/semantic-kernel/model/AskResult.ts
index 3503020f3..9b2227166 100644
--- a/webapp/src/libs/semantic-kernel/model/AskResult.ts
+++ b/webapp/src/libs/semantic-kernel/model/AskResult.ts
@@ -1,11 +1,13 @@
 // Copyright (c) Microsoft. All rights reserved.
 
+import { IChatMessage } from '../../models/ChatMessage';
+
 export interface IAskResult {
-    value: string;
-    variables: Variable[];
+    message: IChatMessage;
+    variables: ContextVariable[];
 }
 
-export interface Variable {
+export interface ContextVariable {
     key: string;
     value: string;
 }
diff --git a/webapp/src/libs/services/ChatService.ts b/webapp/src/libs/services/ChatService.ts
index a9cf21ed4..0b5b91acb 100644
--- a/webapp/src/libs/services/ChatService.ts
+++ b/webapp/src/libs/services/ChatService.ts
@@ -7,6 +7,7 @@ import { IChatParticipant } from '../models/ChatParticipant';
 import { IChatSession } from '../models/ChatSession';
 import { IChatUser } from '../models/ChatUser';
 import { IAsk, IAskVariables } from '../semantic-kernel/model/Ask';
+import { IAskResult } from '../semantic-kernel/model/AskResult';
 import { ICustomPlugin } from '../semantic-kernel/model/CustomPlugin';
 import { BaseService } from './BaseService';
 
@@ -101,7 +102,7 @@ export class ChatService extends BaseService {
         ask: IAsk,
         accessToken: string,
         enabledPlugins?: Plugin[],
-    ): Promise<IChatMessage> => {
+    ): Promise<IAskResult> => {
         // If skill requires any additional api properties, append to context
         if (enabledPlugins && enabledPlugins.length > 0) {
             const openApiSkillVariables: IAskVariables[] = [];
@@ -110,7 +111,6 @@ export class ChatService extends BaseService {
             const customPlugins: ICustomPlugin[] = [];
 
             for (const plugin of enabledPlugins) {
-
                 // If user imported a manifest domain, add custom plugin
                 if (plugin.manifestDomain) {
                     customPlugins.push({
@@ -153,7 +153,7 @@ export class ChatService extends BaseService {
             ask.variables = ask.variables ? ask.variables.concat(openApiSkillVariables) : openApiSkillVariables;
         }
 
-        const result = await this.getResponseAsync<IChatMessage>(
+        const result = await this.getResponseAsync<IAskResult>(
             {
                 commandPath: 'chat',
                 method: 'POST',
diff --git a/webapp/src/redux/features/app/AppState.ts b/webapp/src/redux/features/app/AppState.ts
index 0de60facf..17ea92f47 100644
--- a/webapp/src/redux/features/app/AppState.ts
+++ b/webapp/src/redux/features/app/AppState.ts
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 
 import { AlertType } from '../../../libs/models/AlertType';
+import { TokenUsage } from '../../../libs/models/TokenUsage';
 
 export interface ActiveUserInfo {
     id: string;
@@ -31,6 +32,7 @@ export interface Setting {
 export interface AppState {
     alerts: Alert[];
     activeUserInfo?: ActiveUserInfo;
+    tokenUsage: TokenUsage;
     features: Record<FeatureKeys, Feature>;
     settings: Setting[];
 }
@@ -126,6 +128,7 @@ export const initialState: AppState = {
             type: AlertType.Info,
         },
     ],
+    tokenUsage: {},
     features: Features,
     settings: Settings,
 };
diff --git a/webapp/src/redux/features/app/appSlice.ts b/webapp/src/redux/features/app/appSlice.ts
index 836824e94..73be3c895 100644
--- a/webapp/src/redux/features/app/appSlice.ts
+++ b/webapp/src/redux/features/app/appSlice.ts
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft. All rights reserved.
 
 import { createSlice, PayloadAction } from '@reduxjs/toolkit';
+import { TokenUsage } from '../../../libs/models/TokenUsage';
 import { ActiveUserInfo, Alert, AppState, FeatureKeys, initialState } from './AppState';
 
 export const appSlice = createSlice({
@@ -22,7 +23,13 @@ export const appSlice = createSlice({
         setActiveUserInfo: (state: AppState, action: PayloadAction<ActiveUserInfo>) => {
             state.activeUserInfo = action.payload;
         },
-        // This sets the feature flag
+        updateTokenUsage: (state: AppState, action: PayloadAction<TokenUsage>) => {
+            Object.entries(action.payload).forEach(([key, value]) => {
+                action.payload[key] = getTotalTokenUsage(state.tokenUsage[key], value);
+            });
+            state.tokenUsage = action.payload;
+        },
+        // This sets the feature flag based on end user input
         toggleFeatureFlag: (state: AppState, action: PayloadAction<FeatureKeys>) => {
             const feature = state.features[action.payload];
             state.features = {
@@ -33,7 +40,7 @@ export const appSlice = createSlice({
                 },
             };
         },
-        // This controls feature availability based on the state of backend support
+        // This controls feature availability based on the state of backend
         toggleFeatureState: (
             state: AppState,
             action: PayloadAction<{
@@ -55,7 +62,25 @@ export const appSlice = createSlice({
     },
 });
 
-export const { addAlert, removeAlert, setAlerts, setActiveUserInfo, toggleFeatureFlag, toggleFeatureState } =
-    appSlice.actions;
+export const {
+    addAlert,
+    removeAlert,
+    setAlerts,
+    setActiveUserInfo,
+    toggleFeatureFlag,
+    toggleFeatureState,
+    updateTokenUsage,
+} = appSlice.actions;
 
 export default appSlice.reducer;
+
+const getTotalTokenUsage = (previousSum?: number, current?: number) => {
+    if (previousSum === undefined) {
+        return current;
+    }
+    if (current === undefined) {
+        return previousSum;
+    }
+
+    return previousSum + current;
+};
diff --git a/webapp/src/redux/features/message-relay/signalRMiddleware.ts b/webapp/src/redux/features/message-relay/signalRMiddleware.ts
index e41e6892e..8ffc237ef 100644
--- a/webapp/src/redux/features/message-relay/signalRMiddleware.ts
+++ b/webapp/src/redux/features/message-relay/signalRMiddleware.ts
@@ -13,7 +13,7 @@ import { Store, StoreMiddlewareAPI, getSelectedChatID } from './../../app/store'
 // These have to match the callback names used in the backend
 const enum SignalRCallbackMethods {
     ReceiveMessage = 'ReceiveMessage',
-    ReceiveMessageStream = 'ReceiveMessageStream',
+    ReceiveMessageUpdate = 'ReceiveMessageUpdate',
     UserJoined = 'UserJoined',
     ReceiveUserTypingState = 'ReceiveUserTypingState',
     ReceiveBotResponseStatus = 'ReceiveBotResponseStatus',
@@ -165,15 +165,20 @@ export const registerSignalREvents = (store: Store) => {
         },
     );
 
-    hubConnection.on(
-        SignalRCallbackMethods.ReceiveMessageStream,
-        (chatId: string, messageId: string, content: string) => {
-            store.dispatch({
-                type: 'conversations/updateMessageProperty',
-                payload: { chatId, messageIdOrIndex: messageId, property: 'content', value: content, frontLoad: true },
-            });
-        },
-    );
+    hubConnection.on(SignalRCallbackMethods.ReceiveMessageUpdate, (message: IChatMessage) => {
+        const { chatId, id: messageId, content } = message;
+        // If tokenUsage is defined, that means full message content has already been streamed and updated from server. No need to update content again.
+        store.dispatch({
+            type: 'conversations/updateMessageProperty',
+            payload: {
+                chatId,
+                messageIdOrIndex: messageId,
+                property: message.tokenUsage ? 'tokenUsage' : 'content',
+                value: message.tokenUsage ?? content,
+                frontLoad: true,
+            },
+        });
+    });
 
     hubConnection.on(SignalRCallbackMethods.UserJoined, (chatId: string, userId: string) => {
         const user: IChatUser = {
diff --git a/webapp/src/styles.tsx b/webapp/src/styles.tsx
index 911ecec04..21be9024f 100644
--- a/webapp/src/styles.tsx
+++ b/webapp/src/styles.tsx
@@ -9,7 +9,7 @@ import {
     tokens,
 } from '@fluentui/react-components';
 
-const semanticKernelBrandRamp: BrandVariants = {
+export const semanticKernelBrandRamp: BrandVariants = {
     10: '#060103',
     20: '#261018',
     30: '#431426',