From 6aefee8571737933367e7f5f3b7718d8d8ef71f3 Mon Sep 17 00:00:00 2001 From: Teresa Hoang <125500434+teresaqhoang@users.noreply.github.com> Date: Fri, 28 Jul 2023 16:46:53 -0700 Subject: [PATCH] Token Usage (#39) ### Motivation and Context This PR adds the token usage feature, in which token usage is calculated and shown per prompt and per session. Each token usage calculation will be split into two values: 1. total tokens used in chat completion of the bot response prompt 2. total tokens used in dependencies used to generate prompt ### Description webapi - Token usage per prompt persists as part of ChatMessage object - Add initial bot message and token usage tracking to ChatHistoryController and tracking of token usage for bot response in ChatMessage and ChatSession models. - Update ChatSkill to save token usage to context variables and return with bot response. - Added token usage calculation in ChatSkill by calculating total token usage for dependency functions and chat completion and sending updated response to client. Copy token usage into original chat context. - Calculate memory extraction token usage by taking into account cumulative semantic memory token usage. - Update Utilities to include GetTokenUsage method. Webapp - AppState: Added a new TokenUsage field to track total usage across all chats by app session, and appSlice has been updated to cumulate session token usage. - Update SignalRMiddleware to handle token usage when receiving message updates from server. Update message property to tokenUsage if tokenUsage is defined, otherwise update content. - Fix ChatHistoryTextContent to include TypingIndicator when bot response is generating. - Changed PromptDetails -> PromptDialog component to show prompt details and token usage graph. - Removed TypingIndicatorRenderer Token usage shown for ChatMessages of type Message and Plan ![image](https://github.com/microsoft/chat-copilot/assets/125500434/9ae5a262-67ed-400c-8e26-b486f0e307c8) Hardcoded bot responses default to 0 ![image](https://github.com/microsoft/chat-copilot/assets/125500434/0240695a-14ea-4a53-90f7-e2c3f64df0fe) Loading state ![image](https://github.com/microsoft/chat-copilot/assets/125500434/cb3b0ab7-d76e-4404-8a09-7fa8078fbbf1) Info ![image](https://github.com/microsoft/chat-copilot/assets/125500434/9747b239-daa8-4553-ab57-9210c5553211) This is what it will look like in settings dialog once changes go in ![image](https://github.com/microsoft/chat-copilot/assets/125500434/aca7d038-96f9-4d3e-95a7-339bede3ebc7) ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [Contribution Guidelines](https://github.com/microsoft/copilot-chat/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/copilot-chat/blob/main/CONTRIBUTING.md#development-scripts) raises no violations ~~[ ] All unit tests pass, and I have added new tests where possible~~ - [x] I didn't break anyone :smile: --------- Co-authored-by: GitHub Actions --- .../Controllers/ChatHistoryController.cs | 14 +- .../Controllers/ChatMemoryController.cs | 2 +- .../Controllers/DocumentImportController.cs | 2 +- webapi/CopilotChat/Models/ChatMessage.cs | 17 +- .../CopilotChat/Models/CreateChatResponse.cs | 36 ++++ .../Skills/ChatSkills/ChatSkill.cs | 97 +++++++++-- .../Skills/ChatSkills/DocumentMemorySkill.cs | 2 +- .../ChatSkills/ExternalInformationSkill.cs | 13 +- .../ChatSkills/SemanticChatMemoryExtractor.cs | 6 +- .../ChatSkills/SemanticChatMemorySkill.cs | 2 +- webapi/CopilotChat/Skills/TokenUtilities.cs | 89 ++++++++++ webapi/CopilotChat/Skills/Utilities.cs | 6 - webapi/Services/IOcrEngine.cs | 1 - webapp/src/Constants.ts | 4 +- webapp/src/components/chat/ChatInput.tsx | 6 +- webapp/src/components/chat/ChatRoom.tsx | 5 +- webapp/src/components/chat/ChatStatus.tsx | 37 +++- .../chat/chat-history/ChatHistoryItem.tsx | 4 +- .../chat-history/ChatHistoryTextContent.tsx | 23 ++- .../chat/prompt-details/PromptDetails.tsx | 51 ------ .../chat/prompt-dialog/PromptDialog.tsx | 73 ++++++++ .../TypingIndicatorRenderer.tsx | 51 ------ .../header/settings-dialog/SettingsDialog.tsx | 4 +- webapp/src/components/shared/Alerts.tsx | 11 +- .../components/token-usage/TokenUsageBar.tsx | 36 ++++ .../token-usage/TokenUsageGraph.tsx | 164 ++++++++++++++++++ .../token-usage/TokenUsageLegendItem.tsx | 69 ++++++++ .../token-usage/TokenUsageLegendLabel.tsx | 23 +++ webapp/src/libs/hooks/useChat.ts | 31 ++-- webapp/src/libs/models/ChatMessage.ts | 3 + webapp/src/libs/models/ChatSession.ts | 3 + webapp/src/libs/models/TokenUsage.ts | 25 +++ .../libs/semantic-kernel/model/AskResult.ts | 8 +- webapp/src/libs/services/ChatService.ts | 6 +- webapp/src/redux/features/app/AppState.ts | 3 + webapp/src/redux/features/app/appSlice.ts | 33 +++- .../message-relay/signalRMiddleware.ts | 25 +-- webapp/src/styles.tsx | 2 +- 38 files changed, 776 insertions(+), 211 deletions(-) create mode 100644 webapi/CopilotChat/Models/CreateChatResponse.cs create mode 100644 webapi/CopilotChat/Skills/TokenUtilities.cs delete mode 100644 webapp/src/components/chat/prompt-details/PromptDetails.tsx create mode 100644 webapp/src/components/chat/prompt-dialog/PromptDialog.tsx delete mode 100644 webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx create mode 100644 webapp/src/components/token-usage/TokenUsageBar.tsx create mode 100644 webapp/src/components/token-usage/TokenUsageGraph.tsx create mode 100644 webapp/src/components/token-usage/TokenUsageLegendItem.tsx create mode 100644 webapp/src/components/token-usage/TokenUsageLegendLabel.tsx create mode 100644 webapp/src/libs/models/TokenUsage.ts diff --git a/webapi/CopilotChat/Controllers/ChatHistoryController.cs b/webapi/CopilotChat/Controllers/ChatHistoryController.cs index b01799eed..c9b930fdb 100644 --- a/webapi/CopilotChat/Controllers/ChatHistoryController.cs +++ b/webapi/CopilotChat/Controllers/ChatHistoryController.cs @@ -14,6 +14,7 @@ using SemanticKernel.Service.CopilotChat.Hubs; using SemanticKernel.Service.CopilotChat.Models; using SemanticKernel.Service.CopilotChat.Options; +using SemanticKernel.Service.CopilotChat.Skills; using SemanticKernel.Service.CopilotChat.Storage; namespace SemanticKernel.Service.CopilotChat.Controllers; @@ -80,19 +81,22 @@ public async Task CreateChatSessionAsync([FromBody] CreateChatPar var newChat = new ChatSession(chatParameter.Title, this._promptOptions.SystemDescription); await this._sessionRepository.CreateAsync(newChat); - var initialBotMessage = this._promptOptions.InitialBotMessage; - // The initial bot message doesn't need a prompt. + // Create initial bot message var chatMessage = ChatMessage.CreateBotResponseMessage( newChat.Id, - initialBotMessage, - string.Empty); + this._promptOptions.InitialBotMessage, + string.Empty, // The initial bot message doesn't need a prompt. + TokenUtilities.EmptyTokenUsages()); await this._messageRepository.CreateAsync(chatMessage); // Add the user to the chat session await this._participantRepository.CreateAsync(new ChatParticipant(chatParameter.UserId, newChat.Id)); this._logger.LogDebug("Created chat session with id {0}.", newChat.Id); - return this.CreatedAtAction(nameof(this.GetChatSessionByIdAsync), new { chatId = newChat.Id }, newChat); + return this.CreatedAtAction( + nameof(this.GetChatSessionByIdAsync), + new { chatId = newChat.Id }, + new CreateChatResponse(newChat, chatMessage)); } /// diff --git a/webapi/CopilotChat/Controllers/ChatMemoryController.cs b/webapi/CopilotChat/Controllers/ChatMemoryController.cs index 51d309310..a2867bc5b 100644 --- a/webapi/CopilotChat/Controllers/ChatMemoryController.cs +++ b/webapi/CopilotChat/Controllers/ChatMemoryController.cs @@ -103,4 +103,4 @@ private bool ValidateMemoryName(string memoryName) } # endregion -} \ No newline at end of file +} diff --git a/webapi/CopilotChat/Controllers/DocumentImportController.cs b/webapi/CopilotChat/Controllers/DocumentImportController.cs index 7b665d036..0147eb21d 100644 --- a/webapi/CopilotChat/Controllers/DocumentImportController.cs +++ b/webapi/CopilotChat/Controllers/DocumentImportController.cs @@ -548,7 +548,7 @@ await kernel.Memory.SaveInformationAsync( id: key, description: $"Document: {documentName}"); importResult.AddKey(key); - importResult.Tokens += Utilities.TokenCount(paragraph); + importResult.Tokens += TokenUtilities.TokenCount(paragraph); } this._logger.LogInformation( diff --git a/webapi/CopilotChat/Models/ChatMessage.cs b/webapi/CopilotChat/Models/ChatMessage.cs index 16d3c5d2a..86c218ab6 100644 --- a/webapi/CopilotChat/Models/ChatMessage.cs +++ b/webapi/CopilotChat/Models/ChatMessage.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.Globalization; using System.Text.Json; using System.Text.Json.Serialization; @@ -110,6 +111,12 @@ public enum ChatMessageType [JsonPropertyName("type")] public ChatMessageType Type { get; set; } + /// + /// Counts of total token usage used to generate bot response. + /// + [JsonPropertyName("tokenUsage")] + public Dictionary? TokenUsage { get; set; } + /// /// Create a new chat message. Timestamp is automatically generated. /// @@ -120,6 +127,7 @@ public enum ChatMessageType /// The prompt used to generate the message /// Role of the author /// Type of the message + /// Total token usages used to generate bot response public ChatMessage( string userId, string userName, @@ -127,7 +135,8 @@ public ChatMessage( string content, string prompt = "", AuthorRoles authorRole = AuthorRoles.User, - ChatMessageType type = ChatMessageType.Message) + ChatMessageType type = ChatMessageType.Message, + Dictionary? tokenUsage = null) { this.Timestamp = DateTimeOffset.Now; this.UserId = userId; @@ -138,6 +147,7 @@ public ChatMessage( this.Prompt = prompt; this.AuthorRole = authorRole; this.Type = type; + this.TokenUsage = tokenUsage; } /// @@ -146,9 +156,10 @@ public ChatMessage( /// The chat ID that this message belongs to /// The message /// The prompt used to generate the message - public static ChatMessage CreateBotResponseMessage(string chatId, string content, string prompt) + /// Total token usage of response completion + public static ChatMessage CreateBotResponseMessage(string chatId, string content, string prompt, Dictionary? tokenUsage = null) { - return new ChatMessage("bot", "bot", chatId, content, prompt, AuthorRoles.Bot, IsPlan(content) ? ChatMessageType.Plan : ChatMessageType.Message); + return new ChatMessage("bot", "bot", chatId, content, prompt, AuthorRoles.Bot, IsPlan(content) ? ChatMessageType.Plan : ChatMessageType.Message, tokenUsage); } /// diff --git a/webapi/CopilotChat/Models/CreateChatResponse.cs b/webapi/CopilotChat/Models/CreateChatResponse.cs new file mode 100644 index 000000000..61d63f3bf --- /dev/null +++ b/webapi/CopilotChat/Models/CreateChatResponse.cs @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Serialization; + +namespace SemanticKernel.Service.CopilotChat.Models; + +/// +/// Response to chatSession/create request. +/// +public class CreateChatResponse +{ + /// + /// ID that is persistent and unique to new chat session. + /// + [JsonPropertyName("id")] + public string Id { get; set; } + + /// + /// Title of the chat. + /// + [JsonPropertyName("title")] + public string Title { get; set; } + + /// + /// Initial bot message. + /// + [JsonPropertyName("initialBotMessage")] + public ChatMessage? InitialBotMessage { get; set; } + + public CreateChatResponse(ChatSession chatSession, ChatMessage initialBotMessage) + { + this.Id = chatSession.Id; + this.Title = chatSession.Title; + this.InitialBotMessage = initialBotMessage; + } +} diff --git a/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs b/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs index b88d5335f..dedbce976 100644 --- a/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs +++ b/webapi/CopilotChat/Skills/ChatSkills/ChatSkill.cs @@ -116,7 +116,7 @@ public async Task ExtractUserIntentAsync(SKContext context) var historyTokenBudget = tokenLimit - this._promptOptions.ResponseTokenLimit - - Utilities.TokenCount(string.Join("\n", new string[] + TokenUtilities.TokenCount(string.Join("\n", new string[] { this._promptOptions.SystemDescription, this._promptOptions.SystemIntent, @@ -139,6 +139,9 @@ public async Task ExtractUserIntentAsync(SKContext context) settings: this.CreateIntentCompletionSettings() ); + // Get token usage from ChatCompletion result and add to context + TokenUtilities.GetFunctionTokenUsage(result, context, "SystemIntentExtraction"); + if (result.ErrorOccurred) { context.Log.LogError("{0}: {1}", result.LastErrorDescription, result.LastException); @@ -161,7 +164,7 @@ public async Task ExtractAudienceAsync(SKContext context) var historyTokenBudget = tokenLimit - this._promptOptions.ResponseTokenLimit - - Utilities.TokenCount(string.Join("\n", new string[] + TokenUtilities.TokenCount(string.Join("\n", new string[] { this._promptOptions.SystemAudience, this._promptOptions.SystemAudienceContinuation, @@ -182,6 +185,9 @@ public async Task ExtractAudienceAsync(SKContext context) settings: this.CreateIntentCompletionSettings() ); + // Get token usage from ChatCompletion result and add to context + TokenUtilities.GetFunctionTokenUsage(result, context, "SystemAudienceExtraction"); + if (result.ErrorOccurred) { context.Log.LogError("{0}: {1}", result.LastErrorDescription, result.LastException); @@ -229,7 +235,7 @@ public async Task ExtractChatHistoryAsync( } } - var tokenCount = Utilities.TokenCount(formattedMessage); + var tokenCount = TokenUtilities.TokenCount(formattedMessage); if (remainingToken - tokenCount >= 0) { @@ -262,7 +268,7 @@ public async Task ChatAsync( SKContext context) { // Set the system description in the prompt options - await SetSystemDescriptionAsync(chatId); + await this.SetSystemDescriptionAsync(chatId); // Save this new message to memory such that subsequent chat responses can use it await this.UpdateBotResponseStatusOnClient(chatId, "Saving user message to chat history"); @@ -284,7 +290,7 @@ public async Task ChatAsync( // Save hardcoded response if user cancelled plan if (chatContext.Variables.ContainsKey("userCancelledPlan")) { - await this.SaveNewResponseAsync("I am sorry the plan did not meet your goals.", string.Empty, chatId, userId); + await this.SaveNewResponseAsync("I am sorry the plan did not meet your goals.", string.Empty, chatId, userId, TokenUtilities.EmptyTokenUsages()); return context; } @@ -296,6 +302,7 @@ public async Task ChatAsync( return context; } + context.Variables.Set("tokenUsage", JsonSerializer.Serialize(chatMessage.TokenUsage)); return context; } @@ -350,7 +357,11 @@ public async Task ChatAsync( chatContext.Variables.Set("prompt", prompt); // Save a new response to the chat history with the proposed plan content - return await this.SaveNewResponseAsync(JsonSerializer.Serialize(proposedPlan), prompt, chatId, userId); + return await this.SaveNewResponseAsync( + JsonSerializer.Serialize(proposedPlan), prompt, chatId, userId, + // TODO: [Issue #2106] Accommodate plan token usage differently + this.GetTokenUsagesAsync(chatContext) + ); } // Query relevant semantic and document memories @@ -376,7 +387,7 @@ public async Task ChatAsync( // Fill in the chat history if there is any token budget left var chatContextComponents = new List() { chatMemories, documentMemories, planResult }; var chatContextText = string.Join("\n\n", chatContextComponents.Where(c => !string.IsNullOrEmpty(c))); - var chatHistoryTokenLimit = remainingToken - Utilities.TokenCount(chatContextText); + var chatHistoryTokenLimit = remainingToken - TokenUtilities.TokenCount(chatContextText); if (chatHistoryTokenLimit > 0) { await this.UpdateBotResponseStatusOnClient(chatId, "Extracting chat history"); @@ -399,6 +410,7 @@ public async Task ChatAsync( this._promptOptions.SystemChatPrompt, chatContext); chatContext.Variables.Set("prompt", renderedPrompt); + chatContext.Variables.Set(TokenUtilities.GetFunctionKey(chatContext.Log, "SystemMetaPrompt")!, TokenUtilities.TokenCount(renderedPrompt).ToString(CultureInfo.InvariantCulture)); if (chatContext.ErrorOccurred) { @@ -417,9 +429,15 @@ await SemanticChatMemoryExtractor.ExtractSemanticChatMemoryAsync( chatContext, this._promptOptions); - // Save the message + // Calculate total token usage for dependency functions and prompt template and send to client + await this.UpdateBotResponseStatusOnClient(chatId, "Calculating token usage"); + chatMessage.TokenUsage = this.GetTokenUsagesAsync(chatContext, chatMessage.Content); + await this.UpdateMessageOnClient(chatMessage); + + // Save the message with final completion token usage await this.UpdateBotResponseStatusOnClient(chatId, "Saving message to chat history"); await this._chatMessageRepository.UpsertAsync(chatMessage); + return chatMessage; } @@ -442,6 +460,13 @@ private async Task GetAudienceAsync(SKContext context) var audience = await this.ExtractAudienceAsync(audienceContext); + // Copy token usage into original chat context + var functionKey = TokenUtilities.GetFunctionKey(context.Log, "SystemAudienceExtraction")!; + if (audienceContext.Variables.TryGetValue(functionKey, out string? tokenUsage)) + { + context.Variables.Set(functionKey, tokenUsage); + } + // Propagate the error if (audienceContext.ErrorOccurred) { @@ -473,6 +498,14 @@ private async Task GetUserIntentAsync(SKContext context) ); userIntent = await this.ExtractUserIntentAsync(intentContext); + + // Copy token usage into original chat context + var functionKey = TokenUtilities.GetFunctionKey(context.Log, "SystemIntentExtraction")!; + if (intentContext.Variables.TryGetValue(functionKey!, out string? tokenUsage)) + { + context.Variables.Set(functionKey!, tokenUsage); + } + // Propagate the error if (intentContext.ErrorOccurred) { @@ -579,8 +612,9 @@ private async Task SaveNewMessageAsync(string message, string userI /// Prompt used to generate the response. /// The chat ID /// The user ID + /// Total token usage of response completion /// The created chat message. - private async Task SaveNewResponseAsync(string response, string prompt, string chatId, string userId) + private async Task SaveNewResponseAsync(string response, string prompt, string chatId, string userId, Dictionary? tokenUsage) { // Make sure the chat exists. if (!await this._chatSessionRepository.TryFindByIdAsync(chatId, v => _ = v)) @@ -651,10 +685,10 @@ private int GetChatContextTokenLimit(string audience, string userIntent) var tokenLimit = this._promptOptions.CompletionTokenLimit; var remainingToken = tokenLimit - - Utilities.TokenCount(audience) - - Utilities.TokenCount(userIntent) - + TokenUtilities.TokenCount(audience) - + TokenUtilities.TokenCount(userIntent) - this._promptOptions.ResponseTokenLimit - - Utilities.TokenCount(string.Join("\n", new string[] + TokenUtilities.TokenCount(string.Join("\n", new string[] { this._promptOptions.SystemDescription, this._promptOptions.SystemResponse, @@ -665,6 +699,33 @@ private int GetChatContextTokenLimit(string audience, string userIntent) return remainingToken; } + /// + /// Gets token usage totals for each semantic function if not undefined. + /// + /// Context maintained during response generation. + /// String representing bot response. If null, response is still being generated or was hardcoded. + /// Dictionary containing function to token usage mapping for each total that's defined. + private Dictionary GetTokenUsagesAsync(SKContext chatContext, string? content = null) + { + var tokenUsageDict = new Dictionary(StringComparer.OrdinalIgnoreCase); + + // Total token usage of each semantic function + foreach (string function in TokenUtilities.semanticFunctions.Values) + { + if (chatContext.Variables.TryGetValue($"{function}TokenUsage", out string? tokenUsage)) + { + tokenUsageDict.Add(function, int.Parse(tokenUsage, CultureInfo.InvariantCulture)); + } + } + + if (content != null) + { + tokenUsageDict.Add(TokenUtilities.semanticFunctions["SystemCompletion"]!, TokenUtilities.TokenCount(content)); + } + + return tokenUsageDict; + } + /// /// Stream the response to the client. /// @@ -685,7 +746,7 @@ private async Task StreamResponseToClient(string chatId, string use await foreach (string contentPiece in stream) { chatMessage.Content += contentPiece; - await this.UpdateMessageContentOnClient(chatId, chatMessage); + await this.UpdateMessageOnClient(chatMessage); } return chatMessage; @@ -698,10 +759,11 @@ private async Task StreamResponseToClient(string chatId, string use /// The user ID /// Prompt used to generate the message /// Content of the message + /// Total token usage of response completion /// The created chat message - private async Task CreateBotMessageOnClient(string chatId, string userId, string prompt, string content) + private async Task CreateBotMessageOnClient(string chatId, string userId, string prompt, string content, Dictionary? tokenUsage = null) { - var chatMessage = ChatMessage.CreateBotResponseMessage(chatId, content, prompt); + var chatMessage = ChatMessage.CreateBotResponseMessage(chatId, content, prompt, tokenUsage); await this._messageRelayHubContext.Clients.Group(chatId).SendAsync("ReceiveMessage", chatId, userId, chatMessage); return chatMessage; } @@ -709,11 +771,10 @@ private async Task CreateBotMessageOnClient(string chatId, string u /// /// Update the response on the client. /// - /// The chat ID /// The message - private async Task UpdateMessageContentOnClient(string chatId, ChatMessage message) + private async Task UpdateMessageOnClient(ChatMessage message) { - await this._messageRelayHubContext.Clients.Group(chatId).SendAsync("ReceiveMessageStream", chatId, message.Id, message.Content); + await this._messageRelayHubContext.Clients.Group(message.ChatId).SendAsync("ReceiveMessageUpdate", message); } /// diff --git a/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs b/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs index ac9f87938..9fce9043f 100644 --- a/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs +++ b/webapi/CopilotChat/Skills/ChatSkills/DocumentMemorySkill.cs @@ -78,7 +78,7 @@ public async Task QueryDocumentsAsync( string documentsText = string.Empty; foreach (var memory in relevantMemories) { - var tokenCount = Utilities.TokenCount(memory.Metadata.Text); + var tokenCount = TokenUtilities.TokenCount(memory.Metadata.Text); if (remainingToken - tokenCount > 0) { documentsText += $"\n\nSnippet from {memory.Metadata.Description}: {memory.Metadata.Text}"; diff --git a/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs b/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs index 38e0a2836..d45861c8b 100644 --- a/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs +++ b/webapi/CopilotChat/Skills/ChatSkills/ExternalInformationSkill.cs @@ -72,6 +72,7 @@ public async Task AcquireExternalInformationAsync( [Description("The intent to whether external information is needed")] string userIntent, SKContext context) { + // TODO: [Issue #2106] Calculate planner and plan token usage FunctionsView functions = this._planner.Kernel.Skills.GetFunctionsView(true, true); if (functions.NativeFunctions.IsEmpty && functions.SemanticFunctions.IsEmpty) { @@ -100,8 +101,8 @@ public async Task AcquireExternalInformationAsync( newPlanContext = await plan.InvokeAsync(newPlanContext); int tokenLimit = int.Parse(context["tokenLimit"], new NumberFormatInfo()) - - Utilities.TokenCount(PromptPreamble) - - Utilities.TokenCount(PromptPostamble); + TokenUtilities.TokenCount(PromptPreamble) - + TokenUtilities.TokenCount(PromptPostamble); // The result of the plan may be from an OpenAPI skill. Attempt to extract JSON from the response. bool extractJsonFromOpenApi = @@ -238,7 +239,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan document = JsonDocument.Parse(jsonContent); } - int jsonContentTokenCount = Utilities.TokenCount(jsonContent); + int jsonContentTokenCount = TokenUtilities.TokenCount(jsonContent); // Return the JSON content if it does not exceed the token limit if (jsonContentTokenCount < tokenLimit) @@ -264,7 +265,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan { // Save property name for result interpolation JsonProperty firstProperty = document.RootElement.EnumerateObject().First(); - tokenLimit -= Utilities.TokenCount(firstProperty.Name); + tokenLimit -= TokenUtilities.TokenCount(firstProperty.Name); resultsDescriptor = string.Format(CultureInfo.InvariantCulture, "{0}: ", firstProperty.Name); // Extract object to be truncated @@ -279,7 +280,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan { foreach (JsonProperty property in document.RootElement.EnumerateObject()) { - int propertyTokenCount = Utilities.TokenCount(property.ToString()); + int propertyTokenCount = TokenUtilities.TokenCount(property.ToString()); if (tokenLimit - propertyTokenCount > 0) { @@ -299,7 +300,7 @@ private string OptimizeOpenApiSkillJson(string jsonContent, int tokenLimit, Plan { foreach (JsonElement item in document.RootElement.EnumerateArray()) { - int itemTokenCount = Utilities.TokenCount(item.ToString()); + int itemTokenCount = TokenUtilities.TokenCount(item.ToString()); if (tokenLimit - itemTokenCount > 0) { diff --git a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs index a2a2461c7..14bac7b23 100644 --- a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs +++ b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemoryExtractor.cs @@ -87,7 +87,7 @@ internal static async Task ExtractCognitiveMemoryAsync( var remainingToken = tokenLimit - options.ResponseTokenLimit - - Utilities.TokenCount(memoryPrompt); ; + TokenUtilities.TokenCount(memoryPrompt); ; var memoryExtractionContext = Utilities.CopyContextWithVariablesClone(context); memoryExtractionContext.Variables.Set("tokenLimit", remainingToken.ToString(new NumberFormatInfo())); @@ -101,6 +101,10 @@ internal static async Task ExtractCognitiveMemoryAsync( settings: CreateMemoryExtractionSettings(options) ); + // Get token usage from ChatCompletion result and add to context + // Since there are multiple memory types, total token usage is calculated by cumulating the token usage of each memory type. + TokenUtilities.GetFunctionTokenUsage(result, context, $"SystemCognitive_{memoryName}"); + SemanticChatMemory memory = SemanticChatMemory.FromJson(result.ToString()); return memory; } diff --git a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs index 8f15d88f5..7998fd6ef 100644 --- a/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs +++ b/webapi/CopilotChat/Skills/ChatSkills/SemanticChatMemorySkill.cs @@ -75,7 +75,7 @@ public async Task QueryMemoriesAsync( string memoryText = ""; foreach (var memory in relevantMemories) { - var tokenCount = Utilities.TokenCount(memory.Metadata.Text); + var tokenCount = TokenUtilities.TokenCount(memory.Metadata.Text); if (remainingToken - tokenCount > 0) { memoryText += $"\n[{memory.Metadata.Description}] {memory.Metadata.Text}"; diff --git a/webapi/CopilotChat/Skills/TokenUtilities.cs b/webapi/CopilotChat/Skills/TokenUtilities.cs new file mode 100644 index 000000000..b463cec86 --- /dev/null +++ b/webapi/CopilotChat/Skills/TokenUtilities.cs @@ -0,0 +1,89 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Azure.AI.OpenAI; +using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel.Connectors.AI.OpenAI.Tokenizers; +using Microsoft.SemanticKernel.Orchestration; + +namespace SemanticKernel.Service.CopilotChat.Skills; + +/// +/// Utility methods for token management. +/// +public static class TokenUtilities +{ + /// + /// Semantic dependencies of ChatSkill. + /// If you add a new semantic dependency, please add it here. + /// + public static readonly Dictionary semanticFunctions = new() + { + // TODO: [Issue #2106] Calculate token usage for planner dependencies. + { "SystemAudienceExtraction", "audienceExtraction" }, + { "SystemIntentExtraction", "userIntentExtraction" }, + { "SystemMetaPrompt", "metaPromptTemplate" }, + { "SystemCompletion", "responseCompletion"}, + { "SystemCognitive_WorkingMemory", "workingMemoryExtraction" }, + { "SystemCognitive_LongTermMemory", "longTermMemoryExtraction" } + }; + + /// + /// Gets dictionary containing empty token usage totals. + /// Use for responses that are hardcoded and/or do not have semantic (token) dependencies. + /// + internal static Dictionary EmptyTokenUsages() + { + return semanticFunctions.Values.ToDictionary(v => v, v => 0, StringComparer.OrdinalIgnoreCase); + } + + /// + /// Gets key used to identify function token usage in context variables. + /// + /// The logger instance to use for logging errors. + /// Name of semantic function. + /// The key corresponding to the semantic function name, or null if the function name is unknown. + internal static string? GetFunctionKey(ILogger logger, string? functionName) + { + if (functionName == null || !semanticFunctions.TryGetValue(functionName, out string? key)) + { + logger.LogError("Unknown token dependency {0}. Please define function as semanticFunctions entry in TokenUtilities.cs", functionName); + return null; + }; + + return $"{key}TokenUsage"; + } + + /// + /// Gets the total token usage from a Chat or Text Completion result context and adds it as a variable to response context. + /// + /// Result context of chat completion + /// Context maintained during response generation. + /// Name of the function that invoked the chat completion. + /// true if token usage is found in result context; otherwise, false. + internal static void GetFunctionTokenUsage(SKContext result, SKContext chatContext, string? functionName = null) + { + var functionKey = GetFunctionKey(chatContext.Log, functionName); + if (functionKey == null) + { + return; + } + + if (result.ModelResults == null || result.ModelResults.Count == 0) + { + chatContext.Log.LogError("Unable to determine token usage for {0}", functionKey); + return; + } + + var tokenUsage = result.ModelResults.First().GetResult().Usage.TotalTokens; + chatContext.Variables.Set(functionKey!, tokenUsage.ToString(CultureInfo.InvariantCulture)); + } + + /// + /// Calculate the number of tokens in a string. + /// + internal static int TokenCount(string text) => GPT3Tokenizer.Encode(text).Count; +} diff --git a/webapi/CopilotChat/Skills/Utilities.cs b/webapi/CopilotChat/Skills/Utilities.cs index a11419dd1..25c225d16 100644 --- a/webapi/CopilotChat/Skills/Utilities.cs +++ b/webapi/CopilotChat/Skills/Utilities.cs @@ -1,6 +1,5 @@ //Copyright (c) Microsoft. All rights reserved. -using Microsoft.SemanticKernel.Connectors.AI.OpenAI.Tokenizers; using Microsoft.SemanticKernel.Orchestration; namespace SemanticKernel.Service.CopilotChat.Skills; @@ -24,9 +23,4 @@ internal static SKContext CopyContextWithVariablesClone(SKContext context) context.Skills, context.Log, context.CancellationToken); - - /// - /// Calculate the number of tokens in a string. - /// - internal static int TokenCount(string text) => GPT3Tokenizer.Encode(text).Count; } diff --git a/webapi/Services/IOcrEngine.cs b/webapi/Services/IOcrEngine.cs index 499279bad..bd1527676 100644 --- a/webapi/Services/IOcrEngine.cs +++ b/webapi/Services/IOcrEngine.cs @@ -10,7 +10,6 @@ namespace SemanticKernel.Service.Services; /// public interface IOcrEngine { - /// /// Reads all text from the image file. /// diff --git a/webapp/src/Constants.ts b/webapp/src/Constants.ts index 334ac8bad..6f83a901f 100644 --- a/webapp/src/Constants.ts +++ b/webapp/src/Constants.ts @@ -47,6 +47,6 @@ export const Constants = { msGraphScopes: ['Calendars.Read', 'Mail.Read', 'Mail.Send', 'Tasks.ReadWrite', 'User.Read'], // All OpenAI plugin manifest files should be located at this path per OpenAI requirements: "https://platform.openai.com/docs/plugins/getting-started/plugin-manifest MANIFEST_PATH: '/.well-known/ai-plugin.json', - }, - KEYSTROKE_DEBOUNCE_TIME_MS: 250 + }, + KEYSTROKE_DEBOUNCE_TIME_MS: 250, }; diff --git a/webapp/src/components/chat/ChatInput.tsx b/webapp/src/components/chat/ChatInput.tsx index ff606f8a2..8690b9a52 100644 --- a/webapp/src/components/chat/ChatInput.tsx +++ b/webapp/src/components/chat/ChatInput.tsx @@ -8,15 +8,13 @@ import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk'; import React, { useRef } from 'react'; import { Constants } from '../../Constants'; import { AuthHelper } from '../../libs/auth/AuthHelper'; +import { GetResponseOptions, useChat } from '../../libs/hooks/useChat'; import { AlertType } from '../../libs/models/AlertType'; import { ChatMessageType } from '../../libs/models/ChatMessage'; -import { GetResponseOptions, useChat } from '../../libs/hooks/useChat'; import { useAppDispatch, useAppSelector } from '../../redux/app/hooks'; import { RootState } from '../../redux/app/store'; import { addAlert } from '../../redux/features/app/appSlice'; -import { - editConversationInput, updateBotResponseStatus -} from '../../redux/features/conversations/conversationsSlice'; +import { editConversationInput, updateBotResponseStatus } from '../../redux/features/conversations/conversationsSlice'; import { Alerts } from '../shared/Alerts'; import { SpeechService } from './../../libs/services/SpeechService'; import { updateUserIsTyping } from './../../redux/features/conversations/conversationsSlice'; diff --git a/webapp/src/components/chat/ChatRoom.tsx b/webapp/src/components/chat/ChatRoom.tsx index 1d23dd47a..86dd8c419 100644 --- a/webapp/src/components/chat/ChatRoom.tsx +++ b/webapp/src/components/chat/ChatRoom.tsx @@ -4,8 +4,8 @@ import { makeStyles, shorthands, tokens } from '@fluentui/react-components'; import debug from 'debug'; import React from 'react'; import { Constants } from '../../Constants'; -import { AuthorRoles, IChatMessage } from '../../libs/models/ChatMessage'; import { GetResponseOptions, useChat } from '../../libs/hooks/useChat'; +import { AuthorRoles, IChatMessage } from '../../libs/models/ChatMessage'; import { useAppDispatch, useAppSelector } from '../../redux/app/hooks'; import { RootState } from '../../redux/app/store'; import { addMessageToConversationFromUser } from '../../redux/features/conversations/conversationsSlice'; @@ -89,6 +89,7 @@ export const ChatRoom: React.FC = () => { log('submitting user chat message'); const chatInput: IChatMessage = { + chatId: selectedId, timestamp: new Date().getTime(), userId: activeUserInfo?.id as string, userName: activeUserInfo?.username as string, @@ -116,4 +117,4 @@ export const ChatRoom: React.FC = () => { ); -}; \ No newline at end of file +}; diff --git a/webapp/src/components/chat/ChatStatus.tsx b/webapp/src/components/chat/ChatStatus.tsx index fb17d3394..9cc7b6737 100644 --- a/webapp/src/components/chat/ChatStatus.tsx +++ b/webapp/src/components/chat/ChatStatus.tsx @@ -1,12 +1,23 @@ // Copyright (c) Microsoft. All rights reserved. +import { makeStyles } from '@fluentui/react-components'; +import { Animation } from '@fluentui/react-northstar'; import React from 'react'; import { IChatUser } from '../../libs/models/ChatUser'; import { useAppSelector } from '../../redux/app/hooks'; import { RootState } from '../../redux/app/store'; -import { TypingIndicatorRenderer } from './typing-indicator/TypingIndicatorRenderer'; +import { TypingIndicator } from './typing-indicator/TypingIndicator'; + +const useClasses = makeStyles({ + root: { + display: 'flex', + flexDirection: 'row', + }, +}); export const ChatStatus: React.FC = () => { + const classes = useClasses(); + const { conversations, selectedId } = useAppSelector((state: RootState) => state.conversations); const { users } = conversations[selectedId]; const { activeUserInfo } = useAppSelector((state: RootState) => state.app); @@ -23,10 +34,26 @@ export const ChatStatus: React.FC = () => { checkAreTyping(); }, [activeUserInfo, users]); + let message = conversations[selectedId].botResponseStatus; + const numberOfUsersTyping = typingUserList.length; + if (numberOfUsersTyping === 1) { + message = message ? `${message} and a user is typing` : 'A user is typing'; + } else if (numberOfUsersTyping > 1) { + message = message + ? `${message} and ${numberOfUsersTyping} users are typing` + : `${numberOfUsersTyping} users are typing`; + } + + if (!message) { + return null; + } + return ( - + +
+ + +
+
); }; diff --git a/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx b/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx index 39cbc584c..66d63fdc7 100644 --- a/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx +++ b/webapp/src/components/chat/chat-history/ChatHistoryItem.tsx @@ -11,7 +11,7 @@ import { FeatureKeys } from '../../../redux/features/app/AppState'; import { Breakpoints, customTokens } from '../../../styles'; import { timestampToDateString } from '../../utils/TextUtils'; import { PlanViewer } from '../plan-viewer/PlanViewer'; -import { PromptDetails } from '../prompt-details/PromptDetails'; +import { PromptDialog } from '../prompt-dialog/PromptDialog'; import * as utils from './../../utils/TextUtils'; import { ChatHistoryDocumentContent } from './ChatHistoryDocumentContent'; import { ChatHistoryTextContent } from './ChatHistoryTextContent'; @@ -132,7 +132,7 @@ export const ChatHistoryItem: React.FC = ({ message, getRe
{!isMe && {fullName}} {timestampToDateString(message.timestamp, true)} - {isBot && } + {isBot && }
{content} {showShowRLHFMessage && } diff --git a/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx b/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx index bdba245b3..9f5028d5f 100644 --- a/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx +++ b/webapp/src/components/chat/chat-history/ChatHistoryTextContent.tsx @@ -4,6 +4,7 @@ import { makeStyles } from '@fluentui/react-components'; import React from 'react'; import { IChatMessage } from '../../../libs/models/ChatMessage'; import { convertToAnchorTags } from '../../utils/TextUtils'; +import { TypingIndicator } from '../typing-indicator/TypingIndicator'; import * as utils from './../../utils/TextUtils'; const useClasses = makeStyles({ @@ -19,14 +20,18 @@ interface ChatHistoryTextContentProps { export const ChatHistoryTextContent: React.FC = ({ message }) => { const classes = useClasses(); - let content = message.content.trim() - .replace(/[\u00A0-\u9999<>&]/g, function (i: string) { - return `&#${i.charCodeAt(0)};`; - }); + let content = message.content.trim().replace(/[\u00A0-\u9999<>&]/g, function (i: string) { + return `&#${i.charCodeAt(0)};`; + }); content = utils.formatChatTextContent(content); - content = content - .replace(/\n/g, '
') - .replace(/ {2}/g, '  '); + content = content.replace(/\n/g, '
').replace(/ {2}/g, '  '); - return
; -}; \ No newline at end of file + return ( +
: convertToAnchorTags(content), + }} + /> + ); +}; diff --git a/webapp/src/components/chat/prompt-details/PromptDetails.tsx b/webapp/src/components/chat/prompt-details/PromptDetails.tsx deleted file mode 100644 index 7e9fe7cf9..000000000 --- a/webapp/src/components/chat/prompt-details/PromptDetails.tsx +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -import { Button, Dialog, DialogActions, DialogBody, DialogContent, DialogSurface, DialogTitle, DialogTrigger, Tooltip, makeStyles, shorthands } from '@fluentui/react-components'; -import { Info16Regular } from '@fluentui/react-icons'; -import React from 'react'; -import { IChatMessage } from '../../../libs/models/ChatMessage'; - -const useClasses = makeStyles({ - infoButton: { - ...shorthands.padding(0), - ...shorthands.margin(0), - minWidth: 'auto', - marginLeft: 'auto', // align to right - }, -}); - -interface IPromptDetailsProps { - message: IChatMessage; -} - -export const PromptDetails: React.FC = ({ message }) => { - const classes = useClasses(); - - return ( - - - - - - - - - - ); -}; diff --git a/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx b/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx new file mode 100644 index 000000000..02996f22e --- /dev/null +++ b/webapp/src/components/chat/prompt-dialog/PromptDialog.tsx @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft. All rights reserved. + +import { + Button, + Dialog, + DialogActions, + DialogBody, + DialogContent, + DialogSurface, + DialogTitle, + DialogTrigger, + Label, + Link, + Tooltip, + makeStyles, + shorthands, +} from '@fluentui/react-components'; +import { Info16Regular } from '@fluentui/react-icons'; +import React from 'react'; +import { IChatMessage } from '../../../libs/models/ChatMessage'; +import { useDialogClasses } from '../../../styles'; +import { TokenUsageGraph } from '../../token-usage/TokenUsageGraph'; + +const useClasses = makeStyles({ + infoButton: { + ...shorthands.padding(0), + ...shorthands.margin(0), + minWidth: 'auto', + marginLeft: 'auto', // align to right + }, +}); + +interface IPromptDialogProps { + message: IChatMessage; +} + +export const PromptDialog: React.FC = ({ message }) => { + const classes = useClasses(); + const dialogClasses = useDialogClasses(); + + return ( + + + + + + + + + + ); +}; diff --git a/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx b/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx deleted file mode 100644 index 36a193808..000000000 --- a/webapp/src/components/chat/typing-indicator/TypingIndicatorRenderer.tsx +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -import { makeStyles } from '@fluentui/react-components'; -import { Animation } from '@fluentui/react-northstar'; -import * as React from 'react'; -import { TypingIndicator } from './TypingIndicator'; - -const useClasses = makeStyles({ - root: { - display: 'flex', - flexDirection: 'row', - }, -}); - -interface TypingIndicatorRendererProps { - botResponseStatus: string | undefined; - numberOfUsersTyping: number; -} - -export const TypingIndicatorRenderer: React.FC = ({ - botResponseStatus, - numberOfUsersTyping, -}) => { - const classes = useClasses(); - - let message = botResponseStatus; - if (numberOfUsersTyping === 1) { - message = message ? `${message} and a user is typing` : 'A user is typing'; - } else if (numberOfUsersTyping > 1) { - message = message - ? `${message} and ${numberOfUsersTyping} users are typing` - : `${numberOfUsersTyping} users are typing`; - } - - if (!message) { - return null; - } - - const typingIndicator = ( -
- - -
- ); - - return ( - - {typingIndicator} - - ); -}; diff --git a/webapp/src/components/header/settings-dialog/SettingsDialog.tsx b/webapp/src/components/header/settings-dialog/SettingsDialog.tsx index 7f90b142c..9b82bcc32 100644 --- a/webapp/src/components/header/settings-dialog/SettingsDialog.tsx +++ b/webapp/src/components/header/settings-dialog/SettingsDialog.tsx @@ -25,6 +25,7 @@ import React from 'react'; import { useAppSelector } from '../../../redux/app/hooks'; import { RootState } from '../../../redux/app/store'; import { SharedStyles, useDialogClasses } from '../../../styles'; +import { TokenUsageGraph } from '../../token-usage/TokenUsageGraph'; import { SettingSection } from './SettingSection'; const useClasses = makeStyles({ @@ -55,7 +56,7 @@ interface ISettingsDialogProps { export const SettingsDialog: React.FC = ({ open, closeDialog }) => { const classes = useClasses(); const dialogClasses = useDialogClasses(); - const { settings } = useAppSelector((state: RootState) => state.app); + const { settings, tokenUsage } = useAppSelector((state: RootState) => state.app); return ( = ({ open, closeDial Settings + diff --git a/webapp/src/components/shared/Alerts.tsx b/webapp/src/components/shared/Alerts.tsx index a15a6cb98..eb0386dee 100644 --- a/webapp/src/components/shared/Alerts.tsx +++ b/webapp/src/components/shared/Alerts.tsx @@ -1,9 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. -import { - makeStyles, - tokens -} from '@fluentui/react-components'; +import { makeStyles, tokens } from '@fluentui/react-components'; import { Alert } from '@fluentui/react-components/unstable'; import { Dismiss16Regular } from '@fluentui/react-icons'; import React from 'react'; @@ -12,8 +9,6 @@ import { RootState } from '../../redux/app/store'; import { removeAlert } from '../../redux/features/app/appSlice'; const useClasses = makeStyles({ - root: { - }, alert: { fontWeight: tokens.fontWeightRegular, color: tokens.colorNeutralForeground1, @@ -29,7 +24,7 @@ export const Alerts: React.FC = () => { const { alerts } = useAppSelector((state: RootState) => state.app); return ( -
+
{alerts.map(({ type, message }, index) => { return ( { })}
); -}; \ No newline at end of file +}; diff --git a/webapp/src/components/token-usage/TokenUsageBar.tsx b/webapp/src/components/token-usage/TokenUsageBar.tsx new file mode 100644 index 000000000..868b6c023 --- /dev/null +++ b/webapp/src/components/token-usage/TokenUsageBar.tsx @@ -0,0 +1,36 @@ +import { Popover, PopoverSurface, PopoverTrigger, tokens } from '@fluentui/react-components'; +import { TokenUsageViewDetails } from '../../libs/models/TokenUsage'; + +interface ITokenUsageBar { + details: TokenUsageViewDetails; + totalUsage: number; +} + +export const TokenUsageBar: React.FC = ({ details, totalUsage }) => { + const percentage = details.usageCount / totalUsage; + const barWidth = percentage * 500; + + return ( + + +
+ + {`${details.legendLabel} (${details.usageCount})`} + + ); +}; diff --git a/webapp/src/components/token-usage/TokenUsageGraph.tsx b/webapp/src/components/token-usage/TokenUsageGraph.tsx new file mode 100644 index 000000000..d583b1517 --- /dev/null +++ b/webapp/src/components/token-usage/TokenUsageGraph.tsx @@ -0,0 +1,164 @@ +import { + Body1, + Button, + Divider, + makeStyles, + mergeClasses, + Popover, + PopoverSurface, + PopoverTrigger, + shorthands, + Text, + tokens, +} from '@fluentui/react-components'; +import { Brands } from '@fluentui/tokens'; +import { + TokenUsage, + TokenUsageFunctionNameMap, + TokenUsageView, + TokenUsageViewDetails, +} from '../../libs/models/TokenUsage'; +import { useAppSelector } from '../../redux/app/hooks'; +import { RootState } from '../../redux/app/store'; +import { semanticKernelBrandRamp } from '../../styles'; +import { TypingIndicator } from '../chat/typing-indicator/TypingIndicator'; +import { Info16 } from '../shared/BundledIcons'; +import { TokenUsageBar } from './TokenUsageBar'; +import { TokenUsageLegendItem } from './TokenUsageLegendItem'; + +const useClasses = makeStyles({ + horizontal: { + display: 'flex', + ...shorthands.gap(tokens.spacingVerticalSNudge), + alignItems: 'center', + }, + content: { + display: 'flex', + flexDirection: 'column', + ...shorthands.gap(tokens.spacingHorizontalS), + paddingBottom: tokens.spacingHorizontalM, + }, + popover: { + width: '300px', + }, + header: { + marginBlockEnd: tokens.spacingHorizontalM, + }, + legend: { + 'flex-flow': 'wrap', + }, +}); + +interface ITokenUsageGraph { + tokenUsage: TokenUsage; + promptView?: boolean; +} + +const contrastColors = [ + tokens.colorPaletteBlueBackground2, + tokens.colorPaletteBlueForeground2, + tokens.colorPaletteBlueBorderActive, +]; + +export const TokenUsageGraph: React.FC = ({ promptView, tokenUsage }) => { + const classes = useClasses(); + const { conversations, selectedId } = useAppSelector((state: RootState) => state.conversations); + const loadingResponse = conversations[selectedId].botResponseStatus; + + const responseGenerationView: TokenUsageView = {}; + const memoryExtractionView: TokenUsageView = {}; + + let memoryExtractionUsage = 0; + let responseGenerationUsage = 0; + let brandColorIndex = 120 as Brands; + const brandStep = 20; + let contrastColorsIndex = 0; + + Object.entries(tokenUsage).forEach(([key, value]) => { + const viewDetails: TokenUsageViewDetails = { + usageCount: value ?? 0, + legendLabel: TokenUsageFunctionNameMap[key], + color: semanticKernelBrandRamp[brandColorIndex], + }; + + if (key.toLocaleUpperCase().includes('MEMORY')) { + memoryExtractionUsage += value ?? 0; + viewDetails.color = contrastColors[contrastColorsIndex++]; + memoryExtractionView[key] = viewDetails; + } else { + responseGenerationUsage += value ?? 0; + brandColorIndex = (brandColorIndex - brandStep < 0 ? 160 : brandColorIndex - brandStep) as Brands; + responseGenerationView[key] = viewDetails; + } + }); + + const totalUsage = memoryExtractionUsage + responseGenerationUsage; + + return ( + <> +

+ Token Usage + + +

+
+ {loadingResponse ? ( + + Final token usage will be available once bot response is generated. + + + ) : ( + <> + {totalUsage > 0 ? ( + <> + {!promptView && Total token usage for current session} +
+ {Object.entries(responseGenerationView).map(([key, details]) => { + return ; + })} + {Object.entries(memoryExtractionView).map(([key, details]) => { + return ; + })} +
+
+ + +
+ + ) : promptView ? ( + No tokens were used. This is a hardcoded response. + ) : ( + No tokens have been used in this session yet. + )} + + )} +
+ + + ); +}; diff --git a/webapp/src/components/token-usage/TokenUsageLegendItem.tsx b/webapp/src/components/token-usage/TokenUsageLegendItem.tsx new file mode 100644 index 000000000..0da042059 --- /dev/null +++ b/webapp/src/components/token-usage/TokenUsageLegendItem.tsx @@ -0,0 +1,69 @@ +import { + Popover, + PopoverSurface, + PopoverTrigger, + Text, + makeStyles, + shorthands, + tokens, +} from '@fluentui/react-components'; +import { TokenUsageView } from '../../libs/models/TokenUsage'; +import { TokenUsageLegendLabel } from './TokenUsageLegendLabel'; + +export const useClasses = makeStyles({ + root: { + display: 'flex', + ...shorthands.gap(tokens.spacingVerticalSNudge), + alignItems: 'center', + }, + colors: { + display: 'flex', + ...shorthands.gap(tokens.spacingVerticalXXS), + }, + legendColor: { + height: tokens.spacingVerticalMNudge, + width: tokens.spacingHorizontalMNudge, + }, +}); + +interface ITokenUsageLegendItem { + name: string; + usageCount: number; + items: TokenUsageView; + color: string; +} + +export const TokenUsageLegendItem: React.FC = ({ name, usageCount, items, color }) => { + const classes = useClasses(); + return ( +
+ + +
+ + + {Object.values(items).map((details) => { + return ; + })} + + + {`${name} (${usageCount})`} +
+ ); +}; diff --git a/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx b/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx new file mode 100644 index 000000000..6618b66f8 --- /dev/null +++ b/webapp/src/components/token-usage/TokenUsageLegendLabel.tsx @@ -0,0 +1,23 @@ +import { Text, tokens } from '@fluentui/react-components'; +import { TokenUsageViewDetails } from '../../libs/models/TokenUsage'; +import { useClasses } from './TokenUsageLegendItem'; + +interface ITokenUsageLegendLabel { + details: TokenUsageViewDetails; +} + +export const TokenUsageLegendLabel: React.FC = ({ details }) => { + const classes = useClasses(); + return ( +
+
+ {`${details.legendLabel} (${details.usageCount})`} +
+ ); +}; diff --git a/webapp/src/libs/hooks/useChat.ts b/webapp/src/libs/hooks/useChat.ts index a61b5cf49..5630c4edd 100644 --- a/webapp/src/libs/hooks/useChat.ts +++ b/webapp/src/libs/hooks/useChat.ts @@ -4,21 +4,23 @@ import { useMsal } from '@azure/msal-react'; import { Constants } from '../../Constants'; import { useAppDispatch, useAppSelector } from '../../redux/app/hooks'; import { RootState } from '../../redux/app/store'; -import { addAlert } from '../../redux/features/app/appSlice'; +import { addAlert, updateTokenUsage } from '../../redux/features/app/appSlice'; import { ChatState } from '../../redux/features/conversations/ChatState'; import { Conversations } from '../../redux/features/conversations/ConversationsState'; import { addConversation, setConversations, setSelectedConversation, + updateBotResponseStatus, } from '../../redux/features/conversations/conversationsSlice'; import { Plugin } from '../../redux/features/plugins/PluginsState'; import { AuthHelper } from '../auth/AuthHelper'; import { AlertType } from '../models/AlertType'; import { Bot } from '../models/Bot'; -import { ChatMessageType } from '../models/ChatMessage'; +import { ChatMessageType, IChatMessage } from '../models/ChatMessage'; import { IChatSession } from '../models/ChatSession'; import { IChatUser } from '../models/ChatUser'; +import { TokenUsage } from '../models/TokenUsage'; import { IAskVariables } from '../semantic-kernel/model/Ask'; import { BotService } from '../services/BotService'; import { ChatService } from '../services/ChatService'; @@ -72,15 +74,13 @@ export const useChat = () => { const chatTitle = `Copilot @ ${new Date().toLocaleString()}`; const accessToken = await AuthHelper.getSKaaSAccessToken(instance, inProgress); try { - await chatService.createChatAsync(userId, chatTitle, accessToken).then(async (result: IChatSession) => { - const chatMessages = await chatService.getChatMessagesAsync(result.id, 0, 1, accessToken); - + await chatService.createChatAsync(userId, chatTitle, accessToken).then((result: IChatSession) => { const newChat: ChatState = { id: result.id, title: result.title, systemDescription: result.systemDescription, memoryBalance: result.memoryBalance, - messages: chatMessages, + messages: [result.initialBotMessage as IChatMessage], users: [loggedInUser], botProfilePicture: getBotProfilePicture(Object.keys(conversations).length), input: '', @@ -125,12 +125,21 @@ export const useChat = () => { } try { - await chatService.getBotResponseAsync( - ask, - await AuthHelper.getSKaaSAccessToken(instance, inProgress), - getEnabledPlugins(), - ); + const askResult = await chatService + .getBotResponseAsync( + ask, + await AuthHelper.getSKaaSAccessToken(instance, inProgress), + getEnabledPlugins(), + ) + .catch((e: any) => { + throw e; + }); + + // Update token usage of current session + const responseTokenUsage = askResult.variables.find((v) => v.key === 'tokenUsage')?.value; + if (responseTokenUsage) dispatch(updateTokenUsage(JSON.parse(responseTokenUsage) as TokenUsage)); } catch (e: any) { + dispatch(updateBotResponseStatus({ chatId, status: undefined })); const errorMessage = `Unable to generate bot response. Details: ${getErrorDetails(e)}`; dispatch(addAlert({ message: errorMessage, type: AlertType.Error })); } diff --git a/webapp/src/libs/models/ChatMessage.ts b/webapp/src/libs/models/ChatMessage.ts index 6a975b0e3..f9dc2e80e 100644 --- a/webapp/src/libs/models/ChatMessage.ts +++ b/webapp/src/libs/models/ChatMessage.ts @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. import { PlanState } from './Plan'; +import { TokenUsage } from './TokenUsage'; /** * Role of the author of a chat message. It's a copy of AuthorRoles in the API C# code. @@ -41,6 +42,7 @@ export enum UserFeedback { } export interface IChatMessage { + chatId: string; type: ChatMessageType; timestamp: number; userName: string; @@ -53,4 +55,5 @@ export interface IChatMessage { planState?: PlanState; // TODO: [Issue #42] Persistent RLHF userFeedback?: UserFeedback; + tokenUsage?: TokenUsage; } diff --git a/webapp/src/libs/models/ChatSession.ts b/webapp/src/libs/models/ChatSession.ts index 73f587a1c..932df40ba 100644 --- a/webapp/src/libs/models/ChatSession.ts +++ b/webapp/src/libs/models/ChatSession.ts @@ -1,8 +1,11 @@ // Copyright (c) Microsoft. All rights reserved. +import { IChatMessage } from './ChatMessage'; + export interface IChatSession { id: string; title: string; + initialBotMessage?: IChatMessage; systemDescription: string; memoryBalance: number; } diff --git a/webapp/src/libs/models/TokenUsage.ts b/webapp/src/libs/models/TokenUsage.ts new file mode 100644 index 000000000..01e5c33d1 --- /dev/null +++ b/webapp/src/libs/models/TokenUsage.ts @@ -0,0 +1,25 @@ +/// Information about token usage used to generate bot response. +export type TokenUsage = Record; + +export type TokenUsageView = Record; + +export interface TokenUsageViewDetails { + usageCount: number; + legendLabel: string; + color: string; +} + +export interface FunctionDetails { + usageCount: number; + legendLabel: string; + color?: string; +} + +export const TokenUsageFunctionNameMap: Record = { + audienceExtraction: 'Audience Extraction', + userIntentExtraction: 'User Intent Extraction', + metaPromptTemplate: 'Meta Prompt Template', + responseCompletion: 'Response Completion', + workingMemoryExtraction: 'Working Memory Extraction', + longTermMemoryExtraction: 'Long Term Memory Extraction', +}; diff --git a/webapp/src/libs/semantic-kernel/model/AskResult.ts b/webapp/src/libs/semantic-kernel/model/AskResult.ts index 3503020f3..9b2227166 100644 --- a/webapp/src/libs/semantic-kernel/model/AskResult.ts +++ b/webapp/src/libs/semantic-kernel/model/AskResult.ts @@ -1,11 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. +import { IChatMessage } from '../../models/ChatMessage'; + export interface IAskResult { - value: string; - variables: Variable[]; + message: IChatMessage; + variables: ContextVariable[]; } -export interface Variable { +export interface ContextVariable { key: string; value: string; } diff --git a/webapp/src/libs/services/ChatService.ts b/webapp/src/libs/services/ChatService.ts index a9cf21ed4..0b5b91acb 100644 --- a/webapp/src/libs/services/ChatService.ts +++ b/webapp/src/libs/services/ChatService.ts @@ -7,6 +7,7 @@ import { IChatParticipant } from '../models/ChatParticipant'; import { IChatSession } from '../models/ChatSession'; import { IChatUser } from '../models/ChatUser'; import { IAsk, IAskVariables } from '../semantic-kernel/model/Ask'; +import { IAskResult } from '../semantic-kernel/model/AskResult'; import { ICustomPlugin } from '../semantic-kernel/model/CustomPlugin'; import { BaseService } from './BaseService'; @@ -101,7 +102,7 @@ export class ChatService extends BaseService { ask: IAsk, accessToken: string, enabledPlugins?: Plugin[], - ): Promise => { + ): Promise => { // If skill requires any additional api properties, append to context if (enabledPlugins && enabledPlugins.length > 0) { const openApiSkillVariables: IAskVariables[] = []; @@ -110,7 +111,6 @@ export class ChatService extends BaseService { const customPlugins: ICustomPlugin[] = []; for (const plugin of enabledPlugins) { - // If user imported a manifest domain, add custom plugin if (plugin.manifestDomain) { customPlugins.push({ @@ -153,7 +153,7 @@ export class ChatService extends BaseService { ask.variables = ask.variables ? ask.variables.concat(openApiSkillVariables) : openApiSkillVariables; } - const result = await this.getResponseAsync( + const result = await this.getResponseAsync( { commandPath: 'chat', method: 'POST', diff --git a/webapp/src/redux/features/app/AppState.ts b/webapp/src/redux/features/app/AppState.ts index 0de60facf..17ea92f47 100644 --- a/webapp/src/redux/features/app/AppState.ts +++ b/webapp/src/redux/features/app/AppState.ts @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. import { AlertType } from '../../../libs/models/AlertType'; +import { TokenUsage } from '../../../libs/models/TokenUsage'; export interface ActiveUserInfo { id: string; @@ -31,6 +32,7 @@ export interface Setting { export interface AppState { alerts: Alert[]; activeUserInfo?: ActiveUserInfo; + tokenUsage: TokenUsage; features: Record; settings: Setting[]; } @@ -126,6 +128,7 @@ export const initialState: AppState = { type: AlertType.Info, }, ], + tokenUsage: {}, features: Features, settings: Settings, }; diff --git a/webapp/src/redux/features/app/appSlice.ts b/webapp/src/redux/features/app/appSlice.ts index 836824e94..73be3c895 100644 --- a/webapp/src/redux/features/app/appSlice.ts +++ b/webapp/src/redux/features/app/appSlice.ts @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. import { createSlice, PayloadAction } from '@reduxjs/toolkit'; +import { TokenUsage } from '../../../libs/models/TokenUsage'; import { ActiveUserInfo, Alert, AppState, FeatureKeys, initialState } from './AppState'; export const appSlice = createSlice({ @@ -22,7 +23,13 @@ export const appSlice = createSlice({ setActiveUserInfo: (state: AppState, action: PayloadAction) => { state.activeUserInfo = action.payload; }, - // This sets the feature flag + updateTokenUsage: (state: AppState, action: PayloadAction) => { + Object.entries(action.payload).forEach(([key, value]) => { + action.payload[key] = getTotalTokenUsage(state.tokenUsage[key], value); + }); + state.tokenUsage = action.payload; + }, + // This sets the feature flag based on end user input toggleFeatureFlag: (state: AppState, action: PayloadAction) => { const feature = state.features[action.payload]; state.features = { @@ -33,7 +40,7 @@ export const appSlice = createSlice({ }, }; }, - // This controls feature availability based on the state of backend support + // This controls feature availability based on the state of backend toggleFeatureState: ( state: AppState, action: PayloadAction<{ @@ -55,7 +62,25 @@ export const appSlice = createSlice({ }, }); -export const { addAlert, removeAlert, setAlerts, setActiveUserInfo, toggleFeatureFlag, toggleFeatureState } = - appSlice.actions; +export const { + addAlert, + removeAlert, + setAlerts, + setActiveUserInfo, + toggleFeatureFlag, + toggleFeatureState, + updateTokenUsage, +} = appSlice.actions; export default appSlice.reducer; + +const getTotalTokenUsage = (previousSum?: number, current?: number) => { + if (previousSum === undefined) { + return current; + } + if (current === undefined) { + return previousSum; + } + + return previousSum + current; +}; diff --git a/webapp/src/redux/features/message-relay/signalRMiddleware.ts b/webapp/src/redux/features/message-relay/signalRMiddleware.ts index e41e6892e..8ffc237ef 100644 --- a/webapp/src/redux/features/message-relay/signalRMiddleware.ts +++ b/webapp/src/redux/features/message-relay/signalRMiddleware.ts @@ -13,7 +13,7 @@ import { Store, StoreMiddlewareAPI, getSelectedChatID } from './../../app/store' // These have to match the callback names used in the backend const enum SignalRCallbackMethods { ReceiveMessage = 'ReceiveMessage', - ReceiveMessageStream = 'ReceiveMessageStream', + ReceiveMessageUpdate = 'ReceiveMessageUpdate', UserJoined = 'UserJoined', ReceiveUserTypingState = 'ReceiveUserTypingState', ReceiveBotResponseStatus = 'ReceiveBotResponseStatus', @@ -165,15 +165,20 @@ export const registerSignalREvents = (store: Store) => { }, ); - hubConnection.on( - SignalRCallbackMethods.ReceiveMessageStream, - (chatId: string, messageId: string, content: string) => { - store.dispatch({ - type: 'conversations/updateMessageProperty', - payload: { chatId, messageIdOrIndex: messageId, property: 'content', value: content, frontLoad: true }, - }); - }, - ); + hubConnection.on(SignalRCallbackMethods.ReceiveMessageUpdate, (message: IChatMessage) => { + const { chatId, id: messageId, content } = message; + // If tokenUsage is defined, that means full message content has already been streamed and updated from server. No need to update content again. + store.dispatch({ + type: 'conversations/updateMessageProperty', + payload: { + chatId, + messageIdOrIndex: messageId, + property: message.tokenUsage ? 'tokenUsage' : 'content', + value: message.tokenUsage ?? content, + frontLoad: true, + }, + }); + }); hubConnection.on(SignalRCallbackMethods.UserJoined, (chatId: string, userId: string) => { const user: IChatUser = { diff --git a/webapp/src/styles.tsx b/webapp/src/styles.tsx index 911ecec04..21be9024f 100644 --- a/webapp/src/styles.tsx +++ b/webapp/src/styles.tsx @@ -9,7 +9,7 @@ import { tokens, } from '@fluentui/react-components'; -const semanticKernelBrandRamp: BrandVariants = { +export const semanticKernelBrandRamp: BrandVariants = { 10: '#060103', 20: '#261018', 30: '#431426',