diff --git a/Sources/OpenAI/Public/Models/ChatQuery.swift b/Sources/OpenAI/Public/Models/ChatQuery.swift
index c7a88649..a36e0f6f 100644
--- a/Sources/OpenAI/Public/Models/ChatQuery.swift
+++ b/Sources/OpenAI/Public/Models/ChatQuery.swift
@@ -67,6 +67,10 @@ public struct ChatQuery: Equatable, Codable, Streamable {
     /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.
     /// https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format
     public var stream: Bool
+    /// If set, `in stream mode`, last chunk will contain usage data for the entire request. Can be obtained from  ChatStreamResult.usage.
+    /// Official doc:
+    /// https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response
+    public let streamOptions: Self.StreamOptions?
 
     public init(
         messages: [Self.ChatCompletionMessageParam],
@@ -86,7 +90,8 @@ public struct ChatQuery: Equatable, Codable, Streamable {
         topLogprobs: Int? = nil,
         topP: Double? = nil,
         user: String? = nil,
-        stream: Bool = false
+        stream: Bool = false,
+        streamOptions: Self.StreamOptions? = nil
     ) {
         self.messages = messages
         self.model = model
@@ -106,6 +111,19 @@ public struct ChatQuery: Equatable, Codable, Streamable {
         self.topP = topP
         self.user = user
         self.stream = stream
+        self.streamOptions = streamOptions
+    }
+    
+    public struct StreamOptions: Codable, Equatable {
+        public var includeUsage: Bool
+        
+        public init(includeUsage: Bool) {
+            self.includeUsage = includeUsage
+        }
+        
+        public enum CodingKeys: String, CodingKey {
+            case includeUsage = "include_usage"
+        }
     }
 
     public enum ChatCompletionMessageParam: Codable, Equatable {
@@ -851,5 +869,6 @@ public struct ChatQuery: Equatable, Codable, Streamable {
         case topP = "top_p"
         case user
         case stream
+        case streamOptions = "stream_options"
     }
 }
diff --git a/Sources/OpenAI/Public/Models/ChatStreamResult.swift b/Sources/OpenAI/Public/Models/ChatStreamResult.swift
index 3457b089..977b1ba7 100644
--- a/Sources/OpenAI/Public/Models/ChatStreamResult.swift
+++ b/Sources/OpenAI/Public/Models/ChatStreamResult.swift
@@ -115,6 +115,18 @@ public struct ChatStreamResult: Codable, Equatable {
             case logprobs
         }
     }
+    
+    public struct Usage: Codable, Equatable {
+        public let completionTokens: Int
+        public let promptTokens: Int
+        public let totalTokens: Int
+        
+        public enum CodingKeys: String, CodingKey {
+            case completionTokens = "completion_tokens"
+            case promptTokens = "prompt_tokens"
+            case totalTokens = "total_tokens"
+        }
+    }
 
     /// A unique identifier for the chat completion. Each chunk has the same ID.
     public let id: String
@@ -130,6 +142,8 @@ public struct ChatStreamResult: Codable, Equatable {
     public let choices: [Choice]
     /// This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.
     public let systemFingerprint: String?
+    /// In stream mode, if `streamOptions` are set in the request, return token usage data.
+    public let usage: Self.Usage?
 
     public enum CodingKeys: String, CodingKey {
         case id
@@ -138,5 +152,6 @@ public struct ChatStreamResult: Codable, Equatable {
         case model
         case choices
         case systemFingerprint = "system_fingerprint"
+        case usage
     }
 }