|
| 1 | +# Copyright (c) Microsoft. All rights reserved. |
| 2 | + |
| 3 | +import asyncio |
| 4 | +import logging |
| 5 | +from typing import TYPE_CHECKING |
| 6 | + |
| 7 | +from semantic_kernel.agents import ( |
| 8 | + AgentGroupChat, |
| 9 | + ChatCompletionAgent, |
| 10 | +) |
| 11 | +from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion, OpenAIChatCompletion |
| 12 | +from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent |
| 13 | +from semantic_kernel.contents.history_reducer.chat_history_summarization_reducer import ChatHistorySummarizationReducer |
| 14 | +from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer |
| 15 | +from semantic_kernel.kernel import Kernel |
| 16 | + |
| 17 | +if TYPE_CHECKING: |
| 18 | + from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer |
| 19 | + |
| 20 | +##################################################################### |
| 21 | +# The following sample demonstrates how to implement a chat history # |
| 22 | +# reducer as part of the Semantic Kernel Agent Framework. It # |
| 23 | +# covers two types of reducers: summarization reduction and a # |
| 24 | +# truncation reduction. For this sample, the ChatCompletionAgent # |
| 25 | +# is used. # |
| 26 | +##################################################################### |
| 27 | + |
| 28 | + |
| 29 | +# Initialize the logger for debugging and information messages |
| 30 | +logger = logging.getLogger(__name__) |
| 31 | + |
| 32 | +# Flag to determine whether to use Azure OpenAI services or OpenAI |
| 33 | +# Set this to True if using Azure OpenAI (requires appropriate configuration) |
| 34 | +use_azure_openai = True |
| 35 | + |
| 36 | + |
| 37 | +# Helper function to create and configure a Kernel with the desired chat completion service |
| 38 | +def _create_kernel_with_chat_completion(service_id: str) -> Kernel: |
| 39 | + """A helper function to create a kernel with a chat completion service.""" |
| 40 | + kernel = Kernel() |
| 41 | + if use_azure_openai: |
| 42 | + # Add Azure OpenAI service to the kernel |
| 43 | + kernel.add_service(AzureChatCompletion(service_id=service_id)) |
| 44 | + else: |
| 45 | + # Add OpenAI service to the kernel |
| 46 | + kernel.add_service(OpenAIChatCompletion(service_id=service_id)) |
| 47 | + return kernel |
| 48 | + |
| 49 | + |
| 50 | +class HistoryReducerExample: |
| 51 | + """ |
| 52 | + Demonstrates how to create a ChatCompletionAgent with a ChatHistoryReducer |
| 53 | + (either truncation or summarization) and how to invoke that agent |
| 54 | + multiple times while applying the history reduction. |
| 55 | + """ |
| 56 | + |
| 57 | + # Agent-specific settings |
| 58 | + TRANSLATOR_NAME = "NumeroTranslator" # Name of the agent |
| 59 | + TRANSLATOR_INSTRUCTIONS = "Add one to the latest user number and spell it in Spanish without explanation." |
| 60 | + |
| 61 | + def create_truncating_agent( |
| 62 | + self, reducer_msg_count: int, reducer_threshold: int |
| 63 | + ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]: |
| 64 | + """ |
| 65 | + Creates a ChatCompletionAgent with a truncation-based history reducer. |
| 66 | +
|
| 67 | + Parameters: |
| 68 | + - reducer_msg_count: Target number of messages to retain after truncation. |
| 69 | + - reducer_threshold: Threshold number of messages to trigger truncation. |
| 70 | +
|
| 71 | + Returns: |
| 72 | + - A configured ChatCompletionAgent instance with truncation enabled. |
| 73 | + """ |
| 74 | + truncation_reducer = ChatHistoryTruncationReducer( |
| 75 | + target_count=reducer_msg_count, threshold_count=reducer_threshold |
| 76 | + ) |
| 77 | + |
| 78 | + return ChatCompletionAgent( |
| 79 | + name=self.TRANSLATOR_NAME, |
| 80 | + instructions=self.TRANSLATOR_INSTRUCTIONS, |
| 81 | + kernel=_create_kernel_with_chat_completion("truncate_agent"), |
| 82 | + history_reducer=truncation_reducer, |
| 83 | + ), truncation_reducer |
| 84 | + |
| 85 | + def create_summarizing_agent( |
| 86 | + self, reducer_msg_count: int, reducer_threshold: int |
| 87 | + ) -> tuple[ChatCompletionAgent, "ChatHistoryReducer"]: |
| 88 | + """ |
| 89 | + Creates a ChatCompletionAgent with a summarization-based history reducer. |
| 90 | +
|
| 91 | + Parameters: |
| 92 | + - reducer_msg_count: Target number of messages to retain after summarization. |
| 93 | + - reducer_threshold: Threshold number of messages to trigger summarization. |
| 94 | +
|
| 95 | + Returns: |
| 96 | + - A configured ChatCompletionAgent instance with summarization enabled. |
| 97 | + """ |
| 98 | + kernel = _create_kernel_with_chat_completion("summarize_agent") |
| 99 | + |
| 100 | + summarization_reducer = ChatHistorySummarizationReducer( |
| 101 | + service=kernel.get_service(service_id="summarize_agent"), |
| 102 | + target_count=reducer_msg_count, |
| 103 | + threshold_count=reducer_threshold, |
| 104 | + ) |
| 105 | + |
| 106 | + return ChatCompletionAgent( |
| 107 | + name=self.TRANSLATOR_NAME, |
| 108 | + instructions=self.TRANSLATOR_INSTRUCTIONS, |
| 109 | + kernel=kernel, |
| 110 | + history_reducer=summarization_reducer, |
| 111 | + ), summarization_reducer |
| 112 | + |
| 113 | + async def invoke_agent(self, agent: ChatCompletionAgent, chat_history: ChatHistory, message_count: int): |
| 114 | + """ |
| 115 | + Demonstrates agent invocation with direct history management and reduction. |
| 116 | +
|
| 117 | + Parameters: |
| 118 | + - agent: The ChatCompletionAgent to invoke. |
| 119 | + - message_count: The number of messages to simulate in the conversation. |
| 120 | + """ |
| 121 | + |
| 122 | + index = 1 |
| 123 | + while index <= message_count: |
| 124 | + # Provide user input |
| 125 | + user_message = ChatMessageContent(role=AuthorRole.USER, content=str(index)) |
| 126 | + chat_history.messages.append(user_message) |
| 127 | + print(f"# User: '{index}'") |
| 128 | + |
| 129 | + # Attempt history reduction if a reducer is present |
| 130 | + is_reduced = False |
| 131 | + if agent.history_reducer is not None: |
| 132 | + reduced = await agent.history_reducer.reduce() |
| 133 | + if reduced is not None: |
| 134 | + chat_history.messages.clear() |
| 135 | + chat_history.messages.extend(reduced) |
| 136 | + is_reduced = True |
| 137 | + print("@ (History was reduced!)") |
| 138 | + |
| 139 | + # Invoke the agent and display its response |
| 140 | + async for response in agent.invoke(chat_history): |
| 141 | + chat_history.messages.append(response) |
| 142 | + print(f"# {response.role} - {response.name}: '{response.content}'") |
| 143 | + |
| 144 | + # The index is incremented by 2 because the agent is told to: |
| 145 | + # "Add one to the latest user number and spell it in Spanish without explanation." |
| 146 | + # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish) |
| 147 | + index += 2 |
| 148 | + print(f"@ Message Count: {len(chat_history.messages)}\n") |
| 149 | + |
| 150 | + # If history was reduced, and the chat history is of type `ChatHistorySummarizationReducer`, |
| 151 | + # print summaries as it will contain the __summary__ metadata key. |
| 152 | + if is_reduced and isinstance(chat_history, ChatHistorySummarizationReducer): |
| 153 | + self._print_summaries_from_front(chat_history.messages) |
| 154 | + |
| 155 | + async def invoke_chat(self, agent: ChatCompletionAgent, message_count: int): |
| 156 | + """ |
| 157 | + Demonstrates agent invocation within a group chat. |
| 158 | +
|
| 159 | + Parameters: |
| 160 | + - agent: The ChatCompletionAgent to invoke. |
| 161 | + - message_count: The number of messages to simulate in the conversation. |
| 162 | + """ |
| 163 | + chat = AgentGroupChat() # Initialize a new group chat |
| 164 | + last_history_count = 0 |
| 165 | + |
| 166 | + index = 1 |
| 167 | + while index <= message_count: |
| 168 | + # Add user message to the chat |
| 169 | + user_msg = ChatMessageContent(role=AuthorRole.USER, content=str(index)) |
| 170 | + await chat.add_chat_message(user_msg) |
| 171 | + print(f"# User: '{index}'") |
| 172 | + |
| 173 | + # Invoke the agent and display its response |
| 174 | + async for message in chat.invoke(agent): |
| 175 | + print(f"# {message.role} - {message.name or '*'}: '{message.content}'") |
| 176 | + |
| 177 | + # The index is incremented by 2 because the agent is told to: |
| 178 | + # "Add one to the latest user number and spell it in Spanish without explanation." |
| 179 | + # The user sends 1, 3, 5, etc., and the agent responds with 2, 4, 6, etc. (in Spanish) |
| 180 | + index += 2 |
| 181 | + |
| 182 | + # Retrieve chat messages in descending order (newest first) |
| 183 | + msgs = [] |
| 184 | + async for m in chat.get_chat_messages(agent): |
| 185 | + msgs.append(m) |
| 186 | + |
| 187 | + print(f"@ Message Count: {len(msgs)}\n") |
| 188 | + |
| 189 | + # Check for reduction in message count and print summaries |
| 190 | + if len(msgs) < last_history_count: |
| 191 | + self._print_summaries_from_back(msgs) |
| 192 | + |
| 193 | + last_history_count = len(msgs) |
| 194 | + |
| 195 | + def _print_summaries_from_front(self, messages: list[ChatMessageContent]): |
| 196 | + """ |
| 197 | + Prints summaries from the front of the message list. |
| 198 | +
|
| 199 | + Parameters: |
| 200 | + - messages: List of chat messages to process. |
| 201 | + """ |
| 202 | + summary_index = 0 |
| 203 | + while summary_index < len(messages): |
| 204 | + msg = messages[summary_index] |
| 205 | + if msg.metadata and msg.metadata.get("__summary__"): |
| 206 | + print(f"\tSummary: {msg.content}") |
| 207 | + summary_index += 1 |
| 208 | + else: |
| 209 | + break |
| 210 | + |
| 211 | + def _print_summaries_from_back(self, messages: list[ChatMessageContent]): |
| 212 | + """ |
| 213 | + Prints summaries from the back of the message list. |
| 214 | +
|
| 215 | + Parameters: |
| 216 | + - messages: List of chat messages to process. |
| 217 | + """ |
| 218 | + summary_index = len(messages) - 1 |
| 219 | + while summary_index >= 0: |
| 220 | + msg = messages[summary_index] |
| 221 | + if msg.metadata and msg.metadata.get("__summary__"): |
| 222 | + print(f"\tSummary: {msg.content}") |
| 223 | + summary_index -= 1 |
| 224 | + else: |
| 225 | + break |
| 226 | + |
| 227 | + |
| 228 | +# Main entry point for the script |
| 229 | +async def main(): |
| 230 | + # Initialize the example class |
| 231 | + example = HistoryReducerExample() |
| 232 | + |
| 233 | + # Demonstrate truncation-based reduction |
| 234 | + trunc_agent, history_reducer = example.create_truncating_agent( |
| 235 | + # reducer_msg_count: |
| 236 | + # Purpose: Defines the target number of messages to retain after applying truncation or summarization. |
| 237 | + # What it controls: This parameter determines how much of the most recent conversation history |
| 238 | + # is preserved while discarding or summarizing older messages. |
| 239 | + # Why change it?: |
| 240 | + # - Smaller values: Use when memory constraints are tight, or the assistant only needs a brief history |
| 241 | + # to maintain context. |
| 242 | + # - Larger values: Use when retaining more conversational context is critical for accurate responses |
| 243 | + # or maintaining a richer dialogue. |
| 244 | + reducer_msg_count=10, |
| 245 | + # reducer_threshold: |
| 246 | + # Purpose: Acts as a buffer to avoid reducing history prematurely when the current message count exceeds |
| 247 | + # reducer_msg_count by a small margin. |
| 248 | + # What it controls: Helps ensure that essential paired messages (like a user query and the assistant’s response) |
| 249 | + # are not "orphaned" or lost during truncation or summarization. |
| 250 | + # Why change it?: |
| 251 | + # - Smaller values: Use when you want stricter reduction criteria and are okay with possibly cutting older |
| 252 | + # pairs of messages sooner. |
| 253 | + # - Larger values: Use when you want to minimize the risk of cutting a critical part of the conversation, |
| 254 | + # especially for sensitive interactions like API function calls or complex responses. |
| 255 | + reducer_threshold=10, |
| 256 | + ) |
| 257 | + # print("===TruncatedAgentReduction Demo===") |
| 258 | + # await example.invoke_agent(trunc_agent, chat_history=history_reducer, message_count=50) |
| 259 | + |
| 260 | + # Demonstrate summarization-based reduction |
| 261 | + sum_agent, history_reducer = example.create_summarizing_agent( |
| 262 | + # Same configuration for summarization-based reduction |
| 263 | + reducer_msg_count=10, # Target number of messages to retain |
| 264 | + reducer_threshold=10, # Buffer to avoid premature reduction |
| 265 | + ) |
| 266 | + print("\n===SummarizedAgentReduction Demo===") |
| 267 | + await example.invoke_agent(sum_agent, chat_history=history_reducer, message_count=50) |
| 268 | + |
| 269 | + # Demonstrate group chat with truncation |
| 270 | + print("\n===TruncatedChatReduction Demo===") |
| 271 | + trunc_agent.history_reducer.messages.clear() |
| 272 | + await example.invoke_chat(trunc_agent, message_count=50) |
| 273 | + |
| 274 | + # Demonstrate group chat with summarization |
| 275 | + print("\n===SummarizedChatReduction Demo===") |
| 276 | + sum_agent.history_reducer.messages.clear() |
| 277 | + await example.invoke_chat(sum_agent, message_count=50) |
| 278 | + |
| 279 | + |
| 280 | +# Interaction between reducer_msg_count and reducer_threshold: |
| 281 | +# The combination of these values determines when reduction occurs and how much history is kept. |
| 282 | +# Example: |
| 283 | +# If reducer_msg_count = 10 and reducer_threshold = 5, history will not be truncated until the total message count |
| 284 | +# exceeds 15. This approach ensures flexibility in retaining conversational context while still adhering to memory |
| 285 | +# constraints. |
| 286 | + |
| 287 | +# Recommendations: |
| 288 | +# - Adjust for performance: Use a lower reducer_msg_count in environments with limited memory or when the assistant |
| 289 | +# needs faster processing times. |
| 290 | +# - Context sensitivity: Increase reducer_msg_count and reducer_threshold in use cases where maintaining continuity |
| 291 | +# across multiple interactions is essential (e.g., multi-turn conversations or complex workflows). |
| 292 | +# - Experiment: Start with the default values (10 and 10) and refine based on your application's behavior and the |
| 293 | +# assistant's response quality. |
| 294 | + |
| 295 | + |
| 296 | +# Execute the main function if the script is run directly |
| 297 | +if __name__ == "__main__": |
| 298 | + asyncio.run(main()) |
0 commit comments