Skip to content

Commit 16690ed

Browse files
authoredDec 18, 2024
Python: Yield FunctionResultContent in streaming chat completion path. Update tests. (microsoft#9974)
### Motivation and Context Currently, if using SK's Python streaming chat completion path, we only yield the following content types: `StreamingChatMessageContent` and `FunctionCallContent`. We are not yielding `FunctionResultContent` which is valuable for some use cases. <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> ### Description This PR updates the code to yield `FunctionResultContent` if it exists in the streaming chat completion path. When we merge the function call result content together into a `StreamingChatMessageContent` type, we check if that message has items (which are of type `FunctionResultContent`) and if so, we yield them. The filter path still works because once they're yielded, we break out of the function calling loop. We need to include the `ai_model_id` if it exists for the current PromptExecutionSettings because if performing a `reduce` operation to add two streaming chat message chunks together, the `StreamingChatMessageContent` that has the function results will break if the `ai_model_id` is not set (the error is thrown in the `__add__` override for the `StreamingChatMessageContent`. Some unit tests that cover function calling were also updated -- during the test, the test JSON function args were breaking in the `json.loads` call because they contained single quotes and not double. We're now sanitizing the args, just in case, so we don't break there. This PR fixes: - microsoft#9408 - microsoft#9968 <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
1 parent 4650d27 commit 16690ed

21 files changed

+218
-70
lines changed
 

‎python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.function_call_content import FunctionCallContent
1414
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
15+
from semantic_kernel.contents.utils.author_role import AuthorRole
1516
from semantic_kernel.core_plugins.math_plugin import MathPlugin
1617
from semantic_kernel.core_plugins.time_plugin import TimePlugin
1718
from semantic_kernel.functions import KernelArguments
@@ -131,11 +132,13 @@ async def handle_streaming(
131132
streamed_chunks: list[StreamingChatMessageContent] = []
132133
result_content = []
133134
async for message in response:
134-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
135-
message[0], StreamingChatMessageContent
135+
if (
136+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
137+
and isinstance(message[0], StreamingChatMessageContent)
138+
and message[0].role == AuthorRole.ASSISTANT
136139
):
137140
streamed_chunks.append(message[0])
138-
else:
141+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
139142
result_content.append(message[0])
140143
print(str(message[0]), end="")
141144

‎python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.function_call_content import FunctionCallContent
1414
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
15+
from semantic_kernel.contents.utils.author_role import AuthorRole
1516
from semantic_kernel.core_plugins.math_plugin import MathPlugin
1617
from semantic_kernel.core_plugins.time_plugin import TimePlugin
1718
from semantic_kernel.functions import KernelArguments
@@ -130,13 +131,15 @@ async def handle_streaming(
130131

131132
print("Mosscap:> ", end="")
132133
streamed_chunks: list[StreamingChatMessageContent] = []
133-
result_content = []
134+
result_content: list[StreamingChatMessageContent] = []
134135
async for message in response:
135-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
136-
message[0], StreamingChatMessageContent
136+
if (
137+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
138+
and isinstance(message[0], StreamingChatMessageContent)
139+
and message[0].role == AuthorRole.ASSISTANT
137140
):
138141
streamed_chunks.append(message[0])
139-
else:
142+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
140143
result_content.append(message[0])
141144
print(str(message[0]), end="")
142145

‎python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.function_call_content import FunctionCallContent
1414
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
15+
from semantic_kernel.contents.utils.author_role import AuthorRole
1516
from semantic_kernel.core_plugins.math_plugin import MathPlugin
1617
from semantic_kernel.core_plugins.time_plugin import TimePlugin
1718
from semantic_kernel.functions import KernelArguments
@@ -140,11 +141,13 @@ async def handle_streaming(
140141
streamed_chunks: list[StreamingChatMessageContent] = []
141142
result_content = []
142143
async for message in response:
143-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
144-
message[0], StreamingChatMessageContent
144+
if (
145+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
146+
and isinstance(message[0], StreamingChatMessageContent)
147+
and message[0].role == AuthorRole.ASSISTANT
145148
):
146149
streamed_chunks.append(message[0])
147-
else:
150+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
148151
result_content.append(message[0])
149152
print(str(message[0]), end="")
150153

‎python/samples/concepts/auto_function_calling/function_calling_with_required_type.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
from typing import TYPE_CHECKING
77

88
from semantic_kernel import Kernel
9-
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
9+
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
1010
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
1111
from semantic_kernel.contents import ChatHistory
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.function_call_content import FunctionCallContent
1414
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
15+
from semantic_kernel.contents.utils.author_role import AuthorRole
1516
from semantic_kernel.core_plugins import MathPlugin, TimePlugin
1617
from semantic_kernel.functions import KernelArguments
1718

@@ -131,20 +132,32 @@ async def handle_streaming(
131132

132133
print("Mosscap:> ", end="")
133134
streamed_chunks: list[StreamingChatMessageContent] = []
135+
result_content = []
134136
async for message in response:
135-
if isinstance(message[0], StreamingChatMessageContent):
137+
if (
138+
(
139+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
140+
or execution_settings.function_choice_behavior.type_ == FunctionChoiceType.REQUIRED
141+
)
142+
and isinstance(message[0], StreamingChatMessageContent)
143+
and message[0].role == AuthorRole.ASSISTANT
144+
):
136145
streamed_chunks.append(message[0])
137-
else:
146+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
147+
result_content.append(message[0])
138148
print(str(message[0]), end="")
139149

140150
if streamed_chunks:
141151
streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
142-
if hasattr(streaming_chat_message, "content"):
152+
if hasattr(streaming_chat_message, "content") and streaming_chat_message.content:
143153
print(streaming_chat_message.content)
144154
print("Printing returned tool calls...")
145155
print_tool_calls(streaming_chat_message)
146156

147157
print("\n")
158+
if result_content:
159+
return "".join([str(content) for content in result_content])
160+
return None
148161

149162

150163
async def chat() -> bool:
@@ -164,7 +177,7 @@ async def chat() -> bool:
164177
arguments["chat_history"] = history
165178

166179
if stream:
167-
await handle_streaming(kernel, chat_function, arguments=arguments)
180+
result = await handle_streaming(kernel, chat_function, arguments=arguments)
168181
else:
169182
result = await kernel.invoke(chat_function, arguments=arguments)
170183

@@ -177,6 +190,9 @@ async def chat() -> bool:
177190
return True
178191

179192
print(f"Mosscap:> {result}")
193+
194+
history.add_user_message(user_input)
195+
history.add_assistant_message(str(result))
180196
return True
181197

182198

‎python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1212
from semantic_kernel.contents.function_call_content import FunctionCallContent
1313
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
14+
from semantic_kernel.contents.utils.author_role import AuthorRole
1415
from semantic_kernel.core_plugins import MathPlugin, TimePlugin
1516
from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
1617
AutoFunctionInvocationContext,
@@ -144,7 +145,7 @@ async def handle_streaming(
144145
kernel: Kernel,
145146
chat_function: "KernelFunction",
146147
arguments: KernelArguments,
147-
) -> None:
148+
) -> str | None:
148149
response = kernel.invoke_stream(
149150
chat_function,
150151
return_function_results=False,
@@ -153,20 +154,29 @@ async def handle_streaming(
153154

154155
print("Mosscap:> ", end="")
155156
streamed_chunks: list[StreamingChatMessageContent] = []
157+
result_content: list[StreamingChatMessageContent] = []
156158
async for message in response:
157-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
158-
message[0], StreamingChatMessageContent
159+
if (
160+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
161+
and isinstance(message[0], StreamingChatMessageContent)
162+
and message[0].role == AuthorRole.ASSISTANT
159163
):
160164
streamed_chunks.append(message[0])
161-
else:
165+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
166+
result_content.append(message[0])
162167
print(str(message[0]), end="")
163168

164169
if streamed_chunks:
165170
streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
171+
if hasattr(streaming_chat_message, "content"):
172+
print(streaming_chat_message.content)
166173
print("Auto tool calls is disabled, printing returned tool calls...")
167174
print_tool_calls(streaming_chat_message)
168175

169176
print("\n")
177+
if result_content:
178+
return "".join([str(content) for content in result_content])
179+
return None
170180

171181

172182
async def chat() -> bool:
@@ -187,7 +197,7 @@ async def chat() -> bool:
187197

188198
stream = False
189199
if stream:
190-
await handle_streaming(kernel, chat_function, arguments=arguments)
200+
result = await handle_streaming(kernel, chat_function, arguments=arguments)
191201
else:
192202
result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
193203

@@ -200,6 +210,9 @@ async def chat() -> bool:
200210
return True
201211

202212
print(f"Mosscap:> {result}")
213+
214+
history.add_user_message(user_input)
215+
history.add_assistant_message(str(result))
203216
return True
204217

205218

‎python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1313
from semantic_kernel.contents.function_call_content import FunctionCallContent
1414
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
15+
from semantic_kernel.contents.utils.author_role import AuthorRole
1516
from semantic_kernel.core_plugins import MathPlugin, TimePlugin
1617
from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
1718
AutoFunctionInvocationContext,
@@ -141,7 +142,7 @@ async def handle_streaming(
141142
kernel: Kernel,
142143
chat_function: "KernelFunction",
143144
arguments: KernelArguments,
144-
) -> None:
145+
) -> str | None:
145146
response = kernel.invoke_stream(
146147
chat_function,
147148
return_function_results=False,
@@ -150,20 +151,29 @@ async def handle_streaming(
150151

151152
print("Mosscap:> ", end="")
152153
streamed_chunks: list[StreamingChatMessageContent] = []
154+
result_content: list[StreamingChatMessageContent] = []
153155
async for message in response:
154-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
155-
message[0], StreamingChatMessageContent
156+
if (
157+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
158+
and isinstance(message[0], StreamingChatMessageContent)
159+
and message[0].role == AuthorRole.ASSISTANT
156160
):
157161
streamed_chunks.append(message[0])
158-
else:
162+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
163+
result_content.append(message[0])
159164
print(str(message[0]), end="")
160165

161166
if streamed_chunks:
162167
streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
168+
if hasattr(streaming_chat_message, "content"):
169+
print(streaming_chat_message.content)
163170
print("Auto tool calls is disabled, printing returned tool calls...")
164171
print_tool_calls(streaming_chat_message)
165172

166173
print("\n")
174+
if result_content:
175+
return "".join([str(content) for content in result_content])
176+
return None
167177

168178

169179
async def chat() -> bool:
@@ -184,8 +194,7 @@ async def chat() -> bool:
184194

185195
stream = False
186196
if stream:
187-
pass
188-
# await handle_streaming(kernel, chat_function, arguments=arguments)
197+
result = await handle_streaming(kernel, chat_function, arguments=arguments)
189198
else:
190199
result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
191200

@@ -198,6 +207,9 @@ async def chat() -> bool:
198207
return True
199208

200209
print(f"Mosscap:> {result}")
210+
211+
history.add_user_message(user_input)
212+
history.add_assistant_message(str(result))
201213
return True
202214

203215

‎python/samples/concepts/filtering/function_invocation_filters_stream.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ async def chat(chat_history: ChatHistory) -> bool:
7171
function_name="chat", plugin_name="chat", user_input=user_input, chat_history=chat_history
7272
)
7373
async for message in responses:
74-
streamed_chunks.append(message[0])
74+
if isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
75+
streamed_chunks.append(message[0])
7576
print(str(message[0]), end="")
7677
print("")
7778
chat_history.add_user_message(user_input)

‎python/samples/concepts/plugins/openai_plugin_azure_key_vault.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from semantic_kernel.contents.chat_message_content import ChatMessageContent
1818
from semantic_kernel.contents.function_call_content import FunctionCallContent
1919
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
20+
from semantic_kernel.contents.utils.author_role import AuthorRole
2021
from semantic_kernel.functions import KernelArguments, KernelFunction, KernelPlugin
2122

2223
# region Helper functions
@@ -209,11 +210,13 @@ async def handle_streaming(
209210
print("Security Agent:> ", end="")
210211
streamed_chunks: list[StreamingChatMessageContent] = []
211212
async for message in response:
212-
if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
213-
message[0], StreamingChatMessageContent
213+
if (
214+
not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
215+
and isinstance(message[0], StreamingChatMessageContent)
216+
and message[0].role == AuthorRole.ASSISTANT
214217
):
215218
streamed_chunks.append(message[0])
216-
else:
219+
elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
217220
print(str(message[0]), end="")
218221

219222
if streamed_chunks:

‎python/samples/learn_resources/templates.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from semantic_kernel import Kernel
88
from semantic_kernel.contents import ChatHistory
99
from semantic_kernel.contents.chat_message_content import ChatMessageContent
10+
from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
1011
from semantic_kernel.contents.utils.author_role import AuthorRole
1112
from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
1213

@@ -144,8 +145,9 @@ async def main():
144145
all_chunks = []
145146
print("Assistant:> ", end="")
146147
async for chunk in result:
147-
all_chunks.append(chunk[0])
148-
print(str(chunk[0]), end="")
148+
if isinstance(chunk[0], StreamingChatMessageContent) and chunk[0].role == AuthorRole.ASSISTANT:
149+
all_chunks.append(chunk[0])
150+
print(str(chunk[0]), end="")
149151
print()
150152

151153
history.add_user_message(request)

‎python/semantic_kernel/connectors/ai/chat_completion_client_base.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212

1313
from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
1414
from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
15-
from semantic_kernel.connectors.ai.function_calling_utils import merge_function_results
15+
from semantic_kernel.connectors.ai.function_calling_utils import (
16+
merge_function_results,
17+
merge_streaming_function_results,
18+
)
1619
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
1720
from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
1821
from semantic_kernel.contents.annotation_content import AnnotationContent
@@ -303,8 +306,18 @@ async def get_streaming_chat_message_contents(
303306
],
304307
)
305308

309+
# Merge and yield the function results, regardless of the termination status
310+
# Include the ai_model_id so we can later add two streaming messages together
311+
# Some settings may not have an ai_model_id, so we need to check for it
312+
ai_model_id = self._get_ai_model_id(settings)
313+
function_result_messages = merge_streaming_function_results(
314+
messages=chat_history.messages[-len(results) :],
315+
ai_model_id=ai_model_id, # type: ignore
316+
)
317+
if self._yield_function_result_messages(function_result_messages):
318+
yield function_result_messages
319+
306320
if any(result.terminate for result in results if result is not None):
307-
yield merge_function_results(chat_history.messages[-len(results) :]) # type: ignore
308321
break
309322

310323
async def get_streaming_chat_message_content(
@@ -415,4 +428,16 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: "
415428

416429
return span
417430

431+
def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str:
432+
"""Retrieve the AI model ID from settings if available.
433+
434+
Attempt to get ai_model_id from the settings object. If it doesn't exist or
435+
is blank, fallback to self.ai_model_id (from AIServiceClientBase).
436+
"""
437+
return getattr(settings, "ai_model_id", self.ai_model_id) or self.ai_model_id
438+
439+
def _yield_function_result_messages(self, function_result_messages: list) -> bool:
440+
"""Determine if the function result messages should be yielded."""
441+
return len(function_result_messages) > 0 and len(function_result_messages[0].items) > 0
442+
418443
# endregion

0 commit comments

Comments
 (0)
Please sign in to comment.