markwallace-microsoft · Dec 18, 2024
diff --git a/‎python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
+6-3 b/‎python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
+6-3
diff --git a/‎python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
+7-4 b/‎python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
+7-4
diff --git a/‎python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
+6-3 b/‎python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
+6-3
diff --git a/‎python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
+21-5 b/‎python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
+21-5
diff --git a/‎python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
+18-5 b/‎python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
+18-5
diff --git a/‎python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
+18-6 b/‎python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
+18-6
diff --git a/‎python/samples/concepts/filtering/function_invocation_filters_stream.py
+2-1 b/‎python/samples/concepts/filtering/function_invocation_filters_stream.py
+2-1
diff --git a/‎python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
+6-3 b/‎python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
+6-3
diff --git a/‎python/samples/learn_resources/templates.py
+4-2 b/‎python/samples/learn_resources/templates.py
+4-2
diff --git a/‎python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+27-2 b/‎python/semantic_kernel/connectors/ai/chat_completion_client_base.py
+27-2
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -131,11 +132,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
 
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -130,13 +131,15 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
-    result_content = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
 
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -140,11 +141,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 
 
@@ -6,12 +6,13 @@
 from typing import TYPE_CHECKING
 
 from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.functions import KernelArguments
 
@@ -131,20 +132,32 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content = []
     async for message in response:
-        if isinstance(message[0], StreamingChatMessageContent):
+        if (
+            (
+                not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+                or execution_settings.function_choice_behavior.type_ == FunctionChoiceType.REQUIRED
+            )
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
+        ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
-        if hasattr(streaming_chat_message, "content"):
+        if hasattr(streaming_chat_message, "content") and streaming_chat_message.content:
             print(streaming_chat_message.content)
         print("Printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -164,7 +177,7 @@ async def chat() -> bool:
     arguments["chat_history"] = history
 
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_function, arguments=arguments)
 
@@ -177,6 +190,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
 
@@ -11,6 +11,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -144,7 +145,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -153,20 +154,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -187,7 +197,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -200,6 +210,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
 
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -141,7 +142,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -150,20 +151,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -184,8 +194,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        pass
-        # await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -198,6 +207,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 
 
@@ -71,7 +71,8 @@ async def chat(chat_history: ChatHistory) -> bool:
         function_name="chat", plugin_name="chat", user_input=user_input, chat_history=chat_history
     )
     async for message in responses:
-        streamed_chunks.append(message[0])
+        if isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            streamed_chunks.append(message[0])
         print(str(message[0]), end="")
     print("")
     chat_history.add_user_message(user_input)
 
@@ -17,6 +17,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.functions import KernelArguments, KernelFunction, KernelPlugin
 
 # region Helper functions
@@ -209,11 +210,13 @@ async def handle_streaming(
     print("Security Agent:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             print(str(message[0]), end="")
 
     if streamed_chunks:
 
@@ -7,6 +7,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
 
@@ -144,8 +145,9 @@ async def main():
         all_chunks = []
         print("Assistant:> ", end="")
         async for chunk in result:
-            all_chunks.append(chunk[0])
-            print(str(chunk[0]), end="")
+            if isinstance(chunk[0], StreamingChatMessageContent) and chunk[0].role == AuthorRole.ASSISTANT:
+                all_chunks.append(chunk[0])
+                print(str(chunk[0]), end="")
         print()
 
         history.add_user_message(request)
 
@@ -12,7 +12,10 @@
 
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
-from semantic_kernel.connectors.ai.function_calling_utils import merge_function_results
+from semantic_kernel.connectors.ai.function_calling_utils import (
+    merge_function_results,
+    merge_streaming_function_results,
+)
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
 from semantic_kernel.contents.annotation_content import AnnotationContent
@@ -303,8 +306,18 @@ async def get_streaming_chat_message_contents(
                     ],
                 )
 
+                # Merge and yield the function results, regardless of the termination status
+                # Include the ai_model_id so we can later add two streaming messages together
+                # Some settings may not have an ai_model_id, so we need to check for it
+                ai_model_id = self._get_ai_model_id(settings)
+                function_result_messages = merge_streaming_function_results(
+                    messages=chat_history.messages[-len(results) :],
+                    ai_model_id=ai_model_id,  # type: ignore
+                )
+                if self._yield_function_result_messages(function_result_messages):
+                    yield function_result_messages
+
                 if any(result.terminate for result in results if result is not None):
-                    yield merge_function_results(chat_history.messages[-len(results) :])  # type: ignore
                     break
 
     async def get_streaming_chat_message_content(
@@ -415,4 +428,16 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: "
 
         return span
 
+    def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str:
+        """Retrieve the AI model ID from settings if available.
+
+        Attempt to get ai_model_id from the settings object. If it doesn't exist or
+        is blank, fallback to self.ai_model_id (from AIServiceClientBase).
+        """
+        return getattr(settings, "ai_model_id", self.ai_model_id) or self.ai_model_id
+
+    def _yield_function_result_messages(self, function_result_messages: list) -> bool:
+        """Determine if the function result messages should be yielded."""
+        return len(function_result_messages) > 0 and len(function_result_messages[0].items) > 0
+
     # endregion