markwallace-microsoft · Nov 19, 2024
diff --git a/‎python/.cspell.json
+2 b/‎python/.cspell.json
+2
diff --git a/‎python/pyproject.toml
+2-1 b/‎python/pyproject.toml
+2-1
diff --git a/‎python/samples/demos/telemetry/main.py
+1-1 b/‎python/samples/demos/telemetry/main.py
+1-1
diff --git a/‎python/samples/demos/telemetry/scenarios.py
+6 b/‎python/samples/demos/telemetry/scenarios.py
+6
diff --git a/‎python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_settings.py
+1-1 b/‎python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_settings.py
+1-1
diff --git a/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
+93-5 b/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py
+93-5
diff --git a/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+24-60 b/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py
+24-60
diff --git a/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py
+9-36 b/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py
+9-36
diff --git a/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_tracing.py
+48 b/‎python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_tracing.py
+48
diff --git a/‎python/semantic_kernel/connectors/memory/azure_cosmos_db/azure_cosmos_db_no_sql_base.py
+5-2 b/‎python/semantic_kernel/connectors/memory/azure_cosmos_db/azure_cosmos_db_no_sql_base.py
+5-2
diff --git a/‎python/semantic_kernel/connectors/memory/azure_cosmos_db/utils.py
-10 b/‎python/semantic_kernel/connectors/memory/azure_cosmos_db/utils.py
-10
diff --git a/‎python/semantic_kernel/utils/authentication/async_default_azure_credential_wrapper.py
+15 b/‎python/semantic_kernel/utils/authentication/async_default_azure_credential_wrapper.py
+15
diff --git a/‎python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py
+1-1 b/‎python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py
+1-1
diff --git a/‎python/tests/integration/completions/chat_completion_test_base.py
-2 b/‎python/tests/integration/completions/chat_completion_test_base.py
-2
diff --git a/‎python/tests/integration/embeddings/test_embedding_service_base.py
-2 b/‎python/tests/integration/embeddings/test_embedding_service_base.py
-2
diff --git a/‎python/tests/unit/connectors/ai/azure_ai_inference/conftest.py
+25 b/‎python/tests/unit/connectors/ai/azure_ai_inference/conftest.py
+25
diff --git a/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+1-8 b/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py
+1-8
diff --git a/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py
+1-8 b/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py
+1-8
diff --git a/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_tracing.py
+245 b/‎python/tests/unit/connectors/ai/azure_ai_inference/services/test_azure_ai_inference_tracing.py
+245
diff --git a/‎python/tests/unit/utils/model_diagnostics/test_decorated.py
-14 b/‎python/tests/unit/utils/model_diagnostics/test_decorated.py
-14
diff --git a/‎python/uv.lock
+927-869 b/‎python/uv.lock
+927-869
@@ -39,6 +39,7 @@
         "hnsw",
         "httpx",
         "huggingface",
+        "Instrumentor",
         "kernelfunction",
         "logit",
         "logprobs",
@@ -61,6 +62,7 @@
         "serde",
         "skprompt",
         "templating",
+        "uninstrument",
         "vectordb",
         "vectorizer",
         "vectorstoremodel",
 
@@ -50,7 +50,8 @@ dependencies = [
 ### Optional dependencies
 [project.optional-dependencies]
 azure = [
-    "azure-ai-inference >= 1.0.0b4",
+    "azure-ai-inference >= 1.0.0b6",
+    "azure-core-tracing-opentelemetry >= 1.0.0b11",
     "azure-search-documents >= 11.6.0b4",
     "azure-identity ~= 1.13",
     "azure-cosmos ~= 4.7"
 
@@ -140,7 +140,7 @@ async def main(scenario: Literal["ai_service", "kernel_function", "auto_function
     with tracer.start_as_current_span("main") as current_span:
         print(f"Trace ID: {format_trace_id(current_span.get_span_context().trace_id)}")
 
-        stream = True
+        stream = False
 
         # Scenarios where telemetry is collected in the SDK, from the most basic to the most complex.
         if scenario == "ai_service" or scenario == "all":
 
@@ -22,6 +22,12 @@ def set_up_kernel() -> Kernel:
     # All built-in AI services are instrumented with telemetry.
     # Select any AI service to see the telemetry in action.
     kernel.add_service(OpenAIChatCompletion(service_id="open_ai"))
+    # kernel.add_service(
+    #     AzureAIInferenceChatCompletion(
+    #         ai_model_id="serverless-deployment",
+    #         service_id="azure-ai-inference",
+    #     )
+    # )
     # kernel.add_service(GoogleAIChatCompletion(service_id="google_ai"))
 
     if (sample_plugin_path := get_sample_plugin_path()) is None:
 
@@ -34,4 +34,4 @@ class AzureAIInferenceSettings(KernelBaseSettings):
     env_prefix: ClassVar[str] = "AZURE_AI_INFERENCE_"
 
     endpoint: HttpsUrl
-    api_key: SecretStr
+    api_key: SecretStr | None = None
@@ -3,23 +3,111 @@
 import asyncio
 import contextlib
 from abc import ABC
-from typing import ClassVar
+from enum import Enum
+from typing import Any
 
 from azure.ai.inference.aio import ChatCompletionsClient, EmbeddingsClient
+from azure.core.credentials import AzureKeyCredential
+from pydantic import ValidationError
 
+from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_settings import AzureAIInferenceSettings
+from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.utils.authentication.async_default_azure_credential_wrapper import (
+    AsyncDefaultAzureCredentialWrapper,
+)
 from semantic_kernel.utils.experimental_decorator import experimental_class
+from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT
+
+
+class AzureAIInferenceClientType(Enum):
+    """Client type for Azure AI Inference."""
+
+    ChatCompletions = "ChatCompletions"
+    Embeddings = "Embeddings"
+
+    @classmethod
+    def get_client_class(cls, client_type: "AzureAIInferenceClientType") -> Any:
+        """Get the client class based on the client type."""
+        class_mapping = {
+            cls.ChatCompletions: ChatCompletionsClient,
+            cls.Embeddings: EmbeddingsClient,
+        }
+
+        return class_mapping[client_type]
 
 
 @experimental_class
 class AzureAIInferenceBase(KernelBaseModel, ABC):
     """Azure AI Inference Chat Completion Service."""
 
-    MODEL_PROVIDER_NAME: ClassVar[str] = "azureai"
-
     client: ChatCompletionsClient | EmbeddingsClient
+    managed_client: bool = False
+
+    def __init__(
+        self,
+        client_type: AzureAIInferenceClientType,
+        api_key: str | None = None,
+        endpoint: str | None = None,
+        env_file_path: str | None = None,
+        env_file_encoding: str | None = None,
+        client: ChatCompletionsClient | EmbeddingsClient | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the Azure AI Inference Chat Completion service.
+
+        If no arguments are provided, the service will attempt to load the settings from the environment.
+        The following environment variables are used:
+        - AZURE_AI_INFERENCE_API_KEY
+        - AZURE_AI_INFERENCE_ENDPOINT
+
+        Args:
+            client_type (AzureAIInferenceClientType): The client type to use.
+            api_key (str | None): The API key for the Azure AI Inference service deployment. (Optional)
+            endpoint (str | None): The endpoint of the Azure AI Inference service deployment. (Optional)
+            env_file_path (str | None): The path to the environment file. (Optional)
+            env_file_encoding (str | None): The encoding of the environment file. (Optional)
+            client (ChatCompletionsClient | None): The Azure AI Inference client to use. (Optional)
+            **kwargs: Additional keyword arguments.
+
+        Raises:
+            ServiceInitializationError: If an error occurs during initialization.
+        """
+        managed_client = client is None
+        if not client:
+            try:
+                azure_ai_inference_settings = AzureAIInferenceSettings.create(
+                    api_key=api_key,
+                    endpoint=endpoint,
+                    env_file_path=env_file_path,
+                    env_file_encoding=env_file_encoding,
+                )
+            except ValidationError as e:
+                raise ServiceInitializationError(f"Failed to validate Azure AI Inference settings: {e}") from e
+
+            endpoint = str(azure_ai_inference_settings.endpoint)
+            if azure_ai_inference_settings.api_key is not None:
+                client = AzureAIInferenceClientType.get_client_class(client_type)(
+                    endpoint=endpoint,
+                    credential=AzureKeyCredential(azure_ai_inference_settings.api_key.get_secret_value()),
+                    user_agent=SEMANTIC_KERNEL_USER_AGENT,
+                )
+            else:
+                # Try to create the client with a DefaultAzureCredential
+                client = AzureAIInferenceClientType.get_client_class(client_type)(
+                    endpoint=endpoint,
+                    credential=AsyncDefaultAzureCredentialWrapper(),
+                    user_agent=SEMANTIC_KERNEL_USER_AGENT,
+                )
+
+        super().__init__(
+            client=client,
+            managed_client=managed_client,
+            **kwargs,
+        )
 
     def __del__(self) -> None:
         """Close the client when the object is deleted."""
-        with contextlib.suppress(Exception):
-            asyncio.get_running_loop().create_task(self.client.close())
+        if self.managed_client:
+            with contextlib.suppress(Exception):
+                asyncio.get_running_loop().create_task(self.client.close())
@@ -20,22 +20,19 @@
     StreamingChatChoiceUpdate,
     StreamingChatCompletionsUpdate,
 )
-from azure.core.credentials import AzureKeyCredential
-from azure.identity import DefaultAzureCredential
-from pydantic import ValidationError
 
-from semantic_kernel.connectors.ai.azure_ai_inference import (
-    AzureAIInferenceChatPromptExecutionSettings,
-    AzureAIInferenceSettings,
+from semantic_kernel.connectors.ai.azure_ai_inference import AzureAIInferenceChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import (
+    AzureAIInferenceBase,
+    AzureAIInferenceClientType,
 )
-from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase
+from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_tracing import AzureAIInferenceTracing
 from semantic_kernel.connectors.ai.azure_ai_inference.services.utils import MESSAGE_CONVERTERS
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
 from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
 from semantic_kernel.connectors.ai.function_calling_utils import update_settings_from_function_call_configuration
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType
-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ITEM_TYPES, ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -45,16 +42,8 @@
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.contents.utils.finish_reason import FinishReason
-from semantic_kernel.exceptions.service_exceptions import (
-    ServiceInitializationError,
-    ServiceInvalidExecutionSettingsError,
-)
+from semantic_kernel.exceptions.service_exceptions import ServiceInvalidExecutionSettingsError
 from semantic_kernel.utils.experimental_decorator import experimental_class
-from semantic_kernel.utils.telemetry.model_diagnostics.decorators import (
-    trace_chat_completion,
-    trace_streaming_chat_completion,
-)
-from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT
 
 if TYPE_CHECKING:
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -97,39 +86,14 @@ def __init__(
         Raises:
             ServiceInitializationError: If an error occurs during initialization.
         """
-        if not client:
-            try:
-                azure_ai_inference_settings = AzureAIInferenceSettings.create(
-                    api_key=api_key,
-                    endpoint=endpoint,
-                    env_file_path=env_file_path,
-                    env_file_encoding=env_file_encoding,
-                )
-            except ValidationError as e:
-                raise ServiceInitializationError(f"Failed to validate Azure AI Inference settings: {e}") from e
-
-            endpoint_to_use: str = str(azure_ai_inference_settings.endpoint)
-            if azure_ai_inference_settings.api_key is not None:
-                client = ChatCompletionsClient(
-                    endpoint=endpoint_to_use,
-                    credential=AzureKeyCredential(azure_ai_inference_settings.api_key.get_secret_value()),
-                    user_agent=SEMANTIC_KERNEL_USER_AGENT,
-                )
-            else:
-                # Try to create the client with a DefaultAzureCredential
-                client = (
-                    ChatCompletionsClient(
-                        endpoint=endpoint_to_use,
-                        credential=DefaultAzureCredential(),
-                        credential_scopes=["https://cognitiveservices.azure.com/.default"],
-                        api_version=DEFAULT_AZURE_API_VERSION,
-                        user_agent=SEMANTIC_KERNEL_USER_AGENT,
-                    ),
-                )
-
         super().__init__(
             ai_model_id=ai_model_id,
             service_id=service_id or ai_model_id,
+            client_type=AzureAIInferenceClientType.ChatCompletions,
+            api_key=api_key,
+            endpoint=endpoint,
+            env_file_path=env_file_path,
+            env_file_encoding=env_file_encoding,
             client=client,
         )
 
@@ -149,7 +113,6 @@ def service_url(self) -> str | None:
         return None
 
     @override
-    @trace_chat_completion(AzureAIInferenceBase.MODEL_PROVIDER_NAME)
     async def _inner_get_chat_message_contents(
         self,
         chat_history: "ChatHistory",
@@ -160,17 +123,17 @@ async def _inner_get_chat_message_contents(
         assert isinstance(settings, AzureAIInferenceChatPromptExecutionSettings)  # nosec
 
         assert isinstance(self.client, ChatCompletionsClient)  # nosec
-        response: ChatCompletions = await self.client.complete(
-            messages=self._prepare_chat_history_for_request(chat_history),
-            model_extras=settings.extra_parameters,
-            **settings.prepare_settings_dict(),
-        )
+        with AzureAIInferenceTracing():
+            response: ChatCompletions = await self.client.complete(
+                messages=self._prepare_chat_history_for_request(chat_history),
+                model_extras=settings.extra_parameters,
+                **settings.prepare_settings_dict(),
+            )
         response_metadata = self._get_metadata_from_response(response)
 
         return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices]
 
     @override
-    @trace_streaming_chat_completion(AzureAIInferenceBase.MODEL_PROVIDER_NAME)
     async def _inner_get_streaming_chat_message_contents(
         self,
         chat_history: "ChatHistory",
@@ -181,12 +144,13 @@ async def _inner_get_streaming_chat_message_contents(
         assert isinstance(settings, AzureAIInferenceChatPromptExecutionSettings)  # nosec
 
         assert isinstance(self.client, ChatCompletionsClient)  # nosec
-        response: AsyncStreamingChatCompletions = await self.client.complete(
-            stream=True,
-            messages=self._prepare_chat_history_for_request(chat_history),
-            model_extras=settings.extra_parameters,
-            **settings.prepare_settings_dict(),
-        )
+        with AzureAIInferenceTracing():
+            response: AsyncStreamingChatCompletions = await self.client.complete(
+                stream=True,
+                messages=self._prepare_chat_history_for_request(chat_history),
+                model_extras=settings.extra_parameters,
+                **settings.prepare_settings_dict(),
+            )
 
         async for chunk in response:
             if len(chunk.choices) == 0:
 
@@ -10,21 +10,17 @@
 
 from azure.ai.inference.aio import EmbeddingsClient
 from azure.ai.inference.models import EmbeddingsResult
-from azure.core.credentials import AzureKeyCredential
-from azure.identity import DefaultAzureCredential
 from numpy import array, ndarray
-from pydantic import ValidationError
 
 from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_prompt_execution_settings import (
     AzureAIInferenceEmbeddingPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_settings import AzureAIInferenceSettings
-from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase
+from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import (
+    AzureAIInferenceBase,
+    AzureAIInferenceClientType,
+)
 from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase
-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION
-from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 from semantic_kernel.utils.experimental_decorator import experimental_class
-from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT
 
 if TYPE_CHECKING:
     from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -63,37 +59,14 @@ def __init__(
         Raises:
             ServiceInitializationError: If an error occurs during initialization.
         """
-        if not client:
-            try:
-                azure_ai_inference_settings = AzureAIInferenceSettings.create(
-                    api_key=api_key,
-                    endpoint=endpoint,
-                    env_file_path=env_file_path,
-                    env_file_encoding=env_file_encoding,
-                )
-            except ValidationError as e:
-                raise ServiceInitializationError(f"Failed to validate Azure AI Inference settings: {e}") from e
-
-            endpoint = str(azure_ai_inference_settings.endpoint)
-            if azure_ai_inference_settings.api_key is not None:
-                client = EmbeddingsClient(
-                    endpoint=endpoint,
-                    credential=AzureKeyCredential(azure_ai_inference_settings.api_key.get_secret_value()),
-                    user_agent=SEMANTIC_KERNEL_USER_AGENT,
-                )
-            else:
-                # Try to create the client with a DefaultAzureCredential
-                client = EmbeddingsClient(
-                    endpoint=endpoint,
-                    credential=DefaultAzureCredential(),
-                    credential_scopes=["https://cognitiveservices.azure.com/.default"],
-                    api_version=DEFAULT_AZURE_API_VERSION,
-                    user_agent=SEMANTIC_KERNEL_USER_AGENT,
-                )
-
         super().__init__(
             ai_model_id=ai_model_id,
             service_id=service_id or ai_model_id,
+            client_type=AzureAIInferenceClientType.Embeddings,
+            api_key=api_key,
+            endpoint=endpoint,
+            env_file_path=env_file_path,
+            env_file_encoding=env_file_encoding,
             client=client,
         )
 
 
@@ -0,0 +1,48 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from azure.ai.inference.tracing import AIInferenceInstrumentor
+from azure.core.settings import settings
+
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.utils.telemetry.model_diagnostics.model_diagnostics_settings import ModelDiagnosticSettings
+
+
+class AzureAIInferenceTracing(KernelBaseModel):
+    """Enable tracing for Azure AI Inference.
+
+    This class is intended to be used as a context manager.
+    The instrument() call effect should be scoped to the context manager.
+    """
+
+    diagnostics_settings: ModelDiagnosticSettings
+
+    def __init__(self, diagnostics_settings: ModelDiagnosticSettings | None = None) -> None:
+        """Initialize the Azure AI Inference Tracing.
+
+        Args:
+            diagnostics_settings (ModelDiagnosticSettings, optional): Model diagnostics settings. Defaults to None.
+        """
+        settings.tracing_implementation = "opentelemetry"
+        super().__init__(diagnostics_settings=diagnostics_settings or ModelDiagnosticSettings.create())
+
+    def __enter__(self) -> None:
+        """Enable tracing.
+
+        Both enable_otel_diagnostics and enable_otel_diagnostics_sensitive will enable tracing.
+        enable_otel_diagnostics_sensitive will also enable content recording.
+        """
+        if (
+            self.diagnostics_settings.enable_otel_diagnostics
+            or self.diagnostics_settings.enable_otel_diagnostics_sensitive
+        ):
+            AIInferenceInstrumentor().instrument(
+                enable_content_recording=self.diagnostics_settings.enable_otel_diagnostics_sensitive
+            )
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Disable tracing."""
+        if (
+            self.diagnostics_settings.enable_otel_diagnostics
+            or self.diagnostics_settings.enable_otel_diagnostics_sensitive
+        ):
+            AIInferenceInstrumentor().uninstrument()
@@ -5,12 +5,15 @@
 from pydantic import ValidationError
 
 from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_settings import AzureCosmosDBNoSQLSettings
-from semantic_kernel.connectors.memory.azure_cosmos_db.utils import CosmosClientWrapper, DefaultAzureCredentialWrapper
+from semantic_kernel.connectors.memory.azure_cosmos_db.utils import CosmosClientWrapper
 from semantic_kernel.exceptions.memory_connector_exceptions import (
     MemoryConnectorInitializationError,
     MemoryConnectorResourceNotFound,
 )
 from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.utils.authentication.async_default_azure_credential_wrapper import (
+    AsyncDefaultAzureCredentialWrapper,
+)
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 
@@ -72,7 +75,7 @@ def __init__(
                 )
             else:
                 cosmos_client = CosmosClientWrapper(
-                    str(cosmos_db_nosql_settings.url), credential=DefaultAzureCredentialWrapper()
+                    str(cosmos_db_nosql_settings.url), credential=AsyncDefaultAzureCredentialWrapper()
                 )
 
         super().__init__(
 
@@ -5,7 +5,6 @@
 from typing import Any
 
 from azure.cosmos.aio import CosmosClient
-from azure.identity.aio import DefaultAzureCredential
 
 from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_composite_key import (
     AzureCosmosDBNoSQLCompositeKey,
@@ -179,12 +178,3 @@ def __del__(self) -> None:
         """Close the CosmosClient."""
         with contextlib.suppress(Exception):
             asyncio.get_running_loop().create_task(self.close())
-
-
-class DefaultAzureCredentialWrapper(DefaultAzureCredential):
-    """Wrapper to make sure the DefaultAzureCredential is closed properly."""
-
-    def __del__(self) -> None:
-        """Close the DefaultAzureCredential."""
-        with contextlib.suppress(Exception):
-            asyncio.get_running_loop().create_task(self.close())
@@ -0,0 +1,15 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+import contextlib
+
+from azure.identity.aio import DefaultAzureCredential
+
+
+class AsyncDefaultAzureCredentialWrapper(DefaultAzureCredential):
+    """Wrapper to make sure the async version of the DefaultAzureCredential is closed properly."""
+
+    def __del__(self) -> None:
+        """Close the DefaultAzureCredential."""
+        with contextlib.suppress(Exception):
+            asyncio.get_running_loop().create_task(self.close())
@@ -11,7 +11,7 @@ class ModelDiagnosticSettings(KernelBaseSettings):
     """Settings for model diagnostics.
 
     The settings are first loaded from environment variables with
-    the prefix 'AZURE_AI_INFERENCE_'.
+    the prefix 'SEMANTICKERNEL_EXPERIMENTAL_GENAI_'.
     If the environment variables are not found, the settings can
     be loaded from a .env file with the encoding 'utf-8'.
     If the settings are not found in the .env file, the settings
 
@@ -39,7 +39,6 @@
 from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_chat_completion import MistralAIChatCompletion
 from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama.services.ollama_chat_completion import OllamaChatCompletion
-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureChatPromptExecutionSettings,
 )
@@ -134,7 +133,6 @@ def services(self) -> dict[str, tuple[ServiceType, type[PromptExecutionSettings]
                 endpoint=f"{str(endpoint).strip('/')}/openai/deployments/{deployment_name}",
                 credential=DefaultAzureCredential(),
                 credential_scopes=["https://cognitiveservices.azure.com/.default"],
-                api_version=DEFAULT_AZURE_API_VERSION,
             ),
         )
 
 
@@ -28,7 +28,6 @@
 from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_text_embedding import MistralAITextEmbedding
 from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaEmbeddingPromptExecutionSettings
 from semantic_kernel.connectors.ai.ollama.services.ollama_text_embedding import OllamaTextEmbedding
-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
     OpenAIEmbeddingPromptExecutionSettings,
 )
@@ -76,7 +75,6 @@ def services(self) -> dict[str, tuple[EmbeddingGeneratorBase, type[PromptExecuti
                 endpoint=f"{str(endpoint).strip('/')}/openai/deployments/{deployment_name}",
                 credential=DefaultAzureCredential(),
                 credential_scopes=["https://cognitiveservices.azure.com/.default"],
-                api_version=DEFAULT_AZURE_API_VERSION,
             ),
         )
 
 
@@ -59,6 +59,31 @@ def azure_ai_inference_unit_test_env(monkeypatch, exclude_list, override_env_par
     return env_vars
 
 
+@pytest.fixture()
+def model_diagnostics_test_env(monkeypatch, exclude_list, override_env_param_dict):
+    """Fixture to set environment variables for Azure AI Inference Unit Tests."""
+    if exclude_list is None:
+        exclude_list = []
+
+    if override_env_param_dict is None:
+        override_env_param_dict = {}
+
+    env_vars = {
+        "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "true",
+        "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "true",
+    }
+
+    env_vars.update(override_env_param_dict)
+
+    for key, value in env_vars.items():
+        if key not in exclude_list:
+            monkeypatch.setenv(key, value)
+        else:
+            monkeypatch.delenv(key, raising=False)
+
+    return env_vars
+
+
 @pytest.fixture(scope="function")
 def azure_ai_inference_client(azure_ai_inference_unit_test_env, request) -> ChatCompletionsClient | EmbeddingsClient:
     """Fixture to create Azure AI Inference client for unit tests."""
 
@@ -78,20 +78,13 @@ def test_azure_ai_inference_chat_completion_init_with_custom_client(azure_ai_inf
     assert azure_ai_inference.client == client
 
 
-@pytest.mark.parametrize("exclude_list", [["AZURE_AI_INFERENCE_API_KEY"]], indirect=True)
-def test_azure_ai_inference_chat_completion_init_with_empty_api_key(azure_ai_inference_unit_test_env, model_id) -> None:
-    """Test initialization of AzureAIInferenceChatCompletion with empty API key"""
-    with pytest.raises(ServiceInitializationError):
-        AzureAIInferenceChatCompletion(model_id)
-
-
 @pytest.mark.parametrize("exclude_list", [["AZURE_AI_INFERENCE_ENDPOINT"]], indirect=True)
 def test_azure_ai_inference_chat_completion_init_with_empty_endpoint(
     azure_ai_inference_unit_test_env, model_id
 ) -> None:
     """Test initialization of AzureAIInferenceChatCompletion with empty endpoint"""
     with pytest.raises(ServiceInitializationError):
-        AzureAIInferenceChatCompletion(model_id)
+        AzureAIInferenceChatCompletion(model_id, env_file_path="fake_path")
 
 
 def test_prompt_execution_settings_class(azure_ai_inference_unit_test_env, model_id) -> None:
 
@@ -67,18 +67,11 @@ def test_azure_ai_inference_chat_completion_init_with_custom_client(azure_ai_inf
     assert azure_ai_inference.client == client
 
 
-@pytest.mark.parametrize("exclude_list", [["AZURE_AI_INFERENCE_API_KEY"]], indirect=True)
-def test_azure_ai_inference_text_embedding_init_with_empty_api_key(azure_ai_inference_unit_test_env, model_id) -> None:
-    """Test initialization of AzureAIInferenceTextEmbedding with empty API key"""
-    with pytest.raises(ServiceInitializationError):
-        AzureAIInferenceTextEmbedding(model_id)
-
-
 @pytest.mark.parametrize("exclude_list", [["AZURE_AI_INFERENCE_ENDPOINT"]], indirect=True)
 def test_azure_ai_inference_text_embedding_init_with_empty_endpoint(azure_ai_inference_unit_test_env, model_id) -> None:
     """Test initialization of AzureAIInferenceTextEmbedding with empty endpoint"""
     with pytest.raises(ServiceInitializationError):
-        AzureAIInferenceTextEmbedding(model_id)
+        AzureAIInferenceTextEmbedding(model_id, env_file_path="fake_path")
 
 
 @pytest.mark.asyncio
 
@@ -0,0 +1,245 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from azure.ai.inference.aio import ChatCompletionsClient
+
+from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_prompt_execution_settings import (
+    AzureAIInferenceChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_chat_completion import (
+    AzureAIInferenceChatCompletion,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [AzureAIInferenceChatCompletion.__name__],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_chat_completion_instrumentation(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_chat_completion_response
+
+    await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings)
+
+    mock_instrument.assert_called_once_with(enable_content_recording=True)
+    mock_uninstrument.assert_called_once()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [
+        AzureAIInferenceChatCompletion.__name__,
+    ],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    "override_env_param_dict",
+    [
+        {
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "False",
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "False",
+        },
+    ],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_chat_completion_not_instrumentation(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_chat_completion_response
+
+    await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings)
+
+    mock_instrument.assert_not_called()
+    mock_uninstrument.assert_not_called()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [
+        AzureAIInferenceChatCompletion.__name__,
+    ],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    "override_env_param_dict",
+    [
+        {
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "True",
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "False",
+        },
+    ],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_chat_completion_instrumentation_without_sensitive(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_chat_completion_response
+
+    await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings)
+
+    mock_instrument.assert_called_once_with(enable_content_recording=False)
+    mock_uninstrument.assert_called_once()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [AzureAIInferenceChatCompletion.__name__],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_streaming_chat_completion_instrumentation(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_streaming_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_streaming_chat_completion_response
+
+    async for _ in azure_ai_inference_service.get_streaming_chat_message_contents(
+        chat_history=chat_history, settings=settings
+    ):
+        pass
+
+    mock_instrument.assert_called_once_with(enable_content_recording=True)
+    mock_uninstrument.assert_called_once()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [
+        AzureAIInferenceChatCompletion.__name__,
+    ],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    "override_env_param_dict",
+    [
+        {
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "False",
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "False",
+        },
+    ],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_streaming_chat_completion_not_instrumentation(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_streaming_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_streaming_chat_completion_response
+
+    async for _ in azure_ai_inference_service.get_streaming_chat_message_contents(
+        chat_history=chat_history, settings=settings
+    ):
+        pass
+
+    mock_instrument.assert_not_called()
+    mock_uninstrument.assert_not_called()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "azure_ai_inference_service",
+    [
+        AzureAIInferenceChatCompletion.__name__,
+    ],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    "override_env_param_dict",
+    [
+        {
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "True",
+            "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "False",
+        },
+    ],
+    indirect=True,
+)
+@patch.object(ChatCompletionsClient, "complete", new_callable=AsyncMock)
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.uninstrument")
+@patch("azure.ai.inference.tracing.AIInferenceInstrumentor.instrument")
+async def test_azure_ai_inference_streaming_chat_completion_instrumentation_without_sensitive(
+    mock_instrument,
+    mock_uninstrument,
+    mock_complete,
+    azure_ai_inference_service,
+    chat_history: ChatHistory,
+    mock_azure_ai_inference_streaming_chat_completion_response,
+    model_diagnostics_test_env,
+) -> None:
+    """Test completion of AzureAIInferenceChatCompletion"""
+    settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    mock_complete.return_value = mock_azure_ai_inference_streaming_chat_completion_response
+
+    async for _ in azure_ai_inference_service.get_streaming_chat_message_contents(
+        chat_history=chat_history, settings=settings
+    ):
+        pass
+
+    mock_instrument.assert_called_once_with(enable_content_recording=False)
+    mock_uninstrument.assert_called_once()
@@ -3,9 +3,6 @@
 import pytest
 
 from semantic_kernel.connectors.ai.anthropic.services.anthropic_chat_completion import AnthropicChatCompletion
-from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_chat_completion import (
-    AzureAIInferenceChatCompletion,
-)
 from semantic_kernel.connectors.ai.google.google_ai.services.google_ai_chat_completion import GoogleAIChatCompletion
 from semantic_kernel.connectors.ai.google.google_ai.services.google_ai_text_completion import GoogleAITextCompletion
 from semantic_kernel.connectors.ai.google.vertex_ai.services.vertex_ai_chat_completion import VertexAIChatCompletion
@@ -118,17 +115,6 @@
             "__model_diagnostics_streaming_text_completion__",
             id="GoogleAITextCompletion._inner_get_streaming_text_contents",
         ),
-        # AzureAIInferenceChatCompletion
-        pytest.param(
-            AzureAIInferenceChatCompletion._inner_get_chat_message_contents,
-            "__model_diagnostics_chat_completion__",
-            id="AzureAIInferenceChatCompletion._inner_get_chat_message_contents",
-        ),
-        pytest.param(
-            AzureAIInferenceChatCompletion._inner_get_streaming_chat_message_contents,
-            "__model_diagnostics_streaming_chat_completion__",
-            id="AzureAIInferenceChatCompletion._inner_get_streaming_chat_message_contents",
-        ),
         # AnthropicChatCompletion
         pytest.param(
             AnthropicChatCompletion._inner_get_chat_message_contents,
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,6 @@`
`39`	`39`	`from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_chat_completion import MistralAIChatCompletion`
`40`	`40`	`from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaChatPromptExecutionSettings`
`41`	`41`	`from semantic_kernel.connectors.ai.ollama.services.ollama_chat_completion import OllamaChatCompletion`
`42`		`-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION`
`43`	`42`	`from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (`
`44`	`43`	`AzureChatPromptExecutionSettings,`
`45`	`44`	`)`
`@@ -134,7 +133,6 @@ def services(self) -> dict[str, tuple[ServiceType, type[PromptExecutionSettings]`
`134`	`133`	`endpoint=f"{str(endpoint).strip('/')}/openai/deployments/{deployment_name}",`
`135`	`134`	`credential=DefaultAzureCredential(),`
`136`	`135`	`credential_scopes=["https://cognitiveservices.azure.com/.default"],`
`137`		`- api_version=DEFAULT_AZURE_API_VERSION,`
`138`	`136`	`),`
`139`	`137`	`)`
`140`	`138`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,6 @@`
`28`	`28`	`from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_text_embedding import MistralAITextEmbedding`
`29`	`29`	`from semantic_kernel.connectors.ai.ollama.ollama_prompt_execution_settings import OllamaEmbeddingPromptExecutionSettings`
`30`	`30`	`from semantic_kernel.connectors.ai.ollama.services.ollama_text_embedding import OllamaTextEmbedding`
`31`		`-from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION`
`32`	`31`	`from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (`
`33`	`32`	`OpenAIEmbeddingPromptExecutionSettings,`
`34`	`33`	`)`
`@@ -76,7 +75,6 @@ def services(self) -> dict[str, tuple[EmbeddingGeneratorBase, type[PromptExecuti`
`76`	`75`	`endpoint=f"{str(endpoint).strip('/')}/openai/deployments/{deployment_name}",`
`77`	`76`	`credential=DefaultAzureCredential(),`
`78`	`77`	`credential_scopes=["https://cognitiveservices.azure.com/.default"],`
`79`		`- api_version=DEFAULT_AZURE_API_VERSION,`
`80`	`78`	`),`
`81`	`79`	`)`
`82`	`80`