Run pipeline from audio stream function (#90748)

* Run pipeline from audio stream function * Fix tests --------- Co-authored-by: Michael Hansen <mike@rhasspy.org>
2024-10-05 15:17:19 +00:00 · 2023-04-04 00:06:51 -04:00 · 2023-04-04 00:06:51 -04:00 · 6e4c78686e
parent 4f1574b859
commit 6e4c78686e
8 changed files with 383 additions and 158 deletions
--- a/homeassistant/components/voice_assistant/init.py
+++ b/homeassistant/components/voice_assistant/init.py
@ -1,12 +1,33 @@
 """The Voice Assistant integration."""
 from __future__ import annotations

-from homeassistant.core import HomeAssistant
+from collections.abc import AsyncIterable
+
+from homeassistant.components import stt
+from homeassistant.core import Context, HomeAssistant
 from homeassistant.helpers.typing import ConfigType

 from .const import DOMAIN
+from .error import PipelineNotFound
+from .pipeline import (
+    PipelineEvent,
+    PipelineEventCallback,
+    PipelineEventType,
+    PipelineInput,
+    PipelineRun,
+    PipelineStage,
+    async_get_pipeline,
+)
 from .websocket_api import async_register_websocket_api

+__all__ = (
+    "DOMAIN",
+    "async_setup",
+    "async_pipeline_from_audio_stream",
+    "PipelineEvent",
+    "PipelineEventType",
+)
+

 async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
    """Set up Voice Assistant integration."""
@ -14,3 +35,55 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
    async_register_websocket_api(hass)

    return True
+
+
+async def async_pipeline_from_audio_stream(
+    hass: HomeAssistant,
+    event_callback: PipelineEventCallback,
+    stt_metadata: stt.SpeechMetadata,
+    stt_stream: AsyncIterable[bytes],
+    language: str | None = None,
+    pipeline_id: str | None = None,
+    conversation_id: str | None = None,
+    context: Context | None = None,
+) -> None:
+    """Create an audio pipeline from an audio stream."""
+    if language is None:
+        language = hass.config.language
+
+    # Temporary workaround for language codes
+    if language == "en":
+        language = "en-US"
+
+    if stt_metadata.language == "":
+        stt_metadata.language = language
+
+    if context is None:
+        context = Context()
+
+    pipeline = async_get_pipeline(
+        hass,
+        pipeline_id=pipeline_id,
+        language=language,
+    )
+    if pipeline is None:
+        raise PipelineNotFound(
+            "pipeline_not_found", f"Pipeline {pipeline_id} not found"
+        )
+
+    pipeline_input = PipelineInput(
+        conversation_id=conversation_id,
+        stt_metadata=stt_metadata,
+        stt_stream=stt_stream,
+        run=PipelineRun(
+            hass,
+            context=context,
+            pipeline=pipeline,
+            start_stage=PipelineStage.STT,
+            end_stage=PipelineStage.TTS,
+            event_callback=event_callback,
+        ),
+    )
+
+    await pipeline_input.validate()
+    await pipeline_input.execute()
--- a/homeassistant/components/voice_assistant/error.py
+++ b/homeassistant/components/voice_assistant/error.py
@ -0,0 +1,30 @@
+"""Voice Assistant errors."""
+
+from homeassistant.exceptions import HomeAssistantError
+
+
+class PipelineError(HomeAssistantError):
+    """Base class for pipeline errors."""
+
+    def __init__(self, code: str, message: str) -> None:
+        """Set error message."""
+        self.code = code
+        self.message = message
+
+        super().__init__(f"Pipeline error code={code}, message={message}")
+
+
+class PipelineNotFound(PipelineError):
+    """Unspecified pipeline picked."""
+
+
+class SpeechToTextError(PipelineError):
+    """Error in speech to text portion of pipeline."""
+
+
+class IntentRecognitionError(PipelineError):
+    """Error in intent recognition portion of pipeline."""
+
+
+class TextToSpeechError(PipelineError):
+    """Error in text to speech portion of pipeline."""
--- a/homeassistant/components/voice_assistant/pipeline.py
+++ b/homeassistant/components/voice_assistant/pipeline.py
@ -16,6 +16,12 @@ from homeassistant.core import Context, HomeAssistant, callback
 from homeassistant.util.dt import utcnow

 from .const import DOMAIN
+from .error import (
+    IntentRecognitionError,
+    PipelineError,
+    SpeechToTextError,
+    TextToSpeechError,
+)

 _LOGGER = logging.getLogger(__name__)

@ -39,29 +45,6 @@ def async_get_pipeline(
    )


-class PipelineError(Exception):
-    """Base class for pipeline errors."""
-
-    def __init__(self, code: str, message: str) -> None:
-        """Set error message."""
-        self.code = code
-        self.message = message
-
-        super().__init__(f"Pipeline error code={code}, message={message}")
-
-
-class SpeechToTextError(PipelineError):
-    """Error in speech to text portion of pipeline."""
-
-
-class IntentRecognitionError(PipelineError):
-    """Error in intent recognition portion of pipeline."""
-
-
-class TextToSpeechError(PipelineError):
-    """Error in text to speech portion of pipeline."""
-
-
 class PipelineEventType(StrEnum):
    """Event types emitted during a pipeline run."""

@ -93,6 +76,9 @@ class PipelineEvent:
        }


+PipelineEventCallback = Callable[[PipelineEvent], None]
+
+
@dataclass
 class Pipeline:
    """A voice assistant pipeline."""
@ -146,7 +132,7 @@ class PipelineRun:
    pipeline: Pipeline
    start_stage: PipelineStage
    end_stage: PipelineStage
-    event_callback: Callable[[PipelineEvent], None]
+    event_callback: PipelineEventCallback
    language: str = None  # type: ignore[assignment]
    runner_data: Any | None = None
    stt_provider: stt.Provider | None = None
--- a/tests/common.py
+++ b/tests/common.py
@ -1268,7 +1268,7 @@ def mock_integration(

    def mock_import_platform(platform_name: str) -> NoReturn:
        raise ImportError(
-            f"Mocked unable to import platform '{platform_name}'",
+            f"Mocked unable to import platform '{integration.pkg_path}.{platform_name}'",
            name=f"{integration.pkg_path}.{platform_name}",
        )

--- a/tests/components/voice_assistant/conftest.py
+++ b/tests/components/voice_assistant/conftest.py
@ -0,0 +1,139 @@
+"""Test fixtures for voice assistant."""
+from collections.abc import AsyncIterable
+from typing import Any
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+
+from homeassistant.components import stt, tts
+from homeassistant.core import HomeAssistant
+from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType
+from homeassistant.setup import async_setup_component
+
+from tests.common import MockModule, mock_integration, mock_platform
+from tests.components.tts.conftest import (  # noqa: F401, pylint: disable=unused-import
+    mock_get_cache_files,
+    mock_init_cache_dir,
+)
+
+_TRANSCRIPT = "test transcript"
+
+
+class MockSttProvider(stt.Provider):
+    """Mock STT provider."""
+
+    def __init__(self, hass: HomeAssistant, text: str) -> None:
+        """Init test provider."""
+        self.hass = hass
+        self.text = text
+        self.received = []
+
+    @property
+    def supported_languages(self) -> list[str]:
+        """Return a list of supported languages."""
+        return ["en-US"]
+
+    @property
+    def supported_formats(self) -> list[stt.AudioFormats]:
+        """Return a list of supported formats."""
+        return [stt.AudioFormats.WAV]
+
+    @property
+    def supported_codecs(self) -> list[stt.AudioCodecs]:
+        """Return a list of supported codecs."""
+        return [stt.AudioCodecs.PCM]
+
+    @property
+    def supported_bit_rates(self) -> list[stt.AudioBitRates]:
+        """Return a list of supported bitrates."""
+        return [stt.AudioBitRates.BITRATE_16]
+
+    @property
+    def supported_sample_rates(self) -> list[stt.AudioSampleRates]:
+        """Return a list of supported samplerates."""
+        return [stt.AudioSampleRates.SAMPLERATE_16000]
+
+    @property
+    def supported_channels(self) -> list[stt.AudioChannels]:
+        """Return a list of supported channels."""
+        return [stt.AudioChannels.CHANNEL_MONO]
+
+    async def async_process_audio_stream(
+        self, metadata: stt.SpeechMetadata, stream: AsyncIterable[bytes]
+    ) -> stt.SpeechResult:
+        """Process an audio stream."""
+        async for data in stream:
+            if not data:
+                break
+            self.received.append(data)
+        return stt.SpeechResult(self.text, stt.SpeechResultState.SUCCESS)
+
+
+class MockTTSProvider(tts.Provider):
+    """Mock TTS provider."""
+
+    name = "Test"
+
+    @property
+    def default_language(self) -> str:
+        """Return the default language."""
+        return "en"
+
+    @property
+    def supported_languages(self) -> list[str]:
+        """Return list of supported languages."""
+        return ["en-US"]
+
+    @property
+    def supported_options(self) -> list[str]:
+        """Return list of supported options like voice, emotions."""
+        return ["voice", "age"]
+
+    def get_tts_audio(
+        self, message: str, language: str, options: dict[str, Any] | None = None
+    ) -> tts.TtsAudioType:
+        """Load TTS data."""
+        return ("mp3", b"")
+
+
+class MockTTS:
+    """A mock TTS platform."""
+
+    PLATFORM_SCHEMA = tts.PLATFORM_SCHEMA
+
+    async def async_get_engine(
+        self,
+        hass: HomeAssistant,
+        config: ConfigType,
+        discovery_info: DiscoveryInfoType | None = None,
+    ) -> tts.Provider:
+        """Set up a mock speech component."""
+        return MockTTSProvider()
+
+
+@pytest.fixture
+async def mock_stt_provider(hass) -> MockSttProvider:
+    """Mock STT provider."""
+    return MockSttProvider(hass, _TRANSCRIPT)
+
+
+@pytest.fixture(autouse=True)
+async def init_components(
+    hass: HomeAssistant,
+    mock_stt_provider: MockSttProvider,
+    mock_get_cache_files,  # noqa: F811
+    mock_init_cache_dir,  # noqa: F811,
+):
+    """Initialize relevant components with empty configs."""
+    mock_integration(hass, MockModule(domain="test"))
+    mock_platform(hass, "test.tts", MockTTS())
+    mock_platform(
+        hass,
+        "test.stt",
+        Mock(async_get_engine=AsyncMock(return_value=mock_stt_provider)),
+    )
+
+    assert await async_setup_component(hass, tts.DOMAIN, {"tts": {"platform": "test"}})
+    assert await async_setup_component(hass, stt.DOMAIN, {"stt": {"platform": "test"}})
+    assert await async_setup_component(hass, "media_source", {})
+    assert await async_setup_component(hass, "voice_assistant", {})
--- a/tests/components/voice_assistant/snapshots/test_init.ambr
+++ b/tests/components/voice_assistant/snapshots/test_init.ambr
@ -0,0 +1,85 @@
+# serializer version: 1
+# name: test_pipeline_from_audio_stream
+  list([
+    dict({
+      'data': dict({
+        'language': 'en-US',
+        'pipeline': 'en-US',
+      }),
+      'type': <PipelineEventType.RUN_START: 'run-start'>,
+    }),
+    dict({
+      'data': dict({
+        'engine': 'test',
+        'metadata': dict({
+          'bit_rate': <AudioBitRates.BITRATE_16: 16>,
+          'channel': <AudioChannels.CHANNEL_MONO: 1>,
+          'codec': <AudioCodecs.PCM: 'pcm'>,
+          'format': <AudioFormats.WAV: 'wav'>,
+          'language': 'en-US',
+          'sample_rate': <AudioSampleRates.SAMPLERATE_16000: 16000>,
+        }),
+      }),
+      'type': <PipelineEventType.STT_START: 'stt-start'>,
+    }),
+    dict({
+      'data': dict({
+        'stt_output': dict({
+          'text': 'test transcript',
+        }),
+      }),
+      'type': <PipelineEventType.STT_END: 'stt-end'>,
+    }),
+    dict({
+      'data': dict({
+        'engine': 'homeassistant',
+        'intent_input': 'test transcript',
+      }),
+      'type': <PipelineEventType.INTENT_START: 'intent-start'>,
+    }),
+    dict({
+      'data': dict({
+        'intent_output': dict({
+          'conversation_id': None,
+          'response': dict({
+            'card': dict({
+            }),
+            'data': dict({
+              'code': 'no_intent_match',
+            }),
+            'language': 'en-US',
+            'response_type': 'error',
+            'speech': dict({
+              'plain': dict({
+                'extra_data': None,
+                'speech': "Sorry, I couldn't understand that",
+              }),
+            }),
+          }),
+        }),
+      }),
+      'type': <PipelineEventType.INTENT_END: 'intent-end'>,
+    }),
+    dict({
+      'data': dict({
+        'engine': 'test',
+        'tts_input': "Sorry, I couldn't understand that",
+      }),
+      'type': <PipelineEventType.TTS_START: 'tts-start'>,
+    }),
+    dict({
+      'data': dict({
+        'tts_output': dict({
+          'mime_type': 'audio/mpeg',
+          'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
+        }),
+      }),
+      'type': <PipelineEventType.TTS_END: 'tts-end'>,
+    }),
+    dict({
+      'data': dict({
+      }),
+      'type': <PipelineEventType.RUN_END: 'run-end'>,
+    }),
+  ])
+# ---
--- a/tests/components/voice_assistant/test_init.py
+++ b/tests/components/voice_assistant/test_init.py
@ -0,0 +1,42 @@
+"""Test Voice Assistant init."""
+
+from syrupy.assertion import SnapshotAssertion
+
+from homeassistant.components import stt, voice_assistant
+from homeassistant.core import HomeAssistant
+
+
+async def test_pipeline_from_audio_stream(
+    hass: HomeAssistant, mock_stt_provider, snapshot: SnapshotAssertion
+) -> None:
+    """Test creating a pipeline from an audio stream."""
+
+    events = []
+
+    async def audio_data():
+        yield b"part1"
+        yield b"part2"
+        yield b""
+
+    await voice_assistant.async_pipeline_from_audio_stream(
+        hass,
+        events.append,
+        stt.SpeechMetadata(
+            language="",
+            format=stt.AudioFormats.WAV,
+            codec=stt.AudioCodecs.PCM,
+            bit_rate=stt.AudioBitRates.BITRATE_16,
+            sample_rate=stt.AudioSampleRates.SAMPLERATE_16000,
+            channel=stt.AudioChannels.CHANNEL_MONO,
+        ),
+        audio_data(),
+    )
+
+    processed = []
+    for event in events:
+        as_dict = event.as_dict()
+        as_dict.pop("timestamp")
+        processed.append(as_dict)
+
+    assert processed == snapshot
+    assert mock_stt_provider.received == [b"part1", b"part2"]
--- a/tests/components/voice_assistant/test_websocket.py
+++ b/tests/components/voice_assistant/test_websocket.py
@ -1,143 +1,13 @@
 """Websocket tests for Voice Assistant integration."""
 import asyncio
-from collections.abc import AsyncIterable
-from typing import Any
 from unittest.mock import MagicMock, patch

-import pytest
 from syrupy.assertion import SnapshotAssertion

-from homeassistant.components import stt, tts
 from homeassistant.core import HomeAssistant
-from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType
-from homeassistant.setup import async_setup_component

-from tests.common import MockModule, mock_integration, mock_platform
-from tests.components.tts.conftest import (  # noqa: F401, pylint: disable=unused-import
-    mock_get_cache_files,
-    mock_init_cache_dir,
-)
 from tests.typing import WebSocketGenerator

-_TRANSCRIPT = "test transcript"
-
-
-class MockSttProvider(stt.Provider):
-    """Mock STT provider."""
-
-    def __init__(self, hass: HomeAssistant, text: str) -> None:
-        """Init test provider."""
-        self.hass = hass
-        self.text = text
-
-    @property
-    def supported_languages(self) -> list[str]:
-        """Return a list of supported languages."""
-        return ["en-US"]
-
-    @property
-    def supported_formats(self) -> list[stt.AudioFormats]:
-        """Return a list of supported formats."""
-        return [stt.AudioFormats.WAV]
-
-    @property
-    def supported_codecs(self) -> list[stt.AudioCodecs]:
-        """Return a list of supported codecs."""
-        return [stt.AudioCodecs.PCM]
-
-    @property
-    def supported_bit_rates(self) -> list[stt.AudioBitRates]:
-        """Return a list of supported bitrates."""
-        return [stt.AudioBitRates.BITRATE_16]
-
-    @property
-    def supported_sample_rates(self) -> list[stt.AudioSampleRates]:
-        """Return a list of supported samplerates."""
-        return [stt.AudioSampleRates.SAMPLERATE_16000]
-
-    @property
-    def supported_channels(self) -> list[stt.AudioChannels]:
-        """Return a list of supported channels."""
-        return [stt.AudioChannels.CHANNEL_MONO]
-
-    async def async_process_audio_stream(
-        self, metadata: stt.SpeechMetadata, stream: AsyncIterable[bytes]
-    ) -> stt.SpeechResult:
-        """Process an audio stream."""
-        return stt.SpeechResult(self.text, stt.SpeechResultState.SUCCESS)
-
-
-class MockSTT:
-    """A mock STT platform."""
-
-    async def async_get_engine(
-        self,
-        hass: HomeAssistant,
-        config: ConfigType,
-        discovery_info: DiscoveryInfoType | None = None,
-    ) -> stt.Provider:
-        """Set up a mock speech component."""
-        return MockSttProvider(hass, _TRANSCRIPT)
-
-
-class MockTTSProvider(tts.Provider):
-    """Mock TTS provider."""
-
-    name = "Test"
-
-    @property
-    def default_language(self) -> str:
-        """Return the default language."""
-        return "en"
-
-    @property
-    def supported_languages(self) -> list[str]:
-        """Return list of supported languages."""
-        return ["en-US"]
-
-    @property
-    def supported_options(self) -> list[str]:
-        """Return list of supported options like voice, emotions."""
-        return ["voice", "age"]
-
-    def get_tts_audio(
-        self, message: str, language: str, options: dict[str, Any] | None = None
-    ) -> tts.TtsAudioType:
-        """Load TTS dat."""
-        return ("mp3", b"")
-
-
-class MockTTS:
-    """A mock TTS platform."""
-
-    PLATFORM_SCHEMA = tts.PLATFORM_SCHEMA
-
-    async def async_get_engine(
-        self,
-        hass: HomeAssistant,
-        config: ConfigType,
-        discovery_info: DiscoveryInfoType | None = None,
-    ) -> tts.Provider:
-        """Set up a mock speech component."""
-        return MockTTSProvider()
-
-
-@pytest.fixture(autouse=True)
-async def init_components(
-    hass: HomeAssistant,
-    mock_get_cache_files,  # noqa: F811
-    mock_init_cache_dir,  # noqa: F811
-):
-    """Initialize relevant components with empty configs."""
-    mock_integration(hass, MockModule(domain="test"))
-    mock_platform(hass, "test.tts", MockTTS())
-    mock_platform(hass, "test.stt", MockSTT())
-
-    assert await async_setup_component(hass, tts.DOMAIN, {"tts": {"platform": "test"}})
-    assert await async_setup_component(hass, stt.DOMAIN, {"stt": {"platform": "test"}})
-    assert await async_setup_component(hass, "media_source", {})
-    assert await async_setup_component(hass, "voice_assistant", {})
-

 async def test_text_only_pipeline(
    hass: HomeAssistant,
@ -211,7 +81,7 @@ async def test_audio_pipeline(
    assert msg["event"]["data"] == snapshot

    # End of audio stream (handler id + empty payload)
-    await client.send_bytes(b"1")
+    await client.send_bytes(bytes([1]))

    msg = await client.receive_json()
    assert msg["event"]["type"] == "stt-end"
@ -438,7 +308,7 @@ async def test_stt_stream_failed(
 ) -> None:
    """Test events from a pipeline run with a non-existent STT provider."""
    with patch(
-        "tests.components.voice_assistant.test_websocket.MockSttProvider.async_process_audio_stream",
+        "tests.components.voice_assistant.conftest.MockSttProvider.async_process_audio_stream",
        new=MagicMock(side_effect=RuntimeError),
    ):
        client = await hass_ws_client(hass)