Source code for fluxion_ai.utils.audio_utils

""" 
fluxion_ai.utils.audio_utils
~~~~~~~~~~~~~~~~~~~~~~~~~
This module provides utility functions for audio processing tasks such as
speech-to-text (STT) and text-to-speech (TTS) conversion.

Classes:
    - AudioUtils: A utility class for handling audio-related tasks.
    
Exceptions:
    - AudioUtilsError: Base exception for AudioUtils errors.
    - SpeechToTextError: Exception raised for Speech-to-Text errors.
    - TextToSpeechError: Exception raised for Text-to-Speech errors.

Functions:
    - google_text_to_speech: Converts text to speech and saves it as an audio file.
    - play_audio: Plays an audio file.
    - load_audio: Loads audio data from a file or microphone.
"""

import tempfile
import os
from typing import Any


[docs]
class AudioUtilsError(Exception):
    """Base exception for AudioUtils errors."""
    pass




[docs]
class SpeechToTextError(AudioUtilsError):
    """Exception raised for Speech-to-Text errors."""
    pass




[docs]
class TextToSpeechError(AudioUtilsError):
    """Exception raised for Text-to-Speech errors."""
    pass




[docs]
def google_text_to_speech(text: str, filepath: str, lang: str = "en"):
    """
    Converts text to speech and saves it as an audio file.

    Args:
        text (str): Text to be converted to speech.
        filepath (str): Path where the audio file will be saved.
        lang (str): Language code for the TTS (default: "en").
    """
    try:
        from gtts import gTTS
    except ImportError:
        raise ImportError("The 'gtts' package is required for text-to-speech conversion. "
                          "Please install it using 'pip install gtts'.")
    tts = gTTS(text=text, lang=lang)
    tts.save(filepath)




[docs]
def play_audio(filepath: str):
    """
    Plays an audio file.

    Args:
        filepath (str): Path to the audio file to be played.
    """
    try:
        from playsound import playsound
    except ImportError:
        raise ImportError("The 'playsound' package is required for playing audio. "
                          "Please install it using 'pip install playsound'.")
    playsound(filepath)




[docs]
def load_audio(recognizer: Any, audio_path: str = None):
    """
    Loads audio data from a file or microphone.

    Args:
        audio_path (str): Path to the audio file. If None, loads audio from the microphone.

    Returns:
        speech_recognition.AudioData: The loaded audio data.
    """
    try:
        import speech_recognition as sr
    except ImportError:
        raise ImportError("The 'SpeechRecognition' package is required for audio processing. "
                          "Please install it using 'pip install SpeechRecognition'.")

    if audio_path:
        with sr.AudioFile(audio_path) as source:
            audio = recognizer.record(source)
    else:
        with sr.Microphone() as source:
            recognizer.adjust_for_ambient_noise(source, duration=1)
            audio = recognizer.listen(source)
    return audio




[docs]
class AudioUtils:
    """
    A utility class for handling audio-related tasks such as
    speech-to-text (STT) and text-to-speech (TTS).
    """
    def __init__(self, recognizer=None, lang="en"):
        """
        Initialize the AudioUtils.

        Args:
            recognizer: An external speech recognizer instance for dependency injection (default: None).
            lang (str): Language code for TTS and STT (default: "en").
        """
        self.lang = lang
        self.recognizer = recognizer  # External recognizer injected for testing


[docs]
    def transcribe_audio(self, audio_path: str = None) -> str:
        """
        Transcribes audio to text using a provided load function.

        Args:
            load_audio_fn (callable): Function to load audio, returning an audio object.

        Returns:
            str: Transcribed text.

        Raises:
            SpeechToTextError: If transcription fails.
        """
        try:
            audio = load_audio(self.recognizer, audio_path)  # Function handles loading audio
            if not self.recognizer:
                raise SpeechToTextError("No recognizer provided for speech transcription.")
            return self.recognizer.recognize_google(audio)
        except Exception as e:
            raise SpeechToTextError(f"Error during transcription: {e}")



[docs]
    def text_to_speech(self, text: str):
        """
        Converts text to speech and plays the audio using provided save and play functions.

        Args:
            text (str): Text to convert to speech.
            save_fn (callable): Function to save TTS output to a file.
            play_fn (callable): Function to play the audio file.

        Raises:
            TextToSpeechError: If text-to-speech conversion fails.
        """
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
                google_text_to_speech(text, tmp_file.name)
                play_audio(tmp_file.name)
            os.unlink(tmp_file.name)
        except Exception as e:
            raise TextToSpeechError(f"Text-to-Speech conversion failed: {e}")