"""
fluxion_ai.utils.audio_utils
~~~~~~~~~~~~~~~~~~~~~~~~~
This module provides utility functions for audio processing tasks such as
speech-to-text (STT) and text-to-speech (TTS) conversion.
Classes:
- AudioUtils: A utility class for handling audio-related tasks.
Exceptions:
- AudioUtilsError: Base exception for AudioUtils errors.
- SpeechToTextError: Exception raised for Speech-to-Text errors.
- TextToSpeechError: Exception raised for Text-to-Speech errors.
Functions:
- google_text_to_speech: Converts text to speech and saves it as an audio file.
- play_audio: Plays an audio file.
- load_audio: Loads audio data from a file or microphone.
"""
import tempfile
import os
from typing import Any
[docs]
class AudioUtilsError(Exception):
"""Base exception for AudioUtils errors."""
pass
[docs]
class SpeechToTextError(AudioUtilsError):
"""Exception raised for Speech-to-Text errors."""
pass
[docs]
class TextToSpeechError(AudioUtilsError):
"""Exception raised for Text-to-Speech errors."""
pass
[docs]
def google_text_to_speech(text: str, filepath: str, lang: str = "en"):
"""
Converts text to speech and saves it as an audio file.
Args:
text (str): Text to be converted to speech.
filepath (str): Path where the audio file will be saved.
lang (str): Language code for the TTS (default: "en").
"""
try:
from gtts import gTTS
except ImportError:
raise ImportError("The 'gtts' package is required for text-to-speech conversion. "
"Please install it using 'pip install gtts'.")
tts = gTTS(text=text, lang=lang)
tts.save(filepath)
[docs]
def play_audio(filepath: str):
"""
Plays an audio file.
Args:
filepath (str): Path to the audio file to be played.
"""
try:
from playsound import playsound
except ImportError:
raise ImportError("The 'playsound' package is required for playing audio. "
"Please install it using 'pip install playsound'.")
playsound(filepath)
[docs]
def load_audio(recognizer: Any, audio_path: str = None):
"""
Loads audio data from a file or microphone.
Args:
audio_path (str): Path to the audio file. If None, loads audio from the microphone.
Returns:
speech_recognition.AudioData: The loaded audio data.
"""
try:
import speech_recognition as sr
except ImportError:
raise ImportError("The 'SpeechRecognition' package is required for audio processing. "
"Please install it using 'pip install SpeechRecognition'.")
if audio_path:
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
else:
with sr.Microphone() as source:
recognizer.adjust_for_ambient_noise(source, duration=1)
audio = recognizer.listen(source)
return audio
[docs]
class AudioUtils:
"""
A utility class for handling audio-related tasks such as
speech-to-text (STT) and text-to-speech (TTS).
"""
def __init__(self, recognizer=None, lang="en"):
"""
Initialize the AudioUtils.
Args:
recognizer: An external speech recognizer instance for dependency injection (default: None).
lang (str): Language code for TTS and STT (default: "en").
"""
self.lang = lang
self.recognizer = recognizer # External recognizer injected for testing
[docs]
def transcribe_audio(self, audio_path: str = None) -> str:
"""
Transcribes audio to text using a provided load function.
Args:
load_audio_fn (callable): Function to load audio, returning an audio object.
Returns:
str: Transcribed text.
Raises:
SpeechToTextError: If transcription fails.
"""
try:
audio = load_audio(self.recognizer, audio_path) # Function handles loading audio
if not self.recognizer:
raise SpeechToTextError("No recognizer provided for speech transcription.")
return self.recognizer.recognize_google(audio)
except Exception as e:
raise SpeechToTextError(f"Error during transcription: {e}")
[docs]
def text_to_speech(self, text: str):
"""
Converts text to speech and plays the audio using provided save and play functions.
Args:
text (str): Text to convert to speech.
save_fn (callable): Function to save TTS output to a file.
play_fn (callable): Function to play the audio file.
Raises:
TextToSpeechError: If text-to-speech conversion fails.
"""
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
google_text_to_speech(text, tmp_file.name)
play_audio(tmp_file.name)
os.unlink(tmp_file.name)
except Exception as e:
raise TextToSpeechError(f"Text-to-Speech conversion failed: {e}")