import logging import subprocess from pathlib import Path logger = logging.getLogger(__name__) SUPPORTED_FORMATS = {".m4a", ".mp3", ".wav", ".aac"} def preprocess_audio(input_path: str, output_dir: str) -> str: """Convert audio to mono 16kHz PCM WAV with normalization and DC offset removal. Args: input_path: Path to the source audio file. output_dir: Directory for the processed file. Returns: Path to the processed WAV file. Raises: FileNotFoundError: If input file does not exist. ValueError: If file format is not supported. RuntimeError: If ffmpeg processing fails. """ src = Path(input_path) if not src.exists(): raise FileNotFoundError(f"Audio file not found: {input_path}") if src.suffix.lower() not in SUPPORTED_FORMATS: raise ValueError( f"Unsupported format: {src.suffix}. Supported: {SUPPORTED_FORMATS}" ) out = Path(output_dir) / f"{src.stem}_processed.wav" out.parent.mkdir(parents=True, exist_ok=True) cmd = [ "ffmpeg", "-y", "-i", str(src), "-ac", "1", "-ar", "16000", "-sample_fmt", "s16", "-af", "highpass=f=10,loudnorm=I=-16:TP=-1.5:LRA=11", str(out), ] logger.info("Preprocessing: %s -> %s", src.name, out.name) result = subprocess.run( cmd, capture_output=True, text=True, timeout=600 ) if result.returncode != 0: raise RuntimeError(f"ffmpeg failed: {result.stderr[:500]}") logger.info("Preprocessing complete: %s", out.name) return str(out)