transcribe-interview/transcriber/audio/chunking.py

83 lines
2.3 KiB
Python

import logging
import subprocess
from dataclasses import dataclass
from pathlib import Path
logger = logging.getLogger(__name__)
@dataclass
class ChunkInfo:
"""Metadata for a single audio chunk."""
path: str
start_offset: float
duration: float
def get_audio_duration(wav_path: str) -> float:
"""Get duration of audio file in seconds using ffprobe."""
cmd = [
"ffprobe", "-v", "quiet",
"-show_entries", "format=duration",
"-of", "csv=p=0",
wav_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
raise RuntimeError(f"ffprobe failed: {result.stderr[:300]}")
return float(result.stdout.strip())
def chunk_audio(wav_path: str, max_duration_sec: int = 1800) -> list[ChunkInfo]:
"""Split audio into chunks if longer than max_duration_sec.
Args:
wav_path: Path to the preprocessed WAV file.
max_duration_sec: Maximum chunk duration in seconds (default 30 min).
Returns:
List of ChunkInfo with paths and timing metadata.
"""
total_duration = get_audio_duration(wav_path)
logger.info("Audio duration: %.1f sec", total_duration)
if total_duration <= max_duration_sec:
return [ChunkInfo(path=wav_path, start_offset=0.0, duration=total_duration)]
chunks = []
src = Path(wav_path)
chunk_dir = src.parent / "chunks"
chunk_dir.mkdir(exist_ok=True)
offset = 0.0
idx = 0
while offset < total_duration:
chunk_path = str(chunk_dir / f"{src.stem}_chunk{idx:03d}.wav")
remaining = total_duration - offset
duration = min(max_duration_sec, remaining)
cmd = [
"ffmpeg", "-y",
"-ss", str(offset),
"-i", wav_path,
"-t", str(duration),
"-c", "copy",
chunk_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode != 0:
raise RuntimeError(f"Chunk {idx} failed: {result.stderr[:300]}")
chunks.append(ChunkInfo(
path=chunk_path,
start_offset=offset,
duration=duration,
))
logger.info("Chunk %d: %.1fs - %.1fs", idx, offset, offset + duration)
offset += duration
idx += 1
return chunks