83 lines
2.3 KiB
Python
83 lines
2.3 KiB
Python
import logging
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ChunkInfo:
|
|
"""Metadata for a single audio chunk."""
|
|
|
|
path: str
|
|
start_offset: float
|
|
duration: float
|
|
|
|
|
|
def get_audio_duration(wav_path: str) -> float:
|
|
"""Get duration of audio file in seconds using ffprobe."""
|
|
cmd = [
|
|
"ffprobe", "-v", "quiet",
|
|
"-show_entries", "format=duration",
|
|
"-of", "csv=p=0",
|
|
wav_path,
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"ffprobe failed: {result.stderr[:300]}")
|
|
return float(result.stdout.strip())
|
|
|
|
|
|
def chunk_audio(wav_path: str, max_duration_sec: int = 1800) -> list[ChunkInfo]:
|
|
"""Split audio into chunks if longer than max_duration_sec.
|
|
|
|
Args:
|
|
wav_path: Path to the preprocessed WAV file.
|
|
max_duration_sec: Maximum chunk duration in seconds (default 30 min).
|
|
|
|
Returns:
|
|
List of ChunkInfo with paths and timing metadata.
|
|
"""
|
|
total_duration = get_audio_duration(wav_path)
|
|
logger.info("Audio duration: %.1f sec", total_duration)
|
|
|
|
if total_duration <= max_duration_sec:
|
|
return [ChunkInfo(path=wav_path, start_offset=0.0, duration=total_duration)]
|
|
|
|
chunks = []
|
|
src = Path(wav_path)
|
|
chunk_dir = src.parent / "chunks"
|
|
chunk_dir.mkdir(exist_ok=True)
|
|
|
|
offset = 0.0
|
|
idx = 0
|
|
while offset < total_duration:
|
|
chunk_path = str(chunk_dir / f"{src.stem}_chunk{idx:03d}.wav")
|
|
remaining = total_duration - offset
|
|
duration = min(max_duration_sec, remaining)
|
|
|
|
cmd = [
|
|
"ffmpeg", "-y",
|
|
"-ss", str(offset),
|
|
"-i", wav_path,
|
|
"-t", str(duration),
|
|
"-c", "copy",
|
|
chunk_path,
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"Chunk {idx} failed: {result.stderr[:300]}")
|
|
|
|
chunks.append(ChunkInfo(
|
|
path=chunk_path,
|
|
start_offset=offset,
|
|
duration=duration,
|
|
))
|
|
logger.info("Chunk %d: %.1fs - %.1fs", idx, offset, offset + duration)
|
|
|
|
offset += duration
|
|
idx += 1
|
|
|
|
return chunks
|