This commit is contained in:
itqop 2026-02-19 05:19:03 +03:00
parent eb08c1fef9
commit 88eae8d3d1
2 changed files with 7 additions and 1 deletions

View File

@ -1,4 +1,5 @@
faster-whisper
pyannote.audio
python-dotenv
soundfile
tqdm

View File

@ -1,6 +1,7 @@
import logging
from dataclasses import dataclass
import soundfile as sf
import torch
from pyannote.audio import Pipeline
from pyannote.audio.pipelines.utils.hook import ProgressHook
@ -47,6 +48,10 @@ class DiarizationEngine:
"""
logger.info("Diarizing: %s", audio_path)
data, sample_rate = sf.read(audio_path, dtype="float32")
waveform = torch.from_numpy(data).unsqueeze(0)
audio_input = {"waveform": waveform, "sample_rate": sample_rate}
kwargs = {}
if min_speakers is not None:
kwargs["min_speakers"] = min_speakers
@ -54,7 +59,7 @@ class DiarizationEngine:
kwargs["max_speakers"] = max_speakers
with ProgressHook() as hook:
diarization = self._pipeline(audio_path, hook=hook, **kwargs)
diarization = self._pipeline(audio_input, hook=hook, **kwargs)
turns = []
for turn, speaker in diarization.exclusive_speaker_diarization: