This commit is contained in:
itqop 2026-02-19 05:19:03 +03:00
parent eb08c1fef9
commit 88eae8d3d1
2 changed files with 7 additions and 1 deletions

View File

@ -1,4 +1,5 @@
faster-whisper faster-whisper
pyannote.audio pyannote.audio
python-dotenv python-dotenv
soundfile
tqdm tqdm

View File

@ -1,6 +1,7 @@
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
import soundfile as sf
import torch import torch
from pyannote.audio import Pipeline from pyannote.audio import Pipeline
from pyannote.audio.pipelines.utils.hook import ProgressHook from pyannote.audio.pipelines.utils.hook import ProgressHook
@ -47,6 +48,10 @@ class DiarizationEngine:
""" """
logger.info("Diarizing: %s", audio_path) logger.info("Diarizing: %s", audio_path)
data, sample_rate = sf.read(audio_path, dtype="float32")
waveform = torch.from_numpy(data).unsqueeze(0)
audio_input = {"waveform": waveform, "sample_rate": sample_rate}
kwargs = {} kwargs = {}
if min_speakers is not None: if min_speakers is not None:
kwargs["min_speakers"] = min_speakers kwargs["min_speakers"] = min_speakers
@ -54,7 +59,7 @@ class DiarizationEngine:
kwargs["max_speakers"] = max_speakers kwargs["max_speakers"] = max_speakers
with ProgressHook() as hook: with ProgressHook() as hook:
diarization = self._pipeline(audio_path, hook=hook, **kwargs) diarization = self._pipeline(audio_input, hook=hook, **kwargs)
turns = [] turns = []
for turn, speaker in diarization.exclusive_speaker_diarization: for turn, speaker in diarization.exclusive_speaker_diarization: