fixes bugs
This commit is contained in:
parent
de07a045ce
commit
eb08c1fef9
|
|
@ -5,7 +5,7 @@
|
|||
## Возможности
|
||||
|
||||
- Распознавание речи (faster-whisper, модель large-v3)
|
||||
- Диаризация спикеров (pyannote.audio 3.1)
|
||||
- Диаризация спикеров (pyannote.audio community-1)
|
||||
- Поддержка форматов: `.m4a`, `.mp3`, `.wav`, `.aac`
|
||||
- Аудио до 3 часов / 2 ГБ
|
||||
- Автоматический чанкинг для длинных записей (>30 мин)
|
||||
|
|
@ -38,8 +38,7 @@ HF_TOKEN=hf_your_token_here
|
|||
|
||||
> Для получения токена: https://huggingface.co/settings/tokens
|
||||
> Необходимо принять условия использования моделей:
|
||||
> - https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||
> - https://huggingface.co/pyannote/segmentation-3.0
|
||||
> - https://huggingface.co/pyannote/speaker-diarization-community-1
|
||||
|
||||
## Использование
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
faster-whisper
|
||||
pyannote.audio
|
||||
python-dotenv
|
||||
pydub
|
||||
tqdm
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class DiarizationEngine:
|
|||
def __init__(self, hf_token: str, device: str):
|
||||
logger.info("Loading diarization pipeline on %s", device)
|
||||
self._pipeline = Pipeline.from_pretrained(
|
||||
"pyannote/speaker-diarization-3.1",
|
||||
"pyannote/speaker-diarization-community-1",
|
||||
token=hf_token,
|
||||
)
|
||||
self._device = torch.device(device)
|
||||
|
|
@ -57,7 +57,7 @@ class DiarizationEngine:
|
|||
diarization = self._pipeline(audio_path, hook=hook, **kwargs)
|
||||
|
||||
turns = []
|
||||
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
||||
for turn, speaker in diarization.exclusive_speaker_diarization:
|
||||
turns.append(SpeakerTurn(
|
||||
start=turn.start,
|
||||
end=turn.end,
|
||||
|
|
|
|||
Loading…
Reference in New Issue