speech-api/modules/tts.py

43 lines
1.6 KiB
Python
Raw Normal View History

2023-12-22 00:43:45 +01:00
from vosk_tts import Model, Synth
import os
from moviepy.editor import AudioFileClip
class TTS:
def __init__(self, model_name="vosk-model-tts-ru-0.4-multi"):
self.model = Model(model_name=model_name)
self.synth = Synth(self.model)
def text_to_speech(self, text, output_path, speaker_id=1):
self.synth.synth(text, output_path, speaker_id)
def batch_text_to_speech(self, text_list, output_folder):
for text, speaker_id in text_list:
output_path = f"{output_folder}/output_{speaker_id}.wav"
self.text_to_speech(text, output_path, speaker_id)
@staticmethod
def create_text_speaker_tuples(final_result, vad_timing):
text_speaker_tuples = []
current_speaker_id = 1
for start, end in zip(vad_timing[::2], vad_timing[1::2]):
text_speaker_tuples.append((final_result, current_speaker_id))
current_speaker_id = 4 if current_speaker_id == 1 else 1
return text_speaker_tuples
@staticmethod
def create_output_folder(folder_name):
os.makedirs(folder_name, exist_ok=True)
def set_model(self, model_name):
self.model.set_string('model_name', model_name)
self.synth = Synth(self.model)
def change_speaker_id(self, text_speaker_tuples, new_speaker_id):
return [(text, new_speaker_id) for text, _ in text_speaker_tuples]
def combine_audio_files(self, audio_paths, output_path):
clips = [AudioFileClip(audio_path) for audio_path in audio_paths]
combined_clip = concatenate_audioclips(clips)
combined_clip.write_audiofile(output_path)