43 lines
1.6 KiB
Python
43 lines
1.6 KiB
Python
|
from vosk_tts import Model, Synth
|
||
|
import os
|
||
|
from moviepy.editor import AudioFileClip
|
||
|
|
||
|
class TTS:
|
||
|
def __init__(self, model_name="vosk-model-tts-ru-0.4-multi"):
|
||
|
self.model = Model(model_name=model_name)
|
||
|
self.synth = Synth(self.model)
|
||
|
|
||
|
def text_to_speech(self, text, output_path, speaker_id=1):
|
||
|
self.synth.synth(text, output_path, speaker_id)
|
||
|
|
||
|
def batch_text_to_speech(self, text_list, output_folder):
|
||
|
for text, speaker_id in text_list:
|
||
|
output_path = f"{output_folder}/output_{speaker_id}.wav"
|
||
|
self.text_to_speech(text, output_path, speaker_id)
|
||
|
|
||
|
@staticmethod
|
||
|
def create_text_speaker_tuples(final_result, vad_timing):
|
||
|
text_speaker_tuples = []
|
||
|
current_speaker_id = 1
|
||
|
|
||
|
for start, end in zip(vad_timing[::2], vad_timing[1::2]):
|
||
|
text_speaker_tuples.append((final_result, current_speaker_id))
|
||
|
current_speaker_id = 4 if current_speaker_id == 1 else 1
|
||
|
|
||
|
return text_speaker_tuples
|
||
|
|
||
|
@staticmethod
|
||
|
def create_output_folder(folder_name):
|
||
|
os.makedirs(folder_name, exist_ok=True)
|
||
|
|
||
|
def set_model(self, model_name):
|
||
|
self.model.set_string('model_name', model_name)
|
||
|
self.synth = Synth(self.model)
|
||
|
|
||
|
def change_speaker_id(self, text_speaker_tuples, new_speaker_id):
|
||
|
return [(text, new_speaker_id) for text, _ in text_speaker_tuples]
|
||
|
|
||
|
def combine_audio_files(self, audio_paths, output_path):
|
||
|
clips = [AudioFileClip(audio_path) for audio_path in audio_paths]
|
||
|
combined_clip = concatenate_audioclips(clips)
|
||
|
combined_clip.write_audiofile(output_path)
|