qopscribe/telegram_bot/handlers/audio_handler.py

81 lines
2.8 KiB
Python
Raw Normal View History

2025-02-23 14:12:10 +01:00
import os
2025-02-23 18:11:12 +01:00
import subprocess
2025-02-23 14:12:10 +01:00
import uuid
from functools import partial
from aiogram import types, Dispatcher, F
2025-02-23 18:00:28 +01:00
2025-02-23 14:12:10 +01:00
async def handle_voice_and_video(message: types.Message, redis_service, storage_path: str):
file_id = None
if message.content_type == types.ContentType.VOICE:
file_id = message.voice.file_id
elif message.content_type == types.ContentType.VIDEO_NOTE:
file_id = message.video_note.file_id
if not file_id:
return
file = await message.bot.get_file(file_id)
file_path = file.file_path
file_uuid = str(uuid.uuid4())
2025-02-23 15:24:56 +01:00
original_filename = os.path.basename(file_path)
temp_filename = f"{file_uuid}_{original_filename}"
2025-02-23 14:12:10 +01:00
os.makedirs(storage_path, exist_ok=True)
2025-02-23 15:24:56 +01:00
temp_destination = os.path.join(storage_path, temp_filename)
2025-02-23 14:12:10 +01:00
2025-02-23 15:24:56 +01:00
await message.bot.download_file(file_path, temp_destination)
wav_filename = f"{file_uuid}.wav"
wav_destination = os.path.join(storage_path, wav_filename)
convert_to_wav(temp_destination, wav_destination)
os.remove(temp_destination)
2025-02-23 14:12:10 +01:00
task_data = {
"uuid": file_uuid,
2025-02-23 15:24:56 +01:00
"file_path": wav_destination,
2025-02-23 14:12:10 +01:00
"user_id": message.from_user.id,
2025-02-23 15:24:56 +01:00
"chat_id": message.chat.id,
2025-02-23 22:22:12 +01:00
"message_id": message.message_id,
"sum": 1
2025-02-23 14:12:10 +01:00
}
2025-02-23 15:24:56 +01:00
await redis_service.publish_task(task_data)
2025-02-23 18:23:00 +01:00
#await message.reply("Waiting for transcription...")
2025-02-23 15:24:56 +01:00
text = await redis_service.wait_for_text(
user_id=message.from_user.id,
chat_id=message.chat.id,
message_id=message.message_id
)
2025-02-23 21:13:04 +01:00
os.remove(wav_destination)
2025-02-23 15:24:56 +01:00
if text:
2025-02-23 18:32:20 +01:00
await send_long_message(message, text)
2025-02-23 15:24:56 +01:00
else:
await message.reply("Sorry, transcription result was not received within the timeout.")
2025-02-23 18:32:20 +01:00
async def send_long_message(message: types.Message, text: str):
"""Отправляет длинный текст, разбивая его на части по 4096 символов"""
chunk_size = 4096
for i in range(0, len(text), chunk_size):
await message.reply(text[i : i + chunk_size])
2025-02-23 14:12:10 +01:00
2025-02-23 18:11:12 +01:00
def convert_to_wav(input_file: str, output_file: str):
"""
Конвертирует любой аудиофайл в WAV с частотой 16kHz, 1 канал (моно).
Логирует ошибки FFmpeg.
"""
2025-02-23 18:14:46 +01:00
command = [
2025-02-23 18:11:12 +01:00
"ffmpeg", "-y", "-i", input_file,
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
output_file
]
2025-02-23 18:14:46 +01:00
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2025-02-23 18:11:12 +01:00
2025-02-23 14:12:10 +01:00
def register_audio_handlers(dp: Dispatcher, redis_service, storage_path: str):
handler_callback = partial(handle_voice_and_video, redis_service=redis_service, storage_path=storage_path)
2025-02-23 15:24:56 +01:00
dp.message.register(handler_callback, F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE}))