diff --git a/telegram_bot/handlers/audio_handler.py b/telegram_bot/handlers/audio_handler.py index 05cc5b2..774bf61 100644 --- a/telegram_bot/handlers/audio_handler.py +++ b/telegram_bot/handlers/audio_handler.py @@ -2,6 +2,7 @@ import os import uuid from functools import partial from aiogram import types, Dispatcher, F +import ffmpeg async def handle_voice_and_video(message: types.Message, redis_service, storage_path: str): file_id = None @@ -17,24 +18,60 @@ async def handle_voice_and_video(message: types.Message, redis_service, storage_ file_path = file.file_path file_uuid = str(uuid.uuid4()) - filename = f"{file_uuid}_{os.path.basename(file_path)}" + original_filename = os.path.basename(file_path) + temp_filename = f"{file_uuid}_{original_filename}" + os.makedirs(storage_path, exist_ok=True) - destination = os.path.join(storage_path, filename) + + temp_destination = os.path.join(storage_path, temp_filename) - await message.bot.download_file(file_path, destination) + await message.bot.download_file(file_path, temp_destination) + + wav_filename = f"{file_uuid}.wav" + wav_destination = os.path.join(storage_path, wav_filename) + + convert_to_wav(temp_destination, wav_destination) + + os.remove(temp_destination) task_data = { "uuid": file_uuid, - "file_local_path": destination, + "file_path": wav_destination, "user_id": message.from_user.id, - "chat_id": message.chat.id + "chat_id": message.chat.id, + "message_id": message.message_id } - redis_service.publish_task(task_data) - await message.reply("Your message has been received and queued for processing.") + + await redis_service.publish_task(task_data) + await message.reply("Your message has been received, converted to WAV, and queued for processing.\nWaiting for transcription...") + + text = await redis_service.wait_for_text( + user_id=message.from_user.id, + chat_id=message.chat.id, + message_id=message.message_id + ) + if text: + await message.reply(f"Transcription result:\n{text}") + else: + await message.reply("Sorry, transcription result was not received within the timeout.") + +def convert_to_wav(input_file: str, output_file: str): + """ + Конвертирует любой аудио/видеофайл в .wav с частотой 16kHz, 1 канал (моно). + """ + try: + ffmpeg.input(input_file).output( + output_file, + format="wav", + acodec="pcm_s16le", + ac=1, + ar="16000" + ).run(overwrite_output=True) + except Exception as e: + print(f"Error converting {input_file} to WAV: {e}") def register_audio_handlers(dp: Dispatcher, redis_service, storage_path: str): + # Оборачиваем callback для передачи дополнительных аргументов handler_callback = partial(handle_voice_and_video, redis_service=redis_service, storage_path=storage_path) - dp.message.register( - handler_callback, - F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE}) - ) + # Регистрируем хэндлер с фильтром по content_type + dp.message.register(handler_callback, F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE})) diff --git a/telegram_bot/services/redis_service.py b/telegram_bot/services/redis_service.py index 4babc55..f16f21d 100644 --- a/telegram_bot/services/redis_service.py +++ b/telegram_bot/services/redis_service.py @@ -1,10 +1,29 @@ import json -import redis +import redis.asyncio as redis class RedisService: def __init__(self, host: str, port: int): self.client = redis.Redis(host=host, port=port, decode_responses=True) - def publish_task(self, task_data: dict): + async def publish_task(self, task_data: dict): channel = "audio_tasks" - self.client.publish(channel, json.dumps(task_data)) + await self.client.publish(channel, json.dumps(task_data)) + + async def wait_for_text(self, user_id: int, chat_id: int, message_id: int, timeout: int = 30): + pubsub = self.client.pubsub() + await pubsub.subscribe("texts") + try: + async for message in pubsub.listen(): + if message["type"] != "message": + continue + try: + data = json.loads(message["data"]) + except Exception: + continue + if (data.get("user_id") == user_id and + data.get("chat_id") == chat_id and + data.get("message_id") == message_id): + return data.get("text") + finally: + await pubsub.unsubscribe("texts") + return None