qopscribe/telegram_bot/handlers/audio_handler.py

import os
import subprocess
import uuid
from functools import partial
from aiogram import types, Dispatcher, F

async def handle_voice_and_video(message: types.Message, redis_service, storage_path: str):
    file_id = None
    if message.content_type == types.ContentType.VOICE:
        file_id = message.voice.file_id
    elif message.content_type == types.ContentType.VIDEO_NOTE:
        file_id = message.video_note.file_id

    if not file_id:
        return

    file = await message.bot.get_file(file_id)
    file_path = file.file_path

    file_uuid = str(uuid.uuid4())
    original_filename = os.path.basename(file_path)
    temp_filename = f"{file_uuid}_{original_filename}"
    
    os.makedirs(storage_path, exist_ok=True)
    
    temp_destination = os.path.join(storage_path, temp_filename)

    await message.bot.download_file(file_path, temp_destination)

    wav_filename = f"{file_uuid}.wav"
    wav_destination = os.path.join(storage_path, wav_filename)
    
    convert_to_wav(temp_destination, wav_destination)

    os.remove(temp_destination)

    task_data = {
        "uuid": file_uuid,
        "file_path": wav_destination,
        "user_id": message.from_user.id,
        "chat_id": message.chat.id,
        "message_id": message.message_id,
        "sum": 1
    }
    
    await redis_service.publish_task(task_data)
    #await message.reply("Waiting for transcription...")

    text = await redis_service.wait_for_text(
        user_id=message.from_user.id,
        chat_id=message.chat.id,
        message_id=message.message_id
    )
    os.remove(wav_destination)
    if text:
        await send_long_message(message, text)
    else:
        await message.reply("Sorry, transcription result was not received within the timeout.")

async def send_long_message(message: types.Message, text: str):
    """Отправляет длинный текст, разбивая его на части по 4096 символов"""
    chunk_size = 4096
    for i in range(0, len(text), chunk_size):
        await message.reply(text[i : i + chunk_size])

def convert_to_wav(input_file: str, output_file: str):
    """
    Конвертирует любой аудиофайл в WAV с частотой 16kHz, 1 канал (моно).
    Логирует ошибки FFmpeg.
    """
    command = [
            "ffmpeg", "-y", "-i", input_file,
            "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
            output_file
        ]
    subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

def register_audio_handlers(dp: Dispatcher, redis_service, storage_path: str):
    handler_callback = partial(handle_voice_and_video, redis_service=redis_service, storage_path=storage_path)
    dp.message.register(handler_callback, F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE}))