qopscribe/speech_service/main.py

import asyncio
import json
from config import load_config
from models import AudioTask
from redis_client import RedisClient
from transcriber import WhisperTranscriber
import httpx
import json

async def request_summary(text: str, ollama_url: str) -> str:
    """
    Делает запрос к Ollama API для суммаризации текста.
    Использует модель gemma2:2b, системное сообщение и температуру 0.6.
    Запрос делается без стриминга.
    """
    payload = {
        "model": "gemma2:2b",
        "prompt": text,
        "system": (
            "Ты — помощник для суммаризации текста. Твоя задача: выделить ключевые моменты "
            "из высказывания человека, переформулируя их кратко и сохраняя оригинальный смысл. "
            "Очень важно: не отвечай на вопросы, не рассуждай, не комментируй, не добавляй ничего от себя, "
            "выполняй только суммаризацию."
        ),
        "stream": False,
        "options": {
            "temperature": 0.6
        }
    }
    async with httpx.AsyncClient() as client:
        try:
            response = await client.post(ollama_url, json=payload, timeout=60.0)
            response.raise_for_status()
        except Exception as e:
            print(f"LLM API request failed: {e}")
            return text
        data = response.json()
        summary = data.get("response")
        if summary:
            return summary.strip()
        return text

async def process_audio_task(redis_client: RedisClient, transcriber: WhisperTranscriber, task_data: dict, ollama_url: str):
    try:
        task = AudioTask(**task_data)
    except Exception as e:
        print(f"Error creating AudioTask from data: {e}")
        return

    print(f"Processing task {task.uuid} ...")
    loop = asyncio.get_running_loop()
    text = await loop.run_in_executor(None, transcriber.transcribe, task.file_path)

    if task_data.get("sum") == 1:
        text = await request_summary(text, ollama_url)

    result = {
        "chat_id": task.chat_id,
        "user_id": task.user_id,
        "message_id": task.message_id,
        "text": text
    }
    await redis_client.publish_result(result)
    print(f"Published result for task {task.uuid}")


async def main():
    config = load_config()
    redis_client = RedisClient(
        host=config["REDIS_HOST"],
        port=config["REDIS_PORT"],
        task_channel=config["AUDIO_TASK_CHANNEL"],
        result_channel=config["TEXT_RESULT_CHANNEL"]
    )
    transcriber = WhisperTranscriber(config["WHISPER_MODEL"], config["DEVICE"])

    pubsub = await redis_client.subscribe_tasks()
    print("Subscribed to audio_tasks channel. Waiting for tasks...")

    while True:
        message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
        if message:
            try:
                task_data = json.loads(message["data"])
            except Exception as e:
                print(f"Error parsing task message: {e}")
                continue
            asyncio.create_task(process_audio_task(redis_client, transcriber, task_data, config["OLLAMA_URL"]))
        await asyncio.sleep(0.1)

if __name__ == "__main__":
    asyncio.run(main())