add speech service

2025-02-23 18:22:33 +03:00 · 2025-02-23 18:22:33 +03:00 · 3419f0c19c
parent d6e060c1d1
commit 3419f0c19c
7 changed files with 292 additions and 0 deletions
--- a/speech_service/.gitignore
+++ b/speech_service/.gitignore
@ -0,0 +1,174 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc
--- a/speech_service/config.py
+++ b/speech_service/config.py
@ -0,0 +1,13 @@
 import os
 from dotenv import load_dotenv
 def load_config():
    load_dotenv()
    return {
        "REDIS_HOST": os.getenv("REDIS_HOST", "localhost"),
        "REDIS_PORT": int(os.getenv("REDIS_PORT", "6379")),
        "WHISPER_MODEL": os.getenv("WHISPER_MODEL", "dvislobokov/whisper-large-v3-turbo-russian"),
        "DEVICE": os.getenv("DEVICE", "cuda"),
        "AUDIO_TASK_CHANNEL": os.getenv("AUDIO_TASK_CHANNEL", "audio_tasks"),
        "TEXT_RESULT_CHANNEL": os.getenv("TEXT_RESULT_CHANNEL", "texts"),
    }
--- a/speech_service/main.py
+++ b/speech_service/main.py
@ -0,0 +1,53 @@
 import asyncio
 import json
 from config import load_config
 from models import AudioTask
 from redis_client import RedisClient
 from transcriber import WhisperTranscriber
 async def process_audio_task(redis_client: RedisClient, transcriber: WhisperTranscriber, task_data: dict):
    try:
        task = AudioTask(**task_data)
    except Exception as e:
        print(f"Error creating AudioTask from data: {e}")
        return
    print(f"Processing task {task.uuid} ...")
    loop = asyncio.get_running_loop()
    text = await loop.run_in_executor(None, transcriber.transcribe, task.file_path)
    result = {
        "chat_id": task.chat_id,
        "user_id": task.user_id,
        "message_id": task.message_id,
        "text": text
    }
    await redis_client.publish_result(result)
    print(f"Published result for task {task.uuid}")
 async def main():
    config = load_config()
    redis_client = RedisClient(
        host=config["REDIS_HOST"],
        port=config["REDIS_PORT"],
        task_channel=config["AUDIO_TASK_CHANNEL"],
        result_channel=config["TEXT_RESULT_CHANNEL"]
    )
    transcriber = WhisperTranscriber(config["WHISPER_MODEL"], config["DEVICE"])
    pubsub = await redis_client.subscribe_tasks()
    print("Subscribed to audio_tasks channel. Waiting for tasks...")
    while True:
        message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
        if message:
            try:
                task_data = json.loads(message["data"])
            except Exception as e:
                print(f"Error parsing task message: {e}")
                continue
            asyncio.create_task(process_audio_task(redis_client, transcriber, task_data))
        await asyncio.sleep(0.1)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/speech_service/models.py
+++ b/speech_service/models.py
@ -0,0 +1,9 @@
 from dataclasses import dataclass
@dataclass
 class AudioTask:
    uuid: str
    file_path: str
    user_id: int
    chat_id: int
    message_id: int
--- a/speech_service/redis_client.py
+++ b/speech_service/redis_client.py
@ -0,0 +1,16 @@
 import json
 import redis.asyncio as redis
 class RedisClient:
    def __init__(self, host: str, port: int, task_channel: str, result_channel: str):
        self.client = redis.Redis(host=host, port=port, decode_responses=True)
        self.task_channel = task_channel
        self.result_channel = result_channel
    async def subscribe_tasks(self):
        pubsub = self.client.pubsub()
        await pubsub.subscribe(self.task_channel)
        return pubsub
    async def publish_result(self, result: dict):
        await self.client.publish(self.result_channel, json.dumps(result))
--- a/speech_service/requirements.txt
+++ b/speech_service/requirements.txt
@ -0,0 +1,11 @@
 --index-url https://download.pytorch.org/whl/cu121
 torch==2.5.1
 torchvision==0.20.1
 torchaudio==2.5.1
 --index-url https://pypi.org/simple
 transformers
 redis>=4.2.0
 python-dotenv
--- a/speech_service/transcriber.py
+++ b/speech_service/transcriber.py
@ -0,0 +1,16 @@
 from transformers import pipeline
 class WhisperTranscriber:
    def __init__(self, model_name: str, device: str = "cuda"):
        print("Loading Whisper model...")
        self.pipe = pipeline(
            task="automatic-speech-recognition",
            model=model_name,
            tokenizer=model_name,
            device=0 if device == "cuda" else -1
        )
        print("Whisper model loaded.")
    def transcribe(self, audio_file: str) -> str:
        result = self.pipe(audio_file)
        return result.get("text", "").strip()