add speech service

2025-02-23 18:22:33 +03:00 · 2025-02-23 18:22:33 +03:00 · 3419f0c19c
parent d6e060c1d1
commit 3419f0c19c
7 changed files with 292 additions and 0 deletions
--- a/speech_service/.gitignore
+++ b/speech_service/.gitignore
@ -0,0 +1,174 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
--- a/speech_service/config.py
+++ b/speech_service/config.py
@ -0,0 +1,13 @@
+import os
+from dotenv import load_dotenv
+
+def load_config():
+    load_dotenv()
+    return {
+        "REDIS_HOST": os.getenv("REDIS_HOST", "localhost"),
+        "REDIS_PORT": int(os.getenv("REDIS_PORT", "6379")),
+        "WHISPER_MODEL": os.getenv("WHISPER_MODEL", "dvislobokov/whisper-large-v3-turbo-russian"),
+        "DEVICE": os.getenv("DEVICE", "cuda"),
+        "AUDIO_TASK_CHANNEL": os.getenv("AUDIO_TASK_CHANNEL", "audio_tasks"),
+        "TEXT_RESULT_CHANNEL": os.getenv("TEXT_RESULT_CHANNEL", "texts"),
+    }
--- a/speech_service/main.py
+++ b/speech_service/main.py
@ -0,0 +1,53 @@
+import asyncio
+import json
+from config import load_config
+from models import AudioTask
+from redis_client import RedisClient
+from transcriber import WhisperTranscriber
+
+async def process_audio_task(redis_client: RedisClient, transcriber: WhisperTranscriber, task_data: dict):
+    try:
+        task = AudioTask(**task_data)
+    except Exception as e:
+        print(f"Error creating AudioTask from data: {e}")
+        return
+
+    print(f"Processing task {task.uuid} ...")
+    loop = asyncio.get_running_loop()
+    text = await loop.run_in_executor(None, transcriber.transcribe, task.file_path)
+
+    result = {
+        "chat_id": task.chat_id,
+        "user_id": task.user_id,
+        "message_id": task.message_id,
+        "text": text
+    }
+    await redis_client.publish_result(result)
+    print(f"Published result for task {task.uuid}")
+
+async def main():
+    config = load_config()
+    redis_client = RedisClient(
+        host=config["REDIS_HOST"],
+        port=config["REDIS_PORT"],
+        task_channel=config["AUDIO_TASK_CHANNEL"],
+        result_channel=config["TEXT_RESULT_CHANNEL"]
+    )
+    transcriber = WhisperTranscriber(config["WHISPER_MODEL"], config["DEVICE"])
+    
+    pubsub = await redis_client.subscribe_tasks()
+    print("Subscribed to audio_tasks channel. Waiting for tasks...")
+
+    while True:
+        message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
+        if message:
+            try:
+                task_data = json.loads(message["data"])
+            except Exception as e:
+                print(f"Error parsing task message: {e}")
+                continue
+            asyncio.create_task(process_audio_task(redis_client, transcriber, task_data))
+        await asyncio.sleep(0.1)
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/speech_service/models.py
+++ b/speech_service/models.py
@ -0,0 +1,9 @@
+from dataclasses import dataclass
+
+@dataclass
+class AudioTask:
+    uuid: str
+    file_path: str
+    user_id: int
+    chat_id: int
+    message_id: int
--- a/speech_service/redis_client.py
+++ b/speech_service/redis_client.py
@ -0,0 +1,16 @@
+import json
+import redis.asyncio as redis
+
+class RedisClient:
+    def __init__(self, host: str, port: int, task_channel: str, result_channel: str):
+        self.client = redis.Redis(host=host, port=port, decode_responses=True)
+        self.task_channel = task_channel
+        self.result_channel = result_channel
+
+    async def subscribe_tasks(self):
+        pubsub = self.client.pubsub()
+        await pubsub.subscribe(self.task_channel)
+        return pubsub
+
+    async def publish_result(self, result: dict):
+        await self.client.publish(self.result_channel, json.dumps(result))
--- a/speech_service/requirements.txt
+++ b/speech_service/requirements.txt
@ -0,0 +1,11 @@
+--index-url https://download.pytorch.org/whl/cu121
+
+torch==2.5.1
+torchvision==0.20.1
+torchaudio==2.5.1
+
+--index-url https://pypi.org/simple
+
+transformers
+redis>=4.2.0
+python-dotenv
--- a/speech_service/transcriber.py
+++ b/speech_service/transcriber.py
@ -0,0 +1,16 @@
+from transformers import pipeline
+
+class WhisperTranscriber:
+    def __init__(self, model_name: str, device: str = "cuda"):
+        print("Loading Whisper model...")
+        self.pipe = pipeline(
+            task="automatic-speech-recognition",
+            model=model_name,
+            tokenizer=model_name,
+            device=0 if device == "cuda" else -1
+        )
+        print("Whisper model loaded.")
+
+    def transcribe(self, audio_file: str) -> str:
+        result = self.pipe(audio_file)
+        return result.get("text", "").strip()