diff --git a/speech_service/.gitignore b/speech_service/.gitignore new file mode 100644 index 0000000..1800114 --- /dev/null +++ b/speech_service/.gitignore @@ -0,0 +1,174 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc \ No newline at end of file diff --git a/speech_service/config.py b/speech_service/config.py index e69de29..b95ee30 100644 --- a/speech_service/config.py +++ b/speech_service/config.py @@ -0,0 +1,13 @@ +import os +from dotenv import load_dotenv + +def load_config(): + load_dotenv() + return { + "REDIS_HOST": os.getenv("REDIS_HOST", "localhost"), + "REDIS_PORT": int(os.getenv("REDIS_PORT", "6379")), + "WHISPER_MODEL": os.getenv("WHISPER_MODEL", "dvislobokov/whisper-large-v3-turbo-russian"), + "DEVICE": os.getenv("DEVICE", "cuda"), + "AUDIO_TASK_CHANNEL": os.getenv("AUDIO_TASK_CHANNEL", "audio_tasks"), + "TEXT_RESULT_CHANNEL": os.getenv("TEXT_RESULT_CHANNEL", "texts"), + } diff --git a/speech_service/main.py b/speech_service/main.py index e69de29..405b896 100644 --- a/speech_service/main.py +++ b/speech_service/main.py @@ -0,0 +1,53 @@ +import asyncio +import json +from config import load_config +from models import AudioTask +from redis_client import RedisClient +from transcriber import WhisperTranscriber + +async def process_audio_task(redis_client: RedisClient, transcriber: WhisperTranscriber, task_data: dict): + try: + task = AudioTask(**task_data) + except Exception as e: + print(f"Error creating AudioTask from data: {e}") + return + + print(f"Processing task {task.uuid} ...") + loop = asyncio.get_running_loop() + text = await loop.run_in_executor(None, transcriber.transcribe, task.file_path) + + result = { + "chat_id": task.chat_id, + "user_id": task.user_id, + "message_id": task.message_id, + "text": text + } + await redis_client.publish_result(result) + print(f"Published result for task {task.uuid}") + +async def main(): + config = load_config() + redis_client = RedisClient( + host=config["REDIS_HOST"], + port=config["REDIS_PORT"], + task_channel=config["AUDIO_TASK_CHANNEL"], + result_channel=config["TEXT_RESULT_CHANNEL"] + ) + transcriber = WhisperTranscriber(config["WHISPER_MODEL"], config["DEVICE"]) + + pubsub = await redis_client.subscribe_tasks() + print("Subscribed to audio_tasks channel. Waiting for tasks...") + + while True: + message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0) + if message: + try: + task_data = json.loads(message["data"]) + except Exception as e: + print(f"Error parsing task message: {e}") + continue + asyncio.create_task(process_audio_task(redis_client, transcriber, task_data)) + await asyncio.sleep(0.1) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/speech_service/models.py b/speech_service/models.py index e69de29..8b7127e 100644 --- a/speech_service/models.py +++ b/speech_service/models.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass + +@dataclass +class AudioTask: + uuid: str + file_path: str + user_id: int + chat_id: int + message_id: int diff --git a/speech_service/redis_client.py b/speech_service/redis_client.py index e69de29..eabf477 100644 --- a/speech_service/redis_client.py +++ b/speech_service/redis_client.py @@ -0,0 +1,16 @@ +import json +import redis.asyncio as redis + +class RedisClient: + def __init__(self, host: str, port: int, task_channel: str, result_channel: str): + self.client = redis.Redis(host=host, port=port, decode_responses=True) + self.task_channel = task_channel + self.result_channel = result_channel + + async def subscribe_tasks(self): + pubsub = self.client.pubsub() + await pubsub.subscribe(self.task_channel) + return pubsub + + async def publish_result(self, result: dict): + await self.client.publish(self.result_channel, json.dumps(result)) diff --git a/speech_service/requirements.txt b/speech_service/requirements.txt index e69de29..c9f0cd2 100644 --- a/speech_service/requirements.txt +++ b/speech_service/requirements.txt @@ -0,0 +1,11 @@ +--index-url https://download.pytorch.org/whl/cu121 + +torch==2.5.1 +torchvision==0.20.1 +torchaudio==2.5.1 + +--index-url https://pypi.org/simple + +transformers +redis>=4.2.0 +python-dotenv diff --git a/speech_service/transcriber.py b/speech_service/transcriber.py new file mode 100644 index 0000000..0e197ba --- /dev/null +++ b/speech_service/transcriber.py @@ -0,0 +1,16 @@ +from transformers import pipeline + +class WhisperTranscriber: + def __init__(self, model_name: str, device: str = "cuda"): + print("Loading Whisper model...") + self.pipe = pipeline( + task="automatic-speech-recognition", + model=model_name, + tokenizer=model_name, + device=0 if device == "cuda" else -1 + ) + print("Whisper model loaded.") + + def transcribe(self, audio_file: str) -> str: + result = self.pipe(audio_file) + return result.get("text", "").strip()