fix

add speech service
Add speech service
2025-02-23 18:23:12 +03:00 · 2025-02-23 18:22:33 +03:00 · 2025-02-23 17:25:10 +03:00 · 2025-02-23 17:24:56 +03:00
11 changed files with 364 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 storage
 test*
--- a/speech_service/.gitignore
+++ b/speech_service/.gitignore
@ -0,0 +1,174 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc
--- a/speech_service/init.py
+++ b/speech_service/init.py
--- a/speech_service/config.py
+++ b/speech_service/config.py
@ -0,0 +1,13 @@
 import os
 from dotenv import load_dotenv
 def load_config():
    load_dotenv()
    return {
        "REDIS_HOST": os.getenv("REDIS_HOST", "localhost"),
        "REDIS_PORT": int(os.getenv("REDIS_PORT", "6379")),
        "WHISPER_MODEL": os.getenv("WHISPER_MODEL", "dvislobokov/whisper-large-v3-turbo-russian"),
        "DEVICE": os.getenv("DEVICE", "cuda"),
        "AUDIO_TASK_CHANNEL": os.getenv("AUDIO_TASK_CHANNEL", "audio_tasks"),
        "TEXT_RESULT_CHANNEL": os.getenv("TEXT_RESULT_CHANNEL", "texts"),
    }
--- a/speech_service/main.py
+++ b/speech_service/main.py
@ -0,0 +1,53 @@
 import asyncio
 import json
 from config import load_config
 from models import AudioTask
 from redis_client import RedisClient
 from transcriber import WhisperTranscriber
 async def process_audio_task(redis_client: RedisClient, transcriber: WhisperTranscriber, task_data: dict):
    try:
        task = AudioTask(**task_data)
    except Exception as e:
        print(f"Error creating AudioTask from data: {e}")
        return
    print(f"Processing task {task.uuid} ...")
    loop = asyncio.get_running_loop()
    text = await loop.run_in_executor(None, transcriber.transcribe, task.file_path)
    result = {
        "chat_id": task.chat_id,
        "user_id": task.user_id,
        "message_id": task.message_id,
        "text": text
    }
    await redis_client.publish_result(result)
    print(f"Published result for task {task.uuid}")
 async def main():
    config = load_config()
    redis_client = RedisClient(
        host=config["REDIS_HOST"],
        port=config["REDIS_PORT"],
        task_channel=config["AUDIO_TASK_CHANNEL"],
        result_channel=config["TEXT_RESULT_CHANNEL"]
    )
    transcriber = WhisperTranscriber(config["WHISPER_MODEL"], config["DEVICE"])
    pubsub = await redis_client.subscribe_tasks()
    print("Subscribed to audio_tasks channel. Waiting for tasks...")
    while True:
        message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
        if message:
            try:
                task_data = json.loads(message["data"])
            except Exception as e:
                print(f"Error parsing task message: {e}")
                continue
            asyncio.create_task(process_audio_task(redis_client, transcriber, task_data))
        await asyncio.sleep(0.1)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/speech_service/models.py
+++ b/speech_service/models.py
@ -0,0 +1,9 @@
 from dataclasses import dataclass
@dataclass
 class AudioTask:
    uuid: str
    file_path: str
    user_id: int
    chat_id: int
    message_id: int
--- a/speech_service/redis_client.py
+++ b/speech_service/redis_client.py
@ -0,0 +1,16 @@
 import json
 import redis.asyncio as redis
 class RedisClient:
    def __init__(self, host: str, port: int, task_channel: str, result_channel: str):
        self.client = redis.Redis(host=host, port=port, decode_responses=True)
        self.task_channel = task_channel
        self.result_channel = result_channel
    async def subscribe_tasks(self):
        pubsub = self.client.pubsub()
        await pubsub.subscribe(self.task_channel)
        return pubsub
    async def publish_result(self, result: dict):
        await self.client.publish(self.result_channel, json.dumps(result))
--- a/speech_service/requirements.txt
+++ b/speech_service/requirements.txt
@ -0,0 +1,11 @@
 --index-url https://download.pytorch.org/whl/cu121
 torch==2.5.1
 torchvision==0.20.1
 torchaudio==2.5.1
 --index-url https://pypi.org/simple
 transformers
 redis>=4.2.0
 python-dotenv
--- a/speech_service/transcriber.py
+++ b/speech_service/transcriber.py
@ -0,0 +1,16 @@
 from transformers import pipeline
 class WhisperTranscriber:
    def __init__(self, model_name: str, device: str = "cuda"):
        print("Loading Whisper model...")
        self.pipe = pipeline(
            task="automatic-speech-recognition",
            model=model_name,
            tokenizer=model_name,
            device=0 if device == "cuda" else -1
        )
        print("Whisper model loaded.")
    def transcribe(self, audio_file: str) -> str:
        result = self.pipe(audio_file)
        return result.get("text", "").strip()
--- a/telegram_bot/handlers/audio_handler.py
+++ b/telegram_bot/handlers/audio_handler.py
@ -2,6 +2,7 @@ import os
 import uuid
 from functools import partial
 from aiogram import types, Dispatcher, F
 import ffmpeg
 async def handle_voice_and_video(message: types.Message, redis_service, storage_path: str):
    file_id = None
@ -17,24 +18,60 @@ async def handle_voice_and_video(message: types.Message, redis_service, storage_
    file_path = file.file_path
    file_uuid = str(uuid.uuid4())
-    filename = f"{file_uuid}_{os.path.basename(file_path)}"
+    original_filename = os.path.basename(file_path)
    temp_filename = f"{file_uuid}_{original_filename}"
    os.makedirs(storage_path, exist_ok=True)
-    destination = os.path.join(storage_path, filename)
+    
    temp_destination = os.path.join(storage_path, temp_filename)
-    await message.bot.download_file(file_path, destination)
+    await message.bot.download_file(file_path, temp_destination)
    wav_filename = f"{file_uuid}.wav"
    wav_destination = os.path.join(storage_path, wav_filename)
    convert_to_wav(temp_destination, wav_destination)
    os.remove(temp_destination)
    task_data = {
        "uuid": file_uuid,
-        "file_local_path": destination,
+        "file_path": wav_destination,
        "user_id": message.from_user.id,
-        "chat_id": message.chat.id
+        "chat_id": message.chat.id,
        "message_id": message.message_id
    }
-    redis_service.publish_task(task_data)
+    
-    await message.reply("Your message has been received and queued for processing.")
+    await redis_service.publish_task(task_data)
    await message.reply("Your message has been received, converted to WAV, and queued for processing.\nWaiting for transcription...")
    text = await redis_service.wait_for_text(
        user_id=message.from_user.id,
        chat_id=message.chat.id,
        message_id=message.message_id
    )
    if text:
        await message.reply(f"Transcription result:\n{text}")
    else:
        await message.reply("Sorry, transcription result was not received within the timeout.")
 def convert_to_wav(input_file: str, output_file: str):
    """
    Конвертирует любой аудио/видеофайл в .wav с частотой 16kHz, 1 канал (моно).
    """
    try:
        ffmpeg.input(input_file).output(
            output_file,
            format="wav",
            acodec="pcm_s16le",
            ac=1, 
            ar="16000"
        ).run(overwrite_output=True)
    except Exception as e:
        print(f"Error converting {input_file} to WAV: {e}")
 def register_audio_handlers(dp: Dispatcher, redis_service, storage_path: str):
    # Оборачиваем callback для передачи дополнительных аргументов
    handler_callback = partial(handle_voice_and_video, redis_service=redis_service, storage_path=storage_path)
-    dp.message.register(
+    # Регистрируем хэндлер с фильтром по content_type
-        handler_callback,
+    dp.message.register(handler_callback, F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE}))
        F.content_type.in_({types.ContentType.VOICE, types.ContentType.VIDEO_NOTE})
    )
--- a/telegram_bot/services/redis_service.py
+++ b/telegram_bot/services/redis_service.py
@ -1,10 +1,29 @@
 import json
-import redis
+import redis.asyncio as redis
 class RedisService:
    def __init__(self, host: str, port: int):
        self.client = redis.Redis(host=host, port=port, decode_responses=True)
-    def publish_task(self, task_data: dict):
+    async def publish_task(self, task_data: dict):
        channel = "audio_tasks"
-        self.client.publish(channel, json.dumps(task_data))
+        await self.client.publish(channel, json.dumps(task_data))
    async def wait_for_text(self, user_id: int, chat_id: int, message_id: int, timeout: int = 30):
        pubsub = self.client.pubsub()
        await pubsub.subscribe("texts")
        try:
            async for message in pubsub.listen():
                if message["type"] != "message":
                    continue
                try:
                    data = json.loads(message["data"])
                except Exception:
                    continue
                if (data.get("user_id") == user_id and
                    data.get("chat_id") == chat_id and
                    data.get("message_id") == message_id):
                    return data.get("text")
        finally:
            await pubsub.unsubscribe("texts")
        return None
Author	SHA1	Message	Date
itqop	65c78a31d3	fix	2025-02-23 18:23:12 +03:00
itqop	3419f0c19c	add speech service	2025-02-23 18:22:33 +03:00
itqop	d6e060c1d1	Add speech service	2025-02-23 17:25:10 +03:00
itqop	3360c7219b	Edit bot	2025-02-23 17:24:56 +03:00