ruwiki-test/src/runner.py

215 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import signal
import time
import structlog
from .models import AppConfig, ProcessingStats, SimplifyCommand
from .services import SimplifyService
from .sources import FileSource
logger = structlog.get_logger()
class AsyncRunner:
def __init__(
self,
config: AppConfig,
simplify_service: SimplifyService,
max_workers: int = 10,
) -> None:
self.config = config
self.simplify_service = simplify_service
self.max_workers = max_workers
self._task_queue: asyncio.Queue[SimplifyCommand] = asyncio.Queue()
self._workers: list[asyncio.Task[None]] = []
self._shutdown_event = asyncio.Event()
self.stats = ProcessingStats()
self._start_time: float | None = None
self.logger = structlog.get_logger().bind(service="runner")
async def run_from_file(
self,
input_file: str,
force_reprocess: bool = False,
max_articles: int | None = None,
) -> ProcessingStats:
self.logger.info(
"Запуск обработки статей из файла",
input_file=input_file,
force_reprocess=force_reprocess,
max_workers=self.max_workers,
max_articles=max_articles,
)
self._setup_signal_handlers()
try:
source = FileSource(input_file)
await self._load_tasks_from_source(source, force_reprocess, max_articles)
await self._run_processing()
except Exception as e:
self.logger.error("Ошибка при выполнении runner", error=str(e))
raise
finally:
await self._cleanup()
return self.stats
async def _load_tasks_from_source(
self,
source: FileSource,
force_reprocess: bool,
max_articles: int | None,
) -> None:
loaded_count = 0
async for command in source.read_urls(force_reprocess=force_reprocess):
if max_articles and loaded_count >= max_articles:
break
await self._task_queue.put(command)
loaded_count += 1
self.logger.info("Задачи загружены в очередь", count=loaded_count)
async def _run_processing(self) -> None:
self._start_time = time.time()
self.logger.info("Запуск worker корутин", count=self.max_workers)
for i in range(self.max_workers):
worker = asyncio.create_task(self._worker_loop(worker_id=i))
self._workers.append(worker)
await self._task_queue.join()
self._shutdown_event.set()
if self._workers:
await asyncio.gather(*self._workers, return_exceptions=True)
async def _worker_loop(self, worker_id: int) -> None:
worker_logger = self.logger.bind(worker_id=worker_id)
worker_logger.info("Worker запущен")
processed_count = 0
while not self._shutdown_event.is_set():
try:
try:
command = await asyncio.wait_for(
self._task_queue.get(),
timeout=1.0,
)
except asyncio.TimeoutError:
continue
try:
result = await self.simplify_service.process_command(command)
self.stats.add_result(result)
processed_count += 1
if result.success:
worker_logger.info(
"Статья обработана успешно",
url=command.url,
title=result.title,
tokens_in=result.token_count_raw,
tokens_out=result.token_count_simplified,
)
else:
worker_logger.warning(
"Ошибка при обработке статьи",
url=command.url,
error=result.error_message,
)
except Exception as e:
worker_logger.error(
"Неожиданная ошибка в worker",
url=command.url,
error=str(e),
)
from .models import ProcessingResult
error_result = ProcessingResult.failure_result(
command.url,
f"Неожиданная ошибка: {e!s}",
)
self.stats.add_result(error_result)
finally:
self._task_queue.task_done()
except Exception as e:
worker_logger.error("Критическая ошибка в worker loop", error=str(e))
break
worker_logger.info("Worker завершён", processed_articles=processed_count)
def _setup_signal_handlers(self) -> None:
def signal_handler(signum: int, frame: None) -> None:
signal_name = signal.Signals(signum).name
self.logger.info(f"Получен сигнал {signal_name}, начинаем graceful shutdown")
self._shutdown_event.set()
try:
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
except ValueError:
self.logger.warning("Не удалось настроить обработчики сигналов")
async def _cleanup(self) -> None:
self.logger.info("Начинаем очистку ресурсов")
for worker in self._workers:
if not worker.done():
worker.cancel()
if self._workers:
results = await asyncio.gather(*self._workers, return_exceptions=True)
cancelled_count = sum(1 for r in results if isinstance(r, asyncio.CancelledError))
if cancelled_count > 0:
self.logger.info("Workers отменены", count=cancelled_count)
self._workers.clear()
def get_progress_info(self) -> dict[str, any]:
elapsed_time = time.time() - self._start_time if self._start_time else 0
articles_per_minute = 0
if elapsed_time > 0:
articles_per_minute = (self.stats.successful * 60) / elapsed_time
return {
"total_processed": self.stats.total_processed,
"successful": self.stats.successful,
"failed": self.stats.failed,
"success_rate": self.stats.success_rate,
"elapsed_time": elapsed_time,
"articles_per_minute": articles_per_minute,
"queue_size": self._task_queue.qsize(),
"active_workers": len([w for w in self._workers if not w.done()]),
}
async def health_check(self) -> dict[str, any]:
checks = await self.simplify_service.health_check()
checks.update(
{
"runner_active": bool(self._workers and not self._shutdown_event.is_set()),
"queue_size": self._task_queue.qsize(),
"workers_count": len(self._workers),
}
)
return checks