diff --git a/src/adapters/llm.py b/src/adapters/llm.py index b5f72f8..ded3209 100644 --- a/src/adapters/llm.py +++ b/src/adapters/llm.py @@ -35,8 +35,7 @@ class LLMProviderAdapter(BaseAdapter): self.config = config self.client = AsyncOpenAI( - api_key=config.openai_api_key, - http_client=self._build_http_client() + api_key=config.openai_api_key, http_client=self._build_http_client() ) try: @@ -187,14 +186,9 @@ class LLMProviderAdapter(BaseAdapter): def _build_http_client(self) -> httpx.AsyncClient: if self.config.openai_proxy_url: - return httpx.AsyncClient( - proxy=self.config.openai_proxy_url, - timeout=60.0 - ) + return httpx.AsyncClient(proxy=self.config.openai_proxy_url, timeout=60.0) return httpx.AsyncClient(timeout=60.0) - - async def health_check(self) -> bool: try: test_messages = [{"role": "user", "content": "Ответь 'OK' если всё работает."}] diff --git a/src/cli.py b/src/cli.py index b1531cf..2f0db69 100644 --- a/src/cli.py +++ b/src/cli.py @@ -254,9 +254,7 @@ def list_articles( for article in articles: title = article.title[:47] + "..." if len(article.title) > 50 else article.title - click.echo( - f"{article.id:<5} {article.status.value:<12} {title:<50}" - ) + click.echo(f"{article.id:<5} {article.status.value:<12} {title:<50}") except Exception as e: click.echo(f"Ошибка: {e}", err=True) diff --git a/src/dependency_injection.py b/src/dependency_injection.py index 68cbc04..45382bc 100644 --- a/src/dependency_injection.py +++ b/src/dependency_injection.py @@ -114,7 +114,7 @@ class DependencyContainer: checks["database"] = await db_service.health_check() except Exception: checks["database"] = False - + try: write_queue = self.get_write_queue() checks["write_queue"] = ( diff --git a/src/models/article_dto.py b/src/models/article_dto.py index cb795fd..682b93b 100644 --- a/src/models/article_dto.py +++ b/src/models/article_dto.py @@ -19,4 +19,4 @@ class ArticleDTO: created_at: datetime id: Optional[int] = None simplified_text: Optional[str] = None - updated_at: Optional[datetime] = None \ No newline at end of file + updated_at: Optional[datetime] = None diff --git a/src/models/constants.py b/src/models/constants.py index a744481..9a56c3e 100644 --- a/src/models/constants.py +++ b/src/models/constants.py @@ -3,4 +3,4 @@ MAX_TOKEN_LIMIT_WITH_BUFFER = 16000 ARTICLE_NAME_INDEX = -1 MIN_WIKI_PATH_PARTS = 2 WIKI_PATH_INDEX = 1 -WRITE_QUEUE_BATCH_SIZE = 10 +WRITE_QUEUE_BATCH_SIZE = 10 diff --git a/src/services/database.py b/src/services/database.py index 3b6241e..f08c7ca 100644 --- a/src/services/database.py +++ b/src/services/database.py @@ -20,7 +20,8 @@ class DatabaseService: self.logger.info("Создание схемы базы данных", db_path=self.config.db_path) async with aiosqlite.connect(self.config.db_path) as conn: - await conn.execute(""" + await conn.execute( + """ CREATE TABLE IF NOT EXISTS articles ( id INTEGER PRIMARY KEY AUTOINCREMENT, url TEXT NOT NULL UNIQUE, @@ -35,16 +36,21 @@ class DatabaseService: created_at TEXT NOT NULL, updated_at TEXT ) - """) - - await conn.execute(""" + """ + ) + + await conn.execute( + """ CREATE INDEX IF NOT EXISTS idx_articles_url ON articles(url) - """) - - await conn.execute(""" + """ + ) + + await conn.execute( + """ CREATE INDEX IF NOT EXISTS idx_articles_status ON articles(status) - """) - + """ + ) + await conn.commit() await self._configure_sqlite() @@ -82,5 +88,6 @@ class DatabaseService: except Exception as e: self.logger.error("Database health check failed", error=str(e)) import traceback + self.logger.error("Traceback", traceback=traceback.format_exc()) return False diff --git a/src/services/simplify_service.py b/src/services/simplify_service.py index 922a4d3..cea27e6 100644 --- a/src/services/simplify_service.py +++ b/src/services/simplify_service.py @@ -71,6 +71,7 @@ class SimplifyService: article = await self._create_or_update_article(command, page_info) from src.models.article_dto import ArticleStatus + article.status = ArticleStatus.PENDING await self.repository.update_article(article) @@ -79,11 +80,13 @@ class SimplifyService: raw_text=page_info.content, ) - self.logger.info("Упрощение завершено", - url=command.url, - simplified_length=len(simplified_text), - input_tokens=input_tokens, - output_tokens=output_tokens) + self.logger.info( + "Упрощение завершено", + url=command.url, + simplified_length=len(simplified_text), + input_tokens=input_tokens, + output_tokens=output_tokens, + ) if not simplified_text.strip(): self.logger.error("Получен пустой simplified_text!", url=command.url) @@ -145,6 +148,7 @@ class SimplifyService: if command.force_reprocess: from src.models.article_dto import ArticleStatus + article.title = page_info.title article.raw_text = page_info.content article.status = ArticleStatus.PENDING @@ -242,7 +246,9 @@ class SimplifyService: "Объединённый текст превышает лимит, обрезаем", final_tokens=final_tokens, ) - combined_text = self._truncate_to_token_limit(combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER) + combined_text = self._truncate_to_token_limit( + combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER + ) total_output_tokens = self.llm_adapter.count_tokens(combined_text) return combined_text, total_input_tokens, total_output_tokens diff --git a/src/services/write_queue.py b/src/services/write_queue.py index 1bbf70f..5d25139 100644 --- a/src/services/write_queue.py +++ b/src/services/write_queue.py @@ -66,7 +66,7 @@ class AsyncWriteQueue: async def update_from_result(self, result: ProcessingResult) -> ArticleDTO: self.logger.info("Получен результат для записи", url=result.url, success=result.success) - + future: asyncio.Future[ArticleDTO] = asyncio.Future() operation = WriteOperation( @@ -78,7 +78,7 @@ class AsyncWriteQueue: self.logger.info("Добавляем операцию в очередь", url=result.url) await self._queue.put(operation) self.logger.info("Операция добавлена в очередь, ожидаем результат", url=result.url) - + result_article = await future self.logger.info("Получен результат из очереди", url=result.url) return result_article @@ -124,10 +124,12 @@ class AsyncWriteQueue: async def _process_operation_safely(self, operation: WriteOperation) -> None: try: - self.logger.info("Начинаем обработку операции", - operation_type=operation.operation_type, - url=operation.result.url if operation.result else "N/A") - + self.logger.info( + "Начинаем обработку операции", + operation_type=operation.operation_type, + url=operation.result.url if operation.result else "N/A", + ) + await self._process_single_operation(operation) self._total_operations += 1 @@ -135,7 +137,9 @@ class AsyncWriteQueue: if operation.operation_type == "update_from_result" and operation.result: self.logger.info("Получаем статью из репозитория", url=operation.result.url) article = await self.repository.get_by_url(operation.result.url) - self.logger.info("Статья получена, устанавливаем результат", url=operation.result.url) + self.logger.info( + "Статья получена, устанавливаем результат", url=operation.result.url + ) operation.future.set_result(article) except Exception as e: @@ -159,7 +163,7 @@ class AsyncWriteQueue: async def _update_article_from_result(self, result: ProcessingResult) -> ArticleDTO: self.logger.info("Начинаем обновление статьи из результата", url=result.url) - + article = await self.repository.get_by_url(result.url) if not article: msg = f"Статья с URL {result.url} не найдена" @@ -173,16 +177,18 @@ class AsyncWriteQueue: raise ValueError(msg) from src.models.article_dto import ArticleStatus + article.simplified_text = result.simplified_text article.status = ArticleStatus.SIMPLIFIED else: from src.models.article_dto import ArticleStatus + article.status = ArticleStatus.FAILED self.logger.info("Сохраняем обновлённую статью", url=result.url) updated_article = await self.repository.update_article(article) self.logger.info("Статья успешно обновлена", url=result.url) - + return updated_article @property diff --git a/src/sources.py b/src/sources.py index e2d1f1f..97aee38 100644 --- a/src/sources.py +++ b/src/sources.py @@ -66,14 +66,18 @@ class FileSource: self.logger.info("Начинаем проверку URL", raw_url=url) parsed = urlparse(url) - self.logger.info("Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path) + self.logger.info( + "Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path + ) if parsed.scheme not in ("http", "https"): self.logger.info("Отклонено: неподдерживаемая схема", scheme=parsed.scheme, url=url) return False if "ruwiki" not in parsed.netloc: - self.logger.info("Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url) + self.logger.info( + "Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url + ) return False path_parts = parsed.path.split("/") @@ -87,7 +91,10 @@ class FileSource: if path_parts[WIKI_PATH_INDEX] != "wiki": self.logger.info( - "Отклонено: неверный сегмент пути", expected="wiki", actual=path_parts[WIKI_PATH_INDEX], url=url + "Отклонено: неверный сегмент пути", + expected="wiki", + actual=path_parts[WIKI_PATH_INDEX], + url=url, ) return False @@ -107,7 +114,6 @@ class FileSource: self.logger.info("Ошибка при проверке URL", error=str(e), url=url) return False - async def count_urls(self) -> int: count = 0 async for _ in self.read_urls():