10 files reformatted with black codestyle
This commit is contained in:
parent
03caebfb0b
commit
6a33715356
|
@ -35,8 +35,7 @@ class LLMProviderAdapter(BaseAdapter):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
self.client = AsyncOpenAI(
|
self.client = AsyncOpenAI(
|
||||||
api_key=config.openai_api_key,
|
api_key=config.openai_api_key, http_client=self._build_http_client()
|
||||||
http_client=self._build_http_client()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -187,14 +186,9 @@ class LLMProviderAdapter(BaseAdapter):
|
||||||
|
|
||||||
def _build_http_client(self) -> httpx.AsyncClient:
|
def _build_http_client(self) -> httpx.AsyncClient:
|
||||||
if self.config.openai_proxy_url:
|
if self.config.openai_proxy_url:
|
||||||
return httpx.AsyncClient(
|
return httpx.AsyncClient(proxy=self.config.openai_proxy_url, timeout=60.0)
|
||||||
proxy=self.config.openai_proxy_url,
|
|
||||||
timeout=60.0
|
|
||||||
)
|
|
||||||
return httpx.AsyncClient(timeout=60.0)
|
return httpx.AsyncClient(timeout=60.0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def health_check(self) -> bool:
|
async def health_check(self) -> bool:
|
||||||
try:
|
try:
|
||||||
test_messages = [{"role": "user", "content": "Ответь 'OK' если всё работает."}]
|
test_messages = [{"role": "user", "content": "Ответь 'OK' если всё работает."}]
|
||||||
|
|
|
@ -254,9 +254,7 @@ def list_articles(
|
||||||
for article in articles:
|
for article in articles:
|
||||||
title = article.title[:47] + "..." if len(article.title) > 50 else article.title
|
title = article.title[:47] + "..." if len(article.title) > 50 else article.title
|
||||||
|
|
||||||
click.echo(
|
click.echo(f"{article.id:<5} {article.status.value:<12} {title:<50}")
|
||||||
f"{article.id:<5} {article.status.value:<12} {title:<50}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
click.echo(f"Ошибка: {e}", err=True)
|
click.echo(f"Ошибка: {e}", err=True)
|
||||||
|
|
|
@ -114,7 +114,7 @@ class DependencyContainer:
|
||||||
checks["database"] = await db_service.health_check()
|
checks["database"] = await db_service.health_check()
|
||||||
except Exception:
|
except Exception:
|
||||||
checks["database"] = False
|
checks["database"] = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
write_queue = self.get_write_queue()
|
write_queue = self.get_write_queue()
|
||||||
checks["write_queue"] = (
|
checks["write_queue"] = (
|
||||||
|
|
|
@ -19,4 +19,4 @@ class ArticleDTO:
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
id: Optional[int] = None
|
id: Optional[int] = None
|
||||||
simplified_text: Optional[str] = None
|
simplified_text: Optional[str] = None
|
||||||
updated_at: Optional[datetime] = None
|
updated_at: Optional[datetime] = None
|
||||||
|
|
|
@ -3,4 +3,4 @@ MAX_TOKEN_LIMIT_WITH_BUFFER = 16000
|
||||||
ARTICLE_NAME_INDEX = -1
|
ARTICLE_NAME_INDEX = -1
|
||||||
MIN_WIKI_PATH_PARTS = 2
|
MIN_WIKI_PATH_PARTS = 2
|
||||||
WIKI_PATH_INDEX = 1
|
WIKI_PATH_INDEX = 1
|
||||||
WRITE_QUEUE_BATCH_SIZE = 10
|
WRITE_QUEUE_BATCH_SIZE = 10
|
||||||
|
|
|
@ -20,7 +20,8 @@ class DatabaseService:
|
||||||
self.logger.info("Создание схемы базы данных", db_path=self.config.db_path)
|
self.logger.info("Создание схемы базы данных", db_path=self.config.db_path)
|
||||||
|
|
||||||
async with aiosqlite.connect(self.config.db_path) as conn:
|
async with aiosqlite.connect(self.config.db_path) as conn:
|
||||||
await conn.execute("""
|
await conn.execute(
|
||||||
|
"""
|
||||||
CREATE TABLE IF NOT EXISTS articles (
|
CREATE TABLE IF NOT EXISTS articles (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
url TEXT NOT NULL UNIQUE,
|
url TEXT NOT NULL UNIQUE,
|
||||||
|
@ -35,16 +36,21 @@ class DatabaseService:
|
||||||
created_at TEXT NOT NULL,
|
created_at TEXT NOT NULL,
|
||||||
updated_at TEXT
|
updated_at TEXT
|
||||||
)
|
)
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
await conn.execute("""
|
|
||||||
|
await conn.execute(
|
||||||
|
"""
|
||||||
CREATE INDEX IF NOT EXISTS idx_articles_url ON articles(url)
|
CREATE INDEX IF NOT EXISTS idx_articles_url ON articles(url)
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
await conn.execute("""
|
|
||||||
|
await conn.execute(
|
||||||
|
"""
|
||||||
CREATE INDEX IF NOT EXISTS idx_articles_status ON articles(status)
|
CREATE INDEX IF NOT EXISTS idx_articles_status ON articles(status)
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
await conn.commit()
|
await conn.commit()
|
||||||
|
|
||||||
await self._configure_sqlite()
|
await self._configure_sqlite()
|
||||||
|
@ -82,5 +88,6 @@ class DatabaseService:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error("Database health check failed", error=str(e))
|
self.logger.error("Database health check failed", error=str(e))
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
self.logger.error("Traceback", traceback=traceback.format_exc())
|
self.logger.error("Traceback", traceback=traceback.format_exc())
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -71,6 +71,7 @@ class SimplifyService:
|
||||||
article = await self._create_or_update_article(command, page_info)
|
article = await self._create_or_update_article(command, page_info)
|
||||||
|
|
||||||
from src.models.article_dto import ArticleStatus
|
from src.models.article_dto import ArticleStatus
|
||||||
|
|
||||||
article.status = ArticleStatus.PENDING
|
article.status = ArticleStatus.PENDING
|
||||||
await self.repository.update_article(article)
|
await self.repository.update_article(article)
|
||||||
|
|
||||||
|
@ -79,11 +80,13 @@ class SimplifyService:
|
||||||
raw_text=page_info.content,
|
raw_text=page_info.content,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.logger.info("Упрощение завершено",
|
self.logger.info(
|
||||||
url=command.url,
|
"Упрощение завершено",
|
||||||
simplified_length=len(simplified_text),
|
url=command.url,
|
||||||
input_tokens=input_tokens,
|
simplified_length=len(simplified_text),
|
||||||
output_tokens=output_tokens)
|
input_tokens=input_tokens,
|
||||||
|
output_tokens=output_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
if not simplified_text.strip():
|
if not simplified_text.strip():
|
||||||
self.logger.error("Получен пустой simplified_text!", url=command.url)
|
self.logger.error("Получен пустой simplified_text!", url=command.url)
|
||||||
|
@ -145,6 +148,7 @@ class SimplifyService:
|
||||||
|
|
||||||
if command.force_reprocess:
|
if command.force_reprocess:
|
||||||
from src.models.article_dto import ArticleStatus
|
from src.models.article_dto import ArticleStatus
|
||||||
|
|
||||||
article.title = page_info.title
|
article.title = page_info.title
|
||||||
article.raw_text = page_info.content
|
article.raw_text = page_info.content
|
||||||
article.status = ArticleStatus.PENDING
|
article.status = ArticleStatus.PENDING
|
||||||
|
@ -242,7 +246,9 @@ class SimplifyService:
|
||||||
"Объединённый текст превышает лимит, обрезаем",
|
"Объединённый текст превышает лимит, обрезаем",
|
||||||
final_tokens=final_tokens,
|
final_tokens=final_tokens,
|
||||||
)
|
)
|
||||||
combined_text = self._truncate_to_token_limit(combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER)
|
combined_text = self._truncate_to_token_limit(
|
||||||
|
combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER
|
||||||
|
)
|
||||||
total_output_tokens = self.llm_adapter.count_tokens(combined_text)
|
total_output_tokens = self.llm_adapter.count_tokens(combined_text)
|
||||||
|
|
||||||
return combined_text, total_input_tokens, total_output_tokens
|
return combined_text, total_input_tokens, total_output_tokens
|
||||||
|
|
|
@ -66,7 +66,7 @@ class AsyncWriteQueue:
|
||||||
|
|
||||||
async def update_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
async def update_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
||||||
self.logger.info("Получен результат для записи", url=result.url, success=result.success)
|
self.logger.info("Получен результат для записи", url=result.url, success=result.success)
|
||||||
|
|
||||||
future: asyncio.Future[ArticleDTO] = asyncio.Future()
|
future: asyncio.Future[ArticleDTO] = asyncio.Future()
|
||||||
|
|
||||||
operation = WriteOperation(
|
operation = WriteOperation(
|
||||||
|
@ -78,7 +78,7 @@ class AsyncWriteQueue:
|
||||||
self.logger.info("Добавляем операцию в очередь", url=result.url)
|
self.logger.info("Добавляем операцию в очередь", url=result.url)
|
||||||
await self._queue.put(operation)
|
await self._queue.put(operation)
|
||||||
self.logger.info("Операция добавлена в очередь, ожидаем результат", url=result.url)
|
self.logger.info("Операция добавлена в очередь, ожидаем результат", url=result.url)
|
||||||
|
|
||||||
result_article = await future
|
result_article = await future
|
||||||
self.logger.info("Получен результат из очереди", url=result.url)
|
self.logger.info("Получен результат из очереди", url=result.url)
|
||||||
return result_article
|
return result_article
|
||||||
|
@ -124,10 +124,12 @@ class AsyncWriteQueue:
|
||||||
|
|
||||||
async def _process_operation_safely(self, operation: WriteOperation) -> None:
|
async def _process_operation_safely(self, operation: WriteOperation) -> None:
|
||||||
try:
|
try:
|
||||||
self.logger.info("Начинаем обработку операции",
|
self.logger.info(
|
||||||
operation_type=operation.operation_type,
|
"Начинаем обработку операции",
|
||||||
url=operation.result.url if operation.result else "N/A")
|
operation_type=operation.operation_type,
|
||||||
|
url=operation.result.url if operation.result else "N/A",
|
||||||
|
)
|
||||||
|
|
||||||
await self._process_single_operation(operation)
|
await self._process_single_operation(operation)
|
||||||
self._total_operations += 1
|
self._total_operations += 1
|
||||||
|
|
||||||
|
@ -135,7 +137,9 @@ class AsyncWriteQueue:
|
||||||
if operation.operation_type == "update_from_result" and operation.result:
|
if operation.operation_type == "update_from_result" and operation.result:
|
||||||
self.logger.info("Получаем статью из репозитория", url=operation.result.url)
|
self.logger.info("Получаем статью из репозитория", url=operation.result.url)
|
||||||
article = await self.repository.get_by_url(operation.result.url)
|
article = await self.repository.get_by_url(operation.result.url)
|
||||||
self.logger.info("Статья получена, устанавливаем результат", url=operation.result.url)
|
self.logger.info(
|
||||||
|
"Статья получена, устанавливаем результат", url=operation.result.url
|
||||||
|
)
|
||||||
operation.future.set_result(article)
|
operation.future.set_result(article)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -159,7 +163,7 @@ class AsyncWriteQueue:
|
||||||
|
|
||||||
async def _update_article_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
async def _update_article_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
||||||
self.logger.info("Начинаем обновление статьи из результата", url=result.url)
|
self.logger.info("Начинаем обновление статьи из результата", url=result.url)
|
||||||
|
|
||||||
article = await self.repository.get_by_url(result.url)
|
article = await self.repository.get_by_url(result.url)
|
||||||
if not article:
|
if not article:
|
||||||
msg = f"Статья с URL {result.url} не найдена"
|
msg = f"Статья с URL {result.url} не найдена"
|
||||||
|
@ -173,16 +177,18 @@ class AsyncWriteQueue:
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
from src.models.article_dto import ArticleStatus
|
from src.models.article_dto import ArticleStatus
|
||||||
|
|
||||||
article.simplified_text = result.simplified_text
|
article.simplified_text = result.simplified_text
|
||||||
article.status = ArticleStatus.SIMPLIFIED
|
article.status = ArticleStatus.SIMPLIFIED
|
||||||
else:
|
else:
|
||||||
from src.models.article_dto import ArticleStatus
|
from src.models.article_dto import ArticleStatus
|
||||||
|
|
||||||
article.status = ArticleStatus.FAILED
|
article.status = ArticleStatus.FAILED
|
||||||
|
|
||||||
self.logger.info("Сохраняем обновлённую статью", url=result.url)
|
self.logger.info("Сохраняем обновлённую статью", url=result.url)
|
||||||
updated_article = await self.repository.update_article(article)
|
updated_article = await self.repository.update_article(article)
|
||||||
self.logger.info("Статья успешно обновлена", url=result.url)
|
self.logger.info("Статья успешно обновлена", url=result.url)
|
||||||
|
|
||||||
return updated_article
|
return updated_article
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -66,14 +66,18 @@ class FileSource:
|
||||||
self.logger.info("Начинаем проверку URL", raw_url=url)
|
self.logger.info("Начинаем проверку URL", raw_url=url)
|
||||||
|
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
self.logger.info("Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path)
|
self.logger.info(
|
||||||
|
"Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path
|
||||||
|
)
|
||||||
|
|
||||||
if parsed.scheme not in ("http", "https"):
|
if parsed.scheme not in ("http", "https"):
|
||||||
self.logger.info("Отклонено: неподдерживаемая схема", scheme=parsed.scheme, url=url)
|
self.logger.info("Отклонено: неподдерживаемая схема", scheme=parsed.scheme, url=url)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if "ruwiki" not in parsed.netloc:
|
if "ruwiki" not in parsed.netloc:
|
||||||
self.logger.info("Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url)
|
self.logger.info(
|
||||||
|
"Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
path_parts = parsed.path.split("/")
|
path_parts = parsed.path.split("/")
|
||||||
|
@ -87,7 +91,10 @@ class FileSource:
|
||||||
|
|
||||||
if path_parts[WIKI_PATH_INDEX] != "wiki":
|
if path_parts[WIKI_PATH_INDEX] != "wiki":
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"Отклонено: неверный сегмент пути", expected="wiki", actual=path_parts[WIKI_PATH_INDEX], url=url
|
"Отклонено: неверный сегмент пути",
|
||||||
|
expected="wiki",
|
||||||
|
actual=path_parts[WIKI_PATH_INDEX],
|
||||||
|
url=url,
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -107,7 +114,6 @@ class FileSource:
|
||||||
self.logger.info("Ошибка при проверке URL", error=str(e), url=url)
|
self.logger.info("Ошибка при проверке URL", error=str(e), url=url)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def count_urls(self) -> int:
|
async def count_urls(self) -> int:
|
||||||
count = 0
|
count = 0
|
||||||
async for _ in self.read_urls():
|
async for _ in self.read_urls():
|
||||||
|
|
Loading…
Reference in New Issue