10 files reformatted with black codestyle
This commit is contained in:
parent
03caebfb0b
commit
6a33715356
|
@ -35,8 +35,7 @@ class LLMProviderAdapter(BaseAdapter):
|
|||
self.config = config
|
||||
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=config.openai_api_key,
|
||||
http_client=self._build_http_client()
|
||||
api_key=config.openai_api_key, http_client=self._build_http_client()
|
||||
)
|
||||
|
||||
try:
|
||||
|
@ -187,14 +186,9 @@ class LLMProviderAdapter(BaseAdapter):
|
|||
|
||||
def _build_http_client(self) -> httpx.AsyncClient:
|
||||
if self.config.openai_proxy_url:
|
||||
return httpx.AsyncClient(
|
||||
proxy=self.config.openai_proxy_url,
|
||||
timeout=60.0
|
||||
)
|
||||
return httpx.AsyncClient(proxy=self.config.openai_proxy_url, timeout=60.0)
|
||||
return httpx.AsyncClient(timeout=60.0)
|
||||
|
||||
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
try:
|
||||
test_messages = [{"role": "user", "content": "Ответь 'OK' если всё работает."}]
|
||||
|
|
|
@ -254,9 +254,7 @@ def list_articles(
|
|||
for article in articles:
|
||||
title = article.title[:47] + "..." if len(article.title) > 50 else article.title
|
||||
|
||||
click.echo(
|
||||
f"{article.id:<5} {article.status.value:<12} {title:<50}"
|
||||
)
|
||||
click.echo(f"{article.id:<5} {article.status.value:<12} {title:<50}")
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Ошибка: {e}", err=True)
|
||||
|
|
|
@ -114,7 +114,7 @@ class DependencyContainer:
|
|||
checks["database"] = await db_service.health_check()
|
||||
except Exception:
|
||||
checks["database"] = False
|
||||
|
||||
|
||||
try:
|
||||
write_queue = self.get_write_queue()
|
||||
checks["write_queue"] = (
|
||||
|
|
|
@ -19,4 +19,4 @@ class ArticleDTO:
|
|||
created_at: datetime
|
||||
id: Optional[int] = None
|
||||
simplified_text: Optional[str] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
|
|
@ -3,4 +3,4 @@ MAX_TOKEN_LIMIT_WITH_BUFFER = 16000
|
|||
ARTICLE_NAME_INDEX = -1
|
||||
MIN_WIKI_PATH_PARTS = 2
|
||||
WIKI_PATH_INDEX = 1
|
||||
WRITE_QUEUE_BATCH_SIZE = 10
|
||||
WRITE_QUEUE_BATCH_SIZE = 10
|
||||
|
|
|
@ -20,7 +20,8 @@ class DatabaseService:
|
|||
self.logger.info("Создание схемы базы данных", db_path=self.config.db_path)
|
||||
|
||||
async with aiosqlite.connect(self.config.db_path) as conn:
|
||||
await conn.execute("""
|
||||
await conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS articles (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
url TEXT NOT NULL UNIQUE,
|
||||
|
@ -35,16 +36,21 @@ class DatabaseService:
|
|||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
await conn.execute("""
|
||||
"""
|
||||
)
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_url ON articles(url)
|
||||
""")
|
||||
|
||||
await conn.execute("""
|
||||
"""
|
||||
)
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_status ON articles(status)
|
||||
""")
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
await conn.commit()
|
||||
|
||||
await self._configure_sqlite()
|
||||
|
@ -82,5 +88,6 @@ class DatabaseService:
|
|||
except Exception as e:
|
||||
self.logger.error("Database health check failed", error=str(e))
|
||||
import traceback
|
||||
|
||||
self.logger.error("Traceback", traceback=traceback.format_exc())
|
||||
return False
|
||||
|
|
|
@ -71,6 +71,7 @@ class SimplifyService:
|
|||
article = await self._create_or_update_article(command, page_info)
|
||||
|
||||
from src.models.article_dto import ArticleStatus
|
||||
|
||||
article.status = ArticleStatus.PENDING
|
||||
await self.repository.update_article(article)
|
||||
|
||||
|
@ -79,11 +80,13 @@ class SimplifyService:
|
|||
raw_text=page_info.content,
|
||||
)
|
||||
|
||||
self.logger.info("Упрощение завершено",
|
||||
url=command.url,
|
||||
simplified_length=len(simplified_text),
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens)
|
||||
self.logger.info(
|
||||
"Упрощение завершено",
|
||||
url=command.url,
|
||||
simplified_length=len(simplified_text),
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
)
|
||||
|
||||
if not simplified_text.strip():
|
||||
self.logger.error("Получен пустой simplified_text!", url=command.url)
|
||||
|
@ -145,6 +148,7 @@ class SimplifyService:
|
|||
|
||||
if command.force_reprocess:
|
||||
from src.models.article_dto import ArticleStatus
|
||||
|
||||
article.title = page_info.title
|
||||
article.raw_text = page_info.content
|
||||
article.status = ArticleStatus.PENDING
|
||||
|
@ -242,7 +246,9 @@ class SimplifyService:
|
|||
"Объединённый текст превышает лимит, обрезаем",
|
||||
final_tokens=final_tokens,
|
||||
)
|
||||
combined_text = self._truncate_to_token_limit(combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER)
|
||||
combined_text = self._truncate_to_token_limit(
|
||||
combined_text, MAX_TOKEN_LIMIT_WITH_BUFFER
|
||||
)
|
||||
total_output_tokens = self.llm_adapter.count_tokens(combined_text)
|
||||
|
||||
return combined_text, total_input_tokens, total_output_tokens
|
||||
|
|
|
@ -66,7 +66,7 @@ class AsyncWriteQueue:
|
|||
|
||||
async def update_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
||||
self.logger.info("Получен результат для записи", url=result.url, success=result.success)
|
||||
|
||||
|
||||
future: asyncio.Future[ArticleDTO] = asyncio.Future()
|
||||
|
||||
operation = WriteOperation(
|
||||
|
@ -78,7 +78,7 @@ class AsyncWriteQueue:
|
|||
self.logger.info("Добавляем операцию в очередь", url=result.url)
|
||||
await self._queue.put(operation)
|
||||
self.logger.info("Операция добавлена в очередь, ожидаем результат", url=result.url)
|
||||
|
||||
|
||||
result_article = await future
|
||||
self.logger.info("Получен результат из очереди", url=result.url)
|
||||
return result_article
|
||||
|
@ -124,10 +124,12 @@ class AsyncWriteQueue:
|
|||
|
||||
async def _process_operation_safely(self, operation: WriteOperation) -> None:
|
||||
try:
|
||||
self.logger.info("Начинаем обработку операции",
|
||||
operation_type=operation.operation_type,
|
||||
url=operation.result.url if operation.result else "N/A")
|
||||
|
||||
self.logger.info(
|
||||
"Начинаем обработку операции",
|
||||
operation_type=operation.operation_type,
|
||||
url=operation.result.url if operation.result else "N/A",
|
||||
)
|
||||
|
||||
await self._process_single_operation(operation)
|
||||
self._total_operations += 1
|
||||
|
||||
|
@ -135,7 +137,9 @@ class AsyncWriteQueue:
|
|||
if operation.operation_type == "update_from_result" and operation.result:
|
||||
self.logger.info("Получаем статью из репозитория", url=operation.result.url)
|
||||
article = await self.repository.get_by_url(operation.result.url)
|
||||
self.logger.info("Статья получена, устанавливаем результат", url=operation.result.url)
|
||||
self.logger.info(
|
||||
"Статья получена, устанавливаем результат", url=operation.result.url
|
||||
)
|
||||
operation.future.set_result(article)
|
||||
|
||||
except Exception as e:
|
||||
|
@ -159,7 +163,7 @@ class AsyncWriteQueue:
|
|||
|
||||
async def _update_article_from_result(self, result: ProcessingResult) -> ArticleDTO:
|
||||
self.logger.info("Начинаем обновление статьи из результата", url=result.url)
|
||||
|
||||
|
||||
article = await self.repository.get_by_url(result.url)
|
||||
if not article:
|
||||
msg = f"Статья с URL {result.url} не найдена"
|
||||
|
@ -173,16 +177,18 @@ class AsyncWriteQueue:
|
|||
raise ValueError(msg)
|
||||
|
||||
from src.models.article_dto import ArticleStatus
|
||||
|
||||
article.simplified_text = result.simplified_text
|
||||
article.status = ArticleStatus.SIMPLIFIED
|
||||
else:
|
||||
from src.models.article_dto import ArticleStatus
|
||||
|
||||
article.status = ArticleStatus.FAILED
|
||||
|
||||
self.logger.info("Сохраняем обновлённую статью", url=result.url)
|
||||
updated_article = await self.repository.update_article(article)
|
||||
self.logger.info("Статья успешно обновлена", url=result.url)
|
||||
|
||||
|
||||
return updated_article
|
||||
|
||||
@property
|
||||
|
|
|
@ -66,14 +66,18 @@ class FileSource:
|
|||
self.logger.info("Начинаем проверку URL", raw_url=url)
|
||||
|
||||
parsed = urlparse(url)
|
||||
self.logger.info("Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path)
|
||||
self.logger.info(
|
||||
"Разобранный URL", scheme=parsed.scheme, netloc=parsed.netloc, path=parsed.path
|
||||
)
|
||||
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
self.logger.info("Отклонено: неподдерживаемая схема", scheme=parsed.scheme, url=url)
|
||||
return False
|
||||
|
||||
if "ruwiki" not in parsed.netloc:
|
||||
self.logger.info("Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url)
|
||||
self.logger.info(
|
||||
"Отклонено: домен не содержит 'ruwiki'", netloc=parsed.netloc, url=url
|
||||
)
|
||||
return False
|
||||
|
||||
path_parts = parsed.path.split("/")
|
||||
|
@ -87,7 +91,10 @@ class FileSource:
|
|||
|
||||
if path_parts[WIKI_PATH_INDEX] != "wiki":
|
||||
self.logger.info(
|
||||
"Отклонено: неверный сегмент пути", expected="wiki", actual=path_parts[WIKI_PATH_INDEX], url=url
|
||||
"Отклонено: неверный сегмент пути",
|
||||
expected="wiki",
|
||||
actual=path_parts[WIKI_PATH_INDEX],
|
||||
url=url,
|
||||
)
|
||||
return False
|
||||
|
||||
|
@ -107,7 +114,6 @@ class FileSource:
|
|||
self.logger.info("Ошибка при проверке URL", error=str(e), url=url)
|
||||
return False
|
||||
|
||||
|
||||
async def count_urls(self) -> int:
|
||||
count = 0
|
||||
async for _ in self.read_urls():
|
||||
|
|
Loading…
Reference in New Issue