ruwiki-test/tests/test_services.py

371 lines
12 KiB
Python

"""Тесты для сервисов."""
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
import pytest
from src.adapters import LLMProviderAdapter, RuWikiAdapter
from src.adapters.ruwiki import WikiPageInfo
from src.models import ProcessingResult, SimplifyCommand
from src.services import (
AsyncWriteQueue,
DatabaseService,
RecursiveCharacterTextSplitter,
SimplifyService,
)
class TestRecursiveCharacterTextSplitter:
def test_split_short_text(self):
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
short_text = "Это короткий текст."
chunks = splitter.split_text(short_text)
assert len(chunks) == 1
assert chunks[0] == short_text
def test_split_long_text(self):
splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=10)
long_text = "Это очень длинный текст. " * 10
chunks = splitter.split_text(long_text)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) <= 60
def test_split_by_paragraphs(self):
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
text = "Первый абзац.\n\nВторой абзац.\n\nТретий абзац."
chunks = splitter.split_text(text)
assert len(chunks) >= 2
def test_split_empty_text(self):
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = splitter.split_text("")
assert chunks == []
def test_custom_length_function(self):
def word_count(text: str) -> int:
return len(text.split())
splitter = RecursiveCharacterTextSplitter(
chunk_size=5,
chunk_overlap=2,
length_function=word_count,
)
text = "Один два три четыре пять шесть семь восемь девять десять"
chunks = splitter.split_text(text)
assert len(chunks) > 1
for chunk in chunks:
word_count_in_chunk = len(chunk.split())
assert word_count_in_chunk <= 7
def test_create_chunks_with_metadata(self):
splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=10)
text = "Это тестовый текст. " * 10
title = "Тестовая статья"
chunks_with_metadata = splitter.create_chunks_with_metadata(text, title)
assert len(chunks_with_metadata) > 1
for i, chunk_data in enumerate(chunks_with_metadata):
assert "text" in chunk_data
assert chunk_data["title"] == title
assert chunk_data["chunk_index"] == i
assert chunk_data["total_chunks"] == len(chunks_with_metadata)
assert "chunk_size" in chunk_data
class TestDatabaseService:
@pytest.mark.asyncio
async def test_initialize_database(self, test_config):
db_service = DatabaseService(test_config)
await db_service.initialize_database()
assert Path(test_config.db_path).exists()
assert await db_service.health_check() is True
db_service.close()
@pytest.mark.asyncio
async def test_get_connection(self, test_config):
db_service = DatabaseService(test_config)
await db_service.initialize_database()
async with db_service.get_connection() as conn:
cursor = await conn.execute("SELECT 1")
result = await cursor.fetchone()
assert result[0] == 1
db_service.close()
class TestAsyncWriteQueue:
@pytest.mark.asyncio
async def test_start_stop(self):
mock_repository = AsyncMock()
queue = AsyncWriteQueue(mock_repository, max_batch_size=5)
await queue.start()
assert queue._worker_task is not None
await queue.stop(timeout=1.0)
assert queue._worker_task.done()
@pytest.mark.asyncio
async def test_update_from_result_success(self, sample_article, simplified_text):
mock_repository = AsyncMock()
mock_repository.get_by_url.return_value = sample_article
mock_repository.update_article.return_value = sample_article
queue = AsyncWriteQueue(mock_repository, max_batch_size=1)
await queue.start()
try:
result = ProcessingResult.success_result(
url=sample_article.url,
title=sample_article.title,
raw_text=sample_article.raw_text,
simplified_text=simplified_text,
token_count_raw=100,
token_count_simplified=50,
processing_time_seconds=2.0,
)
updated_article = await queue.update_from_result(result)
assert updated_article.simplified_text == simplified_text
mock_repository.get_by_url.assert_called_once_with(sample_article.url)
mock_repository.update_article.assert_called_once()
finally:
await queue.stop(timeout=1.0)
@pytest.mark.asyncio
async def test_update_from_result_failure(self, sample_article):
mock_repository = AsyncMock()
mock_repository.get_by_url.return_value = sample_article
mock_repository.update_article.return_value = sample_article
queue = AsyncWriteQueue(mock_repository, max_batch_size=1)
await queue.start()
try:
result = ProcessingResult.failure_result(
url=sample_article.url,
error_message="Тестовая ошибка",
)
updated_article = await queue.update_from_result(result)
assert updated_article.error_message == "Тестовая ошибка"
mock_repository.update_article.assert_called_once()
finally:
await queue.stop(timeout=1.0)
def test_stats(self):
mock_repository = AsyncMock()
queue = AsyncWriteQueue(mock_repository)
stats = queue.stats
assert "total_operations" in stats
assert "failed_operations" in stats
assert "queue_size" in stats
assert stats["total_operations"] == 0
class TestSimplifyService:
@pytest.fixture
def mock_adapters_and_queue(self, test_config):
mock_ruwiki = AsyncMock(spec=RuWikiAdapter)
mock_llm = AsyncMock(spec=LLMProviderAdapter)
mock_repository = AsyncMock()
mock_write_queue = AsyncMock()
return mock_ruwiki, mock_llm, mock_repository, mock_write_queue
def test_service_initialization(self, test_config, mock_adapters_and_queue):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
assert service.config == test_config
assert service.ruwiki_adapter == mock_ruwiki
assert service.llm_adapter == mock_llm
assert isinstance(service.text_splitter, RecursiveCharacterTextSplitter)
@pytest.mark.asyncio
async def test_get_prompt_template(self, test_config, mock_adapters_and_queue):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("### role: system\nТы помощник")
temp_prompt_path = f.name
test_config.prompt_template_path = temp_prompt_path
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
try:
template = await service.get_prompt_template()
assert "### role: system" in template
assert "Ты помощник" in template
template2 = await service.get_prompt_template()
assert template == template2
finally:
Path(temp_prompt_path).unlink(missing_ok=True)
@pytest.mark.asyncio
async def test_get_prompt_template_not_found(self, test_config, mock_adapters_and_queue):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
test_config.prompt_template_path = "nonexistent.txt"
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
with pytest.raises(FileNotFoundError):
await service.get_prompt_template()
@pytest.mark.asyncio
async def test_process_command_success(
self, test_config, mock_adapters_and_queue, sample_wikitext, simplified_text
):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
wiki_page_info = WikiPageInfo(
title="Тест",
content=sample_wikitext,
)
mock_ruwiki.fetch_page_cleaned.return_value = wiki_page_info
mock_llm.simplify_text.return_value = (simplified_text, 100, 50)
mock_llm.count_tokens.return_value = 100
mock_repository.get_by_url.return_value = None
mock_repository.create_article.return_value = MagicMock(id=1)
mock_repository.update_article.return_value = MagicMock()
mock_write_queue.update_from_result.return_value = MagicMock()
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("### role: user\n{wiki_source_text}")
test_config.prompt_template_path = f.name
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
try:
command = SimplifyCommand(url="https://ru.wikipedia.org/wiki/Тест")
result = await service.process_command(command)
assert result.success is True
assert result.title == "Тест"
assert result.simplified_text == simplified_text
assert result.token_count_raw == 100
assert result.token_count_simplified == 50
mock_ruwiki.fetch_page_cleaned.assert_called_once()
mock_llm.simplify_text.assert_called_once()
mock_write_queue.update_from_result.assert_called_once()
finally:
Path(test_config.prompt_template_path).unlink(missing_ok=True)
@pytest.mark.asyncio
async def test_process_command_skip_existing(
self, test_config, mock_adapters_and_queue, completed_article
):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
mock_repository.get_by_url.return_value = completed_article
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
command = SimplifyCommand(url=completed_article.url, force_reprocess=False)
result = await service.process_command(command)
assert result.success is True
assert result.title == completed_article.title
mock_ruwiki.fetch_page_cleaned.assert_not_called()
mock_llm.simplify_text.assert_not_called()
@pytest.mark.asyncio
async def test_health_check(self, test_config, mock_adapters_and_queue):
mock_ruwiki, mock_llm, mock_repository, mock_write_queue = mock_adapters_and_queue
mock_ruwiki.health_check.return_value = True
mock_llm.health_check.return_value = True
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("test prompt")
test_config.prompt_template_path = f.name
service = SimplifyService(
config=test_config,
ruwiki_adapter=mock_ruwiki,
llm_adapter=mock_llm,
repository=mock_repository,
write_queue=mock_write_queue,
)
try:
checks = await service.health_check()
assert checks["ruwiki"] is True
assert checks["llm"] is True
assert checks["prompt_template"] is True
finally:
Path(test_config.prompt_template_path).unlink(missing_ok=True)