Compare commits
28 Commits
master
...
feature/ne
| Author | SHA1 | Date |
|---|---|---|
|
|
99ed4130cd | |
|
|
d9cc0c85d1 | |
|
|
2e7aace21f | |
|
|
0ea7add4c3 | |
|
|
74243fd258 | |
|
|
bde0bb0e6f | |
|
|
c907e1d4da | |
|
|
086d765c92 | |
|
|
cfcb405644 | |
|
|
0e888ec910 | |
|
|
e0829d66f8 | |
|
|
6364c97f21 | |
|
|
309e62c410 | |
|
|
b02b4e84fe | |
|
|
d319a1c5e0 | |
|
|
87107d8c31 | |
|
|
bd4490f30c | |
|
|
8bc14e488a | |
|
|
8facab266d | |
|
|
7152f4b61e | |
|
|
7a76dc1d84 | |
|
|
6d52bcbbe7 | |
|
|
71c03e71ef | |
|
|
1789270d17 | |
|
|
fbcdbac6a0 | |
|
|
ad12343784 | |
|
|
18cbbe00d3 | |
|
|
33d8f5ab8b |
|
|
@ -0,0 +1,30 @@
|
||||||
|
[run]
|
||||||
|
source = src/dataloader
|
||||||
|
omit =
|
||||||
|
# Логирование - шаблонный код, не требует тестирования
|
||||||
|
src/dataloader/logger/*
|
||||||
|
# Точка входа - не требует тестирования
|
||||||
|
src/dataloader/__main__.py
|
||||||
|
# Базовые классы без логики
|
||||||
|
src/dataloader/base.py
|
||||||
|
# Middleware - сложно тестировать, покрыт интеграционными тестами
|
||||||
|
src/dataloader/api/middleware.py
|
||||||
|
# Тестовые файлы
|
||||||
|
*/tests/*
|
||||||
|
|
||||||
|
[report]
|
||||||
|
exclude_lines =
|
||||||
|
pragma: no cover
|
||||||
|
def __repr__
|
||||||
|
raise AssertionError
|
||||||
|
raise NotImplementedError
|
||||||
|
if __name__ == .__main__.:
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
@abstractmethod
|
||||||
|
\.\.\.
|
||||||
|
|
||||||
|
precision = 2
|
||||||
|
show_missing = True
|
||||||
|
|
||||||
|
[html]
|
||||||
|
directory = htmlcov
|
||||||
|
|
@ -58,3 +58,6 @@ Thumbs.db
|
||||||
|
|
||||||
# Документация
|
# Документация
|
||||||
docs/_build/
|
docs/_build/
|
||||||
|
|
||||||
|
.claude
|
||||||
|
nul
|
||||||
|
|
@ -0,0 +1,314 @@
|
||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Role and Working Principles
|
||||||
|
|
||||||
|
You act as a senior Python / ML / AI / DL developer and system architect.
|
||||||
|
You work in an enterprise-level project (multi-tier backend) with initial base architecture similar to the REST template from `rest_template.md`.
|
||||||
|
You design code strictly according to patterns (Singleton, Repository, Interface, DTO, CRUD, Service, Context, Adapter, etc.).
|
||||||
|
You write clean production-level Python code (FastAPI, SQLAlchemy 2.x, asyncio, Pydantic v2, PostgreSQL, aiohttp, structlog/loguru).
|
||||||
|
|
||||||
|
**Working Rules:**
|
||||||
|
- Do not add comments unless explicitly requested.
|
||||||
|
- Always add docstrings to functions, classes, and modules.
|
||||||
|
- Always follow PEP 8 style and architectural layer isolation (api / service / repositories / models / schemas / interfaces / logger / config / context).
|
||||||
|
- Prefer typing via `from __future__ import annotations`.
|
||||||
|
- All dependencies are passed through `AppContext` (DI Singleton pattern).
|
||||||
|
- Implement logging through the logger with context (`logger.info("msg")` without structures).
|
||||||
|
- When creating projects from scratch, rely on the structure from `rest_template.md`.
|
||||||
|
- Respond strictly to the point, no fluff, like a senior developer during code review.
|
||||||
|
- All logic in examples is correct, asynchronous, and production-ready.
|
||||||
|
- Use only modern library versions.
|
||||||
|
|
||||||
|
Your style is minimalistic, precise, clean, and architecturally sound.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
**Dataloader** is an asynchronous FastAPI service for managing and executing long-running ETL tasks via a PostgreSQL-based job queue. The service uses PostgreSQL's `LISTEN/NOTIFY` for efficient worker wakeup, advisory locks for concurrency control, and `SELECT ... FOR UPDATE SKIP LOCKED` for job claiming.
|
||||||
|
|
||||||
|
This is a Clean Architecture implementation following the project template `rest_template.md`, built with Python 3.11+, FastAPI, SQLAlchemy 2.0 (async), and asyncpg.
|
||||||
|
|
||||||
|
## Development Commands
|
||||||
|
|
||||||
|
### Running the Application
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies with Poetry
|
||||||
|
poetry install
|
||||||
|
|
||||||
|
# Run the application
|
||||||
|
poetry run dataloader
|
||||||
|
# or
|
||||||
|
uvicorn dataloader.__main__:main
|
||||||
|
|
||||||
|
# The app will start on port 8081 by default (configurable via APP_PORT)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all tests
|
||||||
|
poetry run pytest
|
||||||
|
|
||||||
|
# Run specific test file
|
||||||
|
poetry run pytest tests/integration_tests/v1_api/test_service.py
|
||||||
|
|
||||||
|
# Run with verbose output
|
||||||
|
poetry run pytest -v
|
||||||
|
|
||||||
|
# Run integration tests only
|
||||||
|
poetry run pytest tests/integration_tests/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database
|
||||||
|
|
||||||
|
The database schema is already applied (see `DDL.sql`). The queue uses:
|
||||||
|
- Table `dl_jobs` - main job queue with statuses: queued, running, succeeded, failed, canceled, lost
|
||||||
|
- Table `dl_job_events` - audit log of job lifecycle events
|
||||||
|
- PostgreSQL triggers for `LISTEN/NOTIFY` on job insertion/updates
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### High-Level Structure
|
||||||
|
|
||||||
|
The codebase follows Clean Architecture with clear separation of concerns:
|
||||||
|
|
||||||
|
1. **API Layer** (`src/dataloader/api/`)
|
||||||
|
- `v1/router.py` - HTTP endpoints for job management
|
||||||
|
- `v1/service.py` - Business logic layer
|
||||||
|
- `v1/schemas.py` - Pydantic request/response models
|
||||||
|
- `os_router.py` - Infrastructure endpoints (`/health`, `/status`) **DO NOT MODIFY**
|
||||||
|
- `metric_router.py` - Metrics endpoints (BETA) **DO NOT MODIFY**
|
||||||
|
- `middleware.py` - Request/response logging middleware **DO NOT MODIFY**
|
||||||
|
|
||||||
|
2. **Storage Layer** (`src/dataloader/storage/`)
|
||||||
|
- `repositories.py` - PostgreSQL queue operations using SQLAlchemy ORM
|
||||||
|
- `db.py` - Database engine and session management
|
||||||
|
- `notify_listener.py` - PostgreSQL LISTEN/NOTIFY implementation
|
||||||
|
|
||||||
|
3. **Worker Layer** (`src/dataloader/workers/`)
|
||||||
|
- `manager.py` - Manages lifecycle of async worker tasks
|
||||||
|
- `base.py` - Core worker implementation with claim/heartbeat/execute cycle
|
||||||
|
- `reaper.py` - Background task to requeue lost jobs (expired leases)
|
||||||
|
- `pipelines/registry.py` - Pipeline registration and resolution system
|
||||||
|
- `pipelines/` - Individual pipeline implementations
|
||||||
|
|
||||||
|
4. **Logger** (`src/dataloader/logger/`)
|
||||||
|
- Structured logging with automatic sensitive data masking
|
||||||
|
- **DO NOT MODIFY** these files - they're from the template
|
||||||
|
|
||||||
|
5. **Core** (`src/dataloader/`)
|
||||||
|
- `__main__.py` - Application entry point
|
||||||
|
- `config.py` - Pydantic Settings for all configuration
|
||||||
|
- `context.py` - AppContext singleton for dependency injection
|
||||||
|
- `base.py` - Base classes and types
|
||||||
|
- `exceptions.py` - Global exception definitions
|
||||||
|
|
||||||
|
### Key Architectural Patterns
|
||||||
|
|
||||||
|
#### Job Queue Protocol
|
||||||
|
|
||||||
|
Jobs flow through the system via a strict state machine:
|
||||||
|
|
||||||
|
1. **Enqueue** (`trigger` API) - Creates job in `queued` status
|
||||||
|
- Idempotent via `idempotency_key`
|
||||||
|
- PostgreSQL trigger fires `LISTEN/NOTIFY` to wake workers
|
||||||
|
|
||||||
|
2. **Claim** (worker) - Worker acquires job atomically
|
||||||
|
- Uses `FOR UPDATE SKIP LOCKED` to prevent contention
|
||||||
|
- Sets status to `running`, increments attempt counter
|
||||||
|
- Attempts PostgreSQL advisory lock on `lock_key`
|
||||||
|
- If lock fails → job goes back to `queued` with backoff delay
|
||||||
|
|
||||||
|
3. **Execute** (worker) - Runs the pipeline with heartbeat
|
||||||
|
- Heartbeat updates every `DL_HEARTBEAT_SEC` seconds
|
||||||
|
- Extends `lease_expires_at` to prevent reaper from reclaiming
|
||||||
|
- Checks `cancel_requested` flag between pipeline chunks
|
||||||
|
- Pipeline yields between chunks to allow cooperative cancellation
|
||||||
|
|
||||||
|
4. **Complete** (worker) - Finalize job status
|
||||||
|
- **Success**: `status = succeeded`, release advisory lock
|
||||||
|
- **Failure**:
|
||||||
|
- If `attempt < max_attempts` → `status = queued` (retry with exponential backoff: 30 * attempt seconds)
|
||||||
|
- If `attempt >= max_attempts` → `status = failed`
|
||||||
|
- **Cancel**: `status = canceled`
|
||||||
|
- Always releases advisory lock
|
||||||
|
|
||||||
|
5. **Reaper** (background) - Recovers lost jobs
|
||||||
|
- Runs every `DL_REAPER_PERIOD_SEC`
|
||||||
|
- Finds jobs where `status = running` AND `lease_expires_at < now()`
|
||||||
|
- Resets them to `queued` for retry
|
||||||
|
|
||||||
|
#### Concurrency Control
|
||||||
|
|
||||||
|
The system uses multiple layers of concurrency control:
|
||||||
|
|
||||||
|
- **`lock_key`**: PostgreSQL advisory lock ensures only one worker processes jobs with the same lock_key
|
||||||
|
- **`partition_key`**: Logical grouping for job ordering (currently informational)
|
||||||
|
- **`FOR UPDATE SKIP LOCKED`**: Prevents multiple workers from claiming the same job
|
||||||
|
- **Async workers**: Multiple workers can run concurrently within a single process
|
||||||
|
|
||||||
|
#### Worker Configuration
|
||||||
|
|
||||||
|
Workers are configured via `WORKERS_JSON` environment variable:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{"queue": "load.cbr", "concurrency": 2},
|
||||||
|
{"queue": "load.sgx", "concurrency": 1}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
This spawns M async tasks (sum of all concurrency values) within the FastAPI process.
|
||||||
|
|
||||||
|
#### Pipeline System
|
||||||
|
|
||||||
|
Pipelines are registered via decorator in `workers/pipelines/`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from dataloader.workers.pipelines.registry import register
|
||||||
|
|
||||||
|
@register("my.task")
|
||||||
|
async def my_pipeline(args: dict):
|
||||||
|
# Process chunk 1
|
||||||
|
yield # Allow heartbeat & cancellation check
|
||||||
|
# Process chunk 2
|
||||||
|
yield
|
||||||
|
# Process chunk 3
|
||||||
|
```
|
||||||
|
|
||||||
|
The `yield` statements enable:
|
||||||
|
- Heartbeat updates during long operations
|
||||||
|
- Cooperative cancellation via `cancel_requested` checks
|
||||||
|
- Progress tracking
|
||||||
|
|
||||||
|
All pipelines must be imported in `workers/pipelines/__init__.py` `load_all()` function.
|
||||||
|
|
||||||
|
### Application Lifecycle
|
||||||
|
|
||||||
|
1. **Startup** (`lifespan` in `api/__init__.py`)
|
||||||
|
- Initialize logging
|
||||||
|
- Create database engine and sessionmaker
|
||||||
|
- Load all pipelines from registry
|
||||||
|
- Build WorkerManager from `WORKERS_JSON`
|
||||||
|
- Start all worker tasks and reaper
|
||||||
|
|
||||||
|
2. **Runtime**
|
||||||
|
- FastAPI serves HTTP requests
|
||||||
|
- Workers poll queue via LISTEN/NOTIFY
|
||||||
|
- Reaper runs in background
|
||||||
|
|
||||||
|
3. **Shutdown** (on SIGTERM)
|
||||||
|
- Signal all workers to stop via `asyncio.Event`
|
||||||
|
- Cancel worker tasks and wait for completion
|
||||||
|
- Cancel reaper task
|
||||||
|
- Dispose database engine
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
All configuration is via environment variables (`.env` file or system environment):
|
||||||
|
|
||||||
|
### Application Settings
|
||||||
|
- `APP_HOST` - Server bind address (default: `0.0.0.0`)
|
||||||
|
- `APP_PORT` - Server port (default: `8081`)
|
||||||
|
- `DEBUG` - Debug mode (default: `False`)
|
||||||
|
- `LOCAL` - Local development flag (default: `False`)
|
||||||
|
|
||||||
|
### Database Settings
|
||||||
|
- `PG_HOST`, `PG_PORT`, `PG_USER`, `PG_PASSWORD`, `PG_DATABASE`, `PG_SCHEMA` - PostgreSQL connection
|
||||||
|
- `PG_POOL_SIZE`, `PG_MAX_OVERFLOW`, `PG_POOL_RECYCLE` - Connection pool configuration
|
||||||
|
- `DL_DB_DSN` - Optional override for queue database DSN (if different from main DB)
|
||||||
|
|
||||||
|
### Worker Settings
|
||||||
|
- `WORKERS_JSON` - JSON array of worker configurations (required)
|
||||||
|
- `DL_HEARTBEAT_SEC` - Heartbeat interval (default: `10`)
|
||||||
|
- `DL_DEFAULT_LEASE_TTL_SEC` - Default lease duration (default: `60`)
|
||||||
|
- `DL_REAPER_PERIOD_SEC` - Reaper run interval (default: `10`)
|
||||||
|
- `DL_CLAIM_BACKOFF_SEC` - Backoff when advisory lock fails (default: `15`)
|
||||||
|
|
||||||
|
### Logging Settings
|
||||||
|
- `LOG_PATH`, `LOG_FILE_NAME` - Application log location
|
||||||
|
- `METRIC_PATH`, `METRIC_FILE_NAME` - Metrics log location
|
||||||
|
- `AUDIT_LOG_PATH`, `AUDIT_LOG_FILE_NAME` - Audit events log location
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Business API (v1)
|
||||||
|
|
||||||
|
- `POST /api/v1/jobs/trigger` - Create or get existing job (idempotent)
|
||||||
|
- Body: `{queue, task, args?, idempotency_key?, lock_key, partition_key?, priority?, available_at?}`
|
||||||
|
- Response: `{job_id, status}`
|
||||||
|
|
||||||
|
- `GET /api/v1/jobs/{job_id}/status` - Get job status
|
||||||
|
- Response: `{job_id, status, attempt, started_at?, finished_at?, heartbeat_at?, error?, progress}`
|
||||||
|
|
||||||
|
- `POST /api/v1/jobs/{job_id}/cancel` - Request job cancellation (cooperative)
|
||||||
|
- Response: Same as status endpoint
|
||||||
|
|
||||||
|
### Infrastructure API
|
||||||
|
|
||||||
|
- `GET /health` - Health check (no database access, <20ms)
|
||||||
|
- `GET /status` - Service status with version/uptime
|
||||||
|
|
||||||
|
## Development Guidelines
|
||||||
|
|
||||||
|
### Adding a New Pipeline
|
||||||
|
|
||||||
|
1. Create pipeline file in `src/dataloader/workers/pipelines/`:
|
||||||
|
```python
|
||||||
|
from dataloader.workers.pipelines.registry import register
|
||||||
|
|
||||||
|
@register("myqueue.mytask")
|
||||||
|
async def my_task_pipeline(args: dict):
|
||||||
|
# Your implementation
|
||||||
|
# Use yield between chunks for heartbeat
|
||||||
|
yield
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Import in `src/dataloader/workers/pipelines/__init__.py`:
|
||||||
|
```python
|
||||||
|
def load_all() -> None:
|
||||||
|
from . import noop
|
||||||
|
from . import my_task # Add this line
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Add queue to `.env`:
|
||||||
|
```
|
||||||
|
WORKERS_JSON=[{"queue":"myqueue","concurrency":1}]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Idempotent Operations
|
||||||
|
|
||||||
|
All pipelines should be idempotent since jobs may be retried:
|
||||||
|
- Use `idempotency_key` for external API calls
|
||||||
|
- Use `UPSERT` or `INSERT ... ON CONFLICT` for database writes
|
||||||
|
- Design pipelines to be safely re-runnable from any point
|
||||||
|
|
||||||
|
### Security & Data Masking
|
||||||
|
|
||||||
|
The logger automatically masks sensitive fields (defined in `logger/utils.py`):
|
||||||
|
- Keywords: `password`, `token`, `secret`, `key`, `authorization`, etc.
|
||||||
|
- Never log credentials directly
|
||||||
|
- Use structured logging: `logger.info("message", extra={...})`
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
- Pipelines should raise exceptions for transient errors (will trigger retry)
|
||||||
|
- Use `max_attempts` in job creation to control retry limits
|
||||||
|
- Permanent failures should be logged but not raise (mark job as succeeded but log error in events)
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
Integration tests should:
|
||||||
|
- Use test fixtures from `tests/conftest.py`
|
||||||
|
- Test full job lifecycle: trigger → claim → execute → complete
|
||||||
|
- Test failure scenarios: cancellation, retries, lock contention
|
||||||
|
- Mock external dependencies, use real database for queue operations
|
||||||
|
|
||||||
|
## Important Files to Reference
|
||||||
|
|
||||||
|
- `TZ.md` - Full technical specification (Russian)
|
||||||
|
- `TODO.md` - Implementation progress and next steps
|
||||||
|
- `rest_template.md` - Project structure template
|
||||||
|
- `DDL.sql` - Database schema
|
||||||
|
|
@ -0,0 +1,951 @@
|
||||||
|
про бд для OPU - нужно полученные строки грузить также по курсору стримингом в бд, но перед этим надо truncate таблицу делать, DDL вот -
|
||||||
|
CREATE TABLE brief_digital_certificate_opu (
|
||||||
|
object_id text DEFAULT '-'::text NOT NULL, -- ИД клиента (REPORT_ID)
|
||||||
|
object_nm text NULL, -- Наименование клиента
|
||||||
|
desk_nm text DEFAULT '-'::text NOT NULL, -- Наименование деска
|
||||||
|
actdate date DEFAULT CURRENT_DATE NOT NULL, -- Дата расчета
|
||||||
|
layer_cd text DEFAULT '-'::text NOT NULL, -- Код слоя
|
||||||
|
layer_nm text NULL, -- Наименование слоя
|
||||||
|
opu_cd text NOT NULL, -- Код статьи опу
|
||||||
|
opu_nm_sh text NULL, -- Краткое наименование кода опу
|
||||||
|
opu_nm text NULL, -- Наименование кода опу
|
||||||
|
opu_lvl int4 DEFAULT '-1'::integer NOT NULL, -- Уровень
|
||||||
|
opu_prnt_cd text DEFAULT '-'::text NOT NULL, -- Код родителя
|
||||||
|
opu_prnt_nm_sh text NULL, -- Краткое наименование родителя
|
||||||
|
opu_prnt_nm text NULL, -- Наименование родителя
|
||||||
|
sum_amountrub_p_usd numeric NULL, -- Сумма
|
||||||
|
wf_load_id int8 DEFAULT '-1'::integer NOT NULL, -- Идентификатор потока загрузки
|
||||||
|
wf_load_dttm timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, -- Дата загрузки
|
||||||
|
wf_row_id int8 DEFAULT '-1'::integer NOT NULL, -- Номер строки
|
||||||
|
object_tp text NULL,
|
||||||
|
object_unit text DEFAULT '-'::text NOT NULL,
|
||||||
|
measure text NULL,
|
||||||
|
product_nm text NULL,
|
||||||
|
product_prnt_nm text NULL,
|
||||||
|
sum_amountrub_p_usd_s numeric NULL,
|
||||||
|
CONSTRAINT brief_digital_certificate_opu_pk PRIMARY KEY (object_id, desk_nm, actdate, layer_cd, opu_cd, opu_lvl, opu_prnt_cd, object_unit)
|
||||||
|
);
|
||||||
|
COMMENT ON TABLE brief_digital_certificate_opu IS 'Показатели ОПУ';
|
||||||
|
|
||||||
|
|
||||||
|
Это тестовый скрипт для взаимодействия с OPU
|
||||||
|
Но тут надо сделать модель репозиторий, интерфейс и т.п, в скрипте показано только принцип получения данных.
|
||||||
|
|
||||||
|
# test_export.py
|
||||||
|
"""
|
||||||
|
Скрипт для тестирования экспорта:
|
||||||
|
- запуск задачи
|
||||||
|
- мониторинг статуса
|
||||||
|
- скачивание (полное и по частям)
|
||||||
|
- распаковка
|
||||||
|
- подсчёт строк
|
||||||
|
- замер размеров
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
import zstandard as zstd
|
||||||
|
from loguru import logger
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# ========= НАСТРОЙКИ =========
|
||||||
|
BASE_URL = "https://ci02533826-tib-brief.apps.ift-terra000024-edm.ocp.delta.sbrf.ru "
|
||||||
|
ENDPOINT_START = "/export/opu/start"
|
||||||
|
ENDPOINT_STATUS = "/export/{job_id}/status"
|
||||||
|
ENDPOINT_DOWNLOAD = "/export/{job_id}/download"
|
||||||
|
|
||||||
|
POLL_INTERVAL = 2
|
||||||
|
TIMEOUT = 3600
|
||||||
|
CHUNK_SIZE = 8192
|
||||||
|
|
||||||
|
OUTPUT_DIR = Path("./export_test")
|
||||||
|
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||||
|
# ============================
|
||||||
|
|
||||||
|
|
||||||
|
def sizeof_fmt(num: int) -> str:
|
||||||
|
"""Форматирует размер файла в человекочитаемый вид."""
|
||||||
|
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||||
|
if num < 1024.0:
|
||||||
|
return f"{num:.1f} {unit}"
|
||||||
|
num /= 1024.0
|
||||||
|
return f"{num:.1f} TB"
|
||||||
|
|
||||||
|
|
||||||
|
async def download_full(client: httpx.AsyncClient, url: str, filepath: Path) -> bool:
|
||||||
|
"""Скачивает файл полностью."""
|
||||||
|
logger.info(f"⬇️ Полная загрузка: {filepath.name}")
|
||||||
|
try:
|
||||||
|
response = await client.get(url, follow_redirects=True)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"❌ Ошибка полной загрузки: {response.status_code} {response.text}")
|
||||||
|
return False
|
||||||
|
with open(filepath, "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
logger.success(f" Полная загрузка завершена: {sizeof_fmt(filepath.stat().st_size)}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Ошибка при полной загрузке: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def download_range(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
url: str,
|
||||||
|
filepath: Path,
|
||||||
|
start: int,
|
||||||
|
end: int | None = None
|
||||||
|
) -> bool:
|
||||||
|
"""Скачивает диапазон байтов."""
|
||||||
|
headers = {"Range": f"bytes={start}-{end if end else ''}"}
|
||||||
|
logger.info(f"⬇️ Загрузка диапазона {start}-{end if end else 'end'} -> {filepath.name}")
|
||||||
|
try:
|
||||||
|
response = await client.get(url, headers=headers, follow_redirects=True)
|
||||||
|
if response.status_code != 206:
|
||||||
|
logger.error(f"❌ Ожидался 206 Partial Content, получен: {response.status_code}")
|
||||||
|
return False
|
||||||
|
with open(filepath, "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
logger.success(f" Диапазон сохранён: {sizeof_fmt(filepath.stat().st_size)} байт")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Ошибка при загрузке диапазона: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
logger.info("🚀 Запуск теста экспорта данных")
|
||||||
|
cert_path = r"C:\Users\23193453\Documents\code\cert\client_cert.pem"
|
||||||
|
key_path = r"C:\Users\23193453\Documents\code\cert\client_cert.key"
|
||||||
|
ca_path = r"C:\Users\23193453\Documents\code\cert\client_ca.pem"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
cert=(cert_path, key_path),
|
||||||
|
verify=ca_path,
|
||||||
|
timeout=TIMEOUT
|
||||||
|
) as client:
|
||||||
|
try:
|
||||||
|
# --- Шаг 1: Запуск задачи ---
|
||||||
|
logger.info("📨 Отправка запроса на запуск экспорта OPU...")
|
||||||
|
response = await client.post(BASE_URL + ENDPOINT_START)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"❌ Ошибка запуска задачи: {response.status_code} {response.text}")
|
||||||
|
return
|
||||||
|
job_id = response.json()["job_id"]
|
||||||
|
logger.info(f" Задача запущена: job_id={job_id}")
|
||||||
|
|
||||||
|
# --- Шаг 2: Мониторинг статуса ---
|
||||||
|
status_url = ENDPOINT_STATUS.format(job_id=job_id)
|
||||||
|
logger.info("⏳ Ожидание завершения задачи...")
|
||||||
|
start_wait = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
response = await client.get(BASE_URL + status_url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.warning(f"⚠️ Ошибка при получении статуса: {response.status_code}")
|
||||||
|
await asyncio.sleep(POLL_INTERVAL)
|
||||||
|
continue
|
||||||
|
|
||||||
|
status_data = response.json()
|
||||||
|
status = status_data["status"]
|
||||||
|
total_rows = status_data["total_rows"]
|
||||||
|
|
||||||
|
elapsed = asyncio.get_event_loop().time() - start_wait
|
||||||
|
logger.debug(f"📊 Статус: {status}, строк обработано: {total_rows}, прошло: {elapsed:.1f} с")
|
||||||
|
|
||||||
|
if status == "completed":
|
||||||
|
logger.info(f"🎉 Задача завершена! Обработано строк: {total_rows}")
|
||||||
|
break
|
||||||
|
elif status == "failed":
|
||||||
|
logger.error(f"💥 Задача завершилась с ошибкой: {status_data['error']}")
|
||||||
|
return
|
||||||
|
elif status in ("pending", "running"):
|
||||||
|
await asyncio.sleep(POLL_INTERVAL)
|
||||||
|
continue
|
||||||
|
|
||||||
|
download_url = BASE_URL + ENDPOINT_DOWNLOAD.format(job_id=job_id)
|
||||||
|
|
||||||
|
# --- Тест 1: Полная загрузка ---
|
||||||
|
full_path = OUTPUT_DIR / f"full_export_{job_id}.jsonl.zst"
|
||||||
|
if not await download_full(client, download_url, full_path):
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- Тест 2: Первые 1024 байта ---
|
||||||
|
range_path = OUTPUT_DIR / f"range_head_{job_id}.bin"
|
||||||
|
if not await download_range(client, download_url, range_path, start=0, end=1023):
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- Тест 3: Возобновление с 1024 байта ---
|
||||||
|
resume_path = OUTPUT_DIR / f"range_resume_{job_id}.bin"
|
||||||
|
if not await download_range(client, download_url, resume_path, start=1024):
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- Анализ полного архива ---
|
||||||
|
archive_size = full_path.stat().st_size
|
||||||
|
logger.success(f"📦 Полный архив: {sizeof_fmt(archive_size)}")
|
||||||
|
|
||||||
|
# --- Распаковка ---
|
||||||
|
unpacked_path = OUTPUT_DIR / f"export_{job_id}.jsonl"
|
||||||
|
logger.info(f"📦 Распаковка архива в: {unpacked_path.name}")
|
||||||
|
dctx = zstd.ZstdDecompressor()
|
||||||
|
try:
|
||||||
|
with open(full_path, "rb") as compressed:
|
||||||
|
with open(unpacked_path, "wb") as dest:
|
||||||
|
dctx.copy_stream(compressed, dest)
|
||||||
|
unpacked_size = unpacked_path.stat().st_size
|
||||||
|
logger.success(f" Распаковано: {sizeof_fmt(unpacked_size)}")
|
||||||
|
|
||||||
|
# --- Подсчёт строк ---
|
||||||
|
logger.info("🧮 Подсчёт строк в распакованном файле...")
|
||||||
|
with open(unpacked_path, "rb") as f:
|
||||||
|
line_count = sum(1 for _ in f)
|
||||||
|
logger.success(f" Файл содержит {line_count:,} строк")
|
||||||
|
|
||||||
|
# --- Итог ---
|
||||||
|
logger.info("📈 ИТОГИ:")
|
||||||
|
logger.info(f" Архив: {sizeof_fmt(archive_size)}")
|
||||||
|
logger.info(f" Распаковано: {sizeof_fmt(unpacked_size)}")
|
||||||
|
logger.info(f" Коэффициент сжатия: {archive_size / unpacked_size:.2f}x")
|
||||||
|
logger.info(f" Строк: {line_count:,}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"❌ Ошибка при распаковке: {e}")
|
||||||
|
|
||||||
|
logger.info("🏁 Все тесты завершены успешно!")
|
||||||
|
|
||||||
|
except httpx.ConnectError as e:
|
||||||
|
logger.critical(f"❌ Не удалось подключиться к серверу. Убедитесь, что сервис запущен. {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"❌ Неожиданная ошибка: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
А от код сервиса, который отдает данные OPU -
|
||||||
|
|
||||||
|
а в отдающем сервисе OPU такой код -
|
||||||
|
service\src\gmap2\models\base.py -
|
||||||
|
"""
|
||||||
|
Модуль базовых моделей для ORM.
|
||||||
|
|
||||||
|
Содержит базовый класс и миксин для динамического определения имён таблиц
|
||||||
|
на основе конфигурации приложения.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sqlalchemy.orm import declarative_base, declared_attr
|
||||||
|
|
||||||
|
from gmap2.config import APP_CONFIG
|
||||||
|
|
||||||
|
TABLE_NAME_MAPPING = {
|
||||||
|
"opudata": APP_CONFIG.gp.opu_table,
|
||||||
|
"lprnewsdata": APP_CONFIG.gp.lpr_news_table,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BASE = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicTableMixin: # pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
Миксин для динамического определения имени таблицы через __tablename__.
|
||||||
|
|
||||||
|
Имя таблицы определяется по имени класса в нижнем регистре.
|
||||||
|
Если имя класса присутствует в TABLE_NAME_MAPPING, используется значение из маппинга.
|
||||||
|
В противном случае используется имя класса как имя таблицы.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@declared_attr
|
||||||
|
def __tablename__(cls) -> str: # pylint: disable=no-self-argument
|
||||||
|
"""
|
||||||
|
Динамически возвращает имя таблицы для модели.
|
||||||
|
|
||||||
|
Использует маппинг из конфигурации, если имя класса известно.
|
||||||
|
В противном случае возвращает имя класса в нижнем регистре.
|
||||||
|
|
||||||
|
:return: Имя таблицы, используемое SQLAlchemy при работе с моделью.
|
||||||
|
"""
|
||||||
|
class_name = cls.__name__.lower()
|
||||||
|
return TABLE_NAME_MAPPING.get(class_name, class_name)
|
||||||
|
|
||||||
|
service\src\gmap2\models\opu.py -
|
||||||
|
"""
|
||||||
|
ORM-модель для таблицы OPU.
|
||||||
|
|
||||||
|
Замечание:
|
||||||
|
В БД отсутствует PRIMARY KEY. Для корректной работы SQLAlchemy
|
||||||
|
используется виртуальный составной первичный ключ (wf_row_id, wf_load_id).
|
||||||
|
Это не влияет на экспорт, но требуется для итерации через ORM.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sqlalchemy import Column, Date, DateTime, Float, Integer, String
|
||||||
|
|
||||||
|
from .base import BASE, DynamicTableMixin
|
||||||
|
|
||||||
|
|
||||||
|
class OPUData(DynamicTableMixin, BASE): # pylint: disable=too-few-public-methods
|
||||||
|
"""
|
||||||
|
Данные из таблицы brief_opu.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__table_args__ = {"schema": "brief_opu_schema"}
|
||||||
|
|
||||||
|
wf_row_id = Column(Integer, primary_key=True, nullable=False)
|
||||||
|
wf_load_id = Column(Integer, primary_key=True, nullable=False)
|
||||||
|
|
||||||
|
object_id = Column(String)
|
||||||
|
object_nm = Column(String)
|
||||||
|
desk_nm = Column(String)
|
||||||
|
object_tp = Column(String)
|
||||||
|
object_unit = Column(String)
|
||||||
|
actdate = Column(Date)
|
||||||
|
layer_cd = Column(String)
|
||||||
|
layer_nm = Column(String)
|
||||||
|
measure = Column(String)
|
||||||
|
opu_cd = Column(String)
|
||||||
|
opu_nm_sh = Column(String)
|
||||||
|
opu_nm = Column(String)
|
||||||
|
opu_lvl = Column(Integer)
|
||||||
|
product_nm = Column(String)
|
||||||
|
opu_prnt_cd = Column(String)
|
||||||
|
opu_prnt_nm_sh = Column(String)
|
||||||
|
opu_prnt_nm = Column(String)
|
||||||
|
product_prnt_nm = Column(String)
|
||||||
|
sum_amountrub_p_usd = Column(Float)
|
||||||
|
wf_load_dttm = Column(DateTime)
|
||||||
|
|
||||||
|
service\src\gmap2\repositories\base.py -
|
||||||
|
"""
|
||||||
|
Базовый репозиторий с общим функционалом стриминга.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from collections.abc import AsyncGenerator, Callable
|
||||||
|
from typing import Generic, List, Optional, TypeVar, Union
|
||||||
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy import Select, select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
_T_Model = TypeVar("_T_Model") # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
|
||||||
|
class BaseRepository(Generic[_T_Model]):
|
||||||
|
"""
|
||||||
|
Абстрактный репозиторий с поддержкой стриминга.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model: type[_T_Model]
|
||||||
|
default_order_by: Union[List[Callable[[], List]], List] = []
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession) -> None:
|
||||||
|
"""
|
||||||
|
Инициализирует репозиторий.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Асинхронная сессия SQLAlchemy.
|
||||||
|
"""
|
||||||
|
self.session = session
|
||||||
|
|
||||||
|
async def stream_all(
|
||||||
|
self,
|
||||||
|
chunk_size: int = 10_000,
|
||||||
|
statement: Optional[Select[tuple[_T_Model]]] = None,
|
||||||
|
) -> AsyncGenerator[list[_T_Model], None]:
|
||||||
|
"""
|
||||||
|
Потоково извлекает записи из БД пачками.
|
||||||
|
|
||||||
|
Выполняет стриминг данных с использованием асинхронного курсора.
|
||||||
|
Поддерживает кастомный запрос и автоматическое упорядочивание.
|
||||||
|
|
||||||
|
:param chunk_size: Размер пачки записей для одной итерации.
|
||||||
|
:param statement: Опциональный SQL-запрос.
|
||||||
|
:yield: Список экземпляров модели, загруженных порциями.
|
||||||
|
"""
|
||||||
|
logger.info(f"Streaming {self.model.__name__} in batches of {chunk_size}")
|
||||||
|
|
||||||
|
stmt = statement or select(self.model)
|
||||||
|
|
||||||
|
if not statement and self.default_order_by:
|
||||||
|
stmt = stmt.order_by(*self.default_order_by)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await self.session.stream(
|
||||||
|
stmt.execution_options(
|
||||||
|
stream_results=True,
|
||||||
|
max_row_count=chunk_size,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
partitions_method = result.partitions
|
||||||
|
partitions_result = partitions_method(chunk_size)
|
||||||
|
|
||||||
|
async for partition in partitions_result:
|
||||||
|
items = [row[0] for row in partition]
|
||||||
|
yield items
|
||||||
|
|
||||||
|
logger.info(f"Finished streaming {self.model.__name__}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Error during streaming {self.model.__name__}: {type(e).__name__}: {e}"
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create(cls, session: AsyncSession) -> Self:
|
||||||
|
"""
|
||||||
|
Фабричный метод.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Асинхронная сессия.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Экземпляр репозитория.
|
||||||
|
"""
|
||||||
|
return cls(session=session)
|
||||||
|
|
||||||
|
service\src\gmap2\repositories\opu_repository.py -
|
||||||
|
"""
|
||||||
|
Репозиторий для OPU.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from gmap2.models.opu import OPUData
|
||||||
|
|
||||||
|
from .base import BaseRepository
|
||||||
|
|
||||||
|
|
||||||
|
class OPURepository(BaseRepository[OPUData]):
|
||||||
|
"""
|
||||||
|
Репозиторий для OPUData.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model = OPUData
|
||||||
|
default_order_by = [
|
||||||
|
OPUData.wf_load_id,
|
||||||
|
OPUData.wf_row_id,
|
||||||
|
]
|
||||||
|
|
||||||
|
service\src\gmap2\api\v1\routes.py -
|
||||||
|
"""
|
||||||
|
Маршруты API для запуска, статуса и скачивания экспорта.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||||||
|
|
||||||
|
from gmap2.services.job.background_worker import (
|
||||||
|
run_lpr_news_export_job,
|
||||||
|
run_opu_export_job,
|
||||||
|
)
|
||||||
|
from gmap2.services.job.job_manager import JobManager
|
||||||
|
from gmap2.utils.file_utils import create_range_aware_response
|
||||||
|
|
||||||
|
from .schemas import ExportJobStatus, StartExportResponse
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/export", tags=["export"])
|
||||||
|
|
||||||
|
|
||||||
|
def get_job_manager(request: Request) -> JobManager:
|
||||||
|
"""
|
||||||
|
Извлекает JobManager из application state.
|
||||||
|
"""
|
||||||
|
return request.app.state.job_manager
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/opu/start", response_model=StartExportResponse)
|
||||||
|
async def start_opu_export(
|
||||||
|
request: Request, background_tasks: BackgroundTasks
|
||||||
|
) -> StartExportResponse:
|
||||||
|
"""
|
||||||
|
Запускает фоновую задачу экспорта данных OPU.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Идентификатор задачи.
|
||||||
|
"""
|
||||||
|
job_manager = get_job_manager(request)
|
||||||
|
job_id = job_manager.start_job("opu")
|
||||||
|
|
||||||
|
background_tasks.add_task(run_opu_export_job, job_id, job_manager)
|
||||||
|
|
||||||
|
return StartExportResponse(job_id=job_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/lpr-news/start", response_model=StartExportResponse)
|
||||||
|
async def start_lpr_news_export(
|
||||||
|
request: Request, background_tasks: BackgroundTasks
|
||||||
|
) -> StartExportResponse:
|
||||||
|
"""
|
||||||
|
Запускает фоновую задачу экспорта данных LPR News.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Идентификатор задачи.
|
||||||
|
"""
|
||||||
|
job_manager = get_job_manager(request)
|
||||||
|
job_id = job_manager.start_job("lpr_news")
|
||||||
|
|
||||||
|
background_tasks.add_task(run_lpr_news_export_job, job_id, job_manager)
|
||||||
|
|
||||||
|
return StartExportResponse(job_id=job_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{job_id}/status", response_model=ExportJobStatus)
|
||||||
|
async def get_export_status(job_id: str, request: Request) -> ExportJobStatus:
|
||||||
|
"""
|
||||||
|
Возвращает текущий статус задачи экспорта.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: Идентификатор задачи.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Статус задачи.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException 404: Если задача не найдена.
|
||||||
|
"""
|
||||||
|
job_manager = get_job_manager(request)
|
||||||
|
status = job_manager.get_job_status(job_id)
|
||||||
|
if not status:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found")
|
||||||
|
return ExportJobStatus(**status)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{job_id}/download")
|
||||||
|
async def download_export(job_id: str, request: Request):
|
||||||
|
"""
|
||||||
|
Стримит сжатый файл экспорта клиенту с поддержкой Range-запросов.
|
||||||
|
"""
|
||||||
|
job_manager = get_job_manager(request)
|
||||||
|
status = job_manager.get_job_status(job_id)
|
||||||
|
|
||||||
|
if not status:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found")
|
||||||
|
|
||||||
|
if status["status"] != "completed":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=409, detail=f"Job is {status['status']}, not completed"
|
||||||
|
)
|
||||||
|
|
||||||
|
file_path = status.get("temp_file_path")
|
||||||
|
if not file_path or not file_path.endswith(".jsonl.zst"):
|
||||||
|
raise HTTPException(status_code=500, detail="Export file not available")
|
||||||
|
|
||||||
|
filename = f"export_{job_id}.jsonl.zst"
|
||||||
|
|
||||||
|
return await create_range_aware_response(
|
||||||
|
file_path=file_path,
|
||||||
|
filename=filename,
|
||||||
|
request=request,
|
||||||
|
media_type="application/octet-stream",
|
||||||
|
)
|
||||||
|
|
||||||
|
service\src\gmap2\services\export\export_service.py -
|
||||||
|
"""
|
||||||
|
Сервис экспорта данных: объединяет репозиторий, форматирование и сжатие.
|
||||||
|
Формат: JSON Lines + Zstandard (.jsonl.zst), один непрерывный zstd-фрейм.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from collections.abc import AsyncGenerator, Callable
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import date, datetime
|
||||||
|
from typing import Any, Tuple
|
||||||
|
|
||||||
|
import aiofiles
|
||||||
|
import zstandard as zstd
|
||||||
|
from loguru import logger
|
||||||
|
from orjson import OPT_NAIVE_UTC, OPT_UTC_Z # pylint: disable=no-name-in-module
|
||||||
|
from orjson import dumps as orjson_dumps # pylint: disable=no-name-in-module
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from gmap2.repositories import LPRNewsRepository, OPURepository
|
||||||
|
|
||||||
|
from ..job.job_manager import JobManager
|
||||||
|
from .formatters import models_to_dicts
|
||||||
|
|
||||||
|
|
||||||
|
class _ZstdAsyncSink:
|
||||||
|
"""
|
||||||
|
Потокобезопасный приёмник для zstd.stream_writer.
|
||||||
|
|
||||||
|
Собирает сжатые данные по чанкам в памяти с использованием блокировки.
|
||||||
|
Предназначен для использования в асинхронном контексте,
|
||||||
|
где сжатие выполняется в отдельном потоке.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("_chunks", "_lock")
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
"""
|
||||||
|
Инициализирует приёмник с пустым списком чанков и блокировкой.
|
||||||
|
"""
|
||||||
|
self._chunks: list[bytes] = []
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def write(self, b: bytes) -> int:
|
||||||
|
"""
|
||||||
|
Записывает байтовый фрагмент в буфер.
|
||||||
|
|
||||||
|
Метод потокобезопасен.
|
||||||
|
Копирует данные и добавляет в внутренний список чанков.
|
||||||
|
|
||||||
|
:param b: Байтовые данные для записи.
|
||||||
|
:return: Длина записанных данных.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
self._chunks.append(bytes(b))
|
||||||
|
return len(b)
|
||||||
|
|
||||||
|
def drain(self) -> list[bytes]:
|
||||||
|
"""
|
||||||
|
Извлекает все накопленные чанки, сбрасывая внутренний буфер.
|
||||||
|
|
||||||
|
Метод потокобезопасен.
|
||||||
|
Возвращает список всех чанков, записанных с момента последнего сброса.
|
||||||
|
|
||||||
|
:return: Список байтовых фрагментов.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
chunks = self._chunks
|
||||||
|
self._chunks = []
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
class ExportService:
|
||||||
|
"""
|
||||||
|
Основной сервис экспорта данных в формате JSON Lines + zstd.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, chunk_size: int = 10_000, zstd_level: int = 3) -> None:
|
||||||
|
self.chunk_size = chunk_size
|
||||||
|
self.zstd_level = zstd_level
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _export_to_zstd( # pylint: disable=too-many-arguments,too-many-locals
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
session: AsyncSession,
|
||||||
|
job_id: str,
|
||||||
|
job_manager: JobManager,
|
||||||
|
repo_factory: Callable[[AsyncSession], Any],
|
||||||
|
label: str,
|
||||||
|
temp_dir: str | None = None,
|
||||||
|
) -> AsyncGenerator[Tuple[AsyncGenerator[bytes, None], str], None]:
|
||||||
|
repo = repo_factory(session)
|
||||||
|
file_path: str | None = None
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(
|
||||||
|
delete=False, suffix=".jsonl.zst", dir=temp_dir
|
||||||
|
) as tmp:
|
||||||
|
file_path = tmp.name
|
||||||
|
|
||||||
|
logger.info(f"[export] {label}: start -> {file_path}")
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor(level=self.zstd_level)
|
||||||
|
sink = _ZstdAsyncSink()
|
||||||
|
|
||||||
|
async with aiofiles.open(file_path, "wb") as f:
|
||||||
|
writer = cctx.stream_writer(sink)
|
||||||
|
try:
|
||||||
|
async for batch in repo.stream_all(chunk_size=self.chunk_size):
|
||||||
|
if not batch:
|
||||||
|
continue
|
||||||
|
dicts = models_to_dicts(batch)
|
||||||
|
payload = ("\n".join(_dumps(d) for d in dicts) + "\n").encode(
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.to_thread(writer.write, payload)
|
||||||
|
|
||||||
|
for chunk in sink.drain():
|
||||||
|
await f.write(chunk)
|
||||||
|
|
||||||
|
job_manager.increment_rows(job_id, len(batch))
|
||||||
|
|
||||||
|
await asyncio.to_thread(writer.flush, zstd.FLUSH_FRAME)
|
||||||
|
|
||||||
|
for chunk in sink.drain():
|
||||||
|
await f.write(chunk)
|
||||||
|
finally:
|
||||||
|
await asyncio.to_thread(writer.close)
|
||||||
|
for chunk in sink.drain():
|
||||||
|
await f.write(chunk)
|
||||||
|
await f.flush()
|
||||||
|
|
||||||
|
logger.info(f"[export] {label}: done -> {file_path}")
|
||||||
|
yield _stream_file(file_path), file_path
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
if file_path and os.path.exists(file_path):
|
||||||
|
await asyncio.to_thread(os.remove, file_path)
|
||||||
|
logger.exception(f"[export] {label}: failed, temporary file removed")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def export_opu_to_zstd(
|
||||||
|
self,
|
||||||
|
session: AsyncSession,
|
||||||
|
job_id: str,
|
||||||
|
job_manager: JobManager,
|
||||||
|
temp_dir: str | None = None,
|
||||||
|
) -> AsyncGenerator[Tuple[AsyncGenerator[bytes, None], str], None]:
|
||||||
|
"""
|
||||||
|
Экспорт OPU в один непрерывный zstd-поток.
|
||||||
|
"""
|
||||||
|
async with self._export_to_zstd(
|
||||||
|
session=session,
|
||||||
|
job_id=job_id,
|
||||||
|
job_manager=job_manager,
|
||||||
|
repo_factory=OPURepository,
|
||||||
|
label="OPU",
|
||||||
|
temp_dir=temp_dir,
|
||||||
|
) as ctx:
|
||||||
|
yield ctx
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def export_lpr_news_to_zstd(
|
||||||
|
self,
|
||||||
|
session: AsyncSession,
|
||||||
|
job_id: str,
|
||||||
|
job_manager: JobManager,
|
||||||
|
temp_dir: str | None = None,
|
||||||
|
) -> AsyncGenerator[Tuple[AsyncGenerator[bytes, None], str], None]:
|
||||||
|
"""
|
||||||
|
Экспорт LPR News в один непрерывный zstd-поток.
|
||||||
|
"""
|
||||||
|
async with self._export_to_zstd(
|
||||||
|
session=session,
|
||||||
|
job_id=job_id,
|
||||||
|
job_manager=job_manager,
|
||||||
|
repo_factory=LPRNewsRepository,
|
||||||
|
label="LPR News",
|
||||||
|
temp_dir=temp_dir,
|
||||||
|
) as ctx:
|
||||||
|
yield ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _dumps(data: dict) -> str:
|
||||||
|
"""
|
||||||
|
Сериализует словарь в JSON-строку (orjson).
|
||||||
|
"""
|
||||||
|
|
||||||
|
return orjson_dumps(
|
||||||
|
data,
|
||||||
|
default=_serialize_value,
|
||||||
|
option=OPT_UTC_Z | OPT_NAIVE_UTC,
|
||||||
|
).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_value(value: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Преобразует значения к JSON-совместимому виду.
|
||||||
|
"""
|
||||||
|
if isinstance(value, (datetime, date)):
|
||||||
|
return value.isoformat()
|
||||||
|
if isinstance(value, float) and not math.isfinite(value):
|
||||||
|
return None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
async def _stream_file(
|
||||||
|
file_path: str, chunk_size: int = 8192
|
||||||
|
) -> AsyncGenerator[bytes, None]:
|
||||||
|
"""
|
||||||
|
Асинхронно читает файл блоками.
|
||||||
|
"""
|
||||||
|
async with aiofiles.open(file_path, "rb") as f:
|
||||||
|
while chunk := await f.read(chunk_size):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
service\src\gmap2\services\export\compressors.py -
|
||||||
|
"""
|
||||||
|
Работа со сжатием данных с использованием zstandard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import BinaryIO
|
||||||
|
|
||||||
|
import zstandard as zstd
|
||||||
|
|
||||||
|
|
||||||
|
def create_zstd_writer(fileobj: BinaryIO, level: int = 3) -> zstd.ZstdCompressionWriter:
|
||||||
|
"""
|
||||||
|
Создаёт сжатый writer поверх бинарного файла.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fileobj: Целевой файл (например, tempfile).
|
||||||
|
level: Уровень сжатия (1–10). По умолчанию 3 - баланс скорости и размера.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Объект для записи сжатых данных.
|
||||||
|
"""
|
||||||
|
cctx = zstd.ZstdCompressor(level=level)
|
||||||
|
return cctx.stream_writer(fileobj)
|
||||||
|
|
||||||
|
service\src\gmap2\services\export\formatters.py -
|
||||||
|
"""
|
||||||
|
Форматирование ORM-объектов в словари.
|
||||||
|
С оптимизацией: кеширование структуры модели.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import date, datetime
|
||||||
|
from decimal import Decimal
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
from weakref import WeakKeyDictionary
|
||||||
|
|
||||||
|
from sqlalchemy import inspect
|
||||||
|
from sqlalchemy.orm import RelationshipProperty
|
||||||
|
|
||||||
|
_columns_cache: WeakKeyDictionary = WeakKeyDictionary()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_model_columns(model_instance: Any) -> List[str]:
|
||||||
|
"""
|
||||||
|
Возвращает список имен колонок модели (без отношений).
|
||||||
|
Кеширует результат.
|
||||||
|
"""
|
||||||
|
model_class = model_instance.__class__
|
||||||
|
if model_class not in _columns_cache:
|
||||||
|
mapper = inspect(model_class)
|
||||||
|
columns = [
|
||||||
|
attr.key
|
||||||
|
for attr in mapper.attrs
|
||||||
|
if not isinstance(attr, RelationshipProperty)
|
||||||
|
]
|
||||||
|
_columns_cache[model_class] = columns
|
||||||
|
return _columns_cache[model_class]
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_value(value: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Сериализует значение в JSON-совместимый формат.
|
||||||
|
"""
|
||||||
|
if isinstance(value, (datetime, date)):
|
||||||
|
return value.isoformat()
|
||||||
|
if isinstance(value, Decimal):
|
||||||
|
return float(value)
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def models_to_dicts(instances: List[Any]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Массовое преобразование списка ORM-объектов в словари.
|
||||||
|
|
||||||
|
Использует кеширование структуры модели для высокой производительности.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
instances: Список ORM-объектов.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Список словарей.
|
||||||
|
"""
|
||||||
|
if not instances:
|
||||||
|
return []
|
||||||
|
|
||||||
|
columns = _get_model_columns(instances[0])
|
||||||
|
return [
|
||||||
|
{col: _serialize_value(getattr(obj, col)) for col in columns}
|
||||||
|
for obj in instances
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"models_to_dicts",
|
||||||
|
]
|
||||||
|
|
||||||
|
service\src\gmap2\services\job\background_worker.py -
|
||||||
|
"""
|
||||||
|
Фоновые задачи экспорта.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from psycopg import errors as pg_errors
|
||||||
|
from loguru import logger
|
||||||
|
from sqlalchemy.exc import DatabaseError, OperationalError
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
retry_if_exception_type,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
)
|
||||||
|
|
||||||
|
from gmap2.context import APP_CTX
|
||||||
|
from gmap2.services.export.export_service import ExportService
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
TRANSIENT_ERRORS = (
|
||||||
|
OperationalError,
|
||||||
|
DatabaseError,
|
||||||
|
ConnectionError,
|
||||||
|
TimeoutError,
|
||||||
|
pg_errors.ConnectionException,
|
||||||
|
pg_errors.AdminShutdown,
|
||||||
|
pg_errors.CannotConnectNow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_export_job(
|
||||||
|
export_method_name: str,
|
||||||
|
job_type: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Фабрика фоновых задач экспорта.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
export_method_name: Название метода экспорта в ExportService.
|
||||||
|
job_type: Тип задачи ("opu", "lpr_news").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Асинхронная функция-задача.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(10),
|
||||||
|
wait=wait_exponential(multiplier=1, max=60),
|
||||||
|
retry=retry_if_exception_type(TRANSIENT_ERRORS),
|
||||||
|
reraise=True,
|
||||||
|
)
|
||||||
|
async def run_export_job(
|
||||||
|
job_id: str, job_manager, temp_dir: Optional[str] = None
|
||||||
|
) -> None:
|
||||||
|
export_service = ExportService()
|
||||||
|
try:
|
||||||
|
async with APP_CTX.get_db_session() as session:
|
||||||
|
job_manager.mark_running(job_id)
|
||||||
|
|
||||||
|
export_method = getattr(export_service, export_method_name)
|
||||||
|
async with export_method(
|
||||||
|
session=session,
|
||||||
|
job_id=job_id,
|
||||||
|
job_manager=job_manager,
|
||||||
|
temp_dir=temp_dir,
|
||||||
|
) as (stream, file_path):
|
||||||
|
async for _ in stream:
|
||||||
|
pass
|
||||||
|
|
||||||
|
job_manager.mark_completed(job_id, file_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if not isinstance(e, TRANSIENT_ERRORS):
|
||||||
|
job_manager.mark_failed(job_id, f"{type(e).__name__}: {e}")
|
||||||
|
logger.exception(f"Export job {job_id} ({job_type}) failed")
|
||||||
|
raise
|
||||||
|
|
||||||
|
run_export_job.__name__ = f"run_{job_type}_export_job"
|
||||||
|
run_export_job.__doc__ = f"Фоновая задача экспорта {job_type.upper()} с retry."
|
||||||
|
|
||||||
|
return run_export_job
|
||||||
|
|
||||||
|
|
||||||
|
run_opu_export_job = _create_export_job("export_opu_to_zstd", "opu")
|
||||||
|
run_lpr_news_export_job = _create_export_job("export_lpr_news_to_zstd", "lpr_news")
|
||||||
File diff suppressed because it is too large
Load Diff
830
README.md
830
README.md
|
|
@ -1,12 +1,830 @@
|
||||||
# Dataloader
|
# Dataloader
|
||||||
|
|
||||||
|
Асинхронный сервис постановки и исполнения ETL‑задач поверх очереди в PostgreSQL. Предоставляет HTTP API для триггера задач, мониторинга статуса и отмены. Внутри процесса поднимает пул воркеров, которые конкурируют за задачи через SELECT … FOR UPDATE SKIP LOCKED и обрабатывают их с учётом lease/heartbeat и кооперативной отмены. Для пробуждения воркеров используется LISTEN/NOTIFY.
|
||||||
## Архитектура
|
|
||||||
|
|
||||||
Проект построен на базе **FastAPI** с использованием паттерна **Clean Architecture** и следованием принципам **SOLID**. Основные архитектурные компоненты:
|
|
||||||
|
|
||||||
|
|
||||||
|
## Содержание
|
||||||
|
- О проекте
|
||||||
|
- Быстрый старт
|
||||||
|
- Конфигурация окружения
|
||||||
|
- Архитектура и потоки данных
|
||||||
|
- Структура проекта
|
||||||
|
- Взаимодействие с БД
|
||||||
|
- HTTP API
|
||||||
|
- Воркеры, пайплайны и добавление новых ETL‑задач
|
||||||
|
- Существующие пайплайны
|
||||||
|
- Логирование, метрики, аудит
|
||||||
|
- Тестирование
|
||||||
|
- Эксплуатация и масштабирование
|
||||||
|
|
||||||
## Лицензия
|
|
||||||
|
|
||||||
Внутренний корпоративный проект.
|
## О проекте
|
||||||
|
Сервис решает типовую задачу фоновой обработки задач: один общий пул воркеров, одна очередь в БД, несколько типов задач (пайплайнов), идемпотентность, повторные попытки, контроль конкуренции через advisory‑lock по `lock_key`, кооперативная отмена, возврат «потерянных» задач.
|
||||||
|
|
||||||
|
|
||||||
|
## Быстрый старт
|
||||||
|
1. Установить зависимости (poetry):
|
||||||
|
```bash
|
||||||
|
poetry install
|
||||||
|
```
|
||||||
|
2. Подготовить PostgreSQL (см. DDL в `DDL.sql`) и переменные окружения (см. «Конфигурация»).
|
||||||
|
3. Запуск сервиса:
|
||||||
|
```bash
|
||||||
|
poetry run python -m dataloader
|
||||||
|
```
|
||||||
|
4. Проверка доступности:
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8081/health
|
||||||
|
```
|
||||||
|
5. Пример постановки задачи:
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8081/api/v1/jobs/trigger \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"queue": "etl.default",
|
||||||
|
"task": "noop",
|
||||||
|
"args": {"sleep1": 1, "sleep2": 1, "sleep3": 1},
|
||||||
|
"lock_key": "customer:42",
|
||||||
|
"priority": 100
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Конфигурация окружения
|
||||||
|
Настройки собираются в `src/dataloader/config.py` через Pydantic Settings.
|
||||||
|
|
||||||
|
- Приложение (`AppSettings`):
|
||||||
|
- `APP_HOST` (def: `0.0.0.0`)
|
||||||
|
- `APP_PORT` (def: `8081`)
|
||||||
|
- `TIMEZONE` (def: `Europe/Moscow`)
|
||||||
|
|
||||||
|
- Логирование (`LogSettings`):
|
||||||
|
- `LOG_PATH`, `LOG_FILE_NAME`, `LOG_ROTATION`
|
||||||
|
- `METRIC_PATH`, `METRIC_FILE_NAME`
|
||||||
|
- `AUDIT_LOG_PATH`, `AUDIT_LOG_FILE_NAME`
|
||||||
|
- `DEBUG` переключает уровень на DEBUG
|
||||||
|
|
||||||
|
- PostgreSQL (`PGSettings`):
|
||||||
|
- `PG_HOST`, `PG_PORT`, `PG_USER`, `PG_PASSWORD`, `PG_DATABASE`
|
||||||
|
- `PG_SCHEMA_QUEUE` - схема таблиц очереди (логическая, маппится через `schema_translate_map`)
|
||||||
|
- Параметры пула: `PG_USE_POOL`, `PG_POOL_SIZE`, `PG_MAX_OVERFLOW`, `PG_POOL_RECYCLE`
|
||||||
|
- Таймауты: `PG_CONNECT_TIMEOUT`, `PG_COMMAND_TIMEOUT`
|
||||||
|
|
||||||
|
- Воркеры (`WorkerSettings`):
|
||||||
|
- `WORKERS_JSON` - список конфигураций воркеров, например: `[{"queue":"etl.default","concurrency":2}]`
|
||||||
|
- `DL_HEARTBEAT_SEC` (def: 10)
|
||||||
|
- `DL_DEFAULT_LEASE_TTL_SEC` (def: 60)
|
||||||
|
- `DL_REAPER_PERIOD_SEC` (def: 10)
|
||||||
|
- `DL_CLAIM_BACKOFF_SEC` (def: 15)
|
||||||
|
|
||||||
|
|
||||||
|
## Архитектура и потоки данных
|
||||||
|
|
||||||
|
- HTTP слой: FastAPI‑приложение (`dataloader.api`) с v1 API и инфраструктурными маршрутами (`/health`, `/info`).
|
||||||
|
- Контекст: `AppContext` инициализирует логирование, `AsyncEngine`, `async_sessionmaker` и предоставляет DI (`get_session`).
|
||||||
|
- Очередь: одна таблица `dl_jobs` и журнал `dl_job_events` в PostgreSQL. Идемпотентность на уровне `idempotency_key`. Пробуждение воркеров через триггеры NOTIFY в БД и listener на стороне приложения.
|
||||||
|
- Воркеры: `WorkerManager` поднимает N асинхронных воркеров (`PGWorker`) на основании `WORKERS_JSON`. Каждый воркер:
|
||||||
|
1) ждёт уведомление (LISTEN/NOTIFY) или таймаут,
|
||||||
|
2) пытается «захватить» одну задачу (SELECT … FOR UPDATE SKIP LOCKED),
|
||||||
|
3) выставляет `running`, получает advisory‑lock по `lock_key`,
|
||||||
|
4) исполняет соответствующий пайплайн с heartbeat,
|
||||||
|
5) завершает задачу: `succeeded`/`failed`/`canceled` или возвращает в очередь на ретрай.
|
||||||
|
- Реапер: фоновая задача, периодически возвращает «потерянные» running‑задачи в `queued`.
|
||||||
|
|
||||||
|
|
||||||
|
## Структура
|
||||||
|
```
|
||||||
|
src/dataloader/
|
||||||
|
├── __main__.py # Запуск uvicorn, lifecycle через FastAPI lifespan
|
||||||
|
├── config.py # Pydantic-настройки (app/log/pg/worker)
|
||||||
|
├── context.py # AppContext: engine, sessionmaker, логгер, DI
|
||||||
|
│
|
||||||
|
├── api/
|
||||||
|
│ ├── __init__.py # FastAPI app, middleware, routers, lifespan
|
||||||
|
│ ├── middleware.py # Логирование запросов + метрики/аудит
|
||||||
|
│ ├── os_router.py # /health, /info (инфраструктурные ручки)
|
||||||
|
│ ├── metric_router.py # Примеры метрик (like/dislike)
|
||||||
|
│ │
|
||||||
|
│ └── v1/
|
||||||
|
│ ├── router.py # /api/v1/jobs: trigger, status, cancel
|
||||||
|
│ ├── service.py # Бизнес-логика поверх репозитория
|
||||||
|
│ ├── schemas.py # Pydantic DTO API
|
||||||
|
│ └── utils.py # Утилиты (генерация UUID и т.д.)
|
||||||
|
│
|
||||||
|
├── storage/
|
||||||
|
│ ├── engine.py # AsyncEngine, sessionmaker, schema_translate_map
|
||||||
|
│ ├── notify_listener.py # asyncpg LISTEN/NOTIFY по каналу dl_jobs
|
||||||
|
│ │
|
||||||
|
│ ├── models/
|
||||||
|
│ │ ├── base.py # Declarative base
|
||||||
|
│ │ └── queue.py # ORM-модели DLJob, DLJobEvent
|
||||||
|
│ │
|
||||||
|
│ ├── repositories/
|
||||||
|
│ │ └── queue.py # QueueRepository (CRUD операции)
|
||||||
|
│ │
|
||||||
|
│ └── schemas/
|
||||||
|
│ └── queue.py # CreateJobRequest, JobStatus
|
||||||
|
│
|
||||||
|
├── workers/
|
||||||
|
│ ├── base.py # PGWorker: главный цикл, heartbeat, вызов пайплайнов
|
||||||
|
│ ├── manager.py # WorkerManager: запуск/остановка + reaper
|
||||||
|
│ ├── reaper.py # Requeue_lost на базе репозитория
|
||||||
|
│ │
|
||||||
|
│ └── pipelines/
|
||||||
|
│ ├── __init__.py # Автозагрузка модулей для регистрации
|
||||||
|
│ ├── registry.py # Реестр обработчиков задач (@register)
|
||||||
|
│ └── noop.py # Пример эталонного пайплайна
|
||||||
|
│
|
||||||
|
└── logger/
|
||||||
|
└── ... # Логирование
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Взаимодействие с БД
|
||||||
|
|
||||||
|
- Подключение: `postgresql+asyncpg` через SQLAlchemy AsyncEngine.
|
||||||
|
- Схемы: логическое имя `queue` маппится на реальную через `schema_translate_map` (см. `engine.py`), имя реальной схемы задаётся `PG_SCHEMA_QUEUE`.
|
||||||
|
- DDL: см. `DDL.sql`. Ключевые элементы:
|
||||||
|
- `dl_jobs` с индексами на claim и running‑lease,
|
||||||
|
- `dl_job_events` как журнал событий,
|
||||||
|
- триггер `notify_job_ready()` + `LISTEN dl_jobs` для пробуждения воркеров.
|
||||||
|
- Конкуренция: claim через `FOR UPDATE SKIP LOCKED`, взаимное исключение по бизнес‑сущности через advisory‑lock `pg_try_advisory_lock(hashtext(lock_key))`.
|
||||||
|
- Надёжность: at‑least‑once. Пайплайны должны быть идемпотентны в части записи в целевые таблицы.
|
||||||
|
|
||||||
|
|
||||||
|
## HTTP API (v1)
|
||||||
|
|
||||||
|
### POST `/api/v1/jobs/trigger`
|
||||||
|
|
||||||
|
Постановка задачи в очередь (идемпотентная операция).
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"queue": "load.cbr", // обязательно: имя очереди
|
||||||
|
"task": "load.cbr.rates", // обязательно: имя задачи для registry
|
||||||
|
"args": { // опционально: аргументы задачи
|
||||||
|
"date": "2025-01-10",
|
||||||
|
"currencies": ["USD", "EUR"]
|
||||||
|
},
|
||||||
|
"idempotency_key": "cbr_2025-01-10", // опционально: ключ идемпотентности
|
||||||
|
"lock_key": "cbr_rates", // обязательно: ключ для advisory lock
|
||||||
|
"partition_key": "2025-01-10", // опционально: ключ партиционирования
|
||||||
|
"priority": 100, // опционально: приоритет (меньше = выше)
|
||||||
|
"available_at": "2025-01-10T00:00:00Z", // опционально: отложенный запуск
|
||||||
|
"max_attempts": 3, // опционально: макс попыток (def: 5)
|
||||||
|
"lease_ttl_sec": 300, // опционально: TTL аренды (def: 60)
|
||||||
|
"producer": "api-client", // опционально: кто поставил
|
||||||
|
"consumer_group": "cbr-loaders" // опционально: группа потребителей
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response 200:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
|
"status": "queued"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Коды ответов:**
|
||||||
|
- `200 OK` - задача создана или уже существует (идемпотентность)
|
||||||
|
- `400 Bad Request` - невалидные данные
|
||||||
|
- `500 Internal Server Error` - ошибка сервера
|
||||||
|
|
||||||
|
### GET `/api/v1/jobs/{job_id}/status`
|
||||||
|
|
||||||
|
Получение статуса задачи.
|
||||||
|
|
||||||
|
**Response 200:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
|
"status": "running", // queued/running/succeeded/failed/canceled
|
||||||
|
"attempt": 1, // текущая попытка
|
||||||
|
"started_at": "2025-01-10T12:00:00Z", // время первого запуска
|
||||||
|
"finished_at": null, // время завершения (если есть)
|
||||||
|
"heartbeat_at": "2025-01-10T12:01:30Z", // последний heartbeat
|
||||||
|
"error": null, // текст ошибки (если есть)
|
||||||
|
"progress": { // прогресс выполнения (custom)
|
||||||
|
"processed": 500,
|
||||||
|
"total": 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Коды ответов:**
|
||||||
|
- `200 OK` - статус получен
|
||||||
|
- `404 Not Found` - задача не найдена
|
||||||
|
|
||||||
|
### POST `/api/v1/jobs/{job_id}/cancel`
|
||||||
|
|
||||||
|
Запрос кооперативной отмены задачи.
|
||||||
|
|
||||||
|
**Response 200:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||||
|
"status": "running",
|
||||||
|
"attempt": 1,
|
||||||
|
"started_at": "2025-01-10T12:00:00Z",
|
||||||
|
"heartbeat_at": "2025-01-10T12:01:30Z"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Поведение:**
|
||||||
|
- Устанавливает флаг `cancel_requested = true` в БД
|
||||||
|
- Воркер проверяет флаг между `yield` в пайплайне
|
||||||
|
- При обнаружении флага воркер завершает задачу со статусом `canceled`
|
||||||
|
|
||||||
|
**Коды ответов:**
|
||||||
|
- `200 OK` - запрос отмены принят
|
||||||
|
- `404 Not Found` - задача не найдена
|
||||||
|
|
||||||
|
### Инфраструктурные эндпоинты
|
||||||
|
|
||||||
|
**GET `/health`** - проверка работоспособности (без БД, < 20ms)
|
||||||
|
```json
|
||||||
|
{"status": "healthy"}
|
||||||
|
```
|
||||||
|
|
||||||
|
**GET `/info`** - информация о сервисе
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"service": "dataloader",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"environment": "production"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Воркеры, пайплайны и добавление новых ETL‑задач
|
||||||
|
|
||||||
|
### Как работает воркер
|
||||||
|
1. Ожидает сигнал (LISTEN/NOTIFY) или таймаут `DL_CLAIM_BACKOFF_SEC`.
|
||||||
|
2. Пытается забрать одну задачу своей очереди: `status='queued' AND available_at<=now()` с `FOR UPDATE SKIP LOCKED`.
|
||||||
|
3. Переводит в `running`, увеличивает `attempt`, выставляет `lease_expires_at`, делает heartbeat каждые `DL_HEARTBEAT_SEC`.
|
||||||
|
4. Захватывает advisory‑lock по `lock_key` (если не получилось - возвращает в `queued` с бэкоффом).
|
||||||
|
5. Выполняет пайплайн (`task`) с поддержкой итеративных шагов и кооперативной отмены.
|
||||||
|
6. По завершении: `succeeded` или `failed`/`canceled`; при ошибках возможны ретраи до `max_attempts`.
|
||||||
|
|
||||||
|
### Протокол выполнения задачи (SQL)
|
||||||
|
|
||||||
|
**Claim (захват задачи):**
|
||||||
|
```sql
|
||||||
|
WITH cte AS (
|
||||||
|
SELECT job_id
|
||||||
|
FROM dl_jobs
|
||||||
|
WHERE status = 'queued'
|
||||||
|
AND queue = :queue
|
||||||
|
AND available_at <= now()
|
||||||
|
ORDER BY priority ASC, created_at ASC
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT 1
|
||||||
|
)
|
||||||
|
UPDATE dl_jobs j
|
||||||
|
SET status = 'running',
|
||||||
|
started_at = COALESCE(started_at, now()),
|
||||||
|
attempt = attempt + 1,
|
||||||
|
lease_expires_at = now() + make_interval(secs => j.lease_ttl_sec),
|
||||||
|
heartbeat_at = now()
|
||||||
|
FROM cte
|
||||||
|
WHERE j.job_id = cte.job_id
|
||||||
|
RETURNING j.job_id, j.task, j.args, j.lock_key, j.lease_ttl_sec;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Heartbeat (продление аренды):**
|
||||||
|
```sql
|
||||||
|
UPDATE dl_jobs
|
||||||
|
SET heartbeat_at = now(),
|
||||||
|
lease_expires_at = now() + make_interval(secs => :ttl)
|
||||||
|
WHERE job_id = :job_id AND status = 'running'
|
||||||
|
RETURNING cancel_requested;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Завершение успешное:**
|
||||||
|
```sql
|
||||||
|
UPDATE dl_jobs
|
||||||
|
SET status = 'succeeded',
|
||||||
|
finished_at = now(),
|
||||||
|
lease_expires_at = NULL
|
||||||
|
WHERE job_id = :job_id;
|
||||||
|
|
||||||
|
SELECT pg_advisory_unlock(hashtext(:lock_key));
|
||||||
|
```
|
||||||
|
|
||||||
|
**Завершение с ошибкой (retry):**
|
||||||
|
```sql
|
||||||
|
UPDATE dl_jobs
|
||||||
|
SET status = CASE WHEN attempt < max_attempts THEN 'queued' ELSE 'failed' END,
|
||||||
|
available_at = CASE WHEN attempt < max_attempts
|
||||||
|
THEN now() + make_interval(secs => 30 * attempt)
|
||||||
|
ELSE now() END,
|
||||||
|
error = :error_message,
|
||||||
|
lease_expires_at = NULL,
|
||||||
|
finished_at = CASE WHEN attempt >= max_attempts THEN now() ELSE NULL END
|
||||||
|
WHERE job_id = :job_id;
|
||||||
|
|
||||||
|
SELECT pg_advisory_unlock(hashtext(:lock_key));
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reaper (возврат потерянных задач):**
|
||||||
|
```sql
|
||||||
|
UPDATE dl_jobs
|
||||||
|
SET status = 'queued',
|
||||||
|
available_at = now(),
|
||||||
|
lease_expires_at = NULL
|
||||||
|
WHERE status = 'running'
|
||||||
|
AND lease_expires_at IS NOT NULL
|
||||||
|
AND lease_expires_at < now()
|
||||||
|
RETURNING job_id;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Интерфейс пайплайна
|
||||||
|
Пайплайн - обычная функция, возвращающая одно из:
|
||||||
|
- асинхронный генератор шагов (рекомендуется для длинных процессов),
|
||||||
|
- корутину,
|
||||||
|
- синхронную функцию.
|
||||||
|
|
||||||
|
Каждый «yield» в асинхронном генераторе - безопасная точка, где воркер выполнит heartbeat и проверит `cancel_requested`.
|
||||||
|
|
||||||
|
Регистрация нового пайплайна через декоратор `@register("task_name")` в модуле `src/dataloader/workers/pipelines/<your_task>.py`.
|
||||||
|
|
||||||
|
Пример:
|
||||||
|
```python
|
||||||
|
from __future__ import annotations
|
||||||
|
import asyncio
|
||||||
|
from typing import AsyncIterator
|
||||||
|
from dataloader.workers.pipelines.registry import register
|
||||||
|
|
||||||
|
@register("load.customers")
|
||||||
|
async def load_customers(args: dict) -> AsyncIterator[None]:
|
||||||
|
# шаг 1 – вытягиваем данные
|
||||||
|
await some_fetch(args)
|
||||||
|
yield
|
||||||
|
# шаг 2 – пишем в БД идемпотентно (upsert/merge)
|
||||||
|
await upsert_customers(args)
|
||||||
|
yield
|
||||||
|
# шаг 3 – пост‑обработка
|
||||||
|
await finalize(args)
|
||||||
|
yield
|
||||||
|
```
|
||||||
|
|
||||||
|
Важно:
|
||||||
|
- Пайплайны должны быть идемпотентны (повторный запуск не должен ломать данные).
|
||||||
|
- Долгие операции разбивайте на шаги с `yield`, чтобы работал heartbeat и отмена.
|
||||||
|
- Для бизнес‑взаимного исключения выбирайте корректный `lock_key` (например, `customer:{id}`), чтобы параллельные задачи не конфликтовали.
|
||||||
|
|
||||||
|
### Добавление ETL‑задачи (шаги)
|
||||||
|
|
||||||
|
**1. Создать пайплайн** в `src/dataloader/workers/pipelines/`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# src/dataloader/workers/pipelines/load_cbr_rates.py
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import AsyncIterator
|
||||||
|
from datetime import datetime
|
||||||
|
from dataloader.workers.pipelines.registry import register
|
||||||
|
|
||||||
|
@register("load.cbr.rates")
|
||||||
|
async def load_cbr_rates(args: dict) -> AsyncIterator[None]:
|
||||||
|
"""
|
||||||
|
Загрузка курсов валют ЦБ РФ.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: {"date": "YYYY-MM-DD", "currencies": ["USD", "EUR"]}
|
||||||
|
"""
|
||||||
|
date = datetime.fromisoformat(args["date"])
|
||||||
|
currencies = args.get("currencies", ["USD", "EUR"])
|
||||||
|
|
||||||
|
# Извлечение данных
|
||||||
|
data = await fetch_cbr_rates(date, currencies)
|
||||||
|
yield # Heartbeat checkpoint
|
||||||
|
|
||||||
|
# Трансформация
|
||||||
|
transformed = transform_rates(data)
|
||||||
|
yield # Heartbeat checkpoint
|
||||||
|
|
||||||
|
# Идемпотентная загрузка в БД
|
||||||
|
async with get_target_session() as session:
|
||||||
|
await session.execute(
|
||||||
|
insert(CbrRates)
|
||||||
|
.values(transformed)
|
||||||
|
.on_conflict_do_update(
|
||||||
|
index_elements=["date", "currency"],
|
||||||
|
set_={"rate": excluded.c.rate, "updated_at": func.now()}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
yield
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Настроить воркеры** в `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
WORKERS_JSON='[{"queue":"load.cbr","concurrency":2}]'
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Поставить задачу через API**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8081/api/v1/jobs/trigger \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"queue": "load.cbr",
|
||||||
|
"task": "load.cbr.rates",
|
||||||
|
"args": {"date": "2025-01-10", "currencies": ["USD", "EUR"]},
|
||||||
|
"lock_key": "cbr_rates_2025-01-10",
|
||||||
|
"partition_key": "2025-01-10",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 3,
|
||||||
|
"lease_ttl_sec": 300
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**4. Мониторить выполнение**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Получить job_id из ответа и проверить статус
|
||||||
|
curl http://localhost:8081/api/v1/jobs/{job_id}/status
|
||||||
|
```
|
||||||
|
|
||||||
|
**Ключевые моменты**:
|
||||||
|
- Пайплайн должен быть идемпотентным (повторный запуск не должен ломать данные)
|
||||||
|
- Используйте `yield` после каждого значимого чанка работы для heartbeat
|
||||||
|
- `lock_key` должен обеспечивать взаимное исключение (например, `customer:{id}`)
|
||||||
|
- `partition_key` используется для параллелизации независимых задач
|
||||||
|
|
||||||
|
|
||||||
|
## Существующие пайплайны
|
||||||
|
|
||||||
|
### `load.tenera` - Загрузка котировок
|
||||||
|
|
||||||
|
**Описание:**
|
||||||
|
Загружает финансовые котировки (биржевые индексы, валюты, сырьевые товары) из SuperTenera API.
|
||||||
|
|
||||||
|
**Источник данных:**
|
||||||
|
- SuperTenera API - агрегатор котировок из множества источников (CBR, Investing.com, SGX, Bloomberg, TradingView, TradingEconomics)
|
||||||
|
|
||||||
|
**Назначение:**
|
||||||
|
- Таблицы в схеме `quotes`:
|
||||||
|
- `quote_section` - разделы котировок (CBR, SGX, и т.д.)
|
||||||
|
- `quote` - инструменты (тикеры, названия)
|
||||||
|
- `quote_value` - временные ряды значений (OHLCV + дополнительные метрики)
|
||||||
|
|
||||||
|
**Процесс:**
|
||||||
|
1. Запрос данных из SuperTenera API
|
||||||
|
2. Парсинг различных форматов (каждый источник имеет свою структуру)
|
||||||
|
3. Трансформация в единый формат с преобразованием временных зон
|
||||||
|
4. UPSERT в БД (идемпотентная вставка/обновление)
|
||||||
|
|
||||||
|
**Пример запуска:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Настроить воркер в .env
|
||||||
|
WORKERS_JSON='[{"queue":"load.tenera","concurrency":1}]'
|
||||||
|
|
||||||
|
# Запустить задачу через API
|
||||||
|
curl -X POST http://localhost:8081/api/v1/jobs/trigger \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"queue": "load.tenera",
|
||||||
|
"task": "load.tenera",
|
||||||
|
"args": {},
|
||||||
|
"lock_key": "tenera_quotes",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 3,
|
||||||
|
"lease_ttl_sec": 300
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Особенности:**
|
||||||
|
- Поддержка множества форматов временных меток (ISO, Unix timestamp, кастомные форматы)
|
||||||
|
- Автоматическое преобразование временных зон в Europe/Moscow
|
||||||
|
- Обработка разных структур данных от различных провайдеров
|
||||||
|
- UPSERT по композитному ключу (quote_id + date/timestamp)
|
||||||
|
|
||||||
|
|
||||||
|
### `load.opu` - Загрузка данных OPU
|
||||||
|
|
||||||
|
**Описание:**
|
||||||
|
Загружает данные OPU из Gmap2Brief API. Данные выгружаются в виде zstandard-сжатого архива с JSON Lines.
|
||||||
|
|
||||||
|
**Источник данных:**
|
||||||
|
- Gmap2Brief API - экспорт данных о структуре OPU
|
||||||
|
|
||||||
|
**Назначение:**
|
||||||
|
- Таблица `brief_digital_certificate_opu` в схеме `opu` (23 поля, композитный PK из 8 полей)
|
||||||
|
|
||||||
|
**Процесс:**
|
||||||
|
1. Запуск экспорта через API (`/export/opu/start`)
|
||||||
|
2. Polling статуса экспорта до завершения
|
||||||
|
3. Скачивание zstandard-архива с JSON Lines
|
||||||
|
4. TRUNCATE целевой таблицы (полная перезагрузка)
|
||||||
|
5. Потоковая распаковка архива (64KB чанки)
|
||||||
|
6. Парсинг JSON Lines и батчевая вставка (по 5000 записей)
|
||||||
|
7. Преобразование ISO-дат в date/datetime объекты
|
||||||
|
|
||||||
|
**Пример запуска:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Настроить воркер в .env
|
||||||
|
WORKERS_JSON='[{"queue":"load.opu","concurrency":1}]'
|
||||||
|
|
||||||
|
# Запустить задачу через API
|
||||||
|
curl -X POST http://localhost:8081/api/v1/jobs/trigger \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"queue": "load.opu",
|
||||||
|
"task": "load.opu",
|
||||||
|
"args": {},
|
||||||
|
"lock_key": "opu_export",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 2,
|
||||||
|
"lease_ttl_sec": 600
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Особенности:**
|
||||||
|
- Потоковая обработка больших архивов без полной загрузки в память
|
||||||
|
- Zstandard декомпрессия с буферизацией неполных строк
|
||||||
|
- TRUNCATE перед загрузкой (стратегия полной замены данных)
|
||||||
|
- Батчевая вставка для оптимизации производительности
|
||||||
|
- Heartbeat после каждого батча для отслеживания прогресса
|
||||||
|
- Композитный первичный ключ из 8 полей обеспечивает уникальность
|
||||||
|
|
||||||
|
|
||||||
|
### `noop` - Тестовый пайплайн
|
||||||
|
|
||||||
|
**Описание:**
|
||||||
|
Демонстрационный пайплайн для тестирования инфраструктуры очереди. Выполняет серию sleep-операций с heartbeat.
|
||||||
|
|
||||||
|
**Пример запуска:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8081/api/v1/jobs/trigger \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"queue": "etl.default",
|
||||||
|
"task": "noop",
|
||||||
|
"args": {"sleep1": 2, "sleep2": 3, "sleep3": 1},
|
||||||
|
"lock_key": "test_noop",
|
||||||
|
"priority": 100
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Логирование, метрики, аудит
|
||||||
|
- Логи: структурированные, через `logger/*`. Middleware (`api/middleware.py`) логирует входящие запросы/исходящие ответы, время обработки, пишет метрики и аудит‑события.
|
||||||
|
- Метрики: простые счётчики (пример: likes/dislikes, requests_total, responses_total, duration_ms).
|
||||||
|
- Аудит: запись бизнес‑событий начала/окончания обработки запроса.
|
||||||
|
|
||||||
|
|
||||||
|
## Тестирование
|
||||||
|
|
||||||
|
### Структура тестов
|
||||||
|
|
||||||
|
```
|
||||||
|
tests/
|
||||||
|
├── conftest.py # Глобальные фикстуры (db_engine, db_session, client)
|
||||||
|
├── integration_tests/ # Интеграционные тесты с реальной БД
|
||||||
|
│ ├── test_queue_repository.py # 12 тестов репозитория
|
||||||
|
│ └── test_api_endpoints.py # 7 тестов API endpoints
|
||||||
|
└── unit/ # Юнит-тесты с моками (92 теста)
|
||||||
|
├── test_config.py # 30 тестов конфигурации
|
||||||
|
├── test_context.py # 13 тестов AppContext
|
||||||
|
├── test_api_service.py # 10 тестов сервисного слоя
|
||||||
|
├── test_notify_listener.py # 13 тестов LISTEN/NOTIFY
|
||||||
|
├── test_workers_base.py # 14 тестов PGWorker
|
||||||
|
├── test_workers_manager.py # 10 тестов WorkerManager
|
||||||
|
└── test_pipeline_registry.py # 5 тестов реестра пайплайнов
|
||||||
|
```
|
||||||
|
|
||||||
|
### Запуск тестов
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Все тесты (111 тестов)
|
||||||
|
poetry run pytest
|
||||||
|
|
||||||
|
# Только юнит-тесты
|
||||||
|
poetry run pytest tests/unit/ -m unit
|
||||||
|
|
||||||
|
# Только интеграционные
|
||||||
|
poetry run pytest tests/integration_tests/ -m integration
|
||||||
|
|
||||||
|
# С покрытием кода
|
||||||
|
poetry run pytest --cov=dataloader --cov-report=html
|
||||||
|
|
||||||
|
# С подробным выводом
|
||||||
|
poetry run pytest -v -s
|
||||||
|
```
|
||||||
|
|
||||||
|
### Покрытие кода
|
||||||
|
|
||||||
|
Текущее покрытие: **95.50%** (778 строк / 743 покрыто)
|
||||||
|
|
||||||
|
```
|
||||||
|
Name Stmts Miss Cover Missing
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
src\dataloader\__init__.py 2 0 100.00%
|
||||||
|
src\dataloader\api\__init__.py 31 10 67.74% 28-38
|
||||||
|
src\dataloader\api\metric_router.py 14 4 71.43% 23-27, 39-43
|
||||||
|
src\dataloader\api\os_router.py 11 2 81.82% 31-33
|
||||||
|
src\dataloader\api\schemas.py 13 0 100.00%
|
||||||
|
src\dataloader\api\v1\__init__.py 2 0 100.00%
|
||||||
|
src\dataloader\api\v1\exceptions.py 4 0 100.00%
|
||||||
|
src\dataloader\api\v1\models.py 0 0 100.00%
|
||||||
|
src\dataloader\api\v1\router.py 29 0 100.00%
|
||||||
|
src\dataloader\api\v1\schemas.py 33 1 96.97% 34
|
||||||
|
src\dataloader\api\v1\service.py 32 0 100.00%
|
||||||
|
src\dataloader\api\v1\utils.py 4 0 100.00%
|
||||||
|
src\dataloader\config.py 79 0 100.00%
|
||||||
|
src\dataloader\context.py 39 0 100.00%
|
||||||
|
src\dataloader\exceptions.py 0 0 100.00%
|
||||||
|
src\dataloader\storage\__init__.py 0 0 100.00%
|
||||||
|
src\dataloader\storage\engine.py 9 1 88.89% 52
|
||||||
|
src\dataloader\storage\models\__init__.py 4 0 100.00%
|
||||||
|
src\dataloader\storage\models\base.py 4 0 100.00%
|
||||||
|
src\dataloader\storage\models\queue.py 43 0 100.00%
|
||||||
|
src\dataloader\storage\notify_listener.py 49 0 100.00%
|
||||||
|
src\dataloader\storage\repositories\__init__.py 3 0 100.00%
|
||||||
|
src\dataloader\storage\repositories\queue.py 130 0 100.00%
|
||||||
|
src\dataloader\storage\schemas\__init__.py 3 0 100.00%
|
||||||
|
src\dataloader\storage\schemas\queue.py 29 0 100.00%
|
||||||
|
src\dataloader\workers\__init__.py 0 0 100.00%
|
||||||
|
src\dataloader\workers\base.py 102 0 100.00%
|
||||||
|
src\dataloader\workers\manager.py 64 0 100.00%
|
||||||
|
src\dataloader\workers\pipelines\__init__.py 11 5 54.55% 15-17, 24-25
|
||||||
|
src\dataloader\workers\pipelines\noop.py 12 12 0.00% 2-20
|
||||||
|
src\dataloader\workers\pipelines\registry.py 15 0 100.00%
|
||||||
|
src\dataloader\workers\reaper.py 7 0 100.00%
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
TOTAL 778 35 95.50%
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ключевые тест-сценарии
|
||||||
|
|
||||||
|
**Интеграционные тесты:**
|
||||||
|
- Постановка задачи через API → проверка статуса
|
||||||
|
- Идемпотентность через `idempotency_key`
|
||||||
|
- Claim задачи → heartbeat → успешное завершение
|
||||||
|
- Claim задачи → ошибка → retry → финальный fail
|
||||||
|
- Конкуренция воркеров через advisory lock
|
||||||
|
- Возврат потерянных задач (reaper)
|
||||||
|
- Отмена задачи пользователем
|
||||||
|
|
||||||
|
**Юнит-тесты:**
|
||||||
|
- Конфигурация из переменных окружения
|
||||||
|
- Создание и управление воркерами
|
||||||
|
- LISTEN/NOTIFY механизм
|
||||||
|
- Сервисный слой и репозиторий
|
||||||
|
- Протокол heartbeat и отмены
|
||||||
|
|
||||||
|
|
||||||
|
### Масштабирование
|
||||||
|
|
||||||
|
- **Вертикальное**: Увеличение `concurrency` в `WORKERS_JSON` для существующих воркеров
|
||||||
|
- **Горизонтальное**: Увеличение количества реплик (pods). Очередь в БД и advisory-lock обеспечат корректность при конкуренции между репликами
|
||||||
|
- **По очередям**: Разные deployment'ы для разных очередей с разными ресурсами
|
||||||
|
|
||||||
|
### Graceful Shutdown
|
||||||
|
|
||||||
|
При получении SIGTERM:
|
||||||
|
1. Останавливает прием новых HTTP запросов
|
||||||
|
2. Сигнализирует воркерам о необходимости завершения
|
||||||
|
3. Ждет завершения текущих задач (timeout 30 сек)
|
||||||
|
4. Останавливает reaper
|
||||||
|
5. Закрывает соединения с БД
|
||||||
|
|
||||||
|
### Мониторинг
|
||||||
|
|
||||||
|
**Health Checks:**
|
||||||
|
- `GET /health` - проверка работоспособности (без БД, < 20ms)
|
||||||
|
- `GET /info` - информация о версии
|
||||||
|
|
||||||
|
**Метрики (если включен metric_router):**
|
||||||
|
- Количество задач по статусам (queued, running, succeeded, failed)
|
||||||
|
- Время выполнения задач (p50, p95, p99)
|
||||||
|
- Количество активных воркеров
|
||||||
|
- Частота ошибок
|
||||||
|
|
||||||
|
**Логи:**
|
||||||
|
Структурированные JSON-логи с уровнями: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
|
|
||||||
|
**Ключевые события для алертов:**
|
||||||
|
- `worker.claim.backoff` - частые backoff'ы (возможна конкуренция)
|
||||||
|
- `worker.complete.failed` - высокий процент ошибок
|
||||||
|
- `reaper.requeued` - частый возврат потерянных задач (проблемы с lease)
|
||||||
|
- `api.error` - ошибки API
|
||||||
|
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Задачи застревают в статусе `queued`
|
||||||
|
|
||||||
|
**Симптомы:** Задачи не начинают выполняться, остаются в `queued`.
|
||||||
|
|
||||||
|
**Возможные причины:**
|
||||||
|
1. Воркеры не запущены или упали
|
||||||
|
2. Нет воркеров для данной очереди в `WORKERS_JSON`
|
||||||
|
3. `available_at` в будущем
|
||||||
|
|
||||||
|
**Решение:**
|
||||||
|
```bash
|
||||||
|
|
||||||
|
# Проверить задачи в БД
|
||||||
|
SELECT job_id, queue, status, available_at, created_at
|
||||||
|
FROM dl_jobs
|
||||||
|
WHERE status = 'queued'
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 10;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Задачи часто возвращаются в `queued` (backoff)
|
||||||
|
|
||||||
|
**Симптомы:** В логах частые события `worker.claim.backoff`.
|
||||||
|
|
||||||
|
**Причины:**
|
||||||
|
- Конкуренция за `lock_key`: несколько задач с одинаковым `lock_key` одновременно
|
||||||
|
- Advisory lock уже занят другим процессом
|
||||||
|
|
||||||
|
**Решение:**
|
||||||
|
- Проверить корректность выбора `lock_key` (должен быть уникальным для бизнес-сущности)
|
||||||
|
- Использовать `partition_key` для распределения нагрузки
|
||||||
|
- Снизить `concurrency` для данной очереди
|
||||||
|
|
||||||
|
### Высокий процент `failed` задач
|
||||||
|
|
||||||
|
**Симптомы:** Много задач завершаются с `status = 'failed'`.
|
||||||
|
|
||||||
|
**Диагностика:**
|
||||||
|
```sql
|
||||||
|
SELECT job_id, task, error, attempt, max_attempts
|
||||||
|
FROM dl_jobs
|
||||||
|
WHERE status = 'failed'
|
||||||
|
ORDER BY finished_at DESC
|
||||||
|
LIMIT 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Возможные причины:**
|
||||||
|
- Ошибки в коде пайплайна
|
||||||
|
- Недоступность внешних сервисов
|
||||||
|
- Таймауты (превышение `lease_ttl_sec`)
|
||||||
|
- Неверные аргументы в `args`
|
||||||
|
|
||||||
|
**Решение:**
|
||||||
|
- Проверить логи с `job_id`
|
||||||
|
- Увеличить `max_attempts` для retry
|
||||||
|
- Увеличить `lease_ttl_sec` для долгих операций
|
||||||
|
- Исправить код пайплайна
|
||||||
|
|
||||||
|
|
||||||
|
### Медленное выполнение задач
|
||||||
|
|
||||||
|
**Симптомы:** Задачи выполняются дольше ожидаемого.
|
||||||
|
|
||||||
|
**Диагностика:**
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
task,
|
||||||
|
AVG(EXTRACT(EPOCH FROM (finished_at - started_at))) as avg_duration_sec,
|
||||||
|
COUNT(*) as total
|
||||||
|
FROM dl_jobs
|
||||||
|
WHERE status IN ('succeeded', 'failed')
|
||||||
|
AND finished_at > NOW() - INTERVAL '1 hour'
|
||||||
|
GROUP BY task
|
||||||
|
ORDER BY avg_duration_sec DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Возможные причины:**
|
||||||
|
- Неоптимальный код пайплайна
|
||||||
|
- Медленные внешние сервисы
|
||||||
|
- Недостаточно воркеров (`concurrency` слишком мал)
|
||||||
|
- Проблемы с БД (медленные запросы, блокировки)
|
||||||
|
|
||||||
|
**Решение:**
|
||||||
|
- Профилировать код пайплайна
|
||||||
|
- Увеличить `concurrency` в `WORKERS_JSON`
|
||||||
|
- Оптимизировать запросы к БД (индексы, batching)
|
||||||
|
- Масштабировать горизонтально (больше реплик)
|
||||||
|
|
||||||
|
### Проблемы с LISTEN/NOTIFY
|
||||||
|
|
||||||
|
**Симптомы:** Воркеры не просыпаются сразу после постановки задачи.
|
||||||
|
|
||||||
|
**Диагностика:**
|
||||||
|
```bash
|
||||||
|
|
||||||
|
# Проверить триггеры в БД
|
||||||
|
SELECT * FROM pg_trigger WHERE tgname LIKE 'dl_jobs%';
|
||||||
|
```
|
||||||
|
|
||||||
|
**Возможные причины:**
|
||||||
|
- Триггеры не созданы или отключены
|
||||||
|
- Проблемы с подключением asyncpg
|
||||||
|
- Воркер не подписан на канал
|
||||||
|
|
||||||
|
**Решение:**
|
||||||
|
- Воркер автоматически fallback'ится на polling при проблемах с LISTEN/NOTIFY
|
||||||
|
- Проверить DDL: триггеры `dl_jobs_notify_ins` и `dl_jobs_notify_upd`
|
||||||
|
- Проверить права пользователя БД на LISTEN/NOTIFY
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
## План внедрения
|
||||||
|
|
||||||
|
1. **Шаблон + каркас пакета** - сделано
|
||||||
|
|
||||||
|
* Создать структуру из ТЗ (один пакет `dataloader/` по src-layout).
|
||||||
|
* Подтянуть `rest_template.md` артефакты: `os_router.py`, `middleware.py`, `logger/*`.
|
||||||
|
* `pyproject.toml`: `fastapi`, `uvicorn`, `pydantic-settings`, `sqlalchemy>=2, async`, `psycopg[binary,pool]` или `asyncpg`, `httpx`, `pytest`, `pytest-asyncio`, `httpx[cli]`.
|
||||||
|
* **Критерий:** `uvicorn dataloader.__main__:app` поднимается, `/health` отдаёт 200.
|
||||||
|
|
||||||
|
2. **Конфиг и контекст** - сделано
|
||||||
|
|
||||||
|
* `config.py`: `AppSettings` (DSN, тайминги, WORKERS_JSON).
|
||||||
|
* `context.py`: `AppContext`, создание `AsyncEngine` и `async_sessionmaker`, DI.
|
||||||
|
* **Критерий:** `/status` возвращает версию/uptime, движок создаётся на старте без попыток коннекта в `/health`.
|
||||||
|
|
||||||
|
3. **Хранилище очереди** - в работе
|
||||||
|
|
||||||
|
* `storage/db.py`: фабрики engine/sessionmaker.
|
||||||
|
* `storage/repositories.py`: методы
|
||||||
|
|
||||||
|
* `create_or_get(req)`,
|
||||||
|
* `get_status(job_id)`,
|
||||||
|
* `cancel(job_id)`,
|
||||||
|
* `requeue_lost(now)`,
|
||||||
|
* вспомогательные `claim_one(queue)`, `heartbeat(job_id, ttl)`, `finish_ok(job_id)`, `finish_fail_or_retry(job_id, err)`.
|
||||||
|
* Только чистый SQL (как в ТЗ), транзакция на операцию.
|
||||||
|
* **Критерий:** unit-интегра тест «поставил-прочитал-отменил» проходит.
|
||||||
|
|
||||||
|
4. **API v1**
|
||||||
|
|
||||||
|
* `api/v1/schemas.py`: `TriggerJobRequest/Response`, `JobStatusResponse`.
|
||||||
|
* `api/v1/service.py`: бизнес-слой над репозиторием.
|
||||||
|
* `api/v1/router.py`: `POST /jobs/trigger`, `GET /jobs/{id}/status`, `POST /jobs/{id}/cancel`.
|
||||||
|
* **Критерий:** ручки соответствуют контрактам, идемпотентность по `idempotency_key` работает.
|
||||||
|
|
||||||
|
5. **Базовый воркер**
|
||||||
|
|
||||||
|
* `workers/base.py`: класс `PGWorker` с циклами `listen_or_sleep → claim → advisory_lock → _pipeline → heartbeat → finish`.
|
||||||
|
* Идём строго по SQL из ТЗ: `FOR UPDATE SKIP LOCKED`, lease/heartbeat, backoff при lock.
|
||||||
|
* **Критерий:** локальный мок-пайплайн выполняется, статус `succeeded`.
|
||||||
|
|
||||||
|
6. **Менеджер воркеров**
|
||||||
|
|
||||||
|
* `workers/manager.py`: парсинг `WORKERS_JSON`, создание `asyncio.Task` на воркеры; мягкая остановка на shutdown.
|
||||||
|
* Подключение в `__main__.py` через FastAPI `on_startup/on_shutdown`.
|
||||||
|
* **Критерий:** при старте создаются нужные таски, при SIGTERM корректно гасим.
|
||||||
|
|
||||||
|
7. **Реестр пайплайнов**
|
||||||
|
|
||||||
|
* `workers/pipelines/registry.py`: `@register(task)`, `resolve(task)`.
|
||||||
|
* Пустой эталонный пайплайн (no-op, имитирует 2–3 чанка).
|
||||||
|
* **Критерий:** задача с `task="noop"` исполняется через реестр.
|
||||||
|
|
||||||
|
8. **Reaper**
|
||||||
|
|
||||||
|
* Фоновая async-задача в приложении: `requeue_lost` раз в `DL_REAPER_PERIOD_SEC`.
|
||||||
|
* **Критерий:** задачи с протухшим `lease_expires_at` возвращаются в `queued`.
|
||||||
|
|
||||||
|
9. **Интеграционные тесты**
|
||||||
|
|
||||||
|
* `tests/integration_tests/v1_api/test_service.py`:
|
||||||
|
|
||||||
|
* trigger → status (queued),
|
||||||
|
* воркер подхватил → status (running),
|
||||||
|
* done → status (succeeded),
|
||||||
|
* cancel во время пайплайна → корректная реакция.
|
||||||
|
* **Критерий:** тесты зелёные в CI.
|
||||||
|
|
||||||
|
10. **Dockerfile и запуск**
|
||||||
|
|
||||||
|
* Slim образ на Python 3.11/3.12, `uvicorn` entrypoint.
|
||||||
|
* ENV-пример `.env`, README с запуском.
|
||||||
|
* **Критерий:** контейнер стартует, воркеры работают, API доступно.
|
||||||
|
|
||||||
|
11. **Наблюдаемость**
|
||||||
|
|
||||||
|
* Логи в формате шаблона (структурные, маскирование).
|
||||||
|
* Простая сводка в `/status` (кол-во активных воркеров, конфиг таймингов).
|
||||||
|
* **Критерий:** видно ключевые переходы статусов и ошибки пайплайнов.
|
||||||
|
|
||||||
34
TZ.md
34
TZ.md
|
|
@ -2,16 +2,16 @@
|
||||||
|
|
||||||
## 1) Назначение и рамки
|
## 1) Назначение и рамки
|
||||||
|
|
||||||
`dataloader` — сервис постановки и исполнения долгих ETL-задач через одну общую очередь в Postgres. Сервис предоставляет HTTP-ручки для триггера задач, мониторинга статуса и отмены; внутри процесса запускает N асинхронных воркеров, которые конкурируют за задачи через `SELECT … FOR UPDATE SKIP LOCKED`, держат lease/heartbeat, делают идемпотентные записи в целевые БД и корректно обрабатывают повторы.
|
`dataloader` - сервис постановки и исполнения долгих ETL-задач через одну общую очередь в Postgres. Сервис предоставляет HTTP-ручки для триггера задач, мониторинга статуса и отмены; внутри процесса запускает N асинхронных воркеров, которые конкурируют за задачи через `SELECT … FOR UPDATE SKIP LOCKED`, держат lease/heartbeat, делают идемпотентные записи в целевые БД и корректно обрабатывают повторы.
|
||||||
|
|
||||||
Архитектura и инфраструктурные части соответствуют шаблону `rest_template.md`: единый пакет, `os_router.py` с `/health` и `/status`, middleware логирования, структура каталогов и конфиг-классы — **как в шаблоне**.
|
Архитектura и инфраструктурные части соответствуют шаблону `rest_template.md`: единый пакет, `os_router.py` с `/health` и `/status`, middleware логирования, структура каталогов и конфиг-классы - **как в шаблоне**.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 2) Архитектура (одно приложение, async)
|
## 2) Архитектура (одно приложение, async)
|
||||||
|
|
||||||
* **FastAPI-приложение**: HTTP API v1, инфраструктурные роуты (`/health`, `/status`) из шаблона, middleware и логирование из шаблона.
|
* **FastAPI-приложение**: HTTP API v1, инфраструктурные роуты (`/health`, `/status`) из шаблона, middleware и логирование из шаблона.
|
||||||
* **WorkerManager**: на `startup` читает конфиг (`WORKERS_JSON`) и поднимает M асинхронных воркер-циклов (по очередям и уровням параллелизма). На `shutdown` — мягкая остановка.
|
* **WorkerManager**: на `startup` читает конфиг (`WORKERS_JSON`) и поднимает M асинхронных воркер-циклов (по очередям и уровням параллелизма). На `shutdown` - мягкая остановка.
|
||||||
* **PG Queue**: одна таблица `dl_jobs` на все очереди и сервисы; журнал `dl_job_events`; триггеры LISTEN/NOTIFY для пробуждения воркеров без активного поллинга.
|
* **PG Queue**: одна таблица `dl_jobs` на все очереди и сервисы; журнал `dl_job_events`; триггеры LISTEN/NOTIFY для пробуждения воркеров без активного поллинга.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
@ -164,7 +164,7 @@ FOR EACH ROW EXECUTE FUNCTION notify_job_ready();
|
||||||
Выход: `{… как status …}`
|
Выход: `{… как status …}`
|
||||||
Поведение: устанавливает `cancel_requested = true`. Воркер кооперативно завершает задачу между чанками.
|
Поведение: устанавливает `cancel_requested = true`. Воркер кооперативно завершает задачу между чанками.
|
||||||
|
|
||||||
Инфраструктурные эндпоинты `/health`, `/status`, мидлвар и регистрация роутов — **как в шаблоне**.
|
Инфраструктурные эндпоинты `/health`, `/status`, мидлвар и регистрация роутов - **как в шаблоне**.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -192,7 +192,7 @@ WHERE j.job_id = cte.job_id
|
||||||
RETURNING j.job_id, j.task, j.args, j.lock_key, j.partition_key, j.lease_ttl_sec;
|
RETURNING j.job_id, j.task, j.args, j.lock_key, j.partition_key, j.lease_ttl_sec;
|
||||||
```
|
```
|
||||||
|
|
||||||
Затем `SELECT pg_try_advisory_lock(hashtext(:lock_key))`. Если `false` — `backoff`:
|
Затем `SELECT pg_try_advisory_lock(hashtext(:lock_key))`. Если `false` - `backoff`:
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
UPDATE dl_jobs
|
UPDATE dl_jobs
|
||||||
|
|
@ -233,7 +233,7 @@ WHERE job_id=:jid;
|
||||||
|
|
||||||
Всегда выставлять/снимать advisory-lock на `lock_key`.
|
Всегда выставлять/снимать advisory-lock на `lock_key`.
|
||||||
|
|
||||||
4. **Отмена**: воркер проверяет `cancel_requested` между чанками; при `true` завершает пайплайн (обычно как `canceled` либо как `failed` без ретраев — политика проекта).
|
4. **Отмена**: воркер проверяет `cancel_requested` между чанками; при `true` завершает пайплайн (обычно как `canceled` либо как `failed` без ретраев - политика проекта).
|
||||||
|
|
||||||
5. **Reaper** (фон у приложения): раз в `DL_REAPER_PERIOD_SEC` возвращает «потерянные» задачи в очередь.
|
5. **Reaper** (фон у приложения): раз в `DL_REAPER_PERIOD_SEC` возвращает «потерянные» задачи в очередь.
|
||||||
|
|
||||||
|
|
@ -250,37 +250,37 @@ RETURNING job_id;
|
||||||
|
|
||||||
## 7) Оптимизация и SLA
|
## 7) Оптимизация и SLA
|
||||||
|
|
||||||
* Claim — O(log N) благодаря частичному индексу `ix_dl_jobs_claim`.
|
* Claim - O(log N) благодаря частичному индексу `ix_dl_jobs_claim`.
|
||||||
* Reaper — O(log N) по индексу `ix_dl_jobs_running_lease`.
|
* Reaper - O(log N) по индексу `ix_dl_jobs_running_lease`.
|
||||||
* `/health` — без БД; время ответа ≤ 20 мс. `/jobs/*` — не держат долгих транзакций.
|
* `/health` - без БД; время ответа ≤ 20 мс. `/jobs/*` - не держат долгих транзакций.
|
||||||
* Гарантия доставки: **at-least-once**; операции записи в целевые таблицы — идемпотентны (реализуется в конкретных пайплайнах).
|
* Гарантия доставки: **at-least-once**; операции записи в целевые таблицы - идемпотентны (реализуется в конкретных пайплайнах).
|
||||||
* Конкуренция: один `lock_key` одновременно исполняется одним воркером; параллелизм достигается независимыми `partition_key`.
|
* Конкуренция: один `lock_key` одновременно исполняется одним воркером; параллелизм достигается независимыми `partition_key`.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 8) Конфигурация (ENV)
|
## 8) Конфигурация (ENV)
|
||||||
|
|
||||||
* `DL_DB_DSN` — DSN Postgres (async).
|
* `DL_DB_DSN` - DSN Postgres (async).
|
||||||
* `WORKERS_JSON` — JSON-список конфигураций воркеров, напр.: `[{"queue":"load.cbr","concurrency":2},{"queue":"load.sgx","concurrency":1}]`.
|
* `WORKERS_JSON` - JSON-список конфигураций воркеров, напр.: `[{"queue":"load.cbr","concurrency":2},{"queue":"load.sgx","concurrency":1}]`.
|
||||||
* `DL_HEARTBEAT_SEC` (деф. 10), `DL_DEFAULT_LEASE_TTL_SEC` (деф. 60), `DL_REAPER_PERIOD_SEC` (деф. 10), `DL_CLAIM_BACKOFF_SEC` (деф. 15).
|
* `DL_HEARTBEAT_SEC` (деф. 10), `DL_DEFAULT_LEASE_TTL_SEC` (деф. 60), `DL_REAPER_PERIOD_SEC` (деф. 10), `DL_CLAIM_BACKOFF_SEC` (деф. 15).
|
||||||
* Логирование, middleware, `uvicorn_logging_config` — **из шаблона без изменения контрактов**.
|
* Логирование, middleware, `uvicorn_logging_config` - **из шаблона без изменения контрактов**.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 9) Эксплуатация и деплой
|
## 9) Эксплуатация и деплой
|
||||||
|
|
||||||
* Один контейнер, один Pod, **несколько async-воркеров** внутри процесса (через `WorkerManager`).
|
* Один контейнер, один Pod, **несколько async-воркеров** внутри процесса (через `WorkerManager`).
|
||||||
* Масштабирование — количеством реплик Deployment: очередь в БД, `FOR UPDATE SKIP LOCKED` и advisory-lock обеспечат корректность в гонке.
|
* Масштабирование - количеством реплик Deployment: очередь в БД, `FOR UPDATE SKIP LOCKED` и advisory-lock обеспечат корректность в гонке.
|
||||||
* Пробы: `readiness/liveness` на `/health` из `os_router.py`.
|
* Пробы: `readiness/liveness` на `/health` из `os_router.py`.
|
||||||
* Завершение: на SIGTERM — остановить reaper, подать сигнал воркерам для мягкой остановки, дождаться тасков с таймаутом.
|
* Завершение: на SIGTERM - остановить reaper, подать сигнал воркерам для мягкой остановки, дождаться тасков с таймаутом.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 10) Безопасность, аудит, наблюдаемость
|
## 10) Безопасность, аудит, наблюдаемость
|
||||||
|
|
||||||
* Структурные логи через `logger/*` шаблона; маскирование чувствительных полей — как в `logger/utils.py`.
|
* Структурные логи через `logger/*` шаблона; маскирование чувствительных полей - как в `logger/utils.py`.
|
||||||
* Журнал жизненного цикла в `dl_job_events` (queued/picked/heartbeat/requeue/done/failed/canceled).
|
* Журнал жизненного цикла в `dl_job_events` (queued/picked/heartbeat/requeue/done/failed/canceled).
|
||||||
* Метрики (BETA) — через `metric_router.py` из шаблона при необходимости.
|
* Метрики (BETA) - через `metric_router.py` из шаблона при необходимости.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -15,7 +15,12 @@ pydantic-settings = "^2.11.0"
|
||||||
asyncpg = "^0.30.0"
|
asyncpg = "^0.30.0"
|
||||||
sqlalchemy = "^2.0.0"
|
sqlalchemy = "^2.0.0"
|
||||||
httpx = "^0.28.0"
|
httpx = "^0.28.0"
|
||||||
|
aiohttp = "^3.11.0"
|
||||||
|
aiofiles = "^24.1.0"
|
||||||
|
zstandard = "^0.23.0"
|
||||||
|
orjson = "^3.10.0"
|
||||||
pytz = "^2025.1"
|
pytz = "^2025.1"
|
||||||
|
tzdata = "^2025.1"
|
||||||
loguru = "^0.7.2"
|
loguru = "^0.7.2"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -29,3 +34,37 @@ dataloader = "dataloader.__main__:main"
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 88
|
||||||
|
target-version = ["py311"]
|
||||||
|
skip-string-normalization = false
|
||||||
|
preview = true
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
line_length = 88
|
||||||
|
multi_line_output = 3
|
||||||
|
include_trailing_comma = true
|
||||||
|
force_grid_wrap = 0
|
||||||
|
combine_as_imports = true
|
||||||
|
known_first_party = ["dataloader"]
|
||||||
|
src_paths = ["src"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 88
|
||||||
|
target-version = "py311"
|
||||||
|
fix = true
|
||||||
|
lint.select = ["E", "F", "W", "I", "N", "B"]
|
||||||
|
lint.ignore = [
|
||||||
|
"E501",
|
||||||
|
]
|
||||||
|
exclude = [
|
||||||
|
".git",
|
||||||
|
"__pycache__",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
".venv",
|
||||||
|
"venv",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
[pytest]
|
||||||
|
testpaths = tests
|
||||||
|
python_files = test_*.py
|
||||||
|
python_classes = Test*
|
||||||
|
python_functions = test_*
|
||||||
|
asyncio_mode = auto
|
||||||
|
asyncio_default_fixture_loop_scope = function
|
||||||
|
markers =
|
||||||
|
integration: integration tests requiring database
|
||||||
|
unit: unit tests without external dependencies
|
||||||
|
addopts =
|
||||||
|
-v
|
||||||
|
--tb=short
|
||||||
|
--strict-markers
|
||||||
|
|
@ -61,7 +61,7 @@ aigw-project/
|
||||||
| `__main__.py` | Точка входа. Запускает FastAPI-приложение. |
|
| `__main__.py` | Точка входа. Запускает FastAPI-приложение. |
|
||||||
| `config.py` | Загрузка и обработка переменных окружения. |
|
| `config.py` | Загрузка и обработка переменных окружения. |
|
||||||
| `base.py` | Базовые классы и типы, переиспользуемые в проекте. |
|
| `base.py` | Базовые классы и типы, переиспользуемые в проекте. |
|
||||||
| `context.py` | Реализация паттерна `AppContext` — единая точка доступа к зависимостям. |
|
| `context.py` | Реализация паттерна `AppContext` - единая точка доступа к зависимостям. |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -81,7 +81,7 @@ aigw-project/
|
||||||
|
|
||||||
| Файл | Назначение |
|
| Файл | Назначение |
|
||||||
|--------------------|------------|
|
|--------------------|------------|
|
||||||
| `__init__.py` | Конфигуратор FastAPI — регистрация версий и роутов. |
|
| `__init__.py` | Конфигуратор FastAPI - регистрация версий и роутов. |
|
||||||
| `os_router.py` | Инфраструктурные endpoint'ы (`/health`, `/status`). ⚠️ Не редактировать. |
|
| `os_router.py` | Инфраструктурные endpoint'ы (`/health`, `/status`). ⚠️ Не редактировать. |
|
||||||
| `metric_router.py` | Метрики (BETA). ⚠️ Не редактировать. |
|
| `metric_router.py` | Метрики (BETA). ⚠️ Не редактировать. |
|
||||||
| `schemas.py` | Схемы (Pydantic) для `os_router` и `metric_router`. ⚠️ Не редактировать. |
|
| `schemas.py` | Схемы (Pydantic) для `os_router` и `metric_router`. ⚠️ Не редактировать. |
|
||||||
|
|
@ -116,7 +116,7 @@ from tenera_etl.logger import logger
|
||||||
|
|
||||||
logger.info("End processing user registration request")
|
logger.info("End processing user registration request")
|
||||||
```
|
```
|
||||||
⚠️ Не передавайте в logger.info(...) ничего, кроме строки — она будет записана в поле message.
|
⚠️ Не передавайте в logger.info(...) ничего, кроме строки - она будет записана в поле message.
|
||||||
|
|
||||||
Маскирование чувствительных данных
|
Маскирование чувствительных данных
|
||||||
|
|
||||||
|
|
@ -126,4 +126,4 @@ logger.info("End processing user registration request")
|
||||||
|
|
||||||
работает автоматически, но вы можете конфигурировать список слов и правила.
|
работает автоматически, но вы можете конфигурировать список слов и правила.
|
||||||
|
|
||||||
Перед добавлением кастомной маскировки — ознакомьтесь с документацией, чтобы избежать утечки данных.
|
Перед добавлением кастомной маскировки - ознакомьтесь с документацией, чтобы избежать утечки данных.
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
|
|
||||||
from . import api
|
from . import api
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"api",
|
"api",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,17 @@
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
|
|
||||||
from dataloader.api import app_main
|
from dataloader.api import app_main
|
||||||
from dataloader.config import APP_CONFIG
|
from dataloader.config import APP_CONFIG
|
||||||
from dataloader.logger.uvicorn_logging_config import LOGGING_CONFIG, setup_uvicorn_logging
|
from dataloader.logger.uvicorn_logging_config import (
|
||||||
|
LOGGING_CONFIG,
|
||||||
|
setup_uvicorn_logging,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
# Инициализируем логирование uvicorn перед запуском
|
|
||||||
setup_uvicorn_logging()
|
setup_uvicorn_logging()
|
||||||
|
|
||||||
uvicorn.run(
|
uvicorn.run(
|
||||||
app_main,
|
app_main,
|
||||||
host=APP_CONFIG.app.app_host,
|
host=APP_CONFIG.app.app_host,
|
||||||
|
|
|
||||||
|
|
@ -1,38 +1,46 @@
|
||||||
from collections.abc import AsyncGenerator
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import typing as tp
|
import typing as tp
|
||||||
|
from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.workers.manager import WorkerManager, build_manager_from_env
|
||||||
|
from dataloader.workers.pipelines import load_all as load_pipelines
|
||||||
|
|
||||||
from .metric_router import router as metric_router
|
from .metric_router import router as metric_router
|
||||||
from .middleware import log_requests
|
from .middleware import log_requests
|
||||||
from .os_router import router as service_router
|
from .os_router import router as service_router
|
||||||
from .v1 import router as v1_router
|
from .v1 import router as v1_router
|
||||||
|
|
||||||
|
_manager: WorkerManager | None = None
|
||||||
|
|
||||||
|
|
||||||
@contextlib.asynccontextmanager
|
@contextlib.asynccontextmanager
|
||||||
async def lifespan(app: tp.Any) -> AsyncGenerator[None, None]:
|
async def lifespan(app: tp.Any) -> AsyncGenerator[None, None]:
|
||||||
from dataloader.context import APP_CTX
|
"""
|
||||||
|
Жизненный цикл приложения: инициализация контекста и запуск/остановка воркер-менеджера.
|
||||||
|
"""
|
||||||
|
global _manager
|
||||||
await APP_CTX.on_startup()
|
await APP_CTX.on_startup()
|
||||||
yield
|
load_pipelines()
|
||||||
await APP_CTX.on_shutdown()
|
_manager = build_manager_from_env()
|
||||||
|
await _manager.start()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
if _manager is not None:
|
||||||
|
await _manager.stop()
|
||||||
|
_manager = None
|
||||||
|
await APP_CTX.on_shutdown()
|
||||||
|
|
||||||
|
|
||||||
app_main = FastAPI(title="Data Gateway", lifespan=lifespan)
|
app_main = FastAPI(title="Data Gateway", lifespan=lifespan)
|
||||||
|
|
||||||
app_main.middleware("http")(log_requests)
|
app_main.middleware("http")(log_requests)
|
||||||
|
app_main.include_router(service_router, tags=["Openshift dataloader routes"])
|
||||||
|
app_main.include_router(metric_router, tags=["Like/dislike metric dataloader routes"])
|
||||||
|
app_main.include_router(v1_router, prefix="/api/v1", tags=["dataloader"])
|
||||||
|
|
||||||
app_main.include_router(
|
__all__ = ["app_main"]
|
||||||
service_router, tags=["Openshift dataloader routes"]
|
|
||||||
)
|
|
||||||
app_main.include_router(
|
|
||||||
metric_router, tags=["Like/dislike metric dataloader routes"]
|
|
||||||
)
|
|
||||||
app_main.include_router(
|
|
||||||
v1_router, prefix="/api/v1", tags=["dataloader"]
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"app_main",
|
|
||||||
]
|
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
""" 🚨 НЕ РЕДАКТИРОВАТЬ !!!!!!
|
"""🚨 НЕ РЕДАКТИРОВАТЬ !!!!!!"""
|
||||||
"""
|
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from fastapi import APIRouter, Header, status
|
from fastapi import APIRouter, Header, status
|
||||||
|
|
||||||
from dataloader.context import APP_CTX
|
from dataloader.context import APP_CTX
|
||||||
|
|
||||||
from . import schemas
|
from . import schemas
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
@ -17,8 +18,7 @@ logger = APP_CTX.get_logger()
|
||||||
response_model=schemas.RateResponse,
|
response_model=schemas.RateResponse,
|
||||||
)
|
)
|
||||||
async def like(
|
async def like(
|
||||||
# pylint: disable=C0103,W0613
|
header_request_id: str = Header(uuid.uuid4(), alias="Request-Id")
|
||||||
header_Request_Id: str = Header(uuid.uuid4(), alias="Request-Id")
|
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
logger.metric(
|
logger.metric(
|
||||||
metric_name="dataloader_likes_total",
|
metric_name="dataloader_likes_total",
|
||||||
|
|
@ -33,8 +33,7 @@ async def like(
|
||||||
response_model=schemas.RateResponse,
|
response_model=schemas.RateResponse,
|
||||||
)
|
)
|
||||||
async def dislike(
|
async def dislike(
|
||||||
# pylint: disable=C0103,W0613
|
header_request_id: str = Header(uuid.uuid4(), alias="Request-Id")
|
||||||
header_Request_Id: str = Header(uuid.uuid4(), alias="Request-Id")
|
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
logger.metric(
|
logger.metric(
|
||||||
metric_name="dataloader_dislikes_total",
|
metric_name="dataloader_dislikes_total",
|
||||||
|
|
|
||||||
|
|
@ -38,8 +38,14 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
logger = APP_CTX.get_logger()
|
logger = APP_CTX.get_logger()
|
||||||
request_path = request.url.path
|
request_path = request.url.path
|
||||||
|
|
||||||
allowed_headers_to_log = ((k, request.headers.get(k)) for k in HEADERS_WHITE_LIST_TO_LOG)
|
allowed_headers_to_log = (
|
||||||
headers_to_log = {header_name: header_value for header_name, header_value in allowed_headers_to_log if header_value}
|
(k, request.headers.get(k)) for k in HEADERS_WHITE_LIST_TO_LOG
|
||||||
|
)
|
||||||
|
headers_to_log = {
|
||||||
|
header_name: header_value
|
||||||
|
for header_name, header_value in allowed_headers_to_log
|
||||||
|
if header_value
|
||||||
|
}
|
||||||
|
|
||||||
APP_CTX.get_context_vars_container().set_context_vars(
|
APP_CTX.get_context_vars_container().set_context_vars(
|
||||||
request_id=headers_to_log.get("Request-Id", ""),
|
request_id=headers_to_log.get("Request-Id", ""),
|
||||||
|
|
@ -50,7 +56,9 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
|
|
||||||
if request_path in NON_LOGGED_ENDPOINTS:
|
if request_path in NON_LOGGED_ENDPOINTS:
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
logger.debug(f"Processed request for {request_path} with code {response.status_code}")
|
logger.debug(
|
||||||
|
f"Processed request for {request_path} with code {response.status_code}"
|
||||||
|
)
|
||||||
elif headers_to_log.get("Request-Id", None):
|
elif headers_to_log.get("Request-Id", None):
|
||||||
raw_request_body = await request.body()
|
raw_request_body = await request.body()
|
||||||
request_body_decoded = _get_decoded_body(raw_request_body, "request", logger)
|
request_body_decoded = _get_decoded_body(raw_request_body, "request", logger)
|
||||||
|
|
@ -80,7 +88,7 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
event_params=json.dumps(
|
event_params=json.dumps(
|
||||||
request_body_decoded,
|
request_body_decoded,
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
|
|
@ -88,12 +96,16 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
response_body = [chunk async for chunk in response.body_iterator]
|
response_body = [chunk async for chunk in response.body_iterator]
|
||||||
response.body_iterator = iterate_in_threadpool(iter(response_body))
|
response.body_iterator = iterate_in_threadpool(iter(response_body))
|
||||||
|
|
||||||
headers_to_log["Response-Time"] = datetime.now(APP_CTX.get_pytz_timezone()).isoformat()
|
headers_to_log["Response-Time"] = datetime.now(
|
||||||
|
APP_CTX.get_pytz_timezone()
|
||||||
|
).isoformat()
|
||||||
for header in headers_to_log:
|
for header in headers_to_log:
|
||||||
response.headers[header] = headers_to_log[header]
|
response.headers[header] = headers_to_log[header]
|
||||||
|
|
||||||
response_body_extracted = response_body[0] if len(response_body) > 0 else b""
|
response_body_extracted = response_body[0] if len(response_body) > 0 else b""
|
||||||
decoded_response_body = _get_decoded_body(response_body_extracted, "response", logger)
|
decoded_response_body = _get_decoded_body(
|
||||||
|
response_body_extracted, "response", logger
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Outgoing response to client system",
|
"Outgoing response to client system",
|
||||||
|
|
@ -115,11 +127,13 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
event_params=json.dumps(
|
event_params=json.dumps(
|
||||||
decoded_response_body,
|
decoded_response_body,
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
processing_time_ms = int(round((time.time() - start_time), 3) * 1000)
|
processing_time_ms = int(round((time.time() - start_time), 3) * 1000)
|
||||||
logger.info(f"Request processing time for {request_path}: {processing_time_ms} ms")
|
logger.info(
|
||||||
|
f"Request processing time for {request_path}: {processing_time_ms} ms"
|
||||||
|
)
|
||||||
logger.metric(
|
logger.metric(
|
||||||
metric_name="dataloader_process_duration_ms",
|
metric_name="dataloader_process_duration_ms",
|
||||||
metric_value=processing_time_ms,
|
metric_value=processing_time_ms,
|
||||||
|
|
@ -138,7 +152,9 @@ async def log_requests(request: Request, call_next) -> any:
|
||||||
else:
|
else:
|
||||||
logger.info(f"Incoming {request.method}-request with no id for {request_path}")
|
logger.info(f"Incoming {request.method}-request with no id for {request_path}")
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
logger.info(f"Request with no id for {request_path} processing time: {time.time() - start_time:.3f} s")
|
logger.info(
|
||||||
|
f"Request with no id for {request_path} processing time: {time.time() - start_time:.3f} s"
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,4 @@
|
||||||
# Инфраструктурные endpoint'ы (/health, /status)
|
"""🚨 НЕ РЕДАКТИРОВАТЬ !!!!!!"""
|
||||||
""" 🚨 НЕ РЕДАКТИРОВАТЬ !!!!!!
|
|
||||||
"""
|
|
||||||
|
|
||||||
from importlib.metadata import distribution
|
from importlib.metadata import distribution
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,43 @@
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
|
|
||||||
class HealthResponse(BaseModel):
|
class HealthResponse(BaseModel):
|
||||||
"""Ответ для ручки /health"""
|
"""Ответ для ручки /health"""
|
||||||
status: str = Field(default="running", description="Service health check", max_length=7)
|
|
||||||
|
|
||||||
class Config:
|
model_config = ConfigDict(json_schema_extra={"example": {"status": "running"}})
|
||||||
json_schema_extra = {"example": {"status": "running"}}
|
|
||||||
|
status: str = Field(
|
||||||
|
default="running", description="Service health check", max_length=7
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InfoResponse(BaseModel):
|
class InfoResponse(BaseModel):
|
||||||
"""Ответ для ручки /info"""
|
"""Ответ для ручки /info"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(
|
||||||
|
json_schema_extra={
|
||||||
|
"example": {
|
||||||
|
"name": "rest-template",
|
||||||
|
"description": (
|
||||||
|
"Python 'AI gateway' template for developing REST microservices"
|
||||||
|
),
|
||||||
|
"type": "REST API",
|
||||||
|
"version": "0.1.0",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
name: str = Field(description="Service name", max_length=50)
|
name: str = Field(description="Service name", max_length=50)
|
||||||
description: str = Field(description="Service description", max_length=200)
|
description: str = Field(description="Service description", max_length=200)
|
||||||
type: str = Field(default="REST API", description="Service type", max_length=20)
|
type: str = Field(default="REST API", description="Service type", max_length=20)
|
||||||
version: str = Field(description="Service version", max_length=20, pattern=r"^\d+\.\d+\.\d+")
|
version: str = Field(
|
||||||
|
description="Service version", max_length=20, pattern=r"^\d+\.\d+\.\d+"
|
||||||
class Config:
|
)
|
||||||
json_schema_extra = {
|
|
||||||
"example": {
|
|
||||||
"name": "rest-template",
|
|
||||||
"description": "Python 'AI gateway' template for developing REST microservices",
|
|
||||||
"type": "REST API",
|
|
||||||
"version": "0.1.0"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RateResponse(BaseModel):
|
class RateResponse(BaseModel):
|
||||||
|
"""Ответ для записи рейтинга"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(str_strip_whitespace=True)
|
||||||
|
|
||||||
rating_result: str = Field(description="Rating that was recorded", max_length=50)
|
rating_result: str = Field(description="Rating that was recorded", max_length=50)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
"""Исключения для API v1."""
|
||||||
|
|
||||||
|
from fastapi import HTTPException, status
|
||||||
|
|
||||||
|
|
||||||
|
class JobNotFoundError(HTTPException):
|
||||||
|
"""Задача не найдена."""
|
||||||
|
|
||||||
|
def __init__(self, job_id: str):
|
||||||
|
super().__init__(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND, detail=f"Job {job_id} not found"
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Модели данных для API v1."""
|
||||||
|
|
@ -1,14 +1,69 @@
|
||||||
"""Агрегатор v1-роутов.
|
from __future__ import annotations
|
||||||
|
|
||||||
Экспортирует готовый `router`, собранный из модульных роутеров в пакете `routes`.
|
from http import HTTPStatus
|
||||||
Оставлен как тонкий слой для обратной совместимости импортов `from dataloader.api.v1 import router`.
|
from typing import Annotated
|
||||||
"""
|
from uuid import UUID
|
||||||
|
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter, Depends
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.api.v1.exceptions import JobNotFoundError
|
||||||
|
from dataloader.api.v1.schemas import (
|
||||||
|
JobStatusResponse,
|
||||||
|
TriggerJobRequest,
|
||||||
|
TriggerJobResponse,
|
||||||
|
)
|
||||||
|
from dataloader.api.v1.service import JobsService
|
||||||
|
from dataloader.context import get_session
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter()
|
def get_service(session: Annotated[AsyncSession, Depends(get_session)]) -> JobsService:
|
||||||
|
"""
|
||||||
|
FastAPI dependency to create a JobsService instance with a database session.
|
||||||
|
"""
|
||||||
|
return JobsService(session)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/trigger", response_model=TriggerJobResponse, status_code=HTTPStatus.OK)
|
||||||
|
async def trigger_job(
|
||||||
|
payload: TriggerJobRequest,
|
||||||
|
svc: Annotated[JobsService, Depends(get_service)],
|
||||||
|
) -> TriggerJobResponse:
|
||||||
|
"""
|
||||||
|
Создаёт или возвращает существующую задачу по idempotency_key.
|
||||||
|
"""
|
||||||
|
return await svc.trigger(payload)
|
||||||
|
|
||||||
__all__ = ["router"]
|
|
||||||
|
@router.get(
|
||||||
|
"/{job_id}/status", response_model=JobStatusResponse, status_code=HTTPStatus.OK
|
||||||
|
)
|
||||||
|
async def get_status(
|
||||||
|
job_id: UUID,
|
||||||
|
svc: Annotated[JobsService, Depends(get_service)],
|
||||||
|
) -> JobStatusResponse:
|
||||||
|
"""
|
||||||
|
Возвращает статус задачи по идентификатору.
|
||||||
|
"""
|
||||||
|
st = await svc.status(job_id)
|
||||||
|
if not st:
|
||||||
|
raise JobNotFoundError(job_id=str(job_id))
|
||||||
|
return st
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/{job_id}/cancel", response_model=JobStatusResponse, status_code=HTTPStatus.OK
|
||||||
|
)
|
||||||
|
async def cancel_job(
|
||||||
|
job_id: UUID,
|
||||||
|
svc: Annotated[JobsService, Depends(get_service)],
|
||||||
|
) -> JobStatusResponse:
|
||||||
|
"""
|
||||||
|
Запрашивает отмену задачи.
|
||||||
|
"""
|
||||||
|
st = await svc.cancel(job_id)
|
||||||
|
if not st:
|
||||||
|
raise JobNotFoundError(job_id=str(job_id))
|
||||||
|
return st
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||||
|
|
||||||
|
|
||||||
|
class TriggerJobRequest(BaseModel):
|
||||||
|
"""
|
||||||
|
Запрос на постановку задачи в очередь.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(str_strip_whitespace=True)
|
||||||
|
|
||||||
|
queue: str = Field(...)
|
||||||
|
task: str = Field(...)
|
||||||
|
args: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
idempotency_key: Optional[str] = Field(default=None)
|
||||||
|
lock_key: str = Field(...)
|
||||||
|
partition_key: str = Field(default="")
|
||||||
|
priority: int = Field(default=100, ge=0)
|
||||||
|
available_at: Optional[datetime] = Field(default=None)
|
||||||
|
max_attempts: int = Field(default=5, ge=0)
|
||||||
|
lease_ttl_sec: int = Field(default=60, gt=0)
|
||||||
|
producer: Optional[str] = Field(default=None)
|
||||||
|
consumer_group: Optional[str] = Field(default=None)
|
||||||
|
|
||||||
|
@field_validator("available_at")
|
||||||
|
@classmethod
|
||||||
|
def _ensure_tz(cls, v: Optional[datetime]) -> Optional[datetime]:
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
return v if v.tzinfo else v.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
class TriggerJobResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
Ответ на постановку задачи.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(str_strip_whitespace=True)
|
||||||
|
|
||||||
|
job_id: UUID = Field(...)
|
||||||
|
status: str = Field(...)
|
||||||
|
|
||||||
|
|
||||||
|
class JobStatusResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
Текущий статус задачи.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(str_strip_whitespace=True)
|
||||||
|
|
||||||
|
job_id: UUID = Field(...)
|
||||||
|
status: str = Field(...)
|
||||||
|
attempt: int = Field(...)
|
||||||
|
started_at: Optional[datetime] = Field(default=None)
|
||||||
|
finished_at: Optional[datetime] = Field(default=None)
|
||||||
|
heartbeat_at: Optional[datetime] = Field(default=None)
|
||||||
|
error: Optional[str] = Field(default=None)
|
||||||
|
progress: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class CancelJobResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
Ответ на запрос отмены задачи.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(str_strip_whitespace=True)
|
||||||
|
|
||||||
|
job_id: UUID = Field(...)
|
||||||
|
status: str = Field(...)
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.api.v1.schemas import (
|
||||||
|
JobStatusResponse,
|
||||||
|
TriggerJobRequest,
|
||||||
|
TriggerJobResponse,
|
||||||
|
)
|
||||||
|
from dataloader.api.v1.utils import new_job_id
|
||||||
|
from dataloader.logger.logger import get_logger
|
||||||
|
from dataloader.storage.repositories import QueueRepository
|
||||||
|
from dataloader.storage.schemas import CreateJobRequest
|
||||||
|
|
||||||
|
|
||||||
|
class JobsService:
|
||||||
|
"""
|
||||||
|
Бизнес-логика работы с очередью задач.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession):
|
||||||
|
self._s = session
|
||||||
|
self._repo = QueueRepository(self._s)
|
||||||
|
self._log = get_logger(__name__)
|
||||||
|
|
||||||
|
async def trigger(self, req: TriggerJobRequest) -> TriggerJobResponse:
|
||||||
|
"""
|
||||||
|
Идемпотентно ставит задачу в очередь и возвращает её идентификатор и статус.
|
||||||
|
"""
|
||||||
|
job_uuid: UUID = new_job_id()
|
||||||
|
dt = req.available_at or datetime.now(timezone.utc)
|
||||||
|
creq = CreateJobRequest(
|
||||||
|
job_id=str(job_uuid),
|
||||||
|
queue=req.queue,
|
||||||
|
task=req.task,
|
||||||
|
args=req.args or {},
|
||||||
|
idempotency_key=req.idempotency_key,
|
||||||
|
lock_key=req.lock_key,
|
||||||
|
partition_key=req.partition_key or "",
|
||||||
|
priority=int(req.priority),
|
||||||
|
available_at=dt,
|
||||||
|
max_attempts=int(req.max_attempts),
|
||||||
|
lease_ttl_sec=int(req.lease_ttl_sec),
|
||||||
|
producer=req.producer,
|
||||||
|
consumer_group=req.consumer_group,
|
||||||
|
)
|
||||||
|
job_id, status = await self._repo.create_or_get(creq)
|
||||||
|
return TriggerJobResponse(job_id=UUID(job_id), status=status)
|
||||||
|
|
||||||
|
async def status(self, job_id: UUID) -> Optional[JobStatusResponse]:
|
||||||
|
"""
|
||||||
|
Возвращает статус задачи.
|
||||||
|
"""
|
||||||
|
st = await self._repo.get_status(str(job_id))
|
||||||
|
if not st:
|
||||||
|
return None
|
||||||
|
return JobStatusResponse(
|
||||||
|
job_id=UUID(st.job_id),
|
||||||
|
status=st.status,
|
||||||
|
attempt=st.attempt,
|
||||||
|
started_at=st.started_at,
|
||||||
|
finished_at=st.finished_at,
|
||||||
|
heartbeat_at=st.heartbeat_at,
|
||||||
|
error=st.error,
|
||||||
|
progress=st.progress or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def cancel(self, job_id: UUID) -> Optional[JobStatusResponse]:
|
||||||
|
"""
|
||||||
|
Запрашивает отмену задачи и возвращает её текущее состояние.
|
||||||
|
"""
|
||||||
|
await self._repo.cancel(str(job_id))
|
||||||
|
st = await self._repo.get_status(str(job_id))
|
||||||
|
if not st:
|
||||||
|
return None
|
||||||
|
return JobStatusResponse(
|
||||||
|
job_id=UUID(st.job_id),
|
||||||
|
status=st.status,
|
||||||
|
attempt=st.attempt,
|
||||||
|
started_at=st.started_at,
|
||||||
|
finished_at=st.finished_at,
|
||||||
|
heartbeat_at=st.heartbeat_at,
|
||||||
|
error=st.error,
|
||||||
|
progress=st.progress or {},
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
|
||||||
|
def new_job_id() -> UUID:
|
||||||
|
"""
|
||||||
|
Генерирует новый UUID для идентификатора задачи.
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
UUID для задачи
|
||||||
|
"""
|
||||||
|
return uuid4()
|
||||||
|
|
@ -1,17 +1,37 @@
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from logging import DEBUG, INFO
|
from logging import DEBUG, INFO
|
||||||
|
from typing import Annotated, Any
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pydantic import Field
|
from pydantic import BeforeValidator, Field
|
||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
def strip_slashes(v: str) -> str:
|
||||||
|
"""Strip leading and trailing slashes from string."""
|
||||||
|
if isinstance(v, str):
|
||||||
|
return v.strip("/")
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
def build_url(protocol: str, domain: str, port: str, path: str = "") -> str:
|
||||||
|
"""Build URL from components."""
|
||||||
|
base = f"{protocol}://{domain}"
|
||||||
|
if port and port not in ("80", "443"):
|
||||||
|
base += f":{port}"
|
||||||
|
if path:
|
||||||
|
base += f"/{path.strip('/')}"
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
class BaseAppSettings(BaseSettings):
|
class BaseAppSettings(BaseSettings):
|
||||||
"""
|
"""
|
||||||
Базовый класс для настроек.
|
Базовый класс для настроек.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
local: bool = Field(validation_alias="LOCAL", default=False)
|
local: bool = Field(validation_alias="LOCAL", default=False)
|
||||||
debug: bool = Field(validation_alias="DEBUG", default=False)
|
debug: bool = Field(validation_alias="DEBUG", default=False)
|
||||||
|
|
||||||
|
|
@ -24,6 +44,7 @@ class AppSettings(BaseAppSettings):
|
||||||
"""
|
"""
|
||||||
Настройки приложения.
|
Настройки приложения.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
app_host: str = Field(validation_alias="APP_HOST", default="0.0.0.0")
|
app_host: str = Field(validation_alias="APP_HOST", default="0.0.0.0")
|
||||||
app_port: int = Field(validation_alias="APP_PORT", default=8081)
|
app_port: int = Field(validation_alias="APP_PORT", default=8081)
|
||||||
kube_net_name: str = Field(validation_alias="PROJECT_NAME", default="AIGATEWAY")
|
kube_net_name: str = Field(validation_alias="PROJECT_NAME", default="AIGATEWAY")
|
||||||
|
|
@ -34,15 +55,28 @@ class LogSettings(BaseAppSettings):
|
||||||
"""
|
"""
|
||||||
Настройки логирования.
|
Настройки логирования.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
private_log_file_path: str = Field(validation_alias="LOG_PATH", default=os.getcwd())
|
private_log_file_path: str = Field(validation_alias="LOG_PATH", default=os.getcwd())
|
||||||
private_log_file_name: str = Field(validation_alias="LOG_FILE_NAME", default="app.log")
|
private_log_file_name: str = Field(
|
||||||
|
validation_alias="LOG_FILE_NAME", default="app.log"
|
||||||
|
)
|
||||||
log_rotation: str = Field(validation_alias="LOG_ROTATION", default="10 MB")
|
log_rotation: str = Field(validation_alias="LOG_ROTATION", default="10 MB")
|
||||||
private_metric_file_path: str = Field(validation_alias="METRIC_PATH", default=os.getcwd())
|
private_metric_file_path: str = Field(
|
||||||
private_metric_file_name: str = Field(validation_alias="METRIC_FILE_NAME", default="app-metric.log")
|
validation_alias="METRIC_PATH", default=os.getcwd()
|
||||||
private_audit_file_path: str = Field(validation_alias="AUDIT_LOG_PATH", default=os.getcwd())
|
)
|
||||||
private_audit_file_name: str = Field(validation_alias="AUDIT_LOG_FILE_NAME", default="events.log")
|
private_metric_file_name: str = Field(
|
||||||
|
validation_alias="METRIC_FILE_NAME", default="app-metric.log"
|
||||||
|
)
|
||||||
|
private_audit_file_path: str = Field(
|
||||||
|
validation_alias="AUDIT_LOG_PATH", default=os.getcwd()
|
||||||
|
)
|
||||||
|
private_audit_file_name: str = Field(
|
||||||
|
validation_alias="AUDIT_LOG_FILE_NAME", default="events.log"
|
||||||
|
)
|
||||||
audit_host_ip: str = Field(validation_alias="HOST_IP", default="127.0.0.1")
|
audit_host_ip: str = Field(validation_alias="HOST_IP", default="127.0.0.1")
|
||||||
audit_host_uid: str = Field(validation_alias="HOST_UID", default="63b6dcee-170b-49bf-a65c-3ec967398ccd")
|
audit_host_uid: str = Field(
|
||||||
|
validation_alias="HOST_UID", default="63b6dcee-170b-49bf-a65c-3ec967398ccd"
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_file_abs_path(path_name: str, file_name: str) -> str:
|
def get_file_abs_path(path_name: str, file_name: str) -> str:
|
||||||
|
|
@ -50,15 +84,21 @@ class LogSettings(BaseAppSettings):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def log_file_abs_path(self) -> str:
|
def log_file_abs_path(self) -> str:
|
||||||
return self.get_file_abs_path(self.private_log_file_path, self.private_log_file_name)
|
return self.get_file_abs_path(
|
||||||
|
self.private_log_file_path, self.private_log_file_name
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def metric_file_abs_path(self) -> str:
|
def metric_file_abs_path(self) -> str:
|
||||||
return self.get_file_abs_path(self.private_metric_file_path, self.private_metric_file_name)
|
return self.get_file_abs_path(
|
||||||
|
self.private_metric_file_path, self.private_metric_file_name
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def audit_file_abs_path(self) -> str:
|
def audit_file_abs_path(self) -> str:
|
||||||
return self.get_file_abs_path(self.private_audit_file_path, self.private_audit_file_name)
|
return self.get_file_abs_path(
|
||||||
|
self.private_audit_file_path, self.private_audit_file_name
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def log_lvl(self) -> int:
|
def log_lvl(self) -> int:
|
||||||
|
|
@ -66,12 +106,20 @@ class LogSettings(BaseAppSettings):
|
||||||
|
|
||||||
|
|
||||||
class PGSettings(BaseSettings):
|
class PGSettings(BaseSettings):
|
||||||
|
"""
|
||||||
|
Настройки подключения к Postgres.
|
||||||
|
"""
|
||||||
|
|
||||||
host: str = Field(validation_alias="PG_HOST", default="localhost")
|
host: str = Field(validation_alias="PG_HOST", default="localhost")
|
||||||
port: int = Field(validation_alias="PG_PORT", default=5432)
|
port: int = Field(validation_alias="PG_PORT", default=5432)
|
||||||
user: str = Field(validation_alias="PG_USER", default="postgres")
|
user: str = Field(validation_alias="PG_USER", default="postgres")
|
||||||
password: str = Field(validation_alias="PG_PASSWORD", default="")
|
password: str = Field(validation_alias="PG_PASSWORD", default="")
|
||||||
database: str = Field(validation_alias="PG_DATABASE", default="postgres")
|
database: str = Field(validation_alias="PG_DATABASE", default="postgres")
|
||||||
schema_: str = Field(validation_alias="PG_SCHEMA", default="public")
|
schema_queue: str = Field(validation_alias="PG_SCHEMA_QUEUE", default="public")
|
||||||
|
schema_quotes: str = Field(validation_alias="PG_SCHEMA_QUOTES", default="public")
|
||||||
|
schema_opu: str = Field(validation_alias="PG_SCHEMA_OPU", default="public")
|
||||||
|
batch_size: int = Field(validation_alias="PG_BATCH_SIZE", default=1000)
|
||||||
|
deduplicate: bool = Field(validation_alias="PG_DEDUPLICATE", default=False)
|
||||||
use_pool: bool = Field(validation_alias="PG_USE_POOL", default=True)
|
use_pool: bool = Field(validation_alias="PG_USE_POOL", default=True)
|
||||||
pool_size: int = Field(validation_alias="PG_POOL_SIZE", default=5)
|
pool_size: int = Field(validation_alias="PG_POOL_SIZE", default=5)
|
||||||
max_overflow: int = Field(validation_alias="PG_MAX_OVERFLOW", default=10)
|
max_overflow: int = Field(validation_alias="PG_MAX_OVERFLOW", default=10)
|
||||||
|
|
@ -81,22 +129,118 @@ class PGSettings(BaseSettings):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url(self) -> str:
|
def url(self) -> str:
|
||||||
"""Автоматически генерируется SQLAlchemy URL для подключения"""
|
"""
|
||||||
|
Строка подключения SQLAlchemy (async).
|
||||||
|
"""
|
||||||
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
||||||
|
|
||||||
|
|
||||||
|
class WorkerSettings(BaseSettings):
|
||||||
|
"""
|
||||||
|
Настройки очереди и воркеров.
|
||||||
|
"""
|
||||||
|
|
||||||
|
workers_json: str = Field(validation_alias="WORKERS_JSON", default="[]")
|
||||||
|
heartbeat_sec: int = Field(validation_alias="DL_HEARTBEAT_SEC", default=10)
|
||||||
|
default_lease_ttl_sec: int = Field(
|
||||||
|
validation_alias="DL_DEFAULT_LEASE_TTL_SEC", default=60
|
||||||
|
)
|
||||||
|
reaper_period_sec: int = Field(validation_alias="DL_REAPER_PERIOD_SEC", default=10)
|
||||||
|
claim_backoff_sec: int = Field(validation_alias="DL_CLAIM_BACKOFF_SEC", default=15)
|
||||||
|
|
||||||
|
def parsed_workers(self) -> list[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Возвращает список конфигураций воркеров из WORKERS_JSON.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
data = json.loads(self.workers_json or "[]")
|
||||||
|
return [d for d in data if isinstance(d, dict)]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class CertsSettings(BaseSettings):
|
||||||
|
"""
|
||||||
|
Настройки SSL сертификатов для локальной разработки.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ca_bundle_file: str = Field(validation_alias="CA_BUNDLE_FILE", default="")
|
||||||
|
cert_file: str = Field(validation_alias="CERT_FILE", default="")
|
||||||
|
key_file: str = Field(validation_alias="KEY_FILE", default="")
|
||||||
|
|
||||||
|
|
||||||
|
class SuperTeneraSettings(BaseAppSettings):
|
||||||
|
"""
|
||||||
|
Настройки интеграции с SuperTenera.
|
||||||
|
"""
|
||||||
|
|
||||||
|
host: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="SUPERTENERA_HOST",
|
||||||
|
default="ci03801737-ift-tenera-giga.delta.sbrf.ru/atlant360bc/",
|
||||||
|
)
|
||||||
|
port: str = Field(validation_alias="SUPERTENERA_PORT", default="443")
|
||||||
|
quotes_endpoint: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="SUPERTENERA_QUOTES_ENDPOINT",
|
||||||
|
default="/get_gigaparser_quotes/",
|
||||||
|
)
|
||||||
|
timeout: int = Field(validation_alias="SUPERTENERA_TIMEOUT", default=20)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
"""Возвращает абсолютный URL для SuperTenera."""
|
||||||
|
domain, raw_path = (
|
||||||
|
self.host.split("/", 1) if "/" in self.host else (self.host, "")
|
||||||
|
)
|
||||||
|
return build_url(self.protocol, domain, self.port, raw_path)
|
||||||
|
|
||||||
|
|
||||||
|
class Gmap2BriefSettings(BaseAppSettings):
|
||||||
|
"""
|
||||||
|
Настройки интеграции с Gmap2Brief (OPU API).
|
||||||
|
"""
|
||||||
|
|
||||||
|
host: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="GMAP2BRIEF_HOST",
|
||||||
|
default="ci02533826-tib-brief.apps.ift-terra000024-edm.ocp.delta.sbrf.ru",
|
||||||
|
)
|
||||||
|
port: str = Field(validation_alias="GMAP2BRIEF_PORT", default="443")
|
||||||
|
start_endpoint: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="GMAP2BRIEF_START_ENDPOINT", default="/export/opu/start"
|
||||||
|
)
|
||||||
|
status_endpoint: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="GMAP2BRIEF_STATUS_ENDPOINT", default="/export/{job_id}/status"
|
||||||
|
)
|
||||||
|
download_endpoint: Annotated[str, BeforeValidator(strip_slashes)] = Field(
|
||||||
|
validation_alias="GMAP2BRIEF_DOWNLOAD_ENDPOINT",
|
||||||
|
default="/export/{job_id}/download",
|
||||||
|
)
|
||||||
|
poll_interval: int = Field(validation_alias="GMAP2BRIEF_POLL_INTERVAL", default=2)
|
||||||
|
timeout: int = Field(validation_alias="GMAP2BRIEF_TIMEOUT", default=3600)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
"""Возвращает абсолютный URL для Gmap2Brief."""
|
||||||
|
return build_url(self.protocol, self.host, self.port)
|
||||||
|
|
||||||
|
|
||||||
class Secrets:
|
class Secrets:
|
||||||
"""
|
"""
|
||||||
Класс, агрегирующий все настройки приложения.
|
Агрегатор настроек приложения.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
app: AppSettings = AppSettings()
|
app: AppSettings = AppSettings()
|
||||||
log: LogSettings = LogSettings()
|
log: LogSettings = LogSettings()
|
||||||
pg: PGSettings = PGSettings()
|
pg: PGSettings = PGSettings()
|
||||||
|
worker: WorkerSettings = WorkerSettings()
|
||||||
|
certs: CertsSettings = CertsSettings()
|
||||||
|
supertenera: SuperTeneraSettings = SuperTeneraSettings()
|
||||||
|
gmap2brief: Gmap2BriefSettings = Gmap2BriefSettings()
|
||||||
|
|
||||||
|
|
||||||
APP_CONFIG = Secrets()
|
APP_CONFIG = Secrets()
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
"WorkerSettings",
|
||||||
"Secrets",
|
"Secrets",
|
||||||
"APP_CONFIG",
|
"APP_CONFIG",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,62 +1,122 @@
|
||||||
# Реализация паттерна AppContext — единая точка доступа к зависимостям
|
from __future__ import annotations
|
||||||
from dataloader.base import Singleton
|
|
||||||
import typing
|
from logging import Logger
|
||||||
from dataloader.config import APP_CONFIG, Secrets
|
from typing import AsyncGenerator
|
||||||
from dataloader.logger import ContextVarsContainer, LoggerConfigurator
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from .config import APP_CONFIG
|
||||||
|
from .logger.context_vars import ContextVarsContainer
|
||||||
|
|
||||||
|
|
||||||
import pytz
|
class AppContext:
|
||||||
|
"""
|
||||||
|
Контекст приложения, хранящий глобальные зависимости (Singleton pattern).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._engine: AsyncEngine | None = None
|
||||||
|
self._sessionmaker: async_sessionmaker[AsyncSession] | None = None
|
||||||
|
self._context_vars_container = ContextVarsContainer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AppContext(metaclass=Singleton):
|
|
||||||
@property
|
@property
|
||||||
def logger(self) -> "typing.Any":
|
def engine(self) -> AsyncEngine:
|
||||||
return self._logger_manager.async_logger
|
"""
|
||||||
|
Возвращает инициализированный движок БД.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: Если движок не инициализирован
|
||||||
|
"""
|
||||||
|
if self._engine is None:
|
||||||
|
raise RuntimeError("Database engine is not initialized.")
|
||||||
|
return self._engine
|
||||||
|
|
||||||
def __init__(self, secrets: Secrets) -> None:
|
@property
|
||||||
self.timezone = pytz.timezone(secrets.app.timezone)
|
def sessionmaker(self) -> async_sessionmaker[AsyncSession]:
|
||||||
self.context_vars_container = ContextVarsContainer()
|
"""
|
||||||
self._logger_manager = LoggerConfigurator(
|
Возвращает фабрику сессий БД.
|
||||||
log_lvl=secrets.log.log_lvl,
|
|
||||||
log_file_path=secrets.log.log_file_abs_path,
|
|
||||||
metric_file_path=secrets.log.metric_file_abs_path,
|
|
||||||
audit_file_path=secrets.log.audit_file_abs_path,
|
|
||||||
audit_host_ip=secrets.log.audit_host_ip,
|
|
||||||
audit_host_uid=secrets.log.audit_host_uid,
|
|
||||||
context_vars_container=self.context_vars_container,
|
|
||||||
timezone=self.timezone,
|
|
||||||
)
|
|
||||||
self.pg = secrets.pg
|
|
||||||
self.logger.info("App context initialized.")
|
|
||||||
|
|
||||||
|
|
||||||
def get_logger(self) -> "typing.Any":
|
|
||||||
return self.logger
|
|
||||||
|
|
||||||
|
|
||||||
def get_context_vars_container(self) -> ContextVarsContainer:
|
|
||||||
return self.context_vars_container
|
|
||||||
|
|
||||||
|
|
||||||
def get_pytz_timezone(self):
|
|
||||||
return self.timezone
|
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: Если sessionmaker не инициализирован
|
||||||
|
"""
|
||||||
|
if self._sessionmaker is None:
|
||||||
|
raise RuntimeError("Sessionmaker is not initialized.")
|
||||||
|
return self._sessionmaker
|
||||||
|
|
||||||
async def on_startup(self) -> None:
|
async def on_startup(self) -> None:
|
||||||
self.logger.info("Application is starting up.")
|
"""
|
||||||
self.logger.info("All connections checked. Application is up and ready.")
|
Инициализация контекста при старте приложения.
|
||||||
|
Настраивает логирование, создаёт движок и sessionmaker.
|
||||||
|
"""
|
||||||
|
from .logger.logger import setup_logging
|
||||||
|
from .storage.engine import create_engine, create_sessionmaker
|
||||||
|
|
||||||
|
setup_logging()
|
||||||
|
self._engine = create_engine(APP_CONFIG.pg.url)
|
||||||
|
self._sessionmaker = create_sessionmaker(self._engine)
|
||||||
|
|
||||||
async def on_shutdown(self) -> None:
|
async def on_shutdown(self) -> None:
|
||||||
self.logger.info("Application is shutting down.")
|
"""
|
||||||
self._logger_manager.remove_logger_handlers()
|
Очистка ресурсов при остановке приложения.
|
||||||
|
Закрывает соединения с БД.
|
||||||
|
"""
|
||||||
|
if self._engine:
|
||||||
|
await self._engine.dispose()
|
||||||
|
|
||||||
|
def get_logger(self, name: str | None = None) -> Logger:
|
||||||
|
"""
|
||||||
|
Возвращает настроенный логгер.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
name: Имя логгера (опционально)
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Экземпляр Logger
|
||||||
|
"""
|
||||||
|
from .logger.logger import get_logger as get_app_logger
|
||||||
|
|
||||||
|
return get_app_logger(name)
|
||||||
|
|
||||||
|
def get_context_vars_container(self) -> ContextVarsContainer:
|
||||||
|
"""
|
||||||
|
Возвращает контейнер контекстных переменных для логирования.
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
ContextVarsContainer
|
||||||
|
"""
|
||||||
|
return self._context_vars_container
|
||||||
|
|
||||||
|
@property
|
||||||
|
def logger(self) -> Logger:
|
||||||
|
"""
|
||||||
|
Возвращает логгер приложения.
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Logger
|
||||||
|
"""
|
||||||
|
return self.get_logger()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pytz_timezone(self) -> ZoneInfo:
|
||||||
|
"""
|
||||||
|
Возвращает timezone приложения.
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
ZoneInfo
|
||||||
|
"""
|
||||||
|
return ZoneInfo(APP_CONFIG.app.timezone)
|
||||||
|
|
||||||
|
|
||||||
|
APP_CTX = AppContext()
|
||||||
APP_CTX = AppContext(APP_CONFIG)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["APP_CTX"]
|
async def get_session() -> AsyncGenerator[AsyncSession, None]:
|
||||||
|
"""
|
||||||
|
FastAPI dependency для получения сессии БД.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
AsyncSession для работы с БД
|
||||||
|
"""
|
||||||
|
async with APP_CTX.sessionmaker() as session:
|
||||||
|
yield session
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Исключения уровня приложения."""
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Интерфейсы для взаимодействия с внешними системами."""
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
"""Gmap2Brief интерфейс и схемы."""
|
||||||
|
|
||||||
|
from . import schemas
|
||||||
|
from .interface import Gmap2BriefInterface, get_gmap2brief_interface
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"schemas",
|
||||||
|
"Gmap2BriefInterface",
|
||||||
|
"get_gmap2brief_interface",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,218 @@
|
||||||
|
"""Интерфейс для взаимодействия с Gmap2Brief API (OPU экспорт)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import ssl
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.interfaces.gmap2_brief.schemas import (
|
||||||
|
ExportJobStatus,
|
||||||
|
StartExportResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from logging import Logger
|
||||||
|
|
||||||
|
|
||||||
|
class Gmap2BriefConnectionError(Exception):
|
||||||
|
"""Ошибка подключения к Gmap2Brief API."""
|
||||||
|
|
||||||
|
|
||||||
|
class Gmap2BriefInterface:
|
||||||
|
"""Интерфейс для взаимодействия с Gmap2Brief API."""
|
||||||
|
|
||||||
|
def __init__(self, logger: Logger) -> None:
|
||||||
|
"""
|
||||||
|
Инициализация интерфейса Gmap2Brief.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logger: Logger instance.
|
||||||
|
"""
|
||||||
|
self.logger = logger
|
||||||
|
self.base_url = APP_CONFIG.gmap2brief.base_url
|
||||||
|
self.poll_interval = APP_CONFIG.gmap2brief.poll_interval
|
||||||
|
self.timeout = APP_CONFIG.gmap2brief.timeout
|
||||||
|
|
||||||
|
self._ssl_context = None
|
||||||
|
if APP_CONFIG.app.local and APP_CONFIG.certs.cert_file:
|
||||||
|
self._ssl_context = ssl.create_default_context(
|
||||||
|
cafile=APP_CONFIG.certs.ca_bundle_file
|
||||||
|
)
|
||||||
|
self._ssl_context.load_cert_chain(
|
||||||
|
certfile=APP_CONFIG.certs.cert_file, keyfile=APP_CONFIG.certs.key_file
|
||||||
|
)
|
||||||
|
|
||||||
|
async def start_export(self) -> str:
|
||||||
|
"""
|
||||||
|
Запускает задачу экспорта OPU.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
job_id задачи
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Gmap2BriefConnectionError: При ошибке запроса
|
||||||
|
"""
|
||||||
|
url = self.base_url + APP_CONFIG.gmap2brief.start_endpoint
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
cert=(
|
||||||
|
(APP_CONFIG.certs.cert_file, APP_CONFIG.certs.key_file)
|
||||||
|
if APP_CONFIG.app.local
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
verify=APP_CONFIG.certs.ca_bundle_file if APP_CONFIG.app.local else True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
) as client:
|
||||||
|
try:
|
||||||
|
self.logger.info(f"Starting OPU export: POST {url}")
|
||||||
|
response = await client.post(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = StartExportResponse.model_validate(response.json())
|
||||||
|
self.logger.info(f"OPU export started: job_id={data.job_id}")
|
||||||
|
return data.job_id
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Gmap2BriefConnectionError(
|
||||||
|
f"Failed to start export: {e.response.status_code} {e.response.text}"
|
||||||
|
) from e
|
||||||
|
except httpx.RequestError as e:
|
||||||
|
raise Gmap2BriefConnectionError(f"Request error: {e}") from e
|
||||||
|
|
||||||
|
async def get_status(self, job_id: str) -> ExportJobStatus:
|
||||||
|
"""
|
||||||
|
Получает статус задачи экспорта.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Статус задачи
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Gmap2BriefConnectionError: При ошибке запроса
|
||||||
|
"""
|
||||||
|
url = self.base_url + APP_CONFIG.gmap2brief.status_endpoint.format(
|
||||||
|
job_id=job_id
|
||||||
|
)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
cert=(
|
||||||
|
(APP_CONFIG.certs.cert_file, APP_CONFIG.certs.key_file)
|
||||||
|
if APP_CONFIG.app.local
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
verify=APP_CONFIG.certs.ca_bundle_file if APP_CONFIG.app.local else True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
) as client:
|
||||||
|
try:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = ExportJobStatus.model_validate(response.json())
|
||||||
|
return data
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Gmap2BriefConnectionError(
|
||||||
|
f"Failed to get status: {e.response.status_code} {e.response.text}"
|
||||||
|
) from e
|
||||||
|
except httpx.RequestError as e:
|
||||||
|
raise Gmap2BriefConnectionError(f"Request error: {e}") from e
|
||||||
|
|
||||||
|
async def wait_for_completion(
|
||||||
|
self, job_id: str, max_wait: int | None = None
|
||||||
|
) -> ExportJobStatus:
|
||||||
|
"""
|
||||||
|
Ждет завершения задачи экспорта с периодическим polling.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
max_wait: Максимальное время ожидания в секундах (None = без ограничений)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Финальный статус задачи
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Gmap2BriefConnectionError: При ошибке или таймауте
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Waiting for job {job_id} completion...")
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
status = await self.get_status(job_id)
|
||||||
|
|
||||||
|
if status.status == "completed":
|
||||||
|
self.logger.info(
|
||||||
|
f"Job {job_id} completed, total_rows={status.total_rows}"
|
||||||
|
)
|
||||||
|
return status
|
||||||
|
elif status.status == "failed":
|
||||||
|
raise Gmap2BriefConnectionError(f"Job {job_id} failed: {status.error}")
|
||||||
|
|
||||||
|
elapsed = asyncio.get_event_loop().time() - start_time
|
||||||
|
if max_wait and elapsed > max_wait:
|
||||||
|
raise Gmap2BriefConnectionError(
|
||||||
|
f"Job {job_id} timeout after {elapsed:.1f}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.debug(
|
||||||
|
f"Job {job_id} status={status.status}, rows={status.total_rows}, elapsed={elapsed:.1f}s"
|
||||||
|
)
|
||||||
|
await asyncio.sleep(self.poll_interval)
|
||||||
|
|
||||||
|
async def download_export(self, job_id: str, output_path: Path) -> Path:
|
||||||
|
"""
|
||||||
|
Скачивает результат экспорта.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
output_path: Путь для сохранения файла
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Путь к скачанному файлу
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Gmap2BriefConnectionError: При ошибке скачивания
|
||||||
|
"""
|
||||||
|
url = self.base_url + APP_CONFIG.gmap2brief.download_endpoint.format(
|
||||||
|
job_id=job_id
|
||||||
|
)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
cert=(
|
||||||
|
(APP_CONFIG.certs.cert_file, APP_CONFIG.certs.key_file)
|
||||||
|
if APP_CONFIG.app.local
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
verify=APP_CONFIG.certs.ca_bundle_file if APP_CONFIG.app.local else True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
) as client:
|
||||||
|
try:
|
||||||
|
self.logger.info(f"Downloading export: GET {url}")
|
||||||
|
async with client.stream("GET", url, follow_redirects=True) as response:
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(output_path, "wb") as f:
|
||||||
|
async for chunk in response.aiter_bytes(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
file_size = output_path.stat().st_size
|
||||||
|
self.logger.info(f"Downloaded {file_size:,} bytes to {output_path}")
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Gmap2BriefConnectionError(
|
||||||
|
f"Failed to download: {e.response.status_code} {e.response.text}"
|
||||||
|
) from e
|
||||||
|
except httpx.RequestError as e:
|
||||||
|
raise Gmap2BriefConnectionError(f"Request error: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def get_gmap2brief_interface() -> Gmap2BriefInterface:
|
||||||
|
"""Возвращает инстанс Gmap2BriefInterface с настройками из AppContext."""
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
|
||||||
|
return Gmap2BriefInterface(logger=APP_CTX.logger)
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
"""Схемы ответов Gmap2Brief API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class StartExportResponse(BaseModel):
|
||||||
|
"""Ответ на запуск экспорта."""
|
||||||
|
|
||||||
|
job_id: str = Field(..., description="Идентификатор задачи экспорта")
|
||||||
|
|
||||||
|
|
||||||
|
class ExportJobStatus(BaseModel):
|
||||||
|
"""Статус задачи экспорта."""
|
||||||
|
|
||||||
|
job_id: str = Field(..., description="Идентификатор задачи")
|
||||||
|
status: Literal["pending", "running", "completed", "failed"] = Field(
|
||||||
|
..., description="Статус задачи"
|
||||||
|
)
|
||||||
|
total_rows: int = Field(default=0, description="Количество обработанных строк")
|
||||||
|
error: str | None = Field(default=None, description="Текст ошибки (если есть)")
|
||||||
|
temp_file_path: str | None = Field(
|
||||||
|
default=None, description="Путь к временному файлу (для completed)"
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
"""SuperTenera интерфейс и схемы."""
|
||||||
|
|
||||||
|
from . import schemas
|
||||||
|
from .interface import SuperTeneraInterface, get_async_tenera_interface
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"schemas",
|
||||||
|
"SuperTeneraInterface",
|
||||||
|
"get_async_tenera_interface",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,171 @@
|
||||||
|
"""Интерфейс для взаимодействия с сервисом SuperTenera."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ssl
|
||||||
|
import uuid
|
||||||
|
from asyncio import TimeoutError
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING, Literal, Self
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.interfaces.tenera.schemas import MainData
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from logging import Logger
|
||||||
|
|
||||||
|
|
||||||
|
class SuperTeneraConnectionError(Exception):
|
||||||
|
"""Ошибка подключения к SuperTenera API."""
|
||||||
|
|
||||||
|
|
||||||
|
class SuperTeneraInterface:
|
||||||
|
"""Интерфейс для взаимодействия с сервисом SuperTenera по http."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
logger: Logger,
|
||||||
|
base_url: str,
|
||||||
|
*,
|
||||||
|
timezone=None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Инициализация интерфейса SuperTenera.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logger: Logger instance.
|
||||||
|
base_url: base service url.
|
||||||
|
timezone: timezone for datetime objects.
|
||||||
|
"""
|
||||||
|
self.logger = logger
|
||||||
|
self.base_url = base_url
|
||||||
|
self.timezone = timezone
|
||||||
|
|
||||||
|
self._session: aiohttp.ClientSession | None = None
|
||||||
|
|
||||||
|
self._ssl_context = None
|
||||||
|
if APP_CONFIG.app.local:
|
||||||
|
self._ssl_context = ssl.create_default_context(
|
||||||
|
cafile=APP_CONFIG.certs.ca_bundle_file
|
||||||
|
)
|
||||||
|
self._ssl_context.load_cert_chain(
|
||||||
|
certfile=APP_CONFIG.certs.cert_file, keyfile=APP_CONFIG.certs.key_file
|
||||||
|
)
|
||||||
|
|
||||||
|
def form_base_headers(self) -> dict:
|
||||||
|
"""Формирует базовые заголовки для запроса."""
|
||||||
|
metadata_pairs = {
|
||||||
|
"request-id": str(uuid.uuid4()),
|
||||||
|
"request-time": str(datetime.now(tz=self.timezone).isoformat()),
|
||||||
|
"system-id": APP_CONFIG.app.kube_net_name,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
metakey: metavalue
|
||||||
|
for metakey, metavalue in metadata_pairs.items()
|
||||||
|
if metavalue
|
||||||
|
}
|
||||||
|
|
||||||
|
async def __aenter__(self) -> Self:
|
||||||
|
"""Async context manager enter."""
|
||||||
|
self._session = aiohttp.ClientSession(
|
||||||
|
base_url=self.base_url,
|
||||||
|
connector=aiohttp.TCPConnector(limit=100),
|
||||||
|
headers=self.form_base_headers(),
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
"""Async context manager exit."""
|
||||||
|
if exc_val is not None:
|
||||||
|
self.logger.error(f"{exc_type}: {exc_val}")
|
||||||
|
await self._session.close()
|
||||||
|
|
||||||
|
async def _get_request(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
encoding: str | None = None,
|
||||||
|
content_type: str | None = "application/json",
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Выполняет GET запрос.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SuperTeneraConnectionError: При ошибке запроса
|
||||||
|
"""
|
||||||
|
kwargs["ssl"] = self._ssl_context
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with self._session.get(url, **kwargs) as response:
|
||||||
|
if APP_CONFIG.app.debug:
|
||||||
|
self.logger.debug(
|
||||||
|
f"Response: {(await response.text(errors='ignore'))[:100]}"
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return await response.json(encoding=encoding, content_type=content_type)
|
||||||
|
except aiohttp.ClientResponseError as e:
|
||||||
|
raise SuperTeneraConnectionError(
|
||||||
|
f"HTTP error {e.status}: {e.message} at {url}"
|
||||||
|
) from e
|
||||||
|
except (aiohttp.ClientError, TimeoutError) as e:
|
||||||
|
raise SuperTeneraConnectionError(
|
||||||
|
f"Connection error to SuperTenera API: {e}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
async def get_quotes_data(self) -> MainData:
|
||||||
|
"""
|
||||||
|
Получить данные котировок от SuperTenera.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MainData с котировками из всех источников
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SuperTeneraConnectionError: При ошибке подключения или HTTP ошибке
|
||||||
|
"""
|
||||||
|
data = await self._get_request(APP_CONFIG.supertenera.quotes_endpoint)
|
||||||
|
return MainData.model_validate(data)
|
||||||
|
|
||||||
|
async def ping(self, **kwargs) -> Literal[True]:
|
||||||
|
"""
|
||||||
|
Быстрая проверка доступности SuperTenera API.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True - если сервис доступен
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SuperTeneraConnectionError: При любой ошибке подключения или HTTP ошибке
|
||||||
|
"""
|
||||||
|
kwargs["ssl"] = self._ssl_context
|
||||||
|
try:
|
||||||
|
async with self._session.get(
|
||||||
|
APP_CONFIG.supertenera.quotes_endpoint,
|
||||||
|
timeout=APP_CONFIG.supertenera.timeout,
|
||||||
|
**kwargs,
|
||||||
|
) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
return True
|
||||||
|
except aiohttp.ClientResponseError as e:
|
||||||
|
raise SuperTeneraConnectionError(
|
||||||
|
f"HTTP error {e.status} при проверке доступности SuperTenera API"
|
||||||
|
) from e
|
||||||
|
except TimeoutError as e:
|
||||||
|
raise SuperTeneraConnectionError(
|
||||||
|
f"Timeout ({APP_CONFIG.supertenera.timeout}s) при проверке доступности SuperTenera API"
|
||||||
|
) from e
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
raise SuperTeneraConnectionError(
|
||||||
|
f"Connection error при проверке доступности SuperTenera API: {e}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
|
||||||
|
def get_async_tenera_interface() -> SuperTeneraInterface:
|
||||||
|
"""Возвращает инстанс SuperTeneraInterface с настройками из AppContext."""
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
|
||||||
|
return SuperTeneraInterface(
|
||||||
|
logger=APP_CTX.logger,
|
||||||
|
base_url=APP_CONFIG.supertenera.base_url,
|
||||||
|
timezone=APP_CTX.pytz_timezone,
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,350 @@
|
||||||
|
"""Схемы ответов SuperTenera API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator
|
||||||
|
|
||||||
|
|
||||||
|
class TeneraBaseModel(BaseModel):
|
||||||
|
"""Базовая модель для всех схем SuperTenera с настройкой populate_by_name."""
|
||||||
|
|
||||||
|
model_config = ConfigDict(
|
||||||
|
populate_by_name=True,
|
||||||
|
extra="ignore",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EmptyTimePoint(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Модель заглушка для полностью пустых значений в точке времени.
|
||||||
|
Позволяет корректно валидировать случаи, когда JSON поле {} без содержимого.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class CbrTimePoint(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника Центрального банка России (ЦБР).
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
- value: Строка с числовым значением ("80,32")
|
||||||
|
"""
|
||||||
|
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
class InvestingNumeric(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника Investing.com в формате по странам.
|
||||||
|
|
||||||
|
Поля (alias на русском):
|
||||||
|
- profit: Доходность
|
||||||
|
- base_value: Базовое
|
||||||
|
- max_value: Максимальное
|
||||||
|
- min_value: Минимальное
|
||||||
|
- change: Изменение
|
||||||
|
- change_ptc: Процент изменений
|
||||||
|
"""
|
||||||
|
|
||||||
|
profit: str = Field(alias="Доходность")
|
||||||
|
base_value: str = Field(alias="Осн.")
|
||||||
|
max_value: str = Field(alias="Макс.")
|
||||||
|
min_value: str = Field(alias="Мин.")
|
||||||
|
change: str = Field(alias="Изм.")
|
||||||
|
change_ptc: str = Field(alias="Изм. %")
|
||||||
|
|
||||||
|
|
||||||
|
class InvestingCandlestick(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника Investing.com в формате свечи.
|
||||||
|
|
||||||
|
Поля (alias латинскими заглавными буквами):
|
||||||
|
- open_: "O"
|
||||||
|
- high: "H"
|
||||||
|
- low: "L"
|
||||||
|
- close: "C"
|
||||||
|
- interest: "I" | None
|
||||||
|
- value: "V"
|
||||||
|
"""
|
||||||
|
|
||||||
|
open_: str = Field(alias="O")
|
||||||
|
high: str = Field(alias="H")
|
||||||
|
low: str = Field(alias="L")
|
||||||
|
close: str = Field(alias="C")
|
||||||
|
interest: str | None = Field(alias="I")
|
||||||
|
value: str = Field(alias="V")
|
||||||
|
|
||||||
|
|
||||||
|
class InvestingTimePoint(
|
||||||
|
RootModel[EmptyTimePoint | InvestingNumeric | InvestingCandlestick]
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Union-модель точки времени для источника Investing.com.
|
||||||
|
|
||||||
|
1) {} -> EmptyTImePoint
|
||||||
|
2) numeric -> InvestingNumeric
|
||||||
|
3) свечной -> InvestingCandlestick
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class SgxTimePoint(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника Сингапурской биржи (SGX).
|
||||||
|
|
||||||
|
Поля (alias латинскими заглавными буквами):
|
||||||
|
- open_: "O"
|
||||||
|
- high: "H"
|
||||||
|
- low: "L"
|
||||||
|
- close: "C"
|
||||||
|
- interest: "I"
|
||||||
|
- value: "V"
|
||||||
|
"""
|
||||||
|
|
||||||
|
open_: str | None = Field(alias="O")
|
||||||
|
high: str | None = Field(alias="H")
|
||||||
|
low: str | None = Field(alias="L")
|
||||||
|
close: str | None = Field(alias="C")
|
||||||
|
interest: str | None = Field(alias="I")
|
||||||
|
value: str | None = Field(alias="V")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsEmptyString(RootModel[str]):
|
||||||
|
"""
|
||||||
|
Валидирует точно пустую строку ("").
|
||||||
|
|
||||||
|
Используется для точек данных TradingEconomics, содержащих пустые строковые значения.
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
- root: Строка, которая должна быть точно пустой ("")
|
||||||
|
"""
|
||||||
|
|
||||||
|
root: str
|
||||||
|
|
||||||
|
@field_validator("root", mode="before")
|
||||||
|
@classmethod
|
||||||
|
def _must_be_empty(cls, v) -> Literal[""]:
|
||||||
|
if v == "":
|
||||||
|
return v
|
||||||
|
raise ValueError("not an empty string")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsStringPercent(RootModel[str]):
|
||||||
|
"""
|
||||||
|
Валидирует строки-проценты вида "3.1%" или "-0,14%".
|
||||||
|
|
||||||
|
Принимает как запятую, так и точку в качестве десятичного разделителя.
|
||||||
|
Шаблон: опциональный минус, цифры, опциональная десятичная часть, знак процента.
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
root: Строка с процентным значением (например: "3.1%", "-0,14%", "15%")
|
||||||
|
"""
|
||||||
|
|
||||||
|
root: str
|
||||||
|
|
||||||
|
@field_validator("root")
|
||||||
|
@classmethod
|
||||||
|
def _check_percent(cls, v) -> str:
|
||||||
|
if isinstance(v, str) and re.match(r"^-?\d+(?:[.,]\d+)?%$", v):
|
||||||
|
return v
|
||||||
|
raise ValueError(f"invalid percent string: {v!r}")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsStringTime(RootModel[str]):
|
||||||
|
"""
|
||||||
|
Валидирует строки времени в формате "h:mm AM/PM".
|
||||||
|
|
||||||
|
Примеры: "01:15 AM", "12:30 PM", "9:45 AM"
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
root: Строка времени в 12-часовом формате с AM/PM
|
||||||
|
"""
|
||||||
|
|
||||||
|
root: str
|
||||||
|
|
||||||
|
@field_validator("root")
|
||||||
|
@classmethod
|
||||||
|
def _check_time(cls, v) -> str:
|
||||||
|
if isinstance(v, str) and re.match(r"^(0?[1-9]|1[0-2]):[0-5]\d\s[AP]M$", v):
|
||||||
|
return v
|
||||||
|
raise ValueError(f"invalid time string: {v!r}")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsNumeric(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Полный числовой формат данных от TradingEconomics.
|
||||||
|
|
||||||
|
Содержит полную рыночную информацию с ценой, дневным изменением, процентами
|
||||||
|
и различными периодическими изменениями (недельными, месячными, с начала года, год к году).
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
price: Текущая цена инструмента (алиас: "Price")
|
||||||
|
day: Дневное изменение в абсолютных значениях (алиас: "Day")
|
||||||
|
percent: Дневное изменение в процентах (алиас: "%")
|
||||||
|
weekly: Недельное изменение (алиас: "Weekly")
|
||||||
|
monthly: Месячное изменение (алиас: "Monthly")
|
||||||
|
ytd: Изменение с начала года (алиас: "YTD")
|
||||||
|
yoy: Изменение год к году (алиас: "YoY")
|
||||||
|
"""
|
||||||
|
|
||||||
|
price: str = Field(alias="Price")
|
||||||
|
day: str = Field(alias="Day")
|
||||||
|
percent: str = Field(alias="%")
|
||||||
|
weekly: str = Field(alias="Weekly")
|
||||||
|
monthly: str = Field(alias="Monthly")
|
||||||
|
ytd: str = Field(alias="YTD")
|
||||||
|
yoy: str = Field(alias="YoY")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsLastPrev(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Формат Last/Previous/Unit от TradingEconomics.
|
||||||
|
|
||||||
|
Содержит текущее значение, предыдущее значение и единицу измерения.
|
||||||
|
Обычно используется для экономических индикаторов и статистики.
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
last: Последнее (текущее) значение показателя (алиас: "Last")
|
||||||
|
previous: Предыдущее значение показателя (алиас: "Previous")
|
||||||
|
unit: Единица измерения показателя (алиас: "Unit")
|
||||||
|
"""
|
||||||
|
|
||||||
|
last: str = Field(alias="Last")
|
||||||
|
previous: str = Field(alias="Previous")
|
||||||
|
unit: str = Field(alias="Unit")
|
||||||
|
|
||||||
|
|
||||||
|
class TradingEconomicsTimePoint(
|
||||||
|
RootModel[
|
||||||
|
EmptyTimePoint
|
||||||
|
| TradingEconomicsEmptyString
|
||||||
|
| TradingEconomicsStringPercent
|
||||||
|
| TradingEconomicsStringTime
|
||||||
|
| TradingEconomicsNumeric
|
||||||
|
| TradingEconomicsLastPrev
|
||||||
|
]
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Объединение всех возможных форматов точек времени TradingEconomics.
|
||||||
|
|
||||||
|
Поддерживает:
|
||||||
|
- Пустые объекты ({})
|
||||||
|
- Пустые строки ("")
|
||||||
|
- Строки-проценты ("3.1%", "-0,14%")
|
||||||
|
- Строки времени ("01:15 AM")
|
||||||
|
- Полные числовые объекты с полями цена/день/%
|
||||||
|
- Объекты Last/Previous/Unit для экономических индикаторов
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
root: Один из поддерживаемых типов точек времени TradingEconomics
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class BloombergTimePoint(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника Bloomberg.
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
- value: Строка с числовым значением ("80,32")
|
||||||
|
"""
|
||||||
|
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
class TradingViewTimePoint(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Структура данных точки времени для источника TradingView.
|
||||||
|
|
||||||
|
Поля (alias латинскими заглавными буквами):
|
||||||
|
- open_: "O"
|
||||||
|
- high: "H"
|
||||||
|
- low: "L"
|
||||||
|
- close: "C"
|
||||||
|
- volume: "Vol"
|
||||||
|
"""
|
||||||
|
|
||||||
|
open_: str | None = Field(alias="O")
|
||||||
|
high: str | None = Field(alias="H")
|
||||||
|
low: str | None = Field(alias="L")
|
||||||
|
close: str | None = Field(alias="C")
|
||||||
|
volume: str | None = Field(alias="Vol")
|
||||||
|
|
||||||
|
|
||||||
|
class TimePointUnion(
|
||||||
|
RootModel[
|
||||||
|
EmptyTimePoint
|
||||||
|
| CbrTimePoint
|
||||||
|
| InvestingTimePoint
|
||||||
|
| SgxTimePoint
|
||||||
|
| TradingEconomicsTimePoint
|
||||||
|
| BloombergTimePoint
|
||||||
|
| TradingViewTimePoint
|
||||||
|
]
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Универсальное объединение для точек времени от всех поддерживаемых источников данных.
|
||||||
|
|
||||||
|
Обрабатывает структуры данных от:
|
||||||
|
- ЦБР (Центральный банк России)
|
||||||
|
- Investing.com
|
||||||
|
- SGX (Сингапурская биржа)
|
||||||
|
- TradingEconomics
|
||||||
|
- Bloomberg
|
||||||
|
- TradingView
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
root: Точка времени от любого из поддерживаемых источников данных
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
InstrumentData = dict[str | int, TimePointUnion]
|
||||||
|
"""Тип: Отображение timestamp -> TimePointUnion."""
|
||||||
|
|
||||||
|
SourceData = dict[str, InstrumentData]
|
||||||
|
"""Тип: Отображение имени инструмента -> InstrumentData."""
|
||||||
|
|
||||||
|
|
||||||
|
class MainData(TeneraBaseModel):
|
||||||
|
"""
|
||||||
|
Основной контейнер данных для всех источников финансовых данных от SuperTenera.
|
||||||
|
|
||||||
|
Содержит опциональные данные от нескольких поставщиков финансовых данных,
|
||||||
|
структурированные по источникам, а затем по инструментам.
|
||||||
|
|
||||||
|
Поля:
|
||||||
|
cbr: Данные от Центрального банка России (опционально)
|
||||||
|
investing: Данные от Investing.com (опционально)
|
||||||
|
sgx: Данные от Сингапурской биржи (опционально)
|
||||||
|
tradingeconomics: Данные от TradingEconomics (опционально)
|
||||||
|
bloomberg: Данные от Bloomberg (опционально)
|
||||||
|
trading_view: Данные от TradingView (опционально, алиас: "trading_view")
|
||||||
|
"""
|
||||||
|
|
||||||
|
cbr: SourceData | None = None
|
||||||
|
investing: SourceData | None = None
|
||||||
|
sgx: SourceData | None = None
|
||||||
|
tradingeconomics: SourceData | None = None
|
||||||
|
bloomberg: SourceData | None = None
|
||||||
|
trading_view: SourceData | None = Field(default=None, alias="trading_view")
|
||||||
|
|
||||||
|
@field_validator("investing", mode="before")
|
||||||
|
@classmethod
|
||||||
|
def _filter_investing(cls, v) -> SourceData | None:
|
||||||
|
"""
|
||||||
|
Фильтрация данных от Investing.com.
|
||||||
|
|
||||||
|
Убираем:
|
||||||
|
- все ключи, у которых значение null
|
||||||
|
- все ключи, которые выглядят как чистые числа (timestamps)
|
||||||
|
|
||||||
|
:param v: Объект с данными от Investing.com
|
||||||
|
:return: Отфильтрованный объект
|
||||||
|
"""
|
||||||
|
if isinstance(v, dict):
|
||||||
|
return {
|
||||||
|
key: value
|
||||||
|
for key, value in v.items()
|
||||||
|
if value is not None and not str(key).isdigit()
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
# logger package
|
from .logger import get_logger, setup_logging
|
||||||
from .context_vars import ContextVarsContainer
|
|
||||||
from .logger import LoggerConfigurator
|
|
||||||
|
|
||||||
__all__ = ["ContextVarsContainer", "LoggerConfigurator"]
|
__all__ = ["setup_logging", "get_logger"]
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,7 @@
|
||||||
# Управление контекстом запросов для логирования
|
|
||||||
import uuid
|
import uuid
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
from typing import Final
|
from typing import Final
|
||||||
|
|
||||||
|
|
||||||
REQUEST_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("request_id", default="")
|
REQUEST_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("request_id", default="")
|
||||||
DEVICE_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("device_id", default="")
|
DEVICE_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("device_id", default="")
|
||||||
SESSION_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("session_id", default="")
|
SESSION_ID_CTX_VAR: Final[ContextVar[str]] = ContextVar("session_id", default="")
|
||||||
|
|
@ -66,7 +64,13 @@ class ContextVarsContainer:
|
||||||
def gw_session_id(self, value: str) -> None:
|
def gw_session_id(self, value: str) -> None:
|
||||||
GW_SESSION_ID_CTX_VAR.set(value)
|
GW_SESSION_ID_CTX_VAR.set(value)
|
||||||
|
|
||||||
def set_context_vars(self, request_id: str = "", request_time: str = "", system_id: str = "", gw_session_id: str = "") -> None:
|
def set_context_vars(
|
||||||
|
self,
|
||||||
|
request_id: str = "",
|
||||||
|
request_time: str = "",
|
||||||
|
system_id: str = "",
|
||||||
|
gw_session_id: str = "",
|
||||||
|
) -> None:
|
||||||
if request_id:
|
if request_id:
|
||||||
self.set_request_id(request_id)
|
self.set_request_id(request_id)
|
||||||
if request_time:
|
if request_time:
|
||||||
|
|
|
||||||
|
|
@ -1,146 +1,53 @@
|
||||||
# Основной логгер приложения
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import typing
|
from logging import Logger
|
||||||
from datetime import tzinfo
|
|
||||||
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
from .context_vars import ContextVarsContainer
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Определяем фильтры для разных типов логов
|
|
||||||
def metric_only_filter(record: dict) -> bool:
|
def metric_only_filter(record: dict) -> bool:
|
||||||
return "metric" in record["extra"]
|
return "metric" in record["extra"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def audit_only_filter(record: dict) -> bool:
|
def audit_only_filter(record: dict) -> bool:
|
||||||
return "audit" in record["extra"]
|
return "audit" in record["extra"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def regular_log_filter(record: dict) -> bool:
|
def regular_log_filter(record: dict) -> bool:
|
||||||
return "metric" not in record["extra"] and "audit" not in record["extra"]
|
return "metric" not in record["extra"] and "audit" not in record["extra"]
|
||||||
|
|
||||||
|
|
||||||
|
class InterceptHandler(logging.Handler):
|
||||||
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
|
|
||||||
class LoggerConfigurator:
|
try:
|
||||||
def __init__(
|
level = logger.level(record.levelname).name
|
||||||
self,
|
except ValueError:
|
||||||
log_lvl: str,
|
level = record.levelno
|
||||||
log_file_path: str,
|
|
||||||
metric_file_path: str,
|
|
||||||
audit_file_path: str,
|
|
||||||
audit_host_ip: str,
|
|
||||||
audit_host_uid: str,
|
|
||||||
context_vars_container: ContextVarsContainer,
|
|
||||||
timezone: tzinfo,
|
|
||||||
) -> None:
|
|
||||||
self.context_vars_container = context_vars_container
|
|
||||||
self.timezone = timezone
|
|
||||||
self.log_lvl = log_lvl
|
|
||||||
self.log_file_path = log_file_path
|
|
||||||
self.metric_file_path = metric_file_path
|
|
||||||
self.audit_file_path = audit_file_path
|
|
||||||
self.audit_host_ip = audit_host_ip
|
|
||||||
self.audit_host_uid = audit_host_uid
|
|
||||||
self._handler_ids = []
|
|
||||||
self.configure_logger()
|
|
||||||
|
|
||||||
|
frame, depth = logging.currentframe(), 2
|
||||||
|
while frame.f_code.co_filename == logging.__file__:
|
||||||
|
frame = frame.f_back
|
||||||
|
depth += 1
|
||||||
|
|
||||||
@property
|
logger.opt(depth=depth, exception=record.exc_info).log(
|
||||||
def async_logger(self) -> "typing.Any":
|
level, record.getMessage()
|
||||||
return self._async_logger
|
|
||||||
|
|
||||||
|
|
||||||
def patch_record_with_context(self, record: dict) -> None:
|
|
||||||
context_data = self.context_vars_container.as_dict()
|
|
||||||
record["extra"].update(context_data)
|
|
||||||
if not record["extra"].get("request_id"):
|
|
||||||
record["extra"]["request_id"] = "system_event"
|
|
||||||
|
|
||||||
|
|
||||||
def configure_logger(self) -> None:
|
|
||||||
"""Настройка логгера `loguru` с необходимыми обработчиками."""
|
|
||||||
logger.remove()
|
|
||||||
logger.patch(self.patch_record_with_context)
|
|
||||||
|
|
||||||
# Функция для безопасного форматирования консольных логов
|
|
||||||
def console_format(record):
|
|
||||||
request_id = record["extra"].get("request_id", "system_event")
|
|
||||||
elapsed = record["elapsed"]
|
|
||||||
level = record["level"].name
|
|
||||||
name = record["name"]
|
|
||||||
function = record["function"]
|
|
||||||
line = record["line"]
|
|
||||||
message = record["message"]
|
|
||||||
time_str = record["time"].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
||||||
|
|
||||||
return (
|
|
||||||
f"<green>{time_str} ({elapsed})</green> | "
|
|
||||||
f"<cyan>{request_id}</cyan> | "
|
|
||||||
f"<level>{level: <8}</level> | "
|
|
||||||
f"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
|
|
||||||
f"<level>{message}</level>\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Обработчик для обычных логов (консоль)
|
|
||||||
handler_id = logger.add(
|
|
||||||
sys.stdout,
|
|
||||||
level=self.log_lvl,
|
|
||||||
filter=regular_log_filter,
|
|
||||||
format=console_format,
|
|
||||||
colorize=True,
|
|
||||||
)
|
)
|
||||||
self._handler_ids.append(handler_id)
|
|
||||||
|
|
||||||
|
|
||||||
# Обработчик для обычных логов (файл)
|
def setup_logging():
|
||||||
handler_id = logger.add(
|
"""Настройка логгера `loguru` с необходимыми обработчиками."""
|
||||||
self.log_file_path,
|
logger.remove()
|
||||||
level=self.log_lvl,
|
logger.add(
|
||||||
filter=regular_log_filter,
|
sys.stdout,
|
||||||
rotation="10 MB",
|
level=APP_CONFIG.log.log_lvl,
|
||||||
compression="zip",
|
filter=regular_log_filter,
|
||||||
enqueue=True,
|
colorize=True,
|
||||||
serialize=True,
|
)
|
||||||
)
|
logging.basicConfig(handlers=[InterceptHandler()], level=0)
|
||||||
self._handler_ids.append(handler_id)
|
|
||||||
|
|
||||||
|
|
||||||
# Обработчик для метрик
|
def get_logger(name: str | None = None) -> Logger:
|
||||||
handler_id = logger.add(
|
return logging.getLogger(name or "dataloader")
|
||||||
self.metric_file_path,
|
|
||||||
level="INFO",
|
|
||||||
filter=metric_only_filter,
|
|
||||||
rotation="10 MB",
|
|
||||||
compression="zip",
|
|
||||||
enqueue=True,
|
|
||||||
serialize=True,
|
|
||||||
)
|
|
||||||
self._handler_ids.append(handler_id)
|
|
||||||
|
|
||||||
|
|
||||||
# Обработчик для аудита
|
|
||||||
handler_id = logger.add(
|
|
||||||
self.audit_file_path,
|
|
||||||
level="INFO",
|
|
||||||
filter=audit_only_filter,
|
|
||||||
rotation="10 MB",
|
|
||||||
compression="zip",
|
|
||||||
enqueue=True,
|
|
||||||
serialize=True,
|
|
||||||
)
|
|
||||||
self._handler_ids.append(handler_id)
|
|
||||||
|
|
||||||
|
|
||||||
self._async_logger = logger
|
|
||||||
|
|
||||||
|
|
||||||
def remove_logger_handlers(self) -> None:
|
|
||||||
"""Удаление всех обработчиков логгера."""
|
|
||||||
for handler_id in self._handler_ids:
|
|
||||||
self._async_logger.remove(handler_id)
|
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
# Модели логов, метрик, событий аудита
|
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
# Функции + маскирование args
|
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,28 @@
|
||||||
# Конфигурация логирования uvicorn
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
class InterceptHandler(logging.Handler):
|
class InterceptHandler(logging.Handler):
|
||||||
def emit(self, record: logging.LogRecord) -> None:
|
def emit(self, record: logging.LogRecord) -> None:
|
||||||
# Get corresponding Loguru level if it exists
|
|
||||||
try:
|
try:
|
||||||
level = logger.level(record.levelname).name
|
level = logger.level(record.levelname).name
|
||||||
except ValueError:
|
except ValueError:
|
||||||
level = record.levelno
|
level = record.levelno
|
||||||
|
|
||||||
|
|
||||||
# Find caller from where originated the logged message
|
|
||||||
frame, depth = logging.currentframe(), 2
|
frame, depth = logging.currentframe(), 2
|
||||||
while frame.f_code.co_filename == logging.__file__:
|
while frame.f_code.co_filename == logging.__file__:
|
||||||
frame = frame.f_back
|
frame = frame.f_back
|
||||||
depth += 1
|
depth += 1
|
||||||
|
|
||||||
|
|
||||||
logger.opt(depth=depth, exception=record.exc_info).log(
|
logger.opt(depth=depth, exception=record.exc_info).log(
|
||||||
level, record.getMessage()
|
level, record.getMessage()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def setup_uvicorn_logging() -> None:
|
def setup_uvicorn_logging() -> None:
|
||||||
# Set all uvicorn loggers to use InterceptHandler
|
|
||||||
for logger_name in ["uvicorn", "uvicorn.error", "uvicorn.access"]:
|
for logger_name in ["uvicorn", "uvicorn.error", "uvicorn.access"]:
|
||||||
log = logging.getLogger(logger_name)
|
log = logging.getLogger(logger_name)
|
||||||
log.handlers = [InterceptHandler()]
|
log.handlers = [InterceptHandler()]
|
||||||
|
|
@ -36,7 +30,6 @@ def setup_uvicorn_logging() -> None:
|
||||||
log.propagate = False
|
log.propagate = False
|
||||||
|
|
||||||
|
|
||||||
# uvicorn logging config
|
|
||||||
LOGGING_CONFIG = {
|
LOGGING_CONFIG = {
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"disable_existing_loggers": False,
|
"disable_existing_loggers": False,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Модуль для работы с хранилищем данных."""
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import (
|
||||||
|
AsyncEngine,
|
||||||
|
AsyncSession,
|
||||||
|
async_sessionmaker,
|
||||||
|
create_async_engine,
|
||||||
|
)
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def create_engine(dsn: str) -> AsyncEngine:
|
||||||
|
"""
|
||||||
|
Создаёт асинхронный движок SQLAlchemy с поддержкой маппинга логических схем.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
dsn: Строка подключения к PostgreSQL
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Настроенный AsyncEngine с schema_translate_map
|
||||||
|
"""
|
||||||
|
pg = APP_CONFIG.pg
|
||||||
|
|
||||||
|
schema_map = {
|
||||||
|
"queue": pg.schema_queue,
|
||||||
|
"quotes": pg.schema_quotes,
|
||||||
|
"opu": pg.schema_opu,
|
||||||
|
}
|
||||||
|
|
||||||
|
return create_async_engine(
|
||||||
|
dsn,
|
||||||
|
echo=False,
|
||||||
|
pool_size=pg.pool_size if pg.use_pool else 0,
|
||||||
|
max_overflow=pg.max_overflow if pg.use_pool else 0,
|
||||||
|
pool_recycle=pg.pool_recycle,
|
||||||
|
pool_pre_ping=True,
|
||||||
|
connect_args={
|
||||||
|
"timeout": pg.connect_timeout,
|
||||||
|
"command_timeout": pg.command_timeout,
|
||||||
|
},
|
||||||
|
execution_options={
|
||||||
|
"schema_translate_map": schema_map,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_sessionmaker(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
|
||||||
|
"""
|
||||||
|
Создаёт фабрику асинхронных сессий.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
engine: AsyncEngine для создания сессий
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Настроенный async_sessionmaker
|
||||||
|
"""
|
||||||
|
return async_sessionmaker(
|
||||||
|
bind=engine,
|
||||||
|
class_=AsyncSession,
|
||||||
|
expire_on_commit=False,
|
||||||
|
autoflush=False,
|
||||||
|
autocommit=False,
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
"""
|
||||||
|
ORM модели для работы с базой данных.
|
||||||
|
Организованы по доменам для масштабируемости.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .base import Base
|
||||||
|
from .opu import BriefDigitalCertificateOpu
|
||||||
|
from .queue import DLJob, DLJobEvent, dl_status_enum
|
||||||
|
from .quote import Quote
|
||||||
|
from .quote_section import QuoteSection
|
||||||
|
from .quote_value import QuoteValue
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Base",
|
||||||
|
"BriefDigitalCertificateOpu",
|
||||||
|
"DLJob",
|
||||||
|
"DLJobEvent",
|
||||||
|
"dl_status_enum",
|
||||||
|
"Quote",
|
||||||
|
"QuoteSection",
|
||||||
|
"QuoteValue",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from sqlalchemy.orm import DeclarativeBase
|
||||||
|
|
||||||
|
|
||||||
|
class Base(DeclarativeBase):
|
||||||
|
"""
|
||||||
|
Базовый класс для всех ORM моделей приложения.
|
||||||
|
Используется SQLAlchemy 2.0+ declarative style.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
"""OPU модель."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date, datetime
|
||||||
|
|
||||||
|
from sqlalchemy import TIMESTAMP, BigInteger, Date, Integer, Numeric, String, text
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
|
from dataloader.storage.models.base import Base
|
||||||
|
|
||||||
|
|
||||||
|
class BriefDigitalCertificateOpu(Base):
|
||||||
|
"""Представляет brief_digital_certificate_opu - показатели ОПУ."""
|
||||||
|
|
||||||
|
__tablename__ = "brief_digital_certificate_opu"
|
||||||
|
__table_args__ = ({"schema": "opu"},)
|
||||||
|
|
||||||
|
object_id: Mapped[str] = mapped_column(
|
||||||
|
String, primary_key=True, server_default=text("'-'")
|
||||||
|
)
|
||||||
|
object_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
desk_nm: Mapped[str] = mapped_column(
|
||||||
|
String, primary_key=True, server_default=text("'-'")
|
||||||
|
)
|
||||||
|
actdate: Mapped[date] = mapped_column(
|
||||||
|
Date, primary_key=True, server_default=text("CURRENT_DATE")
|
||||||
|
)
|
||||||
|
layer_cd: Mapped[str] = mapped_column(
|
||||||
|
String, primary_key=True, server_default=text("'-'")
|
||||||
|
)
|
||||||
|
layer_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
opu_cd: Mapped[str] = mapped_column(String, primary_key=True)
|
||||||
|
opu_nm_sh: Mapped[str | None] = mapped_column(String)
|
||||||
|
opu_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
opu_lvl: Mapped[int] = mapped_column(
|
||||||
|
Integer, primary_key=True, server_default=text("'-1'")
|
||||||
|
)
|
||||||
|
opu_prnt_cd: Mapped[str] = mapped_column(
|
||||||
|
String, primary_key=True, server_default=text("'-'")
|
||||||
|
)
|
||||||
|
opu_prnt_nm_sh: Mapped[str | None] = mapped_column(String)
|
||||||
|
opu_prnt_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
sum_amountrub_p_usd: Mapped[float | None] = mapped_column(Numeric)
|
||||||
|
wf_load_id: Mapped[int] = mapped_column(
|
||||||
|
BigInteger, nullable=False, server_default=text("'-1'")
|
||||||
|
)
|
||||||
|
wf_load_dttm: Mapped[datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=False),
|
||||||
|
nullable=False,
|
||||||
|
server_default=text("CURRENT_TIMESTAMP"),
|
||||||
|
)
|
||||||
|
wf_row_id: Mapped[int] = mapped_column(
|
||||||
|
BigInteger, nullable=False, server_default=text("'-1'")
|
||||||
|
)
|
||||||
|
object_tp: Mapped[str | None] = mapped_column(String)
|
||||||
|
object_unit: Mapped[str] = mapped_column(
|
||||||
|
String, primary_key=True, server_default=text("'-'")
|
||||||
|
)
|
||||||
|
measure: Mapped[str | None] = mapped_column(String)
|
||||||
|
product_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
product_prnt_nm: Mapped[str | None] = mapped_column(String)
|
||||||
|
sum_amountrub_p_usd_s: Mapped[float | None] = mapped_column(Numeric)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Строковое представление."""
|
||||||
|
return f"<BriefDigitalCertificateOpu object_id={self.object_id} actdate={self.actdate}>"
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from sqlalchemy import BigInteger, DateTime, Text
|
||||||
|
from sqlalchemy.dialects.postgresql import ENUM, JSONB, UUID
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
|
from .base import Base
|
||||||
|
|
||||||
|
dl_status_enum = ENUM(
|
||||||
|
"queued",
|
||||||
|
"running",
|
||||||
|
"succeeded",
|
||||||
|
"failed",
|
||||||
|
"canceled",
|
||||||
|
"lost",
|
||||||
|
name="dl_status",
|
||||||
|
create_type=False,
|
||||||
|
native_enum=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DLJob(Base):
|
||||||
|
"""
|
||||||
|
Модель таблицы очереди задач dl_jobs.
|
||||||
|
Использует логическое имя схемы 'queue' для поддержки schema_translate_map.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "dl_jobs"
|
||||||
|
__table_args__ = {"schema": "queue"}
|
||||||
|
|
||||||
|
job_id: Mapped[str] = mapped_column(UUID(as_uuid=False), primary_key=True)
|
||||||
|
queue: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
task: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
args: Mapped[dict[str, Any]] = mapped_column(JSONB, default=dict, nullable=False)
|
||||||
|
idempotency_key: Mapped[Optional[str]] = mapped_column(Text, unique=True)
|
||||||
|
lock_key: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
partition_key: Mapped[str] = mapped_column(Text, default="", nullable=False)
|
||||||
|
priority: Mapped[int] = mapped_column(nullable=False, default=100)
|
||||||
|
available_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=False
|
||||||
|
)
|
||||||
|
status: Mapped[str] = mapped_column(
|
||||||
|
dl_status_enum, nullable=False, default="queued"
|
||||||
|
)
|
||||||
|
attempt: Mapped[int] = mapped_column(nullable=False, default=0)
|
||||||
|
max_attempts: Mapped[int] = mapped_column(nullable=False, default=5)
|
||||||
|
lease_ttl_sec: Mapped[int] = mapped_column(nullable=False, default=60)
|
||||||
|
lease_expires_at: Mapped[Optional[datetime]] = mapped_column(
|
||||||
|
DateTime(timezone=True)
|
||||||
|
)
|
||||||
|
heartbeat_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||||
|
cancel_requested: Mapped[bool] = mapped_column(nullable=False, default=False)
|
||||||
|
progress: Mapped[dict[str, Any]] = mapped_column(
|
||||||
|
JSONB, default=dict, nullable=False
|
||||||
|
)
|
||||||
|
error: Mapped[Optional[str]] = mapped_column(Text)
|
||||||
|
producer: Mapped[Optional[str]] = mapped_column(Text)
|
||||||
|
consumer_group: Mapped[Optional[str]] = mapped_column(Text)
|
||||||
|
load_dttm: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||||
|
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||||
|
finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True))
|
||||||
|
|
||||||
|
|
||||||
|
class DLJobEvent(Base):
|
||||||
|
"""
|
||||||
|
Модель таблицы журнала событий dl_job_events.
|
||||||
|
Использует логическое имя схемы 'queue' для поддержки schema_translate_map.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "dl_job_events"
|
||||||
|
__table_args__ = {"schema": "queue"}
|
||||||
|
|
||||||
|
event_id: Mapped[int] = mapped_column(
|
||||||
|
BigInteger, primary_key=True, autoincrement=True
|
||||||
|
)
|
||||||
|
job_id: Mapped[str] = mapped_column(UUID(as_uuid=False), nullable=False)
|
||||||
|
queue: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
load_dttm: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||||
|
kind: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
payload: Mapped[Optional[dict[str, Any]]] = mapped_column(JSONB)
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
"""Quote модель."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
JSON,
|
||||||
|
TIMESTAMP,
|
||||||
|
BigInteger,
|
||||||
|
ForeignKey,
|
||||||
|
String,
|
||||||
|
UniqueConstraint,
|
||||||
|
func,
|
||||||
|
)
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
|
from dataloader.storage.models.base import Base
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .quote_section import QuoteSection
|
||||||
|
from .quote_value import QuoteValue
|
||||||
|
|
||||||
|
|
||||||
|
class Quote(Base):
|
||||||
|
"""Представляет custom_cib_quotes.quotes."""
|
||||||
|
|
||||||
|
__tablename__ = "quotes"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("quote_sect_id", "name", name="ak_uq_quote_name_and_quotes"),
|
||||||
|
{"schema": "quotes"},
|
||||||
|
)
|
||||||
|
|
||||||
|
quote_id: Mapped[int] = mapped_column(BigInteger(), primary_key=True)
|
||||||
|
name: Mapped[str] = mapped_column(String, nullable=False)
|
||||||
|
params: Mapped[dict | None] = mapped_column(JSON)
|
||||||
|
srce: Mapped[str | None] = mapped_column(String)
|
||||||
|
ticker: Mapped[str | None] = mapped_column(String)
|
||||||
|
quote_sect_id: Mapped[int] = mapped_column(
|
||||||
|
ForeignKey(
|
||||||
|
"quotes.quotes_sect.quote_sect_id", ondelete="CASCADE", onupdate="CASCADE"
|
||||||
|
),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
last_update_dttm: Mapped[datetime | None] = mapped_column(TIMESTAMP(timezone=True))
|
||||||
|
|
||||||
|
load_dttm: Mapped[datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=False),
|
||||||
|
nullable=False,
|
||||||
|
server_default=func.current_timestamp(),
|
||||||
|
)
|
||||||
|
|
||||||
|
section: Mapped[QuoteSection] = relationship(back_populates="quotes")
|
||||||
|
values: Mapped[list[QuoteValue]] = relationship(
|
||||||
|
back_populates="quote",
|
||||||
|
cascade="all, delete-orphan",
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Строковое представление."""
|
||||||
|
return f"<Quote id={self.quote_id} name='{self.name}'>"
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
"""Quote-section модель."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from sqlalchemy import JSON, TIMESTAMP, Integer, Sequence, String, func
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
|
from dataloader.storage.models.base import Base
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .quote import Quote
|
||||||
|
|
||||||
|
|
||||||
|
class QuoteSection(Base):
|
||||||
|
"""Представляет custom_cib_quotes.quotes_sect."""
|
||||||
|
|
||||||
|
__tablename__ = "quotes_sect"
|
||||||
|
__table_args__ = {"schema": "quotes"}
|
||||||
|
|
||||||
|
quote_sect_id: Mapped[int] = mapped_column(
|
||||||
|
Integer(),
|
||||||
|
Sequence("quotes_section_id_seq", schema="quotes"),
|
||||||
|
primary_key=True,
|
||||||
|
)
|
||||||
|
name: Mapped[str] = mapped_column(String, nullable=False)
|
||||||
|
params: Mapped[dict | None] = mapped_column(JSON)
|
||||||
|
load_dttm: Mapped[datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=False),
|
||||||
|
nullable=False,
|
||||||
|
server_default=func.current_timestamp(),
|
||||||
|
)
|
||||||
|
|
||||||
|
quotes: Mapped[list[Quote]] = relationship(
|
||||||
|
back_populates="section",
|
||||||
|
cascade="all, delete-orphan",
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Строковое представление."""
|
||||||
|
return f"<QuoteSection id={self.quote_sect_id} name='{self.name}'>"
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
"""Quote-value модель."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
TIMESTAMP,
|
||||||
|
BigInteger,
|
||||||
|
Boolean,
|
||||||
|
DateTime,
|
||||||
|
Float,
|
||||||
|
ForeignKey,
|
||||||
|
String,
|
||||||
|
UniqueConstraint,
|
||||||
|
func,
|
||||||
|
)
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
|
from dataloader.storage.models.base import Base
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .quote import Quote
|
||||||
|
|
||||||
|
|
||||||
|
class QuoteValue(Base):
|
||||||
|
"""Представляет custom_cib_quotes.quotes_values."""
|
||||||
|
|
||||||
|
__tablename__ = "quotes_values"
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("quote_id", "dt", name="ak_uq_quote_and_date_quotes"),
|
||||||
|
{"schema": "quotes"},
|
||||||
|
)
|
||||||
|
|
||||||
|
quotes_values_id: Mapped[int] = mapped_column(
|
||||||
|
BigInteger(),
|
||||||
|
primary_key=True,
|
||||||
|
autoincrement=True,
|
||||||
|
)
|
||||||
|
quote_id: Mapped[int] = mapped_column(
|
||||||
|
ForeignKey("quotes.quotes.quote_id", ondelete="RESTRICT", onupdate="RESTRICT"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
dt: Mapped[datetime] = mapped_column(DateTime, nullable=False)
|
||||||
|
|
||||||
|
price_o: Mapped[float | None] = mapped_column(Float)
|
||||||
|
price_c: Mapped[float | None] = mapped_column(Float)
|
||||||
|
price_h: Mapped[float | None] = mapped_column(Float)
|
||||||
|
price_l: Mapped[float | None] = mapped_column(Float)
|
||||||
|
volume: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
|
load_dttm: Mapped[datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=False),
|
||||||
|
nullable=False,
|
||||||
|
server_default=func.current_timestamp(),
|
||||||
|
)
|
||||||
|
|
||||||
|
unit: Mapped[str | None] = mapped_column(String)
|
||||||
|
key: Mapped[int | None] = mapped_column(BigInteger())
|
||||||
|
|
||||||
|
value_profit: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_base: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_max: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_min: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_chng: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_chng_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
|
price_i: Mapped[float | None] = mapped_column(Float)
|
||||||
|
price: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_day: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_weekly_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_monthly_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_ytd_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_yoy_prc: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_last: Mapped[float | None] = mapped_column(Float)
|
||||||
|
value_previous: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
|
is_empty_str_flg: Mapped[bool | None] = mapped_column(Boolean)
|
||||||
|
interest: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
|
quote: Mapped[Quote] = relationship(back_populates="values")
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Строковое представление."""
|
||||||
|
return f"<QuoteValue id={self.quotes_values_id} dt={self.dt}>"
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
import asyncpg
|
||||||
|
|
||||||
|
|
||||||
|
class PGNotifyListener:
|
||||||
|
"""
|
||||||
|
Прослушиватель PostgreSQL NOTIFY для канала 'dl_jobs'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
dsn: str,
|
||||||
|
queue: str,
|
||||||
|
callback: Callable[[], None],
|
||||||
|
stop_event: asyncio.Event,
|
||||||
|
):
|
||||||
|
self._dsn = dsn
|
||||||
|
self._queue = queue
|
||||||
|
self._callback = callback
|
||||||
|
self._stop = stop_event
|
||||||
|
self._conn: Optional[asyncpg.Connection] = None
|
||||||
|
self._task: Optional[asyncio.Task] = None
|
||||||
|
self._on_notify_handler: Optional[Callable] = None
|
||||||
|
|
||||||
|
async def start(self) -> None:
|
||||||
|
"""
|
||||||
|
Запускает прослушивание уведомлений.
|
||||||
|
"""
|
||||||
|
dsn = self._dsn
|
||||||
|
if dsn.startswith("postgresql+asyncpg://"):
|
||||||
|
dsn = dsn.replace("postgresql+asyncpg://", "postgresql://")
|
||||||
|
|
||||||
|
self._conn = await asyncpg.connect(dsn)
|
||||||
|
|
||||||
|
def on_notify(connection, pid, channel, payload):
|
||||||
|
if channel == "dl_jobs" and payload == self._queue:
|
||||||
|
try:
|
||||||
|
self._callback()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self._on_notify_handler = on_notify
|
||||||
|
await self._conn.execute("LISTEN dl_jobs")
|
||||||
|
await self._conn.add_listener("dl_jobs", self._on_notify_handler)
|
||||||
|
|
||||||
|
self._task = asyncio.create_task(self._monitor_connection())
|
||||||
|
|
||||||
|
async def _monitor_connection(self) -> None:
|
||||||
|
"""
|
||||||
|
Мониторит соединение и останавливает при stop_event.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await self._stop.wait()
|
||||||
|
finally:
|
||||||
|
await self.stop()
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
"""
|
||||||
|
Останавливает прослушивание и закрывает соединение.
|
||||||
|
"""
|
||||||
|
if self._task and not self._task.done():
|
||||||
|
self._task.cancel()
|
||||||
|
try:
|
||||||
|
await self._task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if self._conn and self._on_notify_handler:
|
||||||
|
try:
|
||||||
|
await self._conn.remove_listener("dl_jobs", self._on_notify_handler)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
await self._conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._conn = None
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
"""
|
||||||
|
Репозитории для работы с базой данных.
|
||||||
|
Организованы по доменам для масштабируемости.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .opu import OpuRepository
|
||||||
|
from .queue import QueueRepository
|
||||||
|
from .quotes import QuotesRepository
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"OpuRepository",
|
||||||
|
"QueueRepository",
|
||||||
|
"QuotesRepository",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,193 @@
|
||||||
|
"""Репозиторий для работы с данными OPU."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import DDL, text
|
||||||
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.storage.models import BriefDigitalCertificateOpu
|
||||||
|
|
||||||
|
|
||||||
|
class OpuRepository:
|
||||||
|
"""Репозиторий для работы с таблицей brief_digital_certificate_opu."""
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession):
|
||||||
|
"""
|
||||||
|
Инициализация репозитория.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Асинхронная сессия SQLAlchemy
|
||||||
|
"""
|
||||||
|
self.s = session
|
||||||
|
self.schema = APP_CONFIG.pg.schema_opu
|
||||||
|
self.batch_size = APP_CONFIG.pg.batch_size
|
||||||
|
self.deduplicate = APP_CONFIG.pg.deduplicate
|
||||||
|
|
||||||
|
def _deduplicate_records(self, records: Sequence[dict[str, Any]]) -> Sequence[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Быстро удаляет дубликаты по уникальному индексу, оставляя последнее вхождение.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
records: Список словарей с данными
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Список уникальных записей (или исходный если дедупликация выключена)
|
||||||
|
"""
|
||||||
|
if not self.deduplicate:
|
||||||
|
return records # Возвращаем как есть, без копирования
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Ключи уникального индекса для OPU
|
||||||
|
unique_keys = (
|
||||||
|
"object_id",
|
||||||
|
"desk_nm",
|
||||||
|
"actdate",
|
||||||
|
"layer_cd",
|
||||||
|
"opu_cd",
|
||||||
|
"opu_lvl",
|
||||||
|
"opu_prnt_cd",
|
||||||
|
"object_unit",
|
||||||
|
)
|
||||||
|
|
||||||
|
seen = {}
|
||||||
|
for idx, record in enumerate(records):
|
||||||
|
# Формируем ключ из уникальных полей
|
||||||
|
key = tuple(record.get(k) for k in unique_keys)
|
||||||
|
# Оставляем последнее вхождение (перезаписываем индекс)
|
||||||
|
seen[key] = idx
|
||||||
|
|
||||||
|
# Возвращаем записи в порядке их последнего появления
|
||||||
|
return [records[idx] for idx in sorted(seen.values())]
|
||||||
|
|
||||||
|
async def truncate(
|
||||||
|
self, *, cascade: bool = False, restart_identity: bool = True
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Быстро очищает таблицу, уважая имя схемы и безопасное квотирование для PostgreSQL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cascade: добавляет CASCADE
|
||||||
|
restart_identity: добавляет RESTART IDENTITY
|
||||||
|
"""
|
||||||
|
table = BriefDigitalCertificateOpu.__table__
|
||||||
|
|
||||||
|
def quote_ident(name: str) -> str:
|
||||||
|
"""Экранирует кавычки и оборачивает имя в двойные."""
|
||||||
|
return f'"{name.replace("\"", "\"\"")}"'
|
||||||
|
|
||||||
|
schema_quoted = quote_ident(self.schema)
|
||||||
|
table_quoted = quote_ident(table.name)
|
||||||
|
full_table_name = f"{schema_quoted}.{table_quoted}"
|
||||||
|
|
||||||
|
opts = []
|
||||||
|
if restart_identity:
|
||||||
|
opts.append("RESTART IDENTITY")
|
||||||
|
if cascade:
|
||||||
|
opts.append("CASCADE")
|
||||||
|
|
||||||
|
suffix = f" {' '.join(opts)}" if opts else ""
|
||||||
|
|
||||||
|
await self.s.execute(text(f"TRUNCATE TABLE {full_table_name}{suffix}"))
|
||||||
|
await self.s.commit()
|
||||||
|
|
||||||
|
async def bulk_insert(
|
||||||
|
self, records: Sequence[dict[str, Any]], batch_size: int | None = None
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Массовая вставка записей в таблицу батчами.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
records: Список словарей с данными для вставки
|
||||||
|
batch_size: Размер батча (default: из конфига PG_BATCH_SIZE)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Количество вставленных записей
|
||||||
|
"""
|
||||||
|
if not records:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.batch_size
|
||||||
|
|
||||||
|
# Дедупликация всех записей перед разбиением на батчи
|
||||||
|
records = self._deduplicate_records(records)
|
||||||
|
|
||||||
|
total_inserted = 0
|
||||||
|
|
||||||
|
for i in range(0, len(records), batch_size):
|
||||||
|
batch = records[i : i + batch_size]
|
||||||
|
|
||||||
|
async with self.s.begin_nested():
|
||||||
|
stmt = pg_insert(BriefDigitalCertificateOpu).values(batch)
|
||||||
|
await self.s.execute(stmt)
|
||||||
|
await self.s.flush()
|
||||||
|
|
||||||
|
total_inserted += len(batch)
|
||||||
|
|
||||||
|
return total_inserted
|
||||||
|
|
||||||
|
async def bulk_upsert(
|
||||||
|
self, records: Sequence[dict[str, Any]], batch_size: int | None = None
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Массовая вставка/обновление записей (UPSERT) батчами.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
records: Список словарей с данными
|
||||||
|
batch_size: Размер батча (default: из конфига PG_BATCH_SIZE)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Количество обработанных записей
|
||||||
|
"""
|
||||||
|
if not records:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.batch_size
|
||||||
|
|
||||||
|
# Дедупликация всех записей перед разбиением на батчи
|
||||||
|
records = self._deduplicate_records(records)
|
||||||
|
|
||||||
|
update_columns = {
|
||||||
|
c.name
|
||||||
|
for c in BriefDigitalCertificateOpu.__table__.columns
|
||||||
|
if not c.primary_key
|
||||||
|
and c.name not in {"wf_load_id", "wf_load_dttm", "wf_row_id"}
|
||||||
|
}
|
||||||
|
|
||||||
|
total_upserted = 0
|
||||||
|
|
||||||
|
for i in range(0, len(records), batch_size):
|
||||||
|
batch = records[i : i + batch_size]
|
||||||
|
|
||||||
|
insert_stmt = pg_insert(BriefDigitalCertificateOpu).values(batch)
|
||||||
|
update_cols = {col: insert_stmt.excluded[col] for col in update_columns}
|
||||||
|
|
||||||
|
stmt = insert_stmt.on_conflict_do_update(
|
||||||
|
index_elements=[
|
||||||
|
"object_id",
|
||||||
|
"desk_nm",
|
||||||
|
"actdate",
|
||||||
|
"layer_cd",
|
||||||
|
"opu_cd",
|
||||||
|
"opu_lvl",
|
||||||
|
"opu_prnt_cd",
|
||||||
|
"object_unit",
|
||||||
|
],
|
||||||
|
set_=update_cols,
|
||||||
|
)
|
||||||
|
|
||||||
|
async with self.s.begin_nested():
|
||||||
|
await self.s.execute(stmt)
|
||||||
|
await self.s.flush()
|
||||||
|
|
||||||
|
total_upserted += len(batch)
|
||||||
|
|
||||||
|
return total_upserted
|
||||||
|
|
@ -0,0 +1,386 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from sqlalchemy import func, select, update
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.storage.models import DLJob, DLJobEvent
|
||||||
|
from dataloader.storage.schemas import CreateJobRequest, JobStatus
|
||||||
|
|
||||||
|
|
||||||
|
class QueueRepository:
|
||||||
|
"""
|
||||||
|
Репозиторий для работы с очередью задач и журналом событий.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession):
|
||||||
|
self.s = session
|
||||||
|
|
||||||
|
async def create_or_get(self, req: CreateJobRequest) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Идемпотентно создаёт запись в очереди и возвращает (job_id, status).
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
req: DTO с параметрами задачи
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Кортеж (job_id, status)
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
if req.idempotency_key:
|
||||||
|
q = select(DLJob).where(DLJob.idempotency_key == req.idempotency_key)
|
||||||
|
r = await self.s.execute(q)
|
||||||
|
ex = r.scalar_one_or_none()
|
||||||
|
if ex:
|
||||||
|
return ex.job_id, ex.status
|
||||||
|
|
||||||
|
row = DLJob(
|
||||||
|
job_id=req.job_id,
|
||||||
|
queue=req.queue,
|
||||||
|
task=req.task,
|
||||||
|
args=req.args or {},
|
||||||
|
idempotency_key=req.idempotency_key,
|
||||||
|
lock_key=req.lock_key,
|
||||||
|
partition_key=req.partition_key or "",
|
||||||
|
priority=req.priority,
|
||||||
|
available_at=req.available_at,
|
||||||
|
status="queued",
|
||||||
|
attempt=0,
|
||||||
|
max_attempts=req.max_attempts,
|
||||||
|
lease_ttl_sec=req.lease_ttl_sec,
|
||||||
|
lease_expires_at=None,
|
||||||
|
heartbeat_at=None,
|
||||||
|
cancel_requested=False,
|
||||||
|
progress={},
|
||||||
|
error=None,
|
||||||
|
producer=req.producer,
|
||||||
|
consumer_group=req.consumer_group,
|
||||||
|
load_dttm=datetime.now(timezone.utc),
|
||||||
|
started_at=None,
|
||||||
|
finished_at=None,
|
||||||
|
)
|
||||||
|
self.s.add(row)
|
||||||
|
await self._append_event(
|
||||||
|
req.job_id, req.queue, "queued", {"task": req.task}
|
||||||
|
)
|
||||||
|
return req.job_id, "queued"
|
||||||
|
|
||||||
|
async def get_status(self, job_id: str) -> Optional[JobStatus]:
|
||||||
|
"""
|
||||||
|
Возвращает статус задачи.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
DTO JobStatus или None, если задача не найдена
|
||||||
|
"""
|
||||||
|
q = select(
|
||||||
|
DLJob.job_id,
|
||||||
|
DLJob.status,
|
||||||
|
DLJob.attempt,
|
||||||
|
DLJob.started_at,
|
||||||
|
DLJob.finished_at,
|
||||||
|
DLJob.heartbeat_at,
|
||||||
|
DLJob.error,
|
||||||
|
DLJob.progress,
|
||||||
|
).where(DLJob.job_id == job_id)
|
||||||
|
r = await self.s.execute(q)
|
||||||
|
m = r.first()
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return JobStatus(
|
||||||
|
job_id=m.job_id,
|
||||||
|
status=m.status,
|
||||||
|
attempt=m.attempt,
|
||||||
|
started_at=m.started_at,
|
||||||
|
finished_at=m.finished_at,
|
||||||
|
heartbeat_at=m.heartbeat_at,
|
||||||
|
error=m.error,
|
||||||
|
progress=m.progress or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def cancel(self, job_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Устанавливает флаг отмены для задачи.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
True, если задача найдена и флаг установлен
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
job = await self._get(job_id)
|
||||||
|
if not job:
|
||||||
|
return False
|
||||||
|
job.cancel_requested = True
|
||||||
|
await self._append_event(job_id, job.queue, "cancel_requested", None)
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def claim_one(
|
||||||
|
self, queue: str, claim_backoff_sec: int
|
||||||
|
) -> Optional[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Захватывает одну задачу из очереди с учётом блокировок и выставляет running.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
queue: Имя очереди
|
||||||
|
claim_backoff_sec: Время отката при неудаче с advisory lock
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Словарь с данными задачи или None
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
q = (
|
||||||
|
select(DLJob)
|
||||||
|
.where(
|
||||||
|
DLJob.status == "queued",
|
||||||
|
DLJob.queue == queue,
|
||||||
|
DLJob.available_at <= func.now(),
|
||||||
|
)
|
||||||
|
.order_by(DLJob.priority.asc(), DLJob.load_dttm.asc())
|
||||||
|
.with_for_update(skip_locked=True)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
r = await self.s.execute(q)
|
||||||
|
job: Optional[DLJob] = r.scalar_one_or_none()
|
||||||
|
if not job:
|
||||||
|
return None
|
||||||
|
|
||||||
|
job.status = "running"
|
||||||
|
job.started_at = job.started_at or datetime.now(timezone.utc)
|
||||||
|
job.attempt = int(job.attempt) + 1
|
||||||
|
job.heartbeat_at = datetime.now(timezone.utc)
|
||||||
|
job.lease_expires_at = datetime.now(timezone.utc) + timedelta(
|
||||||
|
seconds=int(job.lease_ttl_sec)
|
||||||
|
)
|
||||||
|
|
||||||
|
ok = await self._try_advisory_lock(job.lock_key)
|
||||||
|
if not ok:
|
||||||
|
job.status = "queued"
|
||||||
|
job.available_at = datetime.now(timezone.utc) + timedelta(
|
||||||
|
seconds=claim_backoff_sec
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
await self._append_event(
|
||||||
|
job.job_id, job.queue, "picked", {"attempt": job.attempt}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"job_id": job.job_id,
|
||||||
|
"queue": job.queue,
|
||||||
|
"task": job.task,
|
||||||
|
"args": job.args or {},
|
||||||
|
"lock_key": job.lock_key,
|
||||||
|
"partition_key": job.partition_key or "",
|
||||||
|
"lease_ttl_sec": int(job.lease_ttl_sec),
|
||||||
|
"attempt": int(job.attempt),
|
||||||
|
}
|
||||||
|
|
||||||
|
async def heartbeat(self, job_id: str, ttl_sec: int) -> tuple[bool, bool]:
|
||||||
|
"""
|
||||||
|
Обновляет heartbeat и продлевает lease.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
ttl_sec: TTL аренды в секундах
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Кортеж (success, cancel_requested)
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
job = await self._get(job_id)
|
||||||
|
if not job or job.status != "running":
|
||||||
|
return False, False
|
||||||
|
|
||||||
|
cancel_requested = bool(job.cancel_requested)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
q = (
|
||||||
|
update(DLJob)
|
||||||
|
.where(DLJob.job_id == job_id, DLJob.status == "running")
|
||||||
|
.values(
|
||||||
|
heartbeat_at=now,
|
||||||
|
lease_expires_at=now + timedelta(seconds=int(ttl_sec)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await self.s.execute(q)
|
||||||
|
await self._append_event(
|
||||||
|
job_id, await self._resolve_queue(job_id), "heartbeat", {"ttl": ttl_sec}
|
||||||
|
)
|
||||||
|
return True, cancel_requested
|
||||||
|
|
||||||
|
async def finish_ok(self, job_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Помечает задачу как выполненную успешно и снимает advisory-lock.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
job = await self._get(job_id)
|
||||||
|
if not job:
|
||||||
|
return
|
||||||
|
job.status = "succeeded"
|
||||||
|
job.finished_at = datetime.now(timezone.utc)
|
||||||
|
job.lease_expires_at = None
|
||||||
|
await self._append_event(job_id, job.queue, "succeeded", None)
|
||||||
|
await self._advisory_unlock(job.lock_key)
|
||||||
|
|
||||||
|
async def finish_fail_or_retry(
|
||||||
|
self, job_id: str, err: str, is_canceled: bool = False
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Помечает задачу как failed, canceled или возвращает в очередь с задержкой.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
err: Текст ошибки
|
||||||
|
is_canceled: Флаг отмены
|
||||||
|
"""
|
||||||
|
async with self.s.begin():
|
||||||
|
job = await self._get(job_id)
|
||||||
|
if not job:
|
||||||
|
return
|
||||||
|
|
||||||
|
if is_canceled:
|
||||||
|
job.status = "canceled"
|
||||||
|
job.error = err
|
||||||
|
job.finished_at = datetime.now(timezone.utc)
|
||||||
|
job.lease_expires_at = None
|
||||||
|
await self._append_event(job_id, job.queue, "canceled", {"error": err})
|
||||||
|
else:
|
||||||
|
can_retry = int(job.attempt) < int(job.max_attempts)
|
||||||
|
if can_retry:
|
||||||
|
job.status = "queued"
|
||||||
|
job.available_at = datetime.now(timezone.utc) + timedelta(
|
||||||
|
seconds=30 * int(job.attempt)
|
||||||
|
)
|
||||||
|
job.error = err
|
||||||
|
job.lease_expires_at = None
|
||||||
|
await self._append_event(
|
||||||
|
job_id,
|
||||||
|
job.queue,
|
||||||
|
"requeue",
|
||||||
|
{"attempt": job.attempt, "error": err},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
job.status = "failed"
|
||||||
|
job.error = err
|
||||||
|
job.finished_at = datetime.now(timezone.utc)
|
||||||
|
job.lease_expires_at = None
|
||||||
|
await self._append_event(
|
||||||
|
job_id, job.queue, "failed", {"error": err}
|
||||||
|
)
|
||||||
|
await self._advisory_unlock(job.lock_key)
|
||||||
|
|
||||||
|
async def requeue_lost(self, now: Optional[datetime] = None) -> list[str]:
|
||||||
|
"""
|
||||||
|
Возвращает протухшие running-задачи в очередь.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
now: Текущее время (по умолчанию используется текущий момент UTC)
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Список job_id перепоставленных задач
|
||||||
|
"""
|
||||||
|
now = now or datetime.now(timezone.utc)
|
||||||
|
async with self.s.begin():
|
||||||
|
q = (
|
||||||
|
select(DLJob)
|
||||||
|
.where(
|
||||||
|
DLJob.status == "running",
|
||||||
|
DLJob.lease_expires_at.is_not(None),
|
||||||
|
DLJob.lease_expires_at < now,
|
||||||
|
)
|
||||||
|
.with_for_update(skip_locked=True)
|
||||||
|
)
|
||||||
|
r = await self.s.execute(q)
|
||||||
|
rows = list(r.scalars().all())
|
||||||
|
ids: list[str] = []
|
||||||
|
for job in rows:
|
||||||
|
job.status = "queued"
|
||||||
|
job.available_at = now
|
||||||
|
job.lease_expires_at = None
|
||||||
|
ids.append(job.job_id)
|
||||||
|
await self._append_event(job.job_id, job.queue, "requeue_lost", None)
|
||||||
|
return ids
|
||||||
|
|
||||||
|
async def _get(self, job_id: str) -> Optional[DLJob]:
|
||||||
|
"""
|
||||||
|
Возвращает ORM-объект задачи с блокировкой.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
ORM модель DLJob или None
|
||||||
|
"""
|
||||||
|
r = await self.s.execute(
|
||||||
|
select(DLJob)
|
||||||
|
.where(DLJob.job_id == job_id)
|
||||||
|
.with_for_update(skip_locked=True)
|
||||||
|
)
|
||||||
|
return r.scalar_one_or_none()
|
||||||
|
|
||||||
|
async def _resolve_queue(self, job_id: str) -> str:
|
||||||
|
"""
|
||||||
|
Возвращает имя очереди для события.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
Имя очереди
|
||||||
|
"""
|
||||||
|
r = await self.s.execute(select(DLJob.queue).where(DLJob.job_id == job_id))
|
||||||
|
v = r.scalar_one_or_none()
|
||||||
|
return v or ""
|
||||||
|
|
||||||
|
async def _append_event(
|
||||||
|
self, job_id: str, queue: str, kind: str, payload: Optional[dict[str, Any]]
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Добавляет запись в журнал событий.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
job_id: Идентификатор задачи
|
||||||
|
queue: Имя очереди
|
||||||
|
kind: Тип события
|
||||||
|
payload: Дополнительные данные события
|
||||||
|
"""
|
||||||
|
ev = DLJobEvent(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue or "",
|
||||||
|
load_dttm=datetime.now(timezone.utc),
|
||||||
|
kind=kind,
|
||||||
|
payload=payload or None,
|
||||||
|
)
|
||||||
|
self.s.add(ev)
|
||||||
|
|
||||||
|
async def _try_advisory_lock(self, lock_key: str) -> bool:
|
||||||
|
"""
|
||||||
|
Пытается получить advisory-lock в Postgres.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
lock_key: Ключ блокировки
|
||||||
|
|
||||||
|
Возвращает:
|
||||||
|
True, если блокировка получена
|
||||||
|
"""
|
||||||
|
r = await self.s.execute(
|
||||||
|
select(func.pg_try_advisory_lock(func.hashtext(lock_key)))
|
||||||
|
)
|
||||||
|
return bool(r.scalar())
|
||||||
|
|
||||||
|
async def _advisory_unlock(self, lock_key: str) -> None:
|
||||||
|
"""
|
||||||
|
Снимает advisory-lock в Postgres.
|
||||||
|
|
||||||
|
Параметры:
|
||||||
|
lock_key: Ключ блокировки
|
||||||
|
"""
|
||||||
|
await self.s.execute(select(func.pg_advisory_unlock(func.hashtext(lock_key))))
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
"""Репозиторий для работы с котировками."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.storage.models import Quote, QuoteSection, QuoteValue
|
||||||
|
|
||||||
|
|
||||||
|
class QuotesRepository:
|
||||||
|
"""Репозиторий для работы с котировками (Quote, QuoteSection, QuoteValue)."""
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession):
|
||||||
|
"""
|
||||||
|
Инициализация репозитория.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Асинхронная сессия SQLAlchemy
|
||||||
|
"""
|
||||||
|
self.s = session
|
||||||
|
|
||||||
|
async def get_section_by_name(self, name: str) -> QuoteSection | None:
|
||||||
|
"""
|
||||||
|
Получить секцию по имени.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Имя секции
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
QuoteSection или None
|
||||||
|
"""
|
||||||
|
stmt = sa.select(QuoteSection).where(QuoteSection.name == name)
|
||||||
|
result = await self.s.execute(stmt)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
async def upsert_quote(
|
||||||
|
self,
|
||||||
|
section: QuoteSection,
|
||||||
|
*,
|
||||||
|
name: str,
|
||||||
|
params: dict | None = None,
|
||||||
|
srce: str | None = None,
|
||||||
|
ticker: str | None = None,
|
||||||
|
last_update_dttm: datetime | None = None,
|
||||||
|
) -> Quote:
|
||||||
|
"""
|
||||||
|
Вставить или обновить котировку (UPSERT).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
section: Секция котировки
|
||||||
|
name: Имя котировки
|
||||||
|
params: Параметры
|
||||||
|
srce: Источник
|
||||||
|
ticker: Тикер
|
||||||
|
last_update_dttm: Время последнего обновления
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Quote
|
||||||
|
"""
|
||||||
|
async with self.s.begin_nested():
|
||||||
|
stmt = (
|
||||||
|
pg_insert(Quote)
|
||||||
|
.values(
|
||||||
|
quote_sect_id=section.quote_sect_id,
|
||||||
|
name=name,
|
||||||
|
params=params,
|
||||||
|
srce=srce,
|
||||||
|
ticker=ticker,
|
||||||
|
last_update_dttm=last_update_dttm,
|
||||||
|
)
|
||||||
|
.on_conflict_do_update(
|
||||||
|
index_elements=["quote_sect_id", "name"],
|
||||||
|
set_={
|
||||||
|
"params": pg_insert(Quote).excluded.params,
|
||||||
|
"srce": pg_insert(Quote).excluded.srce,
|
||||||
|
"ticker": pg_insert(Quote).excluded.ticker,
|
||||||
|
"last_update_dttm": pg_insert(Quote).excluded.last_update_dttm,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.returning(Quote)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await self.s.execute(stmt)
|
||||||
|
quote = result.scalar_one()
|
||||||
|
await self.s.flush()
|
||||||
|
return quote
|
||||||
|
|
||||||
|
async def bulk_upsert_quote_values(
|
||||||
|
self,
|
||||||
|
quote: Quote,
|
||||||
|
values: Sequence[dict[str, Any]],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Массовый UPSERT значений котировок.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
quote: Котировка
|
||||||
|
values: Список словарей со значениями
|
||||||
|
"""
|
||||||
|
if not values:
|
||||||
|
return
|
||||||
|
|
||||||
|
quote_id = quote.quote_id
|
||||||
|
|
||||||
|
update_columns = {
|
||||||
|
c.name
|
||||||
|
for c in QuoteValue.__table__.columns
|
||||||
|
if c.name not in {"quotes_values_id", "quote_id", "dt", "load_dttm"}
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = [
|
||||||
|
{
|
||||||
|
"dt": item["dt"],
|
||||||
|
"quote_id": quote_id,
|
||||||
|
**{col: item.get(col) for col in update_columns},
|
||||||
|
}
|
||||||
|
for item in values
|
||||||
|
]
|
||||||
|
|
||||||
|
insert_stmt = pg_insert(QuoteValue).values(payload)
|
||||||
|
update_cols = {col: insert_stmt.excluded[col] for col in update_columns}
|
||||||
|
|
||||||
|
stmt = insert_stmt.on_conflict_do_update(
|
||||||
|
index_elements=["quote_id", "dt"],
|
||||||
|
set_=update_cols,
|
||||||
|
)
|
||||||
|
|
||||||
|
async with self.s.begin_nested():
|
||||||
|
await self.s.execute(stmt)
|
||||||
|
await self.s.flush()
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""
|
||||||
|
DTO (Data Transfer Objects) для слоя хранилища.
|
||||||
|
Организованы по доменам для масштабируемости.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from .queue import CreateJobRequest, JobStatus
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"CreateJobRequest",
|
||||||
|
"JobStatus",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CreateJobRequest:
|
||||||
|
"""
|
||||||
|
DTO для создания задачи в очереди.
|
||||||
|
"""
|
||||||
|
|
||||||
|
job_id: str
|
||||||
|
queue: str
|
||||||
|
task: str
|
||||||
|
args: dict[str, Any]
|
||||||
|
idempotency_key: Optional[str]
|
||||||
|
lock_key: str
|
||||||
|
partition_key: str
|
||||||
|
priority: int
|
||||||
|
available_at: datetime
|
||||||
|
max_attempts: int
|
||||||
|
lease_ttl_sec: int
|
||||||
|
producer: Optional[str]
|
||||||
|
consumer_group: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class JobStatus:
|
||||||
|
"""
|
||||||
|
DTO для статуса задачи.
|
||||||
|
"""
|
||||||
|
|
||||||
|
job_id: str
|
||||||
|
status: str
|
||||||
|
attempt: int
|
||||||
|
started_at: Optional[datetime]
|
||||||
|
finished_at: Optional[datetime]
|
||||||
|
heartbeat_at: Optional[datetime]
|
||||||
|
error: Optional[str]
|
||||||
|
progress: dict[str, Any]
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Модуль воркеров для обработки задач."""
|
||||||
|
|
@ -0,0 +1,175 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from contextlib import AsyncExitStack
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import AsyncIterator, Callable, Optional
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.storage.notify_listener import PGNotifyListener
|
||||||
|
from dataloader.storage.repositories import QueueRepository
|
||||||
|
from dataloader.workers.pipelines.registry import resolve as resolve_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class WorkerConfig:
|
||||||
|
"""
|
||||||
|
Конфигурация воркера.
|
||||||
|
"""
|
||||||
|
|
||||||
|
queue: str
|
||||||
|
heartbeat_sec: int
|
||||||
|
claim_backoff_sec: int
|
||||||
|
|
||||||
|
|
||||||
|
class PGWorker:
|
||||||
|
"""
|
||||||
|
Базовый асинхронный воркер очереди Postgres.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cfg: WorkerConfig, stop_event: asyncio.Event) -> None:
|
||||||
|
self._cfg = cfg
|
||||||
|
self._stop = stop_event
|
||||||
|
self._log = APP_CTX.get_logger()
|
||||||
|
self._sm = APP_CTX.sessionmaker
|
||||||
|
self._notify_wakeup = asyncio.Event()
|
||||||
|
self._listener: Optional[PGNotifyListener] = None
|
||||||
|
|
||||||
|
async def run(self) -> None:
|
||||||
|
"""
|
||||||
|
Главный цикл: ожидание -> claim -> исполнение -> завершение.
|
||||||
|
"""
|
||||||
|
self._log.info(f"worker.start queue={self._cfg.queue}")
|
||||||
|
|
||||||
|
self._listener = PGNotifyListener(
|
||||||
|
dsn=APP_CONFIG.pg.url,
|
||||||
|
queue=self._cfg.queue,
|
||||||
|
callback=lambda: self._notify_wakeup.set(),
|
||||||
|
stop_event=self._stop,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await self._listener.start()
|
||||||
|
except Exception as e:
|
||||||
|
self._log.warning(
|
||||||
|
f"Failed to start LISTEN/NOTIFY, falling back to polling: {e}"
|
||||||
|
)
|
||||||
|
self._listener = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
while not self._stop.is_set():
|
||||||
|
claimed = await self._claim_and_execute_once()
|
||||||
|
if not claimed:
|
||||||
|
await self._listen_or_sleep(self._cfg.claim_backoff_sec)
|
||||||
|
finally:
|
||||||
|
if self._listener:
|
||||||
|
await self._listener.stop()
|
||||||
|
self._log.info(f"worker.stop queue={self._cfg.queue}")
|
||||||
|
|
||||||
|
async def _listen_or_sleep(self, timeout_sec: int) -> None:
|
||||||
|
"""
|
||||||
|
Ожидание появления задач через LISTEN/NOTIFY или с тайм-аутом.
|
||||||
|
"""
|
||||||
|
if self._listener:
|
||||||
|
|
||||||
|
done, pending = await asyncio.wait(
|
||||||
|
[
|
||||||
|
asyncio.create_task(self._notify_wakeup.wait()),
|
||||||
|
asyncio.create_task(self._stop.wait()),
|
||||||
|
],
|
||||||
|
return_when=asyncio.FIRST_COMPLETED,
|
||||||
|
timeout=timeout_sec,
|
||||||
|
)
|
||||||
|
|
||||||
|
for task in pending:
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if self._notify_wakeup.is_set():
|
||||||
|
self._notify_wakeup.clear()
|
||||||
|
else:
|
||||||
|
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(self._stop.wait(), timeout=timeout_sec)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _claim_and_execute_once(self) -> bool:
|
||||||
|
"""
|
||||||
|
Выполняет одну попытку захвата задачи и её обработку.
|
||||||
|
"""
|
||||||
|
async with AsyncExitStack() as stack:
|
||||||
|
s = await stack.enter_async_context(self._sm())
|
||||||
|
repo = QueueRepository(s)
|
||||||
|
row = await repo.claim_one(self._cfg.queue, self._cfg.claim_backoff_sec)
|
||||||
|
if not row:
|
||||||
|
await s.commit()
|
||||||
|
return False
|
||||||
|
|
||||||
|
job_id = row["job_id"]
|
||||||
|
ttl = int(row["lease_ttl_sec"])
|
||||||
|
task = row["task"]
|
||||||
|
args = row["args"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
canceled = await self._execute_with_heartbeat(
|
||||||
|
job_id, ttl, self._pipeline(task, args)
|
||||||
|
)
|
||||||
|
if canceled:
|
||||||
|
await repo.finish_fail_or_retry(
|
||||||
|
job_id, "canceled by user", is_canceled=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await repo.finish_ok(job_id)
|
||||||
|
return True
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
await repo.finish_fail_or_retry(
|
||||||
|
job_id, "cancelled by shutdown", is_canceled=True
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
await repo.finish_fail_or_retry(job_id, str(e))
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _execute_with_heartbeat(
|
||||||
|
self, job_id: str, ttl: int, it: AsyncIterator[None]
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Исполняет конвейер с поддержкой heartbeat.
|
||||||
|
Возвращает True, если задача была отменена (cancel_requested).
|
||||||
|
"""
|
||||||
|
next_hb = datetime.now(timezone.utc) + timedelta(
|
||||||
|
seconds=self._cfg.heartbeat_sec
|
||||||
|
)
|
||||||
|
async for _ in it:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
if now >= next_hb:
|
||||||
|
async with self._sm() as s_hb:
|
||||||
|
success, cancel_requested = await QueueRepository(s_hb).heartbeat(
|
||||||
|
job_id, ttl
|
||||||
|
)
|
||||||
|
if cancel_requested:
|
||||||
|
return True
|
||||||
|
next_hb = now + timedelta(seconds=self._cfg.heartbeat_sec)
|
||||||
|
if self._stop.is_set():
|
||||||
|
raise asyncio.CancelledError()
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _pipeline(self, task: str, args: dict) -> AsyncIterator[None]:
|
||||||
|
"""
|
||||||
|
Вызывает зарегистрированный пайплайн по имени задачи.
|
||||||
|
"""
|
||||||
|
fn: Callable[[dict], object] = resolve_pipeline(task)
|
||||||
|
res = fn(args)
|
||||||
|
if hasattr(res, "__aiter__"):
|
||||||
|
async for _ in res: # type: ignore[func-returns-value]
|
||||||
|
yield
|
||||||
|
elif asyncio.iscoroutine(res):
|
||||||
|
await res # type: ignore[arg-type]
|
||||||
|
yield
|
||||||
|
else:
|
||||||
|
yield
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.workers.base import PGWorker, WorkerConfig
|
||||||
|
from dataloader.workers.reaper import requeue_lost
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class WorkerSpec:
|
||||||
|
"""
|
||||||
|
Конфигурация набора воркеров для очереди.
|
||||||
|
"""
|
||||||
|
|
||||||
|
queue: str
|
||||||
|
concurrency: int
|
||||||
|
|
||||||
|
|
||||||
|
class WorkerManager:
|
||||||
|
"""
|
||||||
|
Управляет жизненным циклом асинхронных воркеров.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, specs: list[WorkerSpec]) -> None:
|
||||||
|
self._log = APP_CTX.get_logger()
|
||||||
|
self._specs = specs
|
||||||
|
self._stop = asyncio.Event()
|
||||||
|
self._tasks: list[asyncio.Task] = []
|
||||||
|
self._reaper_task: asyncio.Task | None = None
|
||||||
|
|
||||||
|
async def start(self) -> None:
|
||||||
|
"""
|
||||||
|
Стартует воркеры и фоновую задачу реапера.
|
||||||
|
"""
|
||||||
|
hb = int(APP_CONFIG.worker.heartbeat_sec)
|
||||||
|
backoff = int(APP_CONFIG.worker.claim_backoff_sec)
|
||||||
|
|
||||||
|
for spec in self._specs:
|
||||||
|
for i in range(max(1, spec.concurrency)):
|
||||||
|
cfg = WorkerConfig(
|
||||||
|
queue=spec.queue, heartbeat_sec=hb, claim_backoff_sec=backoff
|
||||||
|
)
|
||||||
|
t = asyncio.create_task(
|
||||||
|
PGWorker(cfg, self._stop).run(), name=f"worker:{spec.queue}:{i}"
|
||||||
|
)
|
||||||
|
self._tasks.append(t)
|
||||||
|
|
||||||
|
self._reaper_task = asyncio.create_task(self._reaper_loop(), name="reaper")
|
||||||
|
|
||||||
|
self._log.info(
|
||||||
|
"worker_manager.started",
|
||||||
|
extra={
|
||||||
|
"specs": [spec.__dict__ for spec in self._specs],
|
||||||
|
"total_tasks": len(self._tasks),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
"""
|
||||||
|
Останавливает воркеры и реапер.
|
||||||
|
"""
|
||||||
|
self._stop.set()
|
||||||
|
|
||||||
|
for t in self._tasks:
|
||||||
|
t.cancel()
|
||||||
|
await asyncio.gather(*self._tasks, return_exceptions=True)
|
||||||
|
self._tasks.clear()
|
||||||
|
|
||||||
|
if self._reaper_task:
|
||||||
|
self._reaper_task.cancel()
|
||||||
|
with contextlib.suppress(asyncio.CancelledError, Exception):
|
||||||
|
await self._reaper_task
|
||||||
|
self._reaper_task = None
|
||||||
|
|
||||||
|
self._log.info("worker_manager.stopped")
|
||||||
|
|
||||||
|
async def _reaper_loop(self) -> None:
|
||||||
|
"""
|
||||||
|
Фоновый цикл возврата потерянных задач в очередь.
|
||||||
|
"""
|
||||||
|
period = int(APP_CONFIG.worker.reaper_period_sec)
|
||||||
|
sm = APP_CTX.sessionmaker
|
||||||
|
while not self._stop.is_set():
|
||||||
|
try:
|
||||||
|
async with sm() as s:
|
||||||
|
ids = await requeue_lost(s)
|
||||||
|
if ids:
|
||||||
|
self._log.info("reaper.requeued", extra={"count": len(ids)})
|
||||||
|
except Exception:
|
||||||
|
self._log.exception("reaper.error")
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(self._stop.wait(), timeout=period)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def build_manager_from_env() -> WorkerManager:
|
||||||
|
"""
|
||||||
|
Собирает WorkerManager из WORKERS_JSON.
|
||||||
|
"""
|
||||||
|
specs: list[WorkerSpec] = []
|
||||||
|
for item in APP_CONFIG.worker.parsed_workers():
|
||||||
|
q = str(item.get("queue", "")).strip()
|
||||||
|
c = int(item.get("concurrency", 1))
|
||||||
|
if q:
|
||||||
|
specs.append(WorkerSpec(queue=q, concurrency=max(1, c)))
|
||||||
|
return WorkerManager(specs)
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
"""Модуль пайплайнов обработки задач."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
import pkgutil
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
def load_all() -> None:
|
||||||
|
"""
|
||||||
|
Импортирует все модули в пакете pipelines, чтобы сработали @register().
|
||||||
|
"""
|
||||||
|
pkg_name = __name__
|
||||||
|
for mod in iter_modules():
|
||||||
|
importlib.import_module(f"{pkg_name}.{mod}")
|
||||||
|
|
||||||
|
|
||||||
|
def iter_modules() -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
Возвращает имена всех подпакетов/модулей в текущем пакете.
|
||||||
|
"""
|
||||||
|
for m in pkgutil.iter_modules(__path__): # type: ignore[name-defined]
|
||||||
|
yield m.name
|
||||||
|
|
@ -0,0 +1,161 @@
|
||||||
|
"""Пайплайн загрузки данных OPU из Gmap2Brief API."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, AsyncIterator
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
import zstandard as zstd
|
||||||
|
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.interfaces.gmap2_brief.interface import get_gmap2brief_interface
|
||||||
|
from dataloader.storage.repositories import OpuRepository
|
||||||
|
|
||||||
|
from .registry import register
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_jsonl_from_zst(file_path: Path, chunk_size: int = 10000):
|
||||||
|
"""
|
||||||
|
Распаковывает zstandard архив и парсит JSON Lines стримингово батчами.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Путь к .jsonl.zst файлу
|
||||||
|
chunk_size: Размер батча для обработки
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Батчи словарей (список словарей)
|
||||||
|
"""
|
||||||
|
dctx = zstd.ZstdDecompressor()
|
||||||
|
|
||||||
|
with open(file_path, "rb") as compressed:
|
||||||
|
with dctx.stream_reader(compressed) as reader:
|
||||||
|
batch = []
|
||||||
|
buffer = b""
|
||||||
|
|
||||||
|
while True:
|
||||||
|
chunk = reader.read(65536) # Read 64KB chunks
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
|
||||||
|
buffer += chunk
|
||||||
|
lines = buffer.split(b"\n")
|
||||||
|
buffer = lines[-1] # Keep incomplete line in buffer
|
||||||
|
|
||||||
|
for line in lines[:-1]:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
record = orjson.loads(line)
|
||||||
|
batch.append(record)
|
||||||
|
|
||||||
|
if len(batch) >= chunk_size:
|
||||||
|
yield batch
|
||||||
|
batch = []
|
||||||
|
except orjson.JSONDecodeError as e:
|
||||||
|
APP_CTX.logger.warning(f"Failed to parse JSON line: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if buffer.strip():
|
||||||
|
try:
|
||||||
|
record = orjson.loads(buffer)
|
||||||
|
batch.append(record)
|
||||||
|
except orjson.JSONDecodeError as e:
|
||||||
|
APP_CTX.logger.warning(f"Failed to parse final JSON line: {e}")
|
||||||
|
|
||||||
|
if batch:
|
||||||
|
yield batch
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_record(raw: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Конвертирует JSON запись в формат для БД.
|
||||||
|
|
||||||
|
Преобразует ISO строки дат в date/datetime объекты.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw: Сырая запись из JSON
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Словарь для вставки в БД
|
||||||
|
"""
|
||||||
|
result = raw.copy()
|
||||||
|
|
||||||
|
if "actdate" in result and isinstance(result["actdate"], str):
|
||||||
|
result["actdate"] = datetime.fromisoformat(result["actdate"]).date()
|
||||||
|
|
||||||
|
if "wf_load_dttm" in result and isinstance(result["wf_load_dttm"], str):
|
||||||
|
result["wf_load_dttm"] = datetime.fromisoformat(result["wf_load_dttm"])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@register("load.opu")
|
||||||
|
async def load_opu(args: dict) -> AsyncIterator[None]:
|
||||||
|
"""
|
||||||
|
Загружает данные OPU из Gmap2Brief API и сохраняет в БД.
|
||||||
|
|
||||||
|
Процесс:
|
||||||
|
1. Запускает экспорт через API
|
||||||
|
2. Ждет завершения задачи (polling)
|
||||||
|
3. Скачивает zstandard архив
|
||||||
|
4. Очищает целевую таблицу (TRUNCATE)
|
||||||
|
5. Распаковывает и парсит JSON Lines стримингово
|
||||||
|
6. Загружает данные батчами в БД
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Аргументы задачи (не используются)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
None после каждого этапа для heartbeat
|
||||||
|
"""
|
||||||
|
logger = APP_CTX.logger
|
||||||
|
logger.info("Starting OPU ETL pipeline")
|
||||||
|
|
||||||
|
interface = get_gmap2brief_interface()
|
||||||
|
job_id = await interface.start_export()
|
||||||
|
logger.info(f"OPU export job started: {job_id}")
|
||||||
|
yield
|
||||||
|
|
||||||
|
status = await interface.wait_for_completion(job_id)
|
||||||
|
logger.info(f"OPU export completed: {status.total_rows} rows")
|
||||||
|
yield
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
temp_path = Path(temp_dir)
|
||||||
|
archive_path = temp_path / f"opu_export_{job_id}.jsonl.zst"
|
||||||
|
|
||||||
|
await interface.download_export(job_id, archive_path)
|
||||||
|
logger.info(f"OPU archive downloaded: {archive_path.stat().st_size:,} bytes")
|
||||||
|
yield
|
||||||
|
|
||||||
|
async with APP_CTX.sessionmaker() as session:
|
||||||
|
repo = OpuRepository(session)
|
||||||
|
await repo.truncate()
|
||||||
|
await session.commit()
|
||||||
|
logger.info("OPU table truncated")
|
||||||
|
yield
|
||||||
|
|
||||||
|
total_inserted = 0
|
||||||
|
batch_num = 0
|
||||||
|
|
||||||
|
logger.info("Starting streaming insert...")
|
||||||
|
for batch in _parse_jsonl_from_zst(archive_path, chunk_size=5000):
|
||||||
|
batch_num += 1
|
||||||
|
|
||||||
|
converted = [_convert_record(rec) for rec in batch]
|
||||||
|
|
||||||
|
inserted = await repo.bulk_insert(converted)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
total_inserted += inserted
|
||||||
|
logger.debug(
|
||||||
|
f"Batch {batch_num}: inserted {inserted} rows (total: {total_inserted})"
|
||||||
|
)
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
logger.info(f"OPU ETL completed: {total_inserted} rows inserted")
|
||||||
|
|
@ -0,0 +1,250 @@
|
||||||
|
"""Пайплайн загрузки данных котировок из SuperTenera."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import TYPE_CHECKING, Any, AsyncIterator
|
||||||
|
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.interfaces.tenera.interface import get_async_tenera_interface
|
||||||
|
from dataloader.interfaces.tenera.schemas import (
|
||||||
|
BloombergTimePoint,
|
||||||
|
CbrTimePoint,
|
||||||
|
InvestingCandlestick,
|
||||||
|
InvestingNumeric,
|
||||||
|
InvestingTimePoint,
|
||||||
|
SgxTimePoint,
|
||||||
|
TimePointUnion,
|
||||||
|
TradingEconomicsEmptyString,
|
||||||
|
TradingEconomicsLastPrev,
|
||||||
|
TradingEconomicsNumeric,
|
||||||
|
TradingEconomicsStringPercent,
|
||||||
|
TradingEconomicsStringTime,
|
||||||
|
TradingEconomicsTimePoint,
|
||||||
|
TradingViewTimePoint,
|
||||||
|
)
|
||||||
|
from dataloader.storage.repositories import QuotesRepository
|
||||||
|
|
||||||
|
from .registry import register
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from dataloader.interfaces.tenera.schemas import MainData
|
||||||
|
|
||||||
|
|
||||||
|
def _to_float(value: str | int | float | None) -> float | None:
|
||||||
|
"""Преобразует строковые числа с запятыми/процентами к float."""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, int | float):
|
||||||
|
return float(value)
|
||||||
|
s = str(value).strip().replace(" ", "").replace("%", "").replace(",", ".")
|
||||||
|
if s == "":
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ts_to_datetime(ts: str) -> datetime | None:
|
||||||
|
"""Преобразует строку с Unix timestamp в datetime без таймзоны, но в таймзоне приложения."""
|
||||||
|
if not ts or not ts.strip().isdigit():
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
timestamp = int(ts.strip())
|
||||||
|
dt_aware = datetime.fromtimestamp(timestamp, tz=APP_CTX.pytz_timezone)
|
||||||
|
return dt_aware.replace(tzinfo=None)
|
||||||
|
except (ValueError, OSError, OverflowError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_value_row(
|
||||||
|
source: str, dt: datetime, point: Any
|
||||||
|
) -> dict[str, Any] | None: # noqa: C901
|
||||||
|
"""Строит строку для `quotes_values` по источнику и типу точки."""
|
||||||
|
if isinstance(point, int):
|
||||||
|
return {"dt": dt, "key": point}
|
||||||
|
|
||||||
|
if isinstance(point, TimePointUnion):
|
||||||
|
inner = point.root
|
||||||
|
if isinstance(inner, InvestingTimePoint):
|
||||||
|
deep_inner = inner.root
|
||||||
|
if isinstance(deep_inner, InvestingNumeric):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"value_profit": _to_float(deep_inner.profit),
|
||||||
|
"value_base": _to_float(deep_inner.base_value),
|
||||||
|
"value_max": _to_float(deep_inner.max_value),
|
||||||
|
"value_min": _to_float(deep_inner.min_value),
|
||||||
|
"value_chng": _to_float(deep_inner.change),
|
||||||
|
"value_chng_prc": _to_float(deep_inner.change_ptc),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(deep_inner, InvestingCandlestick):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"price_o": _to_float(
|
||||||
|
getattr(deep_inner, "open_", None)
|
||||||
|
or getattr(deep_inner, "open", None)
|
||||||
|
),
|
||||||
|
"price_h": _to_float(deep_inner.high),
|
||||||
|
"price_l": _to_float(deep_inner.low),
|
||||||
|
"price_c": _to_float(deep_inner.close),
|
||||||
|
"volume": _to_float(deep_inner.value),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(inner, TradingViewTimePoint | SgxTimePoint):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"price_o": _to_float(
|
||||||
|
getattr(inner, "open_", None) or getattr(inner, "open", None)
|
||||||
|
),
|
||||||
|
"price_h": _to_float(inner.high),
|
||||||
|
"price_l": _to_float(inner.low),
|
||||||
|
"price_c": _to_float(inner.close),
|
||||||
|
"volume": _to_float(
|
||||||
|
getattr(inner, "volume", None)
|
||||||
|
or getattr(inner, "interest", None)
|
||||||
|
or getattr(inner, "value", None)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(inner, BloombergTimePoint):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"value_base": _to_float(inner.value),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(inner, CbrTimePoint):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"value_base": _to_float(inner.value),
|
||||||
|
}
|
||||||
|
if isinstance(inner, TradingEconomicsTimePoint):
|
||||||
|
deep_inner = inner.root
|
||||||
|
|
||||||
|
if isinstance(deep_inner, TradingEconomicsNumeric):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"price_i": _to_float(deep_inner.price),
|
||||||
|
"value_day": _to_float(deep_inner.day),
|
||||||
|
"value_prc": _to_float(deep_inner.percent),
|
||||||
|
"value_weekly_prc": _to_float(deep_inner.weekly),
|
||||||
|
"value_monthly_prc": _to_float(deep_inner.monthly),
|
||||||
|
"value_ytd_prc": _to_float(deep_inner.ytd),
|
||||||
|
"value_yoy_prc": _to_float(deep_inner.yoy),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(deep_inner, TradingEconomicsLastPrev):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"value_last": _to_float(deep_inner.last),
|
||||||
|
"value_previous": _to_float(deep_inner.previous),
|
||||||
|
"unit": (
|
||||||
|
str(deep_inner.unit) if deep_inner.unit is not None else None
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(deep_inner, TradingEconomicsStringPercent):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"value_prc": _to_float(deep_inner.root),
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(deep_inner, TradingEconomicsStringTime):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(deep_inner, TradingEconomicsEmptyString):
|
||||||
|
return {
|
||||||
|
"dt": dt,
|
||||||
|
"is_empty_str_flg": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _process_source(
|
||||||
|
repo: QuotesRepository,
|
||||||
|
source_name: str,
|
||||||
|
source_data: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Обрабатывает данные одного источника.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo: Репозиторий для работы с котировками
|
||||||
|
source_name: Имя источника (cbr, investing, sgx, etc)
|
||||||
|
source_data: Данные по инструментам от источника
|
||||||
|
"""
|
||||||
|
logger = APP_CTX.logger
|
||||||
|
|
||||||
|
section = await repo.get_section_by_name(source_name)
|
||||||
|
if section is None:
|
||||||
|
logger.warning(f"Section '{source_name}' not found. Skipping source.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for instrument_name, instrument_data in source_data.items():
|
||||||
|
now = datetime.now(tz=APP_CTX.pytz_timezone).replace(tzinfo=None)
|
||||||
|
quote = await repo.upsert_quote(
|
||||||
|
section=section,
|
||||||
|
name=instrument_name,
|
||||||
|
last_update_dttm=now,
|
||||||
|
)
|
||||||
|
|
||||||
|
rows: list[dict[str, Any]] = []
|
||||||
|
for ts, tp in instrument_data.items():
|
||||||
|
dt = _parse_ts_to_datetime(str(ts))
|
||||||
|
if not dt:
|
||||||
|
continue
|
||||||
|
row = _build_value_row(source_name, dt, tp)
|
||||||
|
if row is None:
|
||||||
|
continue
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
if rows:
|
||||||
|
await repo.bulk_upsert_quote_values(quote, rows)
|
||||||
|
|
||||||
|
|
||||||
|
@register("load.tenera")
|
||||||
|
async def load_tenera(args: dict) -> AsyncIterator[None]:
|
||||||
|
"""
|
||||||
|
Загружает данные котировок из SuperTenera и сохраняет их в БД.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args: Аргументы задачи (не используются)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
None после каждого обработанного источника для heartbeat
|
||||||
|
"""
|
||||||
|
logger = APP_CTX.logger
|
||||||
|
logger.info("Starting SuperTenera ETL pipeline")
|
||||||
|
|
||||||
|
async with get_async_tenera_interface() as tenera:
|
||||||
|
data: MainData = await tenera.get_quotes_data()
|
||||||
|
|
||||||
|
logger.info("Fetched data from SuperTenera")
|
||||||
|
yield
|
||||||
|
|
||||||
|
async with APP_CTX.sessionmaker() as session:
|
||||||
|
repo = QuotesRepository(session)
|
||||||
|
|
||||||
|
for source_name in (
|
||||||
|
"cbr",
|
||||||
|
"investing",
|
||||||
|
"sgx",
|
||||||
|
"tradingeconomics",
|
||||||
|
"bloomberg",
|
||||||
|
"trading_view",
|
||||||
|
):
|
||||||
|
source_data = getattr(data, source_name)
|
||||||
|
if not source_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
await _process_source(repo, source_name, source_data)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
logger.info(f"Processed source: {source_name}")
|
||||||
|
yield
|
||||||
|
|
||||||
|
logger.info("SuperTenera ETL pipeline completed")
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import AsyncIterator
|
||||||
|
|
||||||
|
from .registry import register
|
||||||
|
|
||||||
|
|
||||||
|
@register("noop")
|
||||||
|
async def noop(args: dict) -> AsyncIterator[None]:
|
||||||
|
"""
|
||||||
|
Эталонный пайплайн без побочных эффектов, имитирует 3 шага.
|
||||||
|
"""
|
||||||
|
await asyncio.sleep(float(args.get("sleep1", 2)))
|
||||||
|
yield
|
||||||
|
await asyncio.sleep(float(args.get("sleep2", 2)))
|
||||||
|
yield
|
||||||
|
await asyncio.sleep(float(args.get("sleep3", 2)))
|
||||||
|
yield
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Callable, Dict, Iterable
|
||||||
|
|
||||||
|
_Registry: Dict[str, Callable[[dict[str, Any]], Any]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register(
|
||||||
|
task: str,
|
||||||
|
) -> Callable[[Callable[[dict[str, Any]], Any]], Callable[[dict[str, Any]], Any]]:
|
||||||
|
"""
|
||||||
|
Регистрирует обработчик пайплайна под именем задачи.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _wrap(fn: Callable[[dict[str, Any]], Any]) -> Callable[[dict[str, Any]], Any]:
|
||||||
|
_Registry[task] = fn
|
||||||
|
return fn
|
||||||
|
|
||||||
|
return _wrap
|
||||||
|
|
||||||
|
|
||||||
|
def resolve(task: str) -> Callable[[dict[str, Any]], Any]:
|
||||||
|
"""
|
||||||
|
Возвращает обработчик пайплайна по имени задачи.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return _Registry[task]
|
||||||
|
except KeyError as err:
|
||||||
|
raise KeyError(f"pipeline not found: {task}") from err
|
||||||
|
|
||||||
|
|
||||||
|
def tasks() -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
Возвращает список зарегистрированных задач.
|
||||||
|
"""
|
||||||
|
return _Registry.keys()
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.storage.repositories import QueueRepository
|
||||||
|
|
||||||
|
|
||||||
|
async def requeue_lost(session: AsyncSession) -> Sequence[str]:
|
||||||
|
"""
|
||||||
|
Возвращает протухшие running-задачи в очередь и отдаёт их job_id.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(session)
|
||||||
|
return await repo.requeue_lost()
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from httpx import ASGITransport, AsyncClient
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from dataloader.api import app_main
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
|
from dataloader.context import get_session
|
||||||
|
from dataloader.storage.engine import create_engine
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
if sys.platform == "win32":
|
||||||
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.asyncio
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture(scope="function")
|
||||||
|
async def db_engine() -> AsyncGenerator[AsyncEngine, None]:
|
||||||
|
"""
|
||||||
|
Создаёт тестовый движок для теста.
|
||||||
|
Использует реальную БД из конфига.
|
||||||
|
"""
|
||||||
|
engine = create_engine(APP_CONFIG.pg.url)
|
||||||
|
|
||||||
|
yield engine
|
||||||
|
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture(scope="function")
|
||||||
|
async def db_session(db_engine: AsyncEngine) -> AsyncGenerator[AsyncSession, None]:
|
||||||
|
"""
|
||||||
|
Предоставляет сессию БД для каждого теста.
|
||||||
|
НЕ использует транзакцию, чтобы работали advisory locks.
|
||||||
|
"""
|
||||||
|
sessionmaker = async_sessionmaker(
|
||||||
|
bind=db_engine, expire_on_commit=False, class_=AsyncSession
|
||||||
|
)
|
||||||
|
async with sessionmaker() as session:
|
||||||
|
yield session
|
||||||
|
await session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture(scope="function")
|
||||||
|
async def clean_queue_tables(db_session: AsyncSession) -> None:
|
||||||
|
"""
|
||||||
|
Очищает таблицы очереди перед каждым тестом.
|
||||||
|
"""
|
||||||
|
schema = APP_CONFIG.pg.schema_queue
|
||||||
|
await db_session.execute(text(f"TRUNCATE TABLE {schema}.dl_job_events CASCADE"))
|
||||||
|
await db_session.execute(text(f"TRUNCATE TABLE {schema}.dl_jobs CASCADE"))
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]:
|
||||||
|
"""
|
||||||
|
HTTP клиент для тестирования API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def override_get_session() -> AsyncGenerator[AsyncSession, None]:
|
||||||
|
yield db_session
|
||||||
|
|
||||||
|
app_main.dependency_overrides[get_session] = override_get_session
|
||||||
|
|
||||||
|
transport = ASGITransport(app=app_main)
|
||||||
|
async with AsyncClient(transport=transport, base_url="http://test") as c:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
app_main.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def job_id() -> str:
|
||||||
|
"""
|
||||||
|
Генерирует уникальный job_id для тестов.
|
||||||
|
"""
|
||||||
|
return str(uuid4())
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def queue_name() -> str:
|
||||||
|
"""
|
||||||
|
Возвращает имя тестовой очереди.
|
||||||
|
"""
|
||||||
|
return "test.queue"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def task_name() -> str:
|
||||||
|
"""
|
||||||
|
Возвращает имя тестовой задачи.
|
||||||
|
"""
|
||||||
|
return "test.task"
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
|
|
@ -0,0 +1,171 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from httpx import AsyncClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
class TestJobsAPI:
|
||||||
|
"""
|
||||||
|
Интеграционные тесты для API endpoints.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def test_trigger_job_creates_new_job(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест создания новой задачи через API.
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"queue": queue_name,
|
||||||
|
"task": task_name,
|
||||||
|
"args": {"test_key": "test_value"},
|
||||||
|
"lock_key": "lock_api_1",
|
||||||
|
"partition_key": "part1",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 5,
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await client.post("/api/v1/jobs/trigger", json=payload)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
data = response.json()
|
||||||
|
assert "job_id" in data
|
||||||
|
assert data["status"] == "queued"
|
||||||
|
|
||||||
|
async def test_trigger_job_with_idempotency_key(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест идемпотентности через idempotency_key.
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"queue": queue_name,
|
||||||
|
"task": task_name,
|
||||||
|
"args": {},
|
||||||
|
"idempotency_key": "unique_key_123",
|
||||||
|
"lock_key": "lock_idem",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 5,
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
response1 = await client.post("/api/v1/jobs/trigger", json=payload)
|
||||||
|
response2 = await client.post("/api/v1/jobs/trigger", json=payload)
|
||||||
|
|
||||||
|
assert response1.status_code == 200
|
||||||
|
assert response2.status_code == 200
|
||||||
|
|
||||||
|
data1 = response1.json()
|
||||||
|
data2 = response2.json()
|
||||||
|
|
||||||
|
assert data1["job_id"] == data2["job_id"]
|
||||||
|
assert data1["status"] == data2["status"] == "queued"
|
||||||
|
|
||||||
|
async def test_get_status_returns_job_status(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест получения статуса задачи через API.
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"queue": queue_name,
|
||||||
|
"task": task_name,
|
||||||
|
"args": {},
|
||||||
|
"lock_key": "lock_status",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 5,
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
create_response = await client.post("/api/v1/jobs/trigger", json=payload)
|
||||||
|
job_id = create_response.json()["job_id"]
|
||||||
|
|
||||||
|
status_response = await client.get(f"/api/v1/jobs/{job_id}/status")
|
||||||
|
|
||||||
|
assert status_response.status_code == 200
|
||||||
|
data = status_response.json()
|
||||||
|
assert data["job_id"] == job_id
|
||||||
|
assert data["status"] == "queued"
|
||||||
|
assert data["attempt"] == 0
|
||||||
|
|
||||||
|
async def test_get_status_returns_404_for_nonexistent_job(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест получения статуса несуществующей задачи.
|
||||||
|
"""
|
||||||
|
fake_job_id = "00000000-0000-0000-0000-000000000000"
|
||||||
|
|
||||||
|
response = await client.get(f"/api/v1/jobs/{fake_job_id}/status")
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
async def test_cancel_job_sets_cancel_flag(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест отмены задачи через API.
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"queue": queue_name,
|
||||||
|
"task": task_name,
|
||||||
|
"args": {},
|
||||||
|
"lock_key": "lock_cancel",
|
||||||
|
"priority": 100,
|
||||||
|
"max_attempts": 5,
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
create_response = await client.post("/api/v1/jobs/trigger", json=payload)
|
||||||
|
job_id = create_response.json()["job_id"]
|
||||||
|
|
||||||
|
cancel_response = await client.post(f"/api/v1/jobs/{job_id}/cancel")
|
||||||
|
|
||||||
|
assert cancel_response.status_code == 200
|
||||||
|
data = cancel_response.json()
|
||||||
|
assert data["job_id"] == job_id
|
||||||
|
|
||||||
|
async def test_cancel_nonexistent_job_returns_404(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест отмены несуществующей задачи.
|
||||||
|
"""
|
||||||
|
fake_job_id = "00000000-0000-0000-0000-000000000000"
|
||||||
|
|
||||||
|
response = await client.post(f"/api/v1/jobs/{fake_job_id}/cancel")
|
||||||
|
|
||||||
|
assert response.status_code == 404
|
||||||
|
|
||||||
|
async def test_health_endpoint_returns_200(
|
||||||
|
self,
|
||||||
|
client: AsyncClient,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест health check endpoint.
|
||||||
|
"""
|
||||||
|
response = await client.get("/health")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.api.v1.exceptions import JobNotFoundError
|
||||||
|
from dataloader.api.v1.router import cancel_job, get_status
|
||||||
|
from dataloader.api.v1.schemas import JobStatusResponse
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSvc:
|
||||||
|
async def status(self, job_id: UUID) -> JobStatusResponse | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def cancel(self, job_id: UUID) -> JobStatusResponse | None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_get_status_raises_job_not_found():
|
||||||
|
svc = _FakeSvc()
|
||||||
|
with pytest.raises(JobNotFoundError):
|
||||||
|
await get_status(uuid4(), svc=svc)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_cancel_raises_job_not_found():
|
||||||
|
svc = _FakeSvc()
|
||||||
|
with pytest.raises(JobNotFoundError):
|
||||||
|
await cancel_job(uuid4(), svc=svc)
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.api.v1.router import cancel_job, get_status
|
||||||
|
from dataloader.api.v1.schemas import JobStatusResponse
|
||||||
|
|
||||||
|
|
||||||
|
class _SvcOK:
|
||||||
|
async def status(self, job_id: UUID) -> JobStatusResponse | None:
|
||||||
|
return JobStatusResponse(
|
||||||
|
job_id=job_id,
|
||||||
|
status="queued",
|
||||||
|
attempt=0,
|
||||||
|
started_at=None,
|
||||||
|
finished_at=None,
|
||||||
|
heartbeat_at=None,
|
||||||
|
error=None,
|
||||||
|
progress={},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def cancel(self, job_id: UUID) -> JobStatusResponse | None:
|
||||||
|
return JobStatusResponse(
|
||||||
|
job_id=job_id,
|
||||||
|
status="canceled",
|
||||||
|
attempt=1,
|
||||||
|
started_at=datetime.now(timezone.utc),
|
||||||
|
finished_at=datetime.now(timezone.utc),
|
||||||
|
heartbeat_at=None,
|
||||||
|
error="by test",
|
||||||
|
progress={},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_get_status_returns_response():
|
||||||
|
svc = _SvcOK()
|
||||||
|
jid = uuid4()
|
||||||
|
res = await get_status(jid, svc=svc)
|
||||||
|
assert isinstance(res, JobStatusResponse)
|
||||||
|
assert res.job_id == jid
|
||||||
|
assert res.status == "queued"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_router_cancel_returns_response():
|
||||||
|
svc = _SvcOK()
|
||||||
|
jid = uuid4()
|
||||||
|
res = await cancel_job(jid, svc=svc)
|
||||||
|
assert isinstance(res, JobStatusResponse)
|
||||||
|
assert res.job_id == jid
|
||||||
|
assert res.status == "canceled"
|
||||||
|
|
@ -0,0 +1,123 @@
|
||||||
|
"""
|
||||||
|
Интеграционные тесты для пайплайна load_opu.
|
||||||
|
|
||||||
|
ВНИМАНИЕ: Эти тесты требуют работающего Gmap2Brief API и настоящего соединения с БД.
|
||||||
|
По умолчанию они исключены из запуска через pytest.mark.skip.
|
||||||
|
Для запуска используйте: pytest tests/integration_tests/test_pipeline_load_opu_integration.py --no-skip
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.interfaces.gmap2_brief.interface import get_gmap2brief_interface
|
||||||
|
from dataloader.storage.repositories.opu import OpuRepository
|
||||||
|
from dataloader.workers.pipelines.load_opu import load_opu
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.skip(reason="Requires working Gmap2Brief API - run manually when service is available")
|
||||||
|
class TestLoadOpuIntegration:
|
||||||
|
"""Интеграционные тесты для пайплайна load_opu."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_opu_pipeline_with_real_api(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест полного пайплайна OPU с реальным API.
|
||||||
|
|
||||||
|
Требования:
|
||||||
|
- Gmap2Brief API должен быть доступен
|
||||||
|
- База данных должна быть настроена
|
||||||
|
- Схема OPU должна существовать
|
||||||
|
"""
|
||||||
|
interface = get_gmap2brief_interface()
|
||||||
|
|
||||||
|
try:
|
||||||
|
job_id = await interface.start_export()
|
||||||
|
assert job_id is not None
|
||||||
|
assert isinstance(job_id, str)
|
||||||
|
|
||||||
|
status = await interface.wait_for_completion(job_id, max_wait=300)
|
||||||
|
assert status.status == "completed"
|
||||||
|
assert status.total_rows > 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.skip(f"Gmap2Brief API not available: {e}")
|
||||||
|
|
||||||
|
steps = 0
|
||||||
|
async for _ in load_opu({}):
|
||||||
|
steps += 1
|
||||||
|
|
||||||
|
assert steps > 0
|
||||||
|
|
||||||
|
async with APP_CTX.sessionmaker() as session:
|
||||||
|
repo = OpuRepository(session)
|
||||||
|
|
||||||
|
result = await session.execute(
|
||||||
|
"SELECT COUNT(*) FROM opu.brief_digital_certificate_opu"
|
||||||
|
)
|
||||||
|
count = result.scalar()
|
||||||
|
|
||||||
|
assert count > 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_opu_repository_truncate(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест операции TRUNCATE репозитория OPU.
|
||||||
|
|
||||||
|
Требование: схема OPU должна существовать в БД.
|
||||||
|
"""
|
||||||
|
repo = OpuRepository(db_session)
|
||||||
|
|
||||||
|
await repo.truncate()
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
|
result = await db_session.execute(
|
||||||
|
"SELECT COUNT(*) FROM opu.brief_digital_certificate_opu"
|
||||||
|
)
|
||||||
|
count = result.scalar()
|
||||||
|
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_opu_repository_bulk_insert(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест массовой вставки данных в репозиторий OPU.
|
||||||
|
|
||||||
|
Требование: схема OPU должна существовать в БД.
|
||||||
|
"""
|
||||||
|
repo = OpuRepository(db_session)
|
||||||
|
|
||||||
|
await repo.truncate()
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
|
test_records = [
|
||||||
|
{
|
||||||
|
"object_id": f"test_{i}",
|
||||||
|
"desk_nm": "TEST_DESK",
|
||||||
|
"actdate": "2025-01-15",
|
||||||
|
"layer_cd": "LAYER1",
|
||||||
|
"opu_cd": "OPU1",
|
||||||
|
"opu_lvl": 1,
|
||||||
|
"opu_prnt_cd": "PARENT",
|
||||||
|
"object_unit": "UNIT1",
|
||||||
|
"opu_nm": f"Test OPU {i}",
|
||||||
|
}
|
||||||
|
for i in range(10)
|
||||||
|
]
|
||||||
|
|
||||||
|
inserted = await repo.bulk_insert(test_records)
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
|
assert inserted == 10
|
||||||
|
|
||||||
|
result = await db_session.execute(
|
||||||
|
"SELECT COUNT(*) FROM opu.brief_digital_certificate_opu WHERE desk_nm = 'TEST_DESK'"
|
||||||
|
)
|
||||||
|
count = result.scalar()
|
||||||
|
|
||||||
|
assert count == 10
|
||||||
|
|
||||||
|
await repo.truncate()
|
||||||
|
await db_session.commit()
|
||||||
|
|
@ -0,0 +1,189 @@
|
||||||
|
"""
|
||||||
|
Интеграционные тесты для пайплайна load_tenera.
|
||||||
|
|
||||||
|
ВНИМАНИЕ: Эти тесты требуют работающего SuperTenera API и настоящего соединения с БД.
|
||||||
|
По умолчанию они исключены из запуска через pytest.mark.skip.
|
||||||
|
Для запуска используйте: pytest tests/integration_tests/test_pipeline_load_tenera_integration.py --no-skip
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.context import APP_CTX
|
||||||
|
from dataloader.interfaces.tenera.interface import get_async_tenera_interface
|
||||||
|
from dataloader.storage.repositories.quotes import QuotesRepository
|
||||||
|
from dataloader.workers.pipelines.load_tenera import load_tenera
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.skip(reason="Requires working SuperTenera API - run manually when service is available")
|
||||||
|
class TestLoadTeneraIntegration:
|
||||||
|
"""Интеграционные тесты для пайплайна load_tenera."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_tenera_pipeline_with_real_api(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест полного пайплайна TENERA с реальным API.
|
||||||
|
|
||||||
|
Требования:
|
||||||
|
- SuperTenera API должен быть доступен
|
||||||
|
- База данных должна быть настроена
|
||||||
|
- Схема quotes должна существовать
|
||||||
|
- Таблицы quote_section, quote, quote_value должны существовать
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with get_async_tenera_interface() as tenera:
|
||||||
|
data = await tenera.get_quotes_data()
|
||||||
|
assert data is not None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.skip(f"SuperTenera API not available: {e}")
|
||||||
|
|
||||||
|
steps = 0
|
||||||
|
async for _ in load_tenera({}):
|
||||||
|
steps += 1
|
||||||
|
|
||||||
|
assert steps > 0
|
||||||
|
|
||||||
|
async with APP_CTX.sessionmaker() as session:
|
||||||
|
result = await session.execute("SELECT COUNT(*) FROM quotes.quote_value")
|
||||||
|
count = result.scalar()
|
||||||
|
|
||||||
|
assert count > 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_tenera_interface_get_quotes_data(self):
|
||||||
|
"""
|
||||||
|
Тест получения данных котировок из SuperTenera API.
|
||||||
|
|
||||||
|
Требование: SuperTenera API должен быть доступен.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with get_async_tenera_interface() as tenera:
|
||||||
|
data = await tenera.get_quotes_data()
|
||||||
|
|
||||||
|
assert data is not None
|
||||||
|
assert hasattr(data, "cbr")
|
||||||
|
assert hasattr(data, "investing")
|
||||||
|
assert hasattr(data, "sgx")
|
||||||
|
assert hasattr(data, "tradingeconomics")
|
||||||
|
assert hasattr(data, "bloomberg")
|
||||||
|
assert hasattr(data, "trading_view")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.skip(f"SuperTenera API not available: {e}")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quotes_repository_get_section_by_name(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест получения секции по имени.
|
||||||
|
|
||||||
|
Требование: схема quotes и таблица quote_section должны существовать в БД.
|
||||||
|
"""
|
||||||
|
repo = QuotesRepository(db_session)
|
||||||
|
|
||||||
|
section = await repo.get_section_by_name("cbr")
|
||||||
|
|
||||||
|
if section is not None:
|
||||||
|
assert section.section_nm == "cbr"
|
||||||
|
assert section.section_id is not None
|
||||||
|
else:
|
||||||
|
pytest.skip("Section 'cbr' not found in database - seed data required")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quotes_repository_upsert_quote(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест upsert котировки.
|
||||||
|
|
||||||
|
Требование: схема quotes должна существовать в БД.
|
||||||
|
"""
|
||||||
|
repo = QuotesRepository(db_session)
|
||||||
|
|
||||||
|
section = await repo.get_section_by_name("cbr")
|
||||||
|
if section is None:
|
||||||
|
pytest.skip("Section 'cbr' not found - cannot test upsert")
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
quote = await repo.upsert_quote(
|
||||||
|
section=section,
|
||||||
|
name="TEST_USD",
|
||||||
|
last_update_dttm=datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert quote is not None
|
||||||
|
assert quote.quote_nm == "TEST_USD"
|
||||||
|
assert quote.section_id == section.section_id
|
||||||
|
|
||||||
|
quote2 = await repo.upsert_quote(
|
||||||
|
section=section,
|
||||||
|
name="TEST_USD",
|
||||||
|
last_update_dttm=datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert quote2.quote_id == quote.quote_id
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quotes_repository_bulk_upsert_quote_values(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест массового upsert значений котировок.
|
||||||
|
|
||||||
|
Требование: схема quotes должна существовать в БД.
|
||||||
|
"""
|
||||||
|
repo = QuotesRepository(db_session)
|
||||||
|
|
||||||
|
section = await repo.get_section_by_name("cbr")
|
||||||
|
if section is None:
|
||||||
|
pytest.skip("Section 'cbr' not found - cannot test bulk upsert")
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
quote = await repo.upsert_quote(
|
||||||
|
section=section,
|
||||||
|
name="TEST_BULK_USD",
|
||||||
|
last_update_dttm=datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
|
test_rows = [
|
||||||
|
{
|
||||||
|
"dt": datetime(2025, 1, 15, i, 0, 0),
|
||||||
|
"value_base": 75.0 + i,
|
||||||
|
}
|
||||||
|
for i in range(5)
|
||||||
|
]
|
||||||
|
|
||||||
|
await repo.bulk_upsert_quote_values(quote, test_rows)
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
|
result = await db_session.execute(
|
||||||
|
"SELECT COUNT(*) FROM quotes.quote_value WHERE quote_id = :quote_id",
|
||||||
|
{"quote_id": quote.quote_id},
|
||||||
|
)
|
||||||
|
count = result.scalar()
|
||||||
|
|
||||||
|
assert count == 5
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_tenera_pipeline_processes_all_sources(self, db_session):
|
||||||
|
"""
|
||||||
|
Тест что пайплайн обрабатывает все источники.
|
||||||
|
|
||||||
|
Требования:
|
||||||
|
- SuperTenera API должен быть доступен
|
||||||
|
- Все секции должны существовать в БД
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with get_async_tenera_interface() as tenera:
|
||||||
|
data = await tenera.get_quotes_data()
|
||||||
|
|
||||||
|
sources_with_data = []
|
||||||
|
for source_name in ["cbr", "investing", "sgx", "tradingeconomics", "bloomberg", "trading_view"]:
|
||||||
|
source_data = getattr(data, source_name, None)
|
||||||
|
if source_data:
|
||||||
|
sources_with_data.append(source_name)
|
||||||
|
|
||||||
|
assert len(sources_with_data) > 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.skip(f"SuperTenera API not available: {e}")
|
||||||
|
|
@ -0,0 +1,689 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.storage.models import DLJob
|
||||||
|
from dataloader.storage.repositories import QueueRepository
|
||||||
|
from dataloader.storage.schemas import CreateJobRequest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
class TestQueueRepository:
|
||||||
|
"""
|
||||||
|
Интеграционные тесты для QueueRepository.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def test_create_or_get_creates_new_job(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест создания новой задачи в очереди.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={"param": "value"},
|
||||||
|
idempotency_key="test_key_1",
|
||||||
|
lock_key="test_lock_1",
|
||||||
|
partition_key="part1",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer="test_producer",
|
||||||
|
consumer_group="test_group",
|
||||||
|
)
|
||||||
|
|
||||||
|
created_id, status = await repo.create_or_get(req)
|
||||||
|
|
||||||
|
assert created_id == job_id
|
||||||
|
assert status == "queued"
|
||||||
|
|
||||||
|
async def test_create_or_get_returns_existing_job(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест идемпотентности - повторный вызов возвращает существующую задачу.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key="idempotent_key_1",
|
||||||
|
lock_key="lock1",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
created_id_1, status_1 = await repo.create_or_get(req)
|
||||||
|
|
||||||
|
req_2 = CreateJobRequest(
|
||||||
|
job_id="different_job_id",
|
||||||
|
queue="different_queue",
|
||||||
|
task="different_task",
|
||||||
|
args={},
|
||||||
|
idempotency_key="idempotent_key_1",
|
||||||
|
lock_key="lock2",
|
||||||
|
partition_key="",
|
||||||
|
priority=200,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=3,
|
||||||
|
lease_ttl_sec=30,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
created_id_2, status_2 = await repo.create_or_get(req_2)
|
||||||
|
|
||||||
|
assert created_id_1 == created_id_2 == job_id
|
||||||
|
assert status_1 == status_2 == "queued"
|
||||||
|
|
||||||
|
async def test_get_status_returns_job_status(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест получения статуса задачи.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={"key": "val"},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
|
||||||
|
assert status is not None
|
||||||
|
assert status.job_id == job_id
|
||||||
|
assert status.status == "queued"
|
||||||
|
assert status.attempt == 0
|
||||||
|
assert status.error is None
|
||||||
|
|
||||||
|
async def test_get_status_returns_none_for_nonexistent_job(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест получения статуса несуществующей задачи.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
status = await repo.get_status("00000000-0000-0000-0000-000000000000")
|
||||||
|
|
||||||
|
assert status is None
|
||||||
|
|
||||||
|
async def test_cancel_sets_cancel_requested_flag(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест установки флага отмены.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
|
||||||
|
result = await repo.cancel(job_id)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
|
||||||
|
async def test_claim_one_returns_job_for_processing(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест захвата задачи для обработки.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={"data": "test"},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_claim",
|
||||||
|
partition_key="partition1",
|
||||||
|
priority=50,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=120,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
|
||||||
|
claimed = await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
assert claimed is not None
|
||||||
|
assert claimed["job_id"] == job_id
|
||||||
|
assert claimed["queue"] == queue_name
|
||||||
|
assert claimed["task"] == task_name
|
||||||
|
assert claimed["args"] == {"data": "test"}
|
||||||
|
assert claimed["lock_key"] == "lock_claim"
|
||||||
|
assert claimed["attempt"] == 1
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
assert status.status == "running"
|
||||||
|
assert status.attempt == 1
|
||||||
|
|
||||||
|
async def test_claim_one_returns_none_when_no_jobs(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
queue_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест захвата при пустой очереди.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
claimed = await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
assert claimed is None
|
||||||
|
|
||||||
|
async def test_heartbeat_updates_lease(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест обновления heartbeat и продления lease.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_hb",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
success, cancel_requested = await repo.heartbeat(job_id, ttl_sec=90)
|
||||||
|
|
||||||
|
assert success is True
|
||||||
|
assert cancel_requested is False
|
||||||
|
|
||||||
|
async def test_finish_ok_marks_job_succeeded(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест успешного завершения задачи.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_finish",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
await repo.finish_ok(job_id)
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
assert status.status == "succeeded"
|
||||||
|
assert status.finished_at is not None
|
||||||
|
|
||||||
|
async def test_finish_fail_or_retry_requeues_on_retry(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест повторной постановки при ошибке с возможностью retry.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_retry",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=3,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
await repo.finish_fail_or_retry(job_id, err="Test error")
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
assert status.status == "queued"
|
||||||
|
assert status.error == "Test error"
|
||||||
|
assert status.attempt == 1
|
||||||
|
|
||||||
|
async def test_finish_fail_or_retry_marks_failed_when_max_attempts_reached(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест финальной ошибки при достижении max_attempts.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_fail",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=1,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
await repo.finish_fail_or_retry(job_id, err="Final error")
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
assert status.status == "failed"
|
||||||
|
assert status.error == "Final error"
|
||||||
|
assert status.finished_at is not None
|
||||||
|
|
||||||
|
async def test_requeue_lost_returns_expired_jobs(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тест reaper - возврат протухших задач в очередь.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock_lost",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=1,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
requeued = await repo.requeue_lost()
|
||||||
|
|
||||||
|
assert job_id in requeued
|
||||||
|
|
||||||
|
status = await repo.get_status(job_id)
|
||||||
|
assert status is not None
|
||||||
|
assert status.status == "queued"
|
||||||
|
|
||||||
|
async def test_claim_one_fails_on_advisory_lock_and_sets_backoff(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Проверка ветки отказа advisory-lock: задача возвращается в queued с отложенным available_at.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={"k": "v"},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock-fail-adv",
|
||||||
|
partition_key="",
|
||||||
|
priority=10,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=30,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
|
||||||
|
async def _false_lock(_: str) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
repo._try_advisory_lock = _false_lock # type: ignore[method-assign]
|
||||||
|
|
||||||
|
before = datetime.now(timezone.utc)
|
||||||
|
claimed = await repo.claim_one(queue_name, claim_backoff_sec=15)
|
||||||
|
after = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
assert claimed is None
|
||||||
|
|
||||||
|
st = await repo.get_status(job_id)
|
||||||
|
assert st is not None
|
||||||
|
assert st.status == "queued"
|
||||||
|
|
||||||
|
row = (
|
||||||
|
await db_session.execute(select(DLJob).where(DLJob.job_id == job_id))
|
||||||
|
).scalar_one()
|
||||||
|
assert row.available_at >= before + timedelta(seconds=15)
|
||||||
|
assert row.available_at <= after + timedelta(seconds=60)
|
||||||
|
|
||||||
|
async def test_heartbeat_when_not_running_returns_false(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Heartbeat для нерunning задачи возвращает (False, False).
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock-hb-not-running",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
|
||||||
|
ok, cancel = await repo.heartbeat(job_id, ttl_sec=30)
|
||||||
|
assert ok is False
|
||||||
|
assert cancel is False
|
||||||
|
|
||||||
|
async def test_finish_fail_or_retry_marks_canceled_branch(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Ветка is_canceled=True помечает задачу как canceled и завершает её.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock-cancel",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=5)
|
||||||
|
|
||||||
|
await repo.finish_fail_or_retry(
|
||||||
|
job_id, err="Canceled by test", is_canceled=True
|
||||||
|
)
|
||||||
|
|
||||||
|
st = await repo.get_status(job_id)
|
||||||
|
assert st is not None
|
||||||
|
assert st.status == "canceled"
|
||||||
|
assert st.error == "Canceled by test"
|
||||||
|
assert st.finished_at is not None
|
||||||
|
|
||||||
|
async def test_requeue_lost_no_expired_returns_empty(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
requeue_lost без протухших задач возвращает пустой список.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
req = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock-none-expired",
|
||||||
|
partition_key="",
|
||||||
|
priority=100,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=120,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
await repo.create_or_get(req)
|
||||||
|
await repo.claim_one(queue_name, claim_backoff_sec=5)
|
||||||
|
|
||||||
|
res = await repo.requeue_lost(now=datetime.now(timezone.utc))
|
||||||
|
assert res == []
|
||||||
|
|
||||||
|
st = await repo.get_status(job_id)
|
||||||
|
assert st is not None
|
||||||
|
assert st.status == "running"
|
||||||
|
|
||||||
|
async def test_private_helpers_resolve_queue_and_advisory_unlock_are_executable(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
job_id: str,
|
||||||
|
queue_name: str,
|
||||||
|
task_name: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Прямые прогоны приватных методов для покрытия редких веток.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
|
||||||
|
rq = CreateJobRequest(
|
||||||
|
job_id=job_id,
|
||||||
|
queue=queue_name,
|
||||||
|
task=task_name,
|
||||||
|
args={},
|
||||||
|
idempotency_key=None,
|
||||||
|
lock_key="lock-direct-unlock",
|
||||||
|
partition_key="",
|
||||||
|
priority=1,
|
||||||
|
available_at=datetime.now(timezone.utc),
|
||||||
|
max_attempts=1,
|
||||||
|
lease_ttl_sec=5,
|
||||||
|
producer=None,
|
||||||
|
consumer_group=None,
|
||||||
|
)
|
||||||
|
await repo.create_or_get(rq)
|
||||||
|
|
||||||
|
missing_uuid = str(uuid4())
|
||||||
|
qname = await repo._resolve_queue(missing_uuid) # type: ignore[attr-defined]
|
||||||
|
assert qname == ""
|
||||||
|
|
||||||
|
await repo._advisory_unlock("lock-direct-unlock") # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
async def test_cancel_returns_false_for_nonexistent_job(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Возвращает False при отмене несуществующей задачи.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
assert await repo.cancel(str(uuid4())) is False
|
||||||
|
|
||||||
|
async def test_finish_ok_silent_when_job_absent(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тихо завершается, если задача не найдена.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
await repo.finish_ok(str(uuid4()))
|
||||||
|
|
||||||
|
async def test_finish_fail_or_retry_noop_when_job_absent(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
clean_queue_tables,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Тихо выходит при отсутствии задачи.
|
||||||
|
"""
|
||||||
|
repo = QueueRepository(db_session)
|
||||||
|
await repo.finish_fail_or_retry(str(uuid4()), err="no-op")
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
|
|
@ -0,0 +1,366 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from unittest.mock import AsyncMock, Mock, patch
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.api.v1.schemas import TriggerJobRequest
|
||||||
|
from dataloader.api.v1.service import JobsService
|
||||||
|
from dataloader.storage.schemas import JobStatus
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestJobsService:
|
||||||
|
"""
|
||||||
|
Unit тесты для JobsService.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_init_creates_service_with_session(self):
|
||||||
|
"""
|
||||||
|
Тест создания сервиса с сессией.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with patch("dataloader.api.v1.service.get_logger") as mock_get_logger:
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
assert service._s == mock_session
|
||||||
|
assert service._repo is not None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_trigger_creates_new_job(self):
|
||||||
|
"""
|
||||||
|
Тест создания новой задачи через trigger.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
patch("dataloader.api.v1.service.new_job_id") as mock_new_job_id,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
mock_new_job_id.return_value = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.create_or_get = AsyncMock(
|
||||||
|
return_value=("12345678-1234-5678-1234-567812345678", "queued")
|
||||||
|
)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
req = TriggerJobRequest(
|
||||||
|
queue="test_queue",
|
||||||
|
task="test.task",
|
||||||
|
args={"key": "value"},
|
||||||
|
lock_key="lock_1",
|
||||||
|
priority=100,
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await service.trigger(req)
|
||||||
|
|
||||||
|
assert response.job_id == UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
assert response.status == "queued"
|
||||||
|
mock_repo.create_or_get.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_trigger_with_idempotency_key(self):
|
||||||
|
"""
|
||||||
|
Тест создания задачи с idempotency_key.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
patch("dataloader.api.v1.service.new_job_id") as mock_new_job_id,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
mock_new_job_id.return_value = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.create_or_get = AsyncMock(
|
||||||
|
return_value=("12345678-1234-5678-1234-567812345678", "queued")
|
||||||
|
)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
req = TriggerJobRequest(
|
||||||
|
queue="test_queue",
|
||||||
|
task="test.task",
|
||||||
|
args={},
|
||||||
|
idempotency_key="unique_key_123",
|
||||||
|
lock_key="lock_1",
|
||||||
|
priority=100,
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await service.trigger(req)
|
||||||
|
|
||||||
|
assert response.status == "queued"
|
||||||
|
|
||||||
|
call_args = mock_repo.create_or_get.call_args[0][0]
|
||||||
|
assert call_args.idempotency_key == "unique_key_123"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_trigger_with_available_at(self):
|
||||||
|
"""
|
||||||
|
Тест создания задачи с отложенным запуском.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
patch("dataloader.api.v1.service.new_job_id") as mock_new_job_id,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
mock_new_job_id.return_value = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.create_or_get = AsyncMock(
|
||||||
|
return_value=("12345678-1234-5678-1234-567812345678", "queued")
|
||||||
|
)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
future_time = datetime(2025, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
req = TriggerJobRequest(
|
||||||
|
queue="test_queue",
|
||||||
|
task="test.task",
|
||||||
|
args={},
|
||||||
|
lock_key="lock_1",
|
||||||
|
available_at=future_time,
|
||||||
|
priority=100,
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
await service.trigger(req)
|
||||||
|
|
||||||
|
call_args = mock_repo.create_or_get.call_args[0][0]
|
||||||
|
assert call_args.available_at == future_time
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_trigger_with_optional_fields(self):
|
||||||
|
"""
|
||||||
|
Тест создания задачи с опциональными полями.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
patch("dataloader.api.v1.service.new_job_id") as mock_new_job_id,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
mock_new_job_id.return_value = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.create_or_get = AsyncMock(
|
||||||
|
return_value=("12345678-1234-5678-1234-567812345678", "queued")
|
||||||
|
)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
req = TriggerJobRequest(
|
||||||
|
queue="test_queue",
|
||||||
|
task="test.task",
|
||||||
|
args={},
|
||||||
|
lock_key="lock_1",
|
||||||
|
partition_key="partition_1",
|
||||||
|
producer="test_producer",
|
||||||
|
consumer_group="test_group",
|
||||||
|
priority=100,
|
||||||
|
max_attempts=5,
|
||||||
|
lease_ttl_sec=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
await service.trigger(req)
|
||||||
|
|
||||||
|
call_args = mock_repo.create_or_get.call_args[0][0]
|
||||||
|
assert call_args.partition_key == "partition_1"
|
||||||
|
assert call_args.producer == "test_producer"
|
||||||
|
assert call_args.consumer_group == "test_group"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_status_returns_job_status(self):
|
||||||
|
"""
|
||||||
|
Тест получения статуса существующей задачи.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_status = JobStatus(
|
||||||
|
job_id="12345678-1234-5678-1234-567812345678",
|
||||||
|
status="running",
|
||||||
|
attempt=1,
|
||||||
|
started_at=datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
|
||||||
|
finished_at=None,
|
||||||
|
heartbeat_at=datetime(2025, 1, 1, 12, 5, 0, tzinfo=timezone.utc),
|
||||||
|
error=None,
|
||||||
|
progress={"step": 1},
|
||||||
|
)
|
||||||
|
mock_repo.get_status = AsyncMock(return_value=mock_status)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
job_id = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
response = await service.status(job_id)
|
||||||
|
|
||||||
|
assert response is not None
|
||||||
|
assert response.job_id == job_id
|
||||||
|
assert response.status == "running"
|
||||||
|
assert response.attempt == 1
|
||||||
|
assert response.progress == {"step": 1}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_status_returns_none_for_nonexistent_job(self):
|
||||||
|
"""
|
||||||
|
Тест получения статуса несуществующей задачи.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.get_status = AsyncMock(return_value=None)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
job_id = UUID("00000000-0000-0000-0000-000000000000")
|
||||||
|
response = await service.status(job_id)
|
||||||
|
|
||||||
|
assert response is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cancel_cancels_job_and_returns_status(self):
|
||||||
|
"""
|
||||||
|
Тест отмены задачи и получения её статуса.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.cancel = AsyncMock()
|
||||||
|
mock_status = JobStatus(
|
||||||
|
job_id="12345678-1234-5678-1234-567812345678",
|
||||||
|
status="running",
|
||||||
|
attempt=1,
|
||||||
|
started_at=datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc),
|
||||||
|
finished_at=None,
|
||||||
|
heartbeat_at=datetime(2025, 1, 1, 12, 5, 0, tzinfo=timezone.utc),
|
||||||
|
error=None,
|
||||||
|
progress={},
|
||||||
|
)
|
||||||
|
mock_repo.get_status = AsyncMock(return_value=mock_status)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
job_id = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
response = await service.cancel(job_id)
|
||||||
|
|
||||||
|
assert response is not None
|
||||||
|
assert response.job_id == job_id
|
||||||
|
assert response.status == "running"
|
||||||
|
mock_repo.cancel.assert_called_once_with(str(job_id))
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cancel_returns_none_for_nonexistent_job(self):
|
||||||
|
"""
|
||||||
|
Тест отмены несуществующей задачи.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.cancel = AsyncMock()
|
||||||
|
mock_repo.get_status = AsyncMock(return_value=None)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
job_id = UUID("00000000-0000-0000-0000-000000000000")
|
||||||
|
response = await service.cancel(job_id)
|
||||||
|
|
||||||
|
assert response is None
|
||||||
|
mock_repo.cancel.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_status_handles_empty_progress(self):
|
||||||
|
"""
|
||||||
|
Тест обработки None progress.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.api.v1.service.get_logger") as mock_get_logger,
|
||||||
|
patch("dataloader.api.v1.service.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_get_logger.return_value = Mock()
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_status = JobStatus(
|
||||||
|
job_id="12345678-1234-5678-1234-567812345678",
|
||||||
|
status="queued",
|
||||||
|
attempt=0,
|
||||||
|
started_at=None,
|
||||||
|
finished_at=None,
|
||||||
|
heartbeat_at=None,
|
||||||
|
error=None,
|
||||||
|
progress=None,
|
||||||
|
)
|
||||||
|
mock_repo.get_status = AsyncMock(return_value=mock_status)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
service = JobsService(mock_session)
|
||||||
|
|
||||||
|
job_id = UUID("12345678-1234-5678-1234-567812345678")
|
||||||
|
response = await service.status(job_id)
|
||||||
|
|
||||||
|
assert response is not None
|
||||||
|
assert response.progress == {}
|
||||||
|
|
@ -0,0 +1,414 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from logging import DEBUG, INFO
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.config import (
|
||||||
|
AppSettings,
|
||||||
|
BaseAppSettings,
|
||||||
|
LogSettings,
|
||||||
|
PGSettings,
|
||||||
|
Secrets,
|
||||||
|
WorkerSettings,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestBaseAppSettings:
|
||||||
|
"""
|
||||||
|
Unit тесты для BaseAppSettings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест дефолтных значений.
|
||||||
|
"""
|
||||||
|
settings = BaseAppSettings()
|
||||||
|
|
||||||
|
assert settings.local is False
|
||||||
|
assert settings.debug is False
|
||||||
|
|
||||||
|
def test_protocol_returns_http_when_not_local(self):
|
||||||
|
"""
|
||||||
|
Тест, что protocol возвращает http для не-локального режима.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"LOCAL": "false"}):
|
||||||
|
settings = BaseAppSettings()
|
||||||
|
|
||||||
|
assert settings.protocol == "http"
|
||||||
|
|
||||||
|
def test_protocol_returns_https_when_local(self):
|
||||||
|
"""
|
||||||
|
Тест, что protocol возвращает https для локального режима.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"LOCAL": "true"}):
|
||||||
|
settings = BaseAppSettings()
|
||||||
|
|
||||||
|
assert settings.protocol == "https"
|
||||||
|
|
||||||
|
def test_loads_from_env(self):
|
||||||
|
"""
|
||||||
|
Тест загрузки из переменных окружения.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"LOCAL": "true", "DEBUG": "true"}):
|
||||||
|
settings = BaseAppSettings()
|
||||||
|
|
||||||
|
assert settings.local is True
|
||||||
|
assert settings.debug is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestAppSettings:
|
||||||
|
"""
|
||||||
|
Unit тесты для AppSettings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест дефолтных значений.
|
||||||
|
"""
|
||||||
|
settings = AppSettings()
|
||||||
|
|
||||||
|
assert settings.app_host == "0.0.0.0"
|
||||||
|
assert settings.app_port == 8081
|
||||||
|
assert settings.kube_net_name == "AIGATEWAY"
|
||||||
|
assert settings.timezone == "Europe/Moscow"
|
||||||
|
|
||||||
|
def test_loads_from_env(self):
|
||||||
|
"""
|
||||||
|
Тест загрузки из переменных окружения.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"APP_HOST": "127.0.0.1",
|
||||||
|
"APP_PORT": "9000",
|
||||||
|
"PROJECT_NAME": "TestProject",
|
||||||
|
"TIMEZONE": "UTC",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
settings = AppSettings()
|
||||||
|
|
||||||
|
assert settings.app_host == "127.0.0.1"
|
||||||
|
assert settings.app_port == 9000
|
||||||
|
assert settings.kube_net_name == "TestProject"
|
||||||
|
assert settings.timezone == "UTC"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestLogSettings:
|
||||||
|
"""
|
||||||
|
Unit тесты для LogSettings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест дефолтных значений.
|
||||||
|
"""
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert settings.private_log_file_name == "app.log"
|
||||||
|
assert settings.log_rotation == "10 MB"
|
||||||
|
assert settings.private_metric_file_name == "app-metric.log"
|
||||||
|
assert settings.private_audit_file_name == "events.log"
|
||||||
|
assert settings.audit_host_ip == "127.0.0.1"
|
||||||
|
|
||||||
|
def test_get_file_abs_path_joins_path_and_file(self):
|
||||||
|
"""
|
||||||
|
Тест объединения пути и имени файла.
|
||||||
|
"""
|
||||||
|
path = LogSettings.get_file_abs_path("/var/log/", "app.log")
|
||||||
|
|
||||||
|
assert "app.log" in path
|
||||||
|
assert path.startswith("var")
|
||||||
|
|
||||||
|
def test_get_file_abs_path_handles_trailing_slashes(self):
|
||||||
|
"""
|
||||||
|
Тест обработки слэшей в путях.
|
||||||
|
"""
|
||||||
|
path = LogSettings.get_file_abs_path("/var/log///", "///app.log")
|
||||||
|
|
||||||
|
assert "app.log" in path
|
||||||
|
assert path.startswith("var")
|
||||||
|
assert not path.startswith("/")
|
||||||
|
|
||||||
|
def test_log_file_abs_path_property(self):
|
||||||
|
"""
|
||||||
|
Тест свойства log_file_abs_path.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ", {"LOG_PATH": "/var/log", "LOG_FILE_NAME": "test.log"}
|
||||||
|
):
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert "test.log" in settings.log_file_abs_path
|
||||||
|
assert settings.log_file_abs_path.startswith("var")
|
||||||
|
|
||||||
|
def test_metric_file_abs_path_property(self):
|
||||||
|
"""
|
||||||
|
Тест свойства metric_file_abs_path.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{"METRIC_PATH": "/var/metrics", "METRIC_FILE_NAME": "metrics.log"},
|
||||||
|
):
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert "metrics.log" in settings.metric_file_abs_path
|
||||||
|
assert settings.metric_file_abs_path.startswith("var")
|
||||||
|
|
||||||
|
def test_audit_file_abs_path_property(self):
|
||||||
|
"""
|
||||||
|
Тест свойства audit_file_abs_path.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{"AUDIT_LOG_PATH": "/var/audit", "AUDIT_LOG_FILE_NAME": "audit.log"},
|
||||||
|
):
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert "audit.log" in settings.audit_file_abs_path
|
||||||
|
assert settings.audit_file_abs_path.startswith("var")
|
||||||
|
|
||||||
|
def test_log_lvl_returns_debug_when_debug_enabled(self):
|
||||||
|
"""
|
||||||
|
Тест, что log_lvl возвращает DEBUG в debug-режиме.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"DEBUG": "true"}):
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert settings.log_lvl == DEBUG
|
||||||
|
|
||||||
|
def test_log_lvl_returns_info_when_debug_disabled(self):
|
||||||
|
"""
|
||||||
|
Тест, что log_lvl возвращает INFO в обычном режиме.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"DEBUG": "false"}):
|
||||||
|
settings = LogSettings()
|
||||||
|
|
||||||
|
assert settings.log_lvl == INFO
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestPGSettings:
|
||||||
|
"""
|
||||||
|
Unit тесты для PGSettings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест дефолтных значений.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {}, clear=True):
|
||||||
|
settings = PGSettings()
|
||||||
|
|
||||||
|
assert settings.host == "localhost"
|
||||||
|
assert settings.port == 5432
|
||||||
|
assert settings.user == "postgres"
|
||||||
|
assert settings.password == ""
|
||||||
|
assert settings.database == "postgres"
|
||||||
|
assert settings.schema_queue == "public"
|
||||||
|
assert settings.use_pool is True
|
||||||
|
assert settings.pool_size == 5
|
||||||
|
assert settings.max_overflow == 10
|
||||||
|
|
||||||
|
def test_url_property_returns_connection_string(self):
|
||||||
|
"""
|
||||||
|
Тест формирования строки подключения.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"PG_HOST": "db.example.com",
|
||||||
|
"PG_PORT": "5433",
|
||||||
|
"PG_USER": "testuser",
|
||||||
|
"PG_PASSWORD": "testpass",
|
||||||
|
"PG_DATABASE": "testdb",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
settings = PGSettings()
|
||||||
|
|
||||||
|
expected = (
|
||||||
|
"postgresql+asyncpg://testuser:testpass@db.example.com:5433/testdb"
|
||||||
|
)
|
||||||
|
assert settings.url == expected
|
||||||
|
|
||||||
|
def test_url_property_with_empty_password(self):
|
||||||
|
"""
|
||||||
|
Тест строки подключения с пустым паролем.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"PG_HOST": "localhost",
|
||||||
|
"PG_PORT": "5432",
|
||||||
|
"PG_USER": "postgres",
|
||||||
|
"PG_PASSWORD": "",
|
||||||
|
"PG_DATABASE": "testdb",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
settings = PGSettings()
|
||||||
|
|
||||||
|
expected = "postgresql+asyncpg://postgres:@localhost:5432/testdb"
|
||||||
|
assert settings.url == expected
|
||||||
|
|
||||||
|
def test_loads_from_env(self):
|
||||||
|
"""
|
||||||
|
Тест загрузки из переменных окружения.
|
||||||
|
"""
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"PG_HOST": "testhost",
|
||||||
|
"PG_PORT": "5433",
|
||||||
|
"PG_USER": "testuser",
|
||||||
|
"PG_PASSWORD": "testpass",
|
||||||
|
"PG_DATABASE": "testdb",
|
||||||
|
"PG_SCHEMA_QUEUE": "queue_schema",
|
||||||
|
"PG_POOL_SIZE": "20",
|
||||||
|
},
|
||||||
|
):
|
||||||
|
settings = PGSettings()
|
||||||
|
|
||||||
|
assert settings.host == "testhost"
|
||||||
|
assert settings.port == 5433
|
||||||
|
assert settings.user == "testuser"
|
||||||
|
assert settings.password == "testpass"
|
||||||
|
assert settings.database == "testdb"
|
||||||
|
assert settings.schema_queue == "queue_schema"
|
||||||
|
assert settings.pool_size == 20
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestWorkerSettings:
|
||||||
|
"""
|
||||||
|
Unit тесты для WorkerSettings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест дефолтных значений.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": "[]"}, clear=True):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
assert settings.workers_json == "[]"
|
||||||
|
assert settings.heartbeat_sec == 10
|
||||||
|
assert settings.default_lease_ttl_sec == 60
|
||||||
|
assert settings.reaper_period_sec == 10
|
||||||
|
assert settings.claim_backoff_sec == 15
|
||||||
|
|
||||||
|
def test_parsed_workers_returns_empty_list_for_default(self):
|
||||||
|
"""
|
||||||
|
Тест, что parsed_workers возвращает пустой список по умолчанию.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": "[]"}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
assert settings.parsed_workers() == []
|
||||||
|
|
||||||
|
def test_parsed_workers_parses_valid_json(self):
|
||||||
|
"""
|
||||||
|
Тест парсинга валидного JSON.
|
||||||
|
"""
|
||||||
|
workers_json = json.dumps(
|
||||||
|
[
|
||||||
|
{"queue": "queue1", "concurrency": 2},
|
||||||
|
{"queue": "queue2", "concurrency": 3},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": workers_json}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
workers = settings.parsed_workers()
|
||||||
|
|
||||||
|
assert len(workers) == 2
|
||||||
|
assert workers[0]["queue"] == "queue1"
|
||||||
|
assert workers[0]["concurrency"] == 2
|
||||||
|
assert workers[1]["queue"] == "queue2"
|
||||||
|
assert workers[1]["concurrency"] == 3
|
||||||
|
|
||||||
|
def test_parsed_workers_filters_non_dict_items(self):
|
||||||
|
"""
|
||||||
|
Тест фильтрации не-словарей из JSON.
|
||||||
|
"""
|
||||||
|
workers_json = json.dumps(
|
||||||
|
[
|
||||||
|
{"queue": "queue1", "concurrency": 2},
|
||||||
|
"invalid_item",
|
||||||
|
123,
|
||||||
|
{"queue": "queue2", "concurrency": 3},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": workers_json}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
workers = settings.parsed_workers()
|
||||||
|
|
||||||
|
assert len(workers) == 2
|
||||||
|
assert all(isinstance(w, dict) for w in workers)
|
||||||
|
|
||||||
|
def test_parsed_workers_handles_invalid_json(self):
|
||||||
|
"""
|
||||||
|
Тест обработки невалидного JSON.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": "not valid json"}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
workers = settings.parsed_workers()
|
||||||
|
|
||||||
|
assert workers == []
|
||||||
|
|
||||||
|
def test_parsed_workers_handles_empty_string(self):
|
||||||
|
"""
|
||||||
|
Тест обработки пустой строки.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": ""}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
workers = settings.parsed_workers()
|
||||||
|
|
||||||
|
assert workers == []
|
||||||
|
|
||||||
|
def test_parsed_workers_handles_null_json(self):
|
||||||
|
"""
|
||||||
|
Тест обработки null в JSON.
|
||||||
|
"""
|
||||||
|
with patch.dict("os.environ", {"WORKERS_JSON": "null"}):
|
||||||
|
settings = WorkerSettings()
|
||||||
|
|
||||||
|
workers = settings.parsed_workers()
|
||||||
|
|
||||||
|
assert workers == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestSecrets:
|
||||||
|
"""
|
||||||
|
Unit тесты для Secrets.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_initializes_all_settings(self):
|
||||||
|
"""
|
||||||
|
Тест, что Secrets инициализирует все настройки.
|
||||||
|
"""
|
||||||
|
secrets = Secrets()
|
||||||
|
|
||||||
|
assert isinstance(secrets.app, AppSettings)
|
||||||
|
assert isinstance(secrets.log, LogSettings)
|
||||||
|
assert isinstance(secrets.pg, PGSettings)
|
||||||
|
assert isinstance(secrets.worker, WorkerSettings)
|
||||||
|
|
||||||
|
def test_all_settings_have_default_values(self):
|
||||||
|
"""
|
||||||
|
Тест, что все настройки имеют дефолтные значения.
|
||||||
|
"""
|
||||||
|
secrets = Secrets()
|
||||||
|
|
||||||
|
assert secrets.app.app_host == "0.0.0.0"
|
||||||
|
assert secrets.log.private_log_file_name == "app.log"
|
||||||
|
assert secrets.pg.host == "localhost"
|
||||||
|
assert secrets.worker.heartbeat_sec == 10
|
||||||
|
|
@ -0,0 +1,206 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.context import AppContext, get_session
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestAppContext:
|
||||||
|
"""
|
||||||
|
Unit тесты для AppContext.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_init_creates_empty_context(self):
|
||||||
|
"""
|
||||||
|
Тест создания пустого контекста.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
assert ctx._engine is None
|
||||||
|
assert ctx._sessionmaker is None
|
||||||
|
assert ctx._context_vars_container is not None
|
||||||
|
|
||||||
|
def test_engine_property_raises_when_not_initialized(self):
|
||||||
|
"""
|
||||||
|
Тест, что engine выбрасывает RuntimeError, если не инициализирован.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="Database engine is not initialized"):
|
||||||
|
_ = ctx.engine
|
||||||
|
|
||||||
|
def test_engine_property_returns_engine_when_initialized(self):
|
||||||
|
"""
|
||||||
|
Тест, что engine возвращает движок после инициализации.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
mock_engine = Mock()
|
||||||
|
ctx._engine = mock_engine
|
||||||
|
|
||||||
|
assert ctx.engine == mock_engine
|
||||||
|
|
||||||
|
def test_sessionmaker_property_raises_when_not_initialized(self):
|
||||||
|
"""
|
||||||
|
Тест, что sessionmaker выбрасывает RuntimeError, если не инициализирован.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="Sessionmaker is not initialized"):
|
||||||
|
_ = ctx.sessionmaker
|
||||||
|
|
||||||
|
def test_sessionmaker_property_returns_sessionmaker_when_initialized(self):
|
||||||
|
"""
|
||||||
|
Тест, что sessionmaker возвращает фабрику после инициализации.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
mock_sm = Mock()
|
||||||
|
ctx._sessionmaker = mock_sm
|
||||||
|
|
||||||
|
assert ctx.sessionmaker == mock_sm
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_startup_initializes_engine_and_sessionmaker(self):
|
||||||
|
"""
|
||||||
|
Тест, что on_startup инициализирует engine и sessionmaker.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
mock_engine = Mock()
|
||||||
|
mock_sm = Mock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.logger.logger.setup_logging") as mock_setup_logging,
|
||||||
|
patch(
|
||||||
|
"dataloader.storage.engine.create_engine", return_value=mock_engine
|
||||||
|
) as mock_create_engine,
|
||||||
|
patch(
|
||||||
|
"dataloader.storage.engine.create_sessionmaker", return_value=mock_sm
|
||||||
|
) as mock_create_sm,
|
||||||
|
patch("dataloader.context.APP_CONFIG") as mock_config,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_config.pg.url = "postgresql://test"
|
||||||
|
|
||||||
|
await ctx.on_startup()
|
||||||
|
|
||||||
|
mock_setup_logging.assert_called_once()
|
||||||
|
mock_create_engine.assert_called_once_with("postgresql://test")
|
||||||
|
mock_create_sm.assert_called_once_with(mock_engine)
|
||||||
|
assert ctx._engine == mock_engine
|
||||||
|
assert ctx._sessionmaker == mock_sm
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_shutdown_disposes_engine(self):
|
||||||
|
"""
|
||||||
|
Тест, что on_shutdown закрывает движок БД.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
mock_engine = AsyncMock()
|
||||||
|
ctx._engine = mock_engine
|
||||||
|
|
||||||
|
await ctx.on_shutdown()
|
||||||
|
|
||||||
|
mock_engine.dispose.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_shutdown_does_nothing_when_no_engine(self):
|
||||||
|
"""
|
||||||
|
Тест, что on_shutdown безопасно работает без движка.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
await ctx.on_shutdown()
|
||||||
|
|
||||||
|
assert ctx._engine is None
|
||||||
|
|
||||||
|
def test_get_logger_returns_logger(self):
|
||||||
|
"""
|
||||||
|
Тест получения логгера.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
with patch("dataloader.logger.logger.get_logger") as mock_get_logger:
|
||||||
|
mock_logger = Mock()
|
||||||
|
mock_get_logger.return_value = mock_logger
|
||||||
|
|
||||||
|
logger = ctx.get_logger("test_module")
|
||||||
|
|
||||||
|
mock_get_logger.assert_called_once_with("test_module")
|
||||||
|
assert logger == mock_logger
|
||||||
|
|
||||||
|
def test_get_logger_without_name(self):
|
||||||
|
"""
|
||||||
|
Тест получения логгера без указания имени.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
with patch("dataloader.logger.logger.get_logger") as mock_get_logger:
|
||||||
|
mock_logger = Mock()
|
||||||
|
mock_get_logger.return_value = mock_logger
|
||||||
|
|
||||||
|
logger = ctx.get_logger()
|
||||||
|
|
||||||
|
mock_get_logger.assert_called_once_with(None)
|
||||||
|
assert logger == mock_logger
|
||||||
|
|
||||||
|
def test_get_context_vars_container_returns_container(self):
|
||||||
|
"""
|
||||||
|
Тест получения контейнера контекстных переменных.
|
||||||
|
"""
|
||||||
|
ctx = AppContext()
|
||||||
|
|
||||||
|
container = ctx.get_context_vars_container()
|
||||||
|
|
||||||
|
assert container == ctx._context_vars_container
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestGetSession:
|
||||||
|
"""
|
||||||
|
Unit тесты для get_session dependency.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_session_yields_session(self):
|
||||||
|
"""
|
||||||
|
Тест, что get_session возвращает сессию.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
|
||||||
|
mock_context_manager = AsyncMock()
|
||||||
|
mock_context_manager.__aenter__.return_value = mock_session
|
||||||
|
mock_context_manager.__aexit__.return_value = None
|
||||||
|
|
||||||
|
mock_sm = Mock(return_value=mock_context_manager)
|
||||||
|
|
||||||
|
with patch("dataloader.context.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
async for session in get_session():
|
||||||
|
assert session == mock_session
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_session_closes_session_after_use(self):
|
||||||
|
"""
|
||||||
|
Тест, что get_session закрывает сессию после использования.
|
||||||
|
"""
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_exit = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
mock_context_manager = AsyncMock()
|
||||||
|
mock_context_manager.__aenter__.return_value = mock_session
|
||||||
|
mock_context_manager.__aexit__ = mock_exit
|
||||||
|
|
||||||
|
mock_sm = Mock(return_value=mock_context_manager)
|
||||||
|
|
||||||
|
with patch("dataloader.context.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
async for _session in get_session():
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert mock_exit.call_count == 1
|
||||||
|
|
@ -0,0 +1,346 @@
|
||||||
|
"""Unit тесты для обработки ошибок в интерфейсах."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.interfaces.gmap2_brief.interface import (
|
||||||
|
Gmap2BriefConnectionError,
|
||||||
|
Gmap2BriefInterface,
|
||||||
|
)
|
||||||
|
from dataloader.interfaces.tenera.interface import (
|
||||||
|
SuperTeneraConnectionError,
|
||||||
|
SuperTeneraInterface,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestGmap2BriefErrorHandling:
|
||||||
|
"""Тесты обработки ошибок в Gmap2BriefInterface."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_export_handles_http_status_error(self):
|
||||||
|
"""Тест обработки HTTP статус ошибки в start_export."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.gmap2brief.base_url = "http://test.com"
|
||||||
|
mock_config.gmap2brief.start_endpoint = "/start"
|
||||||
|
mock_config.gmap2brief.timeout = 30
|
||||||
|
mock_config.app.local = False
|
||||||
|
|
||||||
|
interface = Gmap2BriefInterface(mock_logger)
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.httpx.AsyncClient") as mock_client_cls:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 500
|
||||||
|
mock_response.text = "Internal Server Error"
|
||||||
|
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
||||||
|
"Server error", request=MagicMock(), response=mock_response
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.post.return_value = mock_response
|
||||||
|
mock_client.__aenter__.return_value = mock_client
|
||||||
|
mock_client.__aexit__.return_value = None
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
with pytest.raises(Gmap2BriefConnectionError) as exc_info:
|
||||||
|
await interface.start_export()
|
||||||
|
|
||||||
|
assert "Failed to start export" in str(exc_info.value)
|
||||||
|
assert "500" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_export_handles_request_error(self):
|
||||||
|
"""Тест обработки сетевой ошибки в start_export."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.gmap2brief.base_url = "http://test.com"
|
||||||
|
mock_config.gmap2brief.start_endpoint = "/start"
|
||||||
|
mock_config.gmap2brief.timeout = 30
|
||||||
|
mock_config.app.local = False
|
||||||
|
|
||||||
|
interface = Gmap2BriefInterface(mock_logger)
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.httpx.AsyncClient") as mock_client_cls:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.post.side_effect = httpx.ConnectError("Connection refused")
|
||||||
|
mock_client.__aenter__.return_value = mock_client
|
||||||
|
mock_client.__aexit__.return_value = None
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
with pytest.raises(Gmap2BriefConnectionError) as exc_info:
|
||||||
|
await interface.start_export()
|
||||||
|
|
||||||
|
assert "Request error" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_status_handles_http_error(self):
|
||||||
|
"""Тест обработки HTTP ошибки в get_status."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.gmap2brief.base_url = "http://test.com"
|
||||||
|
mock_config.gmap2brief.status_endpoint = "/status/{job_id}"
|
||||||
|
mock_config.gmap2brief.timeout = 30
|
||||||
|
mock_config.app.local = False
|
||||||
|
|
||||||
|
interface = Gmap2BriefInterface(mock_logger)
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.httpx.AsyncClient") as mock_client_cls:
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
mock_response.text = "Not Found"
|
||||||
|
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
|
||||||
|
"Not found", request=MagicMock(), response=mock_response
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get.return_value = mock_response
|
||||||
|
mock_client.__aenter__.return_value = mock_client
|
||||||
|
mock_client.__aexit__.return_value = None
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
with pytest.raises(Gmap2BriefConnectionError) as exc_info:
|
||||||
|
await interface.get_status("job123")
|
||||||
|
|
||||||
|
assert "Failed to get status" in str(exc_info.value)
|
||||||
|
assert "404" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_download_export_handles_error(self):
|
||||||
|
"""Тест обработки ошибки в download_export."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.gmap2brief.base_url = "http://test.com"
|
||||||
|
mock_config.gmap2brief.download_endpoint = "/download/{job_id}"
|
||||||
|
mock_config.gmap2brief.timeout = 30
|
||||||
|
mock_config.app.local = False
|
||||||
|
|
||||||
|
interface = Gmap2BriefInterface(mock_logger)
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.gmap2_brief.interface.httpx.AsyncClient") as mock_client_cls:
|
||||||
|
# Create mock for stream context manager
|
||||||
|
mock_stream_ctx = MagicMock()
|
||||||
|
mock_stream_ctx.__aenter__ = AsyncMock(side_effect=httpx.TimeoutException("Timeout"))
|
||||||
|
mock_stream_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.stream = MagicMock(return_value=mock_stream_ctx)
|
||||||
|
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_client_cls.return_value = mock_client
|
||||||
|
|
||||||
|
with pytest.raises(Gmap2BriefConnectionError) as exc_info:
|
||||||
|
from pathlib import Path
|
||||||
|
await interface.download_export("job123", Path("/tmp/test.zst"))
|
||||||
|
|
||||||
|
assert "Request error" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestSuperTeneraErrorHandling:
|
||||||
|
"""Тесты обработки ошибок в SuperTeneraInterface."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_request_handles_http_status_error(self):
|
||||||
|
"""Тест обработки HTTP статус ошибки в _get_request."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.app.debug = False
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status = 503
|
||||||
|
mock_response.raise_for_status = MagicMock(side_effect=aiohttp.ClientResponseError(
|
||||||
|
request_info=MagicMock(),
|
||||||
|
history=(),
|
||||||
|
status=503,
|
||||||
|
message="Service Unavailable",
|
||||||
|
))
|
||||||
|
mock_response.json = AsyncMock(return_value={})
|
||||||
|
|
||||||
|
# Create context manager mock
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(return_value=mock_response)
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface._get_request("/test")
|
||||||
|
|
||||||
|
assert "HTTP error 503" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_request_handles_connection_error(self):
|
||||||
|
"""Тест обработки ошибки подключения в _get_request."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.app.debug = False
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
# Create context manager mock that raises on __aenter__
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(side_effect=aiohttp.ClientConnectionError("Connection failed"))
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface._get_request("/test")
|
||||||
|
|
||||||
|
assert "Connection error" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_request_handles_timeout(self):
|
||||||
|
"""Тест обработки таймаута в _get_request."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.app.debug = False
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
# Create context manager mock that raises on __aenter__
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(side_effect=TimeoutError("Request timeout"))
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface._get_request("/test")
|
||||||
|
|
||||||
|
assert "Connection error" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_quotes_data_propagates_error(self):
|
||||||
|
"""Тест что get_quotes_data пробрасывает ошибки из _get_request."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.app.debug = False
|
||||||
|
mock_config.supertenera.quotes_endpoint = "/quotes"
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
# Create context manager mock that raises on __aenter__
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(side_effect=aiohttp.ServerTimeoutError("Server timeout"))
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError):
|
||||||
|
await interface.get_quotes_data()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ping_handles_client_response_error(self):
|
||||||
|
"""Тест обработки ClientResponseError в ping."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.supertenera.quotes_endpoint = "/quotes"
|
||||||
|
mock_config.supertenera.timeout = 10
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.raise_for_status = MagicMock(side_effect=aiohttp.ClientResponseError(
|
||||||
|
request_info=MagicMock(),
|
||||||
|
history=(),
|
||||||
|
status=500,
|
||||||
|
message="Internal Server Error",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Create context manager mock
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(return_value=mock_response)
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface.ping()
|
||||||
|
|
||||||
|
assert "HTTP error 500" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ping_handles_timeout_error(self):
|
||||||
|
"""Тест обработки TimeoutError в ping."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.supertenera.quotes_endpoint = "/quotes"
|
||||||
|
mock_config.supertenera.timeout = 10
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
# Create context manager mock that raises on __aenter__
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(side_effect=TimeoutError())
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface.ping()
|
||||||
|
|
||||||
|
assert "Timeout" in str(exc_info.value)
|
||||||
|
assert "10s" in str(exc_info.value)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ping_handles_client_error(self):
|
||||||
|
"""Тест обработки общей ClientError в ping."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
|
||||||
|
with patch("dataloader.interfaces.tenera.interface.APP_CONFIG") as mock_config:
|
||||||
|
mock_config.app.local = False
|
||||||
|
mock_config.supertenera.quotes_endpoint = "/quotes"
|
||||||
|
mock_config.supertenera.timeout = 10
|
||||||
|
|
||||||
|
interface = SuperTeneraInterface(mock_logger, "http://test.com")
|
||||||
|
|
||||||
|
mock_session = MagicMock()
|
||||||
|
interface._session = mock_session
|
||||||
|
|
||||||
|
# Create context manager mock that raises on __aenter__
|
||||||
|
mock_ctx = MagicMock()
|
||||||
|
mock_ctx.__aenter__ = AsyncMock(side_effect=aiohttp.ClientError("Generic client error"))
|
||||||
|
mock_ctx.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
mock_session.get = MagicMock(return_value=mock_ctx)
|
||||||
|
|
||||||
|
with pytest.raises(SuperTeneraConnectionError) as exc_info:
|
||||||
|
await interface.ping()
|
||||||
|
|
||||||
|
assert "Connection error" in str(exc_info.value)
|
||||||
|
|
@ -0,0 +1,399 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import AsyncMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.storage.notify_listener import PGNotifyListener
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestPGNotifyListener:
|
||||||
|
"""
|
||||||
|
Unit тесты для PGNotifyListener.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_init_creates_listener_with_config(self):
|
||||||
|
"""
|
||||||
|
Тест создания listener'а с конфигурацией.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert listener._dsn == "postgresql://test"
|
||||||
|
assert listener._queue == "test_queue"
|
||||||
|
assert listener._callback == callback
|
||||||
|
assert listener._stop == stop_event
|
||||||
|
assert listener._conn is None
|
||||||
|
assert listener._task is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_establishes_connection_and_listens(self):
|
||||||
|
"""
|
||||||
|
Тест запуска прослушивания NOTIFY.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
assert listener._conn == mock_conn
|
||||||
|
assert listener._task is not None
|
||||||
|
mock_conn.execute.assert_called_once_with("LISTEN dl_jobs")
|
||||||
|
mock_conn.add_listener.assert_called_once()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_converts_asyncpg_dsn_format(self):
|
||||||
|
"""
|
||||||
|
Тест преобразования DSN из формата SQLAlchemy в asyncpg.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql+asyncpg://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
) as mock_connect:
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
mock_connect.assert_called_once_with("postgresql://test")
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_notify_handler_calls_callback_for_matching_queue(self):
|
||||||
|
"""
|
||||||
|
Тест, что callback вызывается для совпадающей очереди.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
handler = listener._on_notify_handler
|
||||||
|
handler(mock_conn, 12345, "dl_jobs", "test_queue")
|
||||||
|
|
||||||
|
callback.assert_called_once()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_notify_handler_ignores_wrong_channel(self):
|
||||||
|
"""
|
||||||
|
Тест, что callback не вызывается для другого канала.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
handler = listener._on_notify_handler
|
||||||
|
handler(mock_conn, 12345, "other_channel", "test_queue")
|
||||||
|
|
||||||
|
callback.assert_not_called()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_notify_handler_ignores_wrong_queue(self):
|
||||||
|
"""
|
||||||
|
Тест, что callback не вызывается для другой очереди.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
handler = listener._on_notify_handler
|
||||||
|
handler(mock_conn, 12345, "dl_jobs", "other_queue")
|
||||||
|
|
||||||
|
callback.assert_not_called()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_on_notify_handler_suppresses_callback_exceptions(self):
|
||||||
|
"""
|
||||||
|
Тест, что исключения в callback не ломают listener.
|
||||||
|
"""
|
||||||
|
callback = Mock(side_effect=Exception("Callback error"))
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
handler = listener._on_notify_handler
|
||||||
|
handler(mock_conn, 12345, "dl_jobs", "test_queue")
|
||||||
|
|
||||||
|
callback.assert_called_once()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_monitor_connection_waits_for_stop_event(self):
|
||||||
|
"""
|
||||||
|
Тест, что _monitor_connection ждёт stop_event.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
mock_conn.remove_listener = AsyncMock()
|
||||||
|
mock_conn.close = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
assert listener._task is not None
|
||||||
|
assert not listener._task.done()
|
||||||
|
|
||||||
|
stop_event.set()
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
assert listener._task.done()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_cancels_task_and_closes_connection(self):
|
||||||
|
"""
|
||||||
|
Тест остановки listener'а с закрытием соединения.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
mock_conn.remove_listener = AsyncMock()
|
||||||
|
mock_conn.close = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
mock_conn.remove_listener.assert_called_once()
|
||||||
|
mock_conn.close.assert_called_once()
|
||||||
|
assert listener._conn is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_handles_already_stopped_task(self):
|
||||||
|
"""
|
||||||
|
Тест stop() для уже остановленной задачи.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
mock_conn.remove_listener = AsyncMock()
|
||||||
|
mock_conn.close = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
stop_event.set()
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
assert listener._conn is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_handles_remove_listener_exception(self):
|
||||||
|
"""
|
||||||
|
Тест, что stop() подавляет исключения при remove_listener.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
mock_conn.remove_listener = AsyncMock(side_effect=Exception("Remove error"))
|
||||||
|
mock_conn.close = AsyncMock()
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
mock_conn.close.assert_called_once()
|
||||||
|
assert listener._conn is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_handles_close_exception(self):
|
||||||
|
"""
|
||||||
|
Тест, что stop() подавляет исключения при close.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_conn = AsyncMock()
|
||||||
|
mock_conn.execute = AsyncMock()
|
||||||
|
mock_conn.add_listener = AsyncMock()
|
||||||
|
mock_conn.remove_listener = AsyncMock()
|
||||||
|
mock_conn.close = AsyncMock(side_effect=Exception("Close error"))
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"dataloader.storage.notify_listener.asyncpg.connect", return_value=mock_conn
|
||||||
|
):
|
||||||
|
await listener.start()
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
assert listener._conn is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_without_connection_does_nothing(self):
|
||||||
|
"""
|
||||||
|
Тест stop() без активного соединения.
|
||||||
|
"""
|
||||||
|
callback = Mock()
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
listener = PGNotifyListener(
|
||||||
|
dsn="postgresql://test",
|
||||||
|
queue="test_queue",
|
||||||
|
callback=callback,
|
||||||
|
stop_event=stop_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
await listener.stop()
|
||||||
|
|
||||||
|
assert listener._conn is None
|
||||||
|
assert listener._task is None
|
||||||
|
|
@ -0,0 +1,253 @@
|
||||||
|
"""Unit тесты для пайплайна load_opu."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from datetime import date, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
import pytest
|
||||||
|
import zstandard as zstd
|
||||||
|
|
||||||
|
from dataloader.interfaces.gmap2_brief.schemas import ExportJobStatus
|
||||||
|
from dataloader.workers.pipelines.load_opu import (
|
||||||
|
_convert_record,
|
||||||
|
_parse_jsonl_from_zst,
|
||||||
|
load_opu,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestParseJsonlFromZst:
|
||||||
|
"""Тесты для функции _parse_jsonl_from_zst."""
|
||||||
|
|
||||||
|
def test_parses_valid_zst_file_with_small_batch(self):
|
||||||
|
"""Тест парсинга валидного zst файла с небольшим батчем."""
|
||||||
|
records = [
|
||||||
|
{"id": 1, "name": "test1"},
|
||||||
|
{"id": 2, "name": "test2"},
|
||||||
|
{"id": 3, "name": "test3"},
|
||||||
|
]
|
||||||
|
|
||||||
|
lines = [orjson.dumps(r) for r in records]
|
||||||
|
content = b"\n".join(lines)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
file_path = Path(tmpdir) / "test.jsonl.zst"
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor()
|
||||||
|
compressed = cctx.compress(content)
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(compressed)
|
||||||
|
|
||||||
|
batches = list(_parse_jsonl_from_zst(file_path, chunk_size=2))
|
||||||
|
|
||||||
|
assert len(batches) == 2
|
||||||
|
assert batches[0] == [{"id": 1, "name": "test1"}, {"id": 2, "name": "test2"}]
|
||||||
|
assert batches[1] == [{"id": 3, "name": "test3"}]
|
||||||
|
|
||||||
|
def test_parses_empty_file(self):
|
||||||
|
"""Тест парсинга пустого файла."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
file_path = Path(tmpdir) / "empty.jsonl.zst"
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor()
|
||||||
|
compressed = cctx.compress(b"")
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(compressed)
|
||||||
|
|
||||||
|
batches = list(_parse_jsonl_from_zst(file_path, chunk_size=100))
|
||||||
|
|
||||||
|
assert len(batches) == 0
|
||||||
|
|
||||||
|
def test_skips_empty_lines(self):
|
||||||
|
"""Тест пропуска пустых строк."""
|
||||||
|
records = [
|
||||||
|
{"id": 1},
|
||||||
|
{"id": 2},
|
||||||
|
]
|
||||||
|
|
||||||
|
lines = [orjson.dumps(records[0]), b"", b" ", orjson.dumps(records[1])]
|
||||||
|
content = b"\n".join(lines)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
file_path = Path(tmpdir) / "test.jsonl.zst"
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor()
|
||||||
|
compressed = cctx.compress(content)
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(compressed)
|
||||||
|
|
||||||
|
batches = list(_parse_jsonl_from_zst(file_path, chunk_size=10))
|
||||||
|
|
||||||
|
assert len(batches) == 1
|
||||||
|
assert batches[0] == [{"id": 1}, {"id": 2}]
|
||||||
|
|
||||||
|
@patch("dataloader.workers.pipelines.load_opu.APP_CTX")
|
||||||
|
def test_handles_invalid_json_gracefully(self, mock_ctx):
|
||||||
|
"""Тест обработки невалидного JSON."""
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
orjson.dumps({"id": 1}),
|
||||||
|
b"{invalid json}",
|
||||||
|
orjson.dumps({"id": 2}),
|
||||||
|
]
|
||||||
|
content = b"\n".join(lines)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
file_path = Path(tmpdir) / "test.jsonl.zst"
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor()
|
||||||
|
compressed = cctx.compress(content)
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(compressed)
|
||||||
|
|
||||||
|
batches = list(_parse_jsonl_from_zst(file_path, chunk_size=10))
|
||||||
|
|
||||||
|
assert len(batches) == 1
|
||||||
|
assert batches[0] == [{"id": 1}, {"id": 2}]
|
||||||
|
mock_logger.warning.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestConvertRecord:
|
||||||
|
"""Тесты для функции _convert_record."""
|
||||||
|
|
||||||
|
def test_converts_actdate_string_to_date(self):
|
||||||
|
"""Тест конвертации actdate из строки в date."""
|
||||||
|
raw = {
|
||||||
|
"id": 1,
|
||||||
|
"actdate": "2025-01-15",
|
||||||
|
"name": "test",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _convert_record(raw)
|
||||||
|
|
||||||
|
assert result["id"] == 1
|
||||||
|
assert result["actdate"] == date(2025, 1, 15)
|
||||||
|
assert result["name"] == "test"
|
||||||
|
|
||||||
|
def test_converts_wf_load_dttm_string_to_datetime(self):
|
||||||
|
"""Тест конвертации wf_load_dttm из строки в datetime."""
|
||||||
|
raw = {
|
||||||
|
"id": 1,
|
||||||
|
"wf_load_dttm": "2025-01-15T12:30:45",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _convert_record(raw)
|
||||||
|
|
||||||
|
assert result["id"] == 1
|
||||||
|
assert result["wf_load_dttm"] == datetime(2025, 1, 15, 12, 30, 45)
|
||||||
|
|
||||||
|
def test_keeps_non_date_fields_unchanged(self):
|
||||||
|
"""Тест сохранения полей без конвертации."""
|
||||||
|
raw = {
|
||||||
|
"id": 1,
|
||||||
|
"name": "test",
|
||||||
|
"value": 123.45,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _convert_record(raw)
|
||||||
|
|
||||||
|
assert result == raw
|
||||||
|
|
||||||
|
def test_handles_already_converted_dates(self):
|
||||||
|
"""Тест обработки уже сконвертированных дат."""
|
||||||
|
actdate_obj = date(2025, 1, 15)
|
||||||
|
wf_load_dttm_obj = datetime(2025, 1, 15, 12, 30, 45)
|
||||||
|
|
||||||
|
raw = {
|
||||||
|
"id": 1,
|
||||||
|
"actdate": actdate_obj,
|
||||||
|
"wf_load_dttm": wf_load_dttm_obj,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _convert_record(raw)
|
||||||
|
|
||||||
|
assert result["actdate"] == actdate_obj
|
||||||
|
assert result["wf_load_dttm"] == wf_load_dttm_obj
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestLoadOpuPipeline:
|
||||||
|
"""Тесты для пайплайна load_opu."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_pipeline_success(self):
|
||||||
|
"""Тест успешного выполнения полного пайплайна."""
|
||||||
|
mock_interface = AsyncMock()
|
||||||
|
mock_interface.start_export = AsyncMock(return_value="job123")
|
||||||
|
mock_interface.wait_for_completion = AsyncMock(
|
||||||
|
return_value=ExportJobStatus(
|
||||||
|
job_id="job123",
|
||||||
|
status="completed",
|
||||||
|
total_rows=100,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
archive_path = Path(tmpdir) / "test.jsonl.zst"
|
||||||
|
|
||||||
|
records = [
|
||||||
|
{"id": i, "actdate": "2025-01-15", "name": f"test{i}"} for i in range(10)
|
||||||
|
]
|
||||||
|
lines = [orjson.dumps(r) for r in records]
|
||||||
|
content = b"\n".join(lines)
|
||||||
|
|
||||||
|
cctx = zstd.ZstdCompressor()
|
||||||
|
compressed = cctx.compress(content)
|
||||||
|
with open(archive_path, "wb") as f:
|
||||||
|
f.write(compressed)
|
||||||
|
|
||||||
|
async def mock_download(job_id: str, output_path: Path):
|
||||||
|
with open(archive_path, "rb") as src:
|
||||||
|
with open(output_path, "wb") as dst:
|
||||||
|
dst.write(src.read())
|
||||||
|
|
||||||
|
mock_interface.download_export = AsyncMock(side_effect=mock_download)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sessionmaker = MagicMock()
|
||||||
|
mock_sessionmaker.return_value.__aenter__ = AsyncMock(
|
||||||
|
return_value=mock_session
|
||||||
|
)
|
||||||
|
mock_sessionmaker.return_value.__aexit__ = AsyncMock()
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
mock_repo.truncate = AsyncMock()
|
||||||
|
mock_repo.bulk_insert = AsyncMock(return_value=10)
|
||||||
|
|
||||||
|
mock_app_ctx = MagicMock()
|
||||||
|
mock_app_ctx.logger = MagicMock()
|
||||||
|
mock_app_ctx.sessionmaker = mock_sessionmaker
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_opu.get_gmap2brief_interface",
|
||||||
|
return_value=mock_interface,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_opu.OpuRepository",
|
||||||
|
return_value=mock_repo,
|
||||||
|
),
|
||||||
|
patch("dataloader.workers.pipelines.load_opu.APP_CTX", mock_app_ctx),
|
||||||
|
):
|
||||||
|
steps = []
|
||||||
|
async for _ in load_opu({}):
|
||||||
|
steps.append("step")
|
||||||
|
|
||||||
|
assert len(steps) >= 4
|
||||||
|
|
||||||
|
mock_interface.start_export.assert_called_once()
|
||||||
|
mock_interface.wait_for_completion.assert_called_once_with("job123")
|
||||||
|
mock_interface.download_export.assert_called_once()
|
||||||
|
mock_repo.truncate.assert_called_once()
|
||||||
|
mock_repo.bulk_insert.assert_called()
|
||||||
|
|
@ -0,0 +1,536 @@
|
||||||
|
"""Unit тесты для пайплайна load_tenera."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from dataloader.interfaces.tenera.schemas import (
|
||||||
|
BloombergTimePoint,
|
||||||
|
CbrTimePoint,
|
||||||
|
InvestingCandlestick,
|
||||||
|
InvestingNumeric,
|
||||||
|
InvestingTimePoint,
|
||||||
|
MainData,
|
||||||
|
SgxTimePoint,
|
||||||
|
TimePointUnion,
|
||||||
|
TradingEconomicsEmptyString,
|
||||||
|
TradingEconomicsLastPrev,
|
||||||
|
TradingEconomicsNumeric,
|
||||||
|
TradingEconomicsStringPercent,
|
||||||
|
TradingEconomicsStringTime,
|
||||||
|
TradingEconomicsTimePoint,
|
||||||
|
TradingViewTimePoint,
|
||||||
|
)
|
||||||
|
from dataloader.workers.pipelines.load_tenera import (
|
||||||
|
_build_value_row,
|
||||||
|
_parse_ts_to_datetime,
|
||||||
|
_process_source,
|
||||||
|
_to_float,
|
||||||
|
load_tenera,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestToFloat:
|
||||||
|
"""Тесты для функции _to_float."""
|
||||||
|
|
||||||
|
def test_converts_int_to_float(self):
|
||||||
|
"""Тест конвертации int в float."""
|
||||||
|
assert _to_float(42) == 42.0
|
||||||
|
|
||||||
|
def test_converts_float_to_float(self):
|
||||||
|
"""Тест что float остается float."""
|
||||||
|
assert _to_float(3.14) == 3.14
|
||||||
|
|
||||||
|
def test_converts_string_number_to_float(self):
|
||||||
|
"""Тест конвертации строки с числом."""
|
||||||
|
assert _to_float("123.45") == 123.45
|
||||||
|
|
||||||
|
def test_converts_string_with_comma_to_float(self):
|
||||||
|
"""Тест конвертации строки с запятой."""
|
||||||
|
assert _to_float("123,45") == 123.45
|
||||||
|
|
||||||
|
def test_converts_string_with_percent_to_float(self):
|
||||||
|
"""Тест конвертации строки с процентом."""
|
||||||
|
assert _to_float("12.5%") == 12.5
|
||||||
|
|
||||||
|
def test_converts_string_with_spaces_to_float(self):
|
||||||
|
"""Тест конвертации строки с пробелами."""
|
||||||
|
assert _to_float(" 123.45 ") == 123.45
|
||||||
|
|
||||||
|
def test_returns_none_for_none(self):
|
||||||
|
"""Тест что None возвращает None."""
|
||||||
|
assert _to_float(None) is None
|
||||||
|
|
||||||
|
def test_returns_none_for_empty_string(self):
|
||||||
|
"""Тест что пустая строка возвращает None."""
|
||||||
|
assert _to_float("") is None
|
||||||
|
assert _to_float(" ") is None
|
||||||
|
|
||||||
|
def test_returns_none_for_invalid_string(self):
|
||||||
|
"""Тест что невалидная строка возвращает None."""
|
||||||
|
assert _to_float("invalid") is None
|
||||||
|
assert _to_float("abc123") is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestParseTsToDatetime:
|
||||||
|
"""Тесты для функции _parse_ts_to_datetime."""
|
||||||
|
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
def test_parses_valid_timestamp(self, mock_ctx):
|
||||||
|
"""Тест парсинга валидного timestamp."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
|
||||||
|
result = _parse_ts_to_datetime("1609459200")
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert isinstance(result, datetime)
|
||||||
|
assert result.tzinfo is None
|
||||||
|
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
def test_parses_timestamp_with_whitespace(self, mock_ctx):
|
||||||
|
"""Тест парсинга timestamp с пробелами."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
|
||||||
|
result = _parse_ts_to_datetime(" 1609459200 ")
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert isinstance(result, datetime)
|
||||||
|
|
||||||
|
def test_returns_none_for_empty_string(self):
|
||||||
|
"""Тест что пустая строка возвращает None."""
|
||||||
|
assert _parse_ts_to_datetime("") is None
|
||||||
|
assert _parse_ts_to_datetime(" ") is None
|
||||||
|
|
||||||
|
def test_returns_none_for_non_digit_string(self):
|
||||||
|
"""Тест что не-цифровая строка возвращает None."""
|
||||||
|
assert _parse_ts_to_datetime("abc123") is None
|
||||||
|
assert _parse_ts_to_datetime("2025-01-15") is None
|
||||||
|
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
def test_handles_invalid_timestamp(self, mock_ctx):
|
||||||
|
"""Тест обработки невалидного timestamp."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
|
||||||
|
result = _parse_ts_to_datetime("999999999999999")
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestBuildValueRow:
|
||||||
|
"""Тесты для функции _build_value_row."""
|
||||||
|
|
||||||
|
def test_handles_int_point(self):
|
||||||
|
"""Тест обработки int значения."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
result = _build_value_row("test", dt, 42)
|
||||||
|
|
||||||
|
assert result == {"dt": dt, "key": 42}
|
||||||
|
|
||||||
|
def test_handles_investing_numeric(self):
|
||||||
|
"""Тест обработки InvestingNumeric."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = InvestingNumeric(
|
||||||
|
profit="1.5%",
|
||||||
|
base_value="100.0",
|
||||||
|
max_value="105.0",
|
||||||
|
min_value="95.0",
|
||||||
|
change="5.0",
|
||||||
|
change_ptc="5%",
|
||||||
|
)
|
||||||
|
point = TimePointUnion(root=InvestingTimePoint(root=inner))
|
||||||
|
|
||||||
|
result = _build_value_row("investing", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["value_profit"] == 1.5
|
||||||
|
assert result["value_base"] == 100.0
|
||||||
|
assert result["value_max"] == 105.0
|
||||||
|
assert result["value_min"] == 95.0
|
||||||
|
assert result["value_chng"] == 5.0
|
||||||
|
assert result["value_chng_prc"] == 5.0
|
||||||
|
|
||||||
|
def test_handles_investing_candlestick(self):
|
||||||
|
"""Тест обработки InvestingCandlestick."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = InvestingCandlestick(
|
||||||
|
open_="100.0", high="105.0", low="95.0", close="102.0", interest=None, value="1000"
|
||||||
|
)
|
||||||
|
point = TimePointUnion(root=InvestingTimePoint(root=inner))
|
||||||
|
|
||||||
|
result = _build_value_row("investing", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["price_o"] == 100.0
|
||||||
|
assert result["price_h"] == 105.0
|
||||||
|
assert result["price_l"] == 95.0
|
||||||
|
assert result["price_c"] == 102.0
|
||||||
|
assert result["volume"] == 1000.0
|
||||||
|
|
||||||
|
def test_handles_trading_view_timepoint(self):
|
||||||
|
"""Тест обработки TradingViewTimePoint."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = TradingViewTimePoint(
|
||||||
|
open_="100", high="105", low="95", close="102", volume="5000"
|
||||||
|
)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingview", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["price_o"] == 100.0
|
||||||
|
assert result["price_h"] == 105.0
|
||||||
|
assert result["price_l"] == 95.0
|
||||||
|
assert result["price_c"] == 102.0
|
||||||
|
assert result["volume"] == 5000.0
|
||||||
|
|
||||||
|
def test_handles_sgx_timepoint(self):
|
||||||
|
"""Тест обработки SgxTimePoint."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = SgxTimePoint(
|
||||||
|
open_="100", high="105", low="95", close="102", interest="3000", value="2000"
|
||||||
|
)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("sgx", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["price_o"] == 100.0
|
||||||
|
assert result["price_h"] == 105.0
|
||||||
|
assert result["price_l"] == 95.0
|
||||||
|
assert result["price_c"] == 102.0
|
||||||
|
assert result["volume"] == 3000.0
|
||||||
|
|
||||||
|
def test_handles_bloomberg_timepoint(self):
|
||||||
|
"""Тест обработки BloombergTimePoint."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = BloombergTimePoint(value="123.45")
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("bloomberg", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["value_base"] == 123.45
|
||||||
|
|
||||||
|
def test_handles_cbr_timepoint(self):
|
||||||
|
"""Тест обработки CbrTimePoint."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
inner = CbrTimePoint(value="80,32")
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("cbr", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["value_base"] == 80.32
|
||||||
|
|
||||||
|
def test_handles_trading_economics_numeric(self):
|
||||||
|
"""Тест обработки TradingEconomicsNumeric."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
deep_inner = TradingEconomicsNumeric(
|
||||||
|
price="100",
|
||||||
|
day="1.5",
|
||||||
|
percent="2.0",
|
||||||
|
weekly="3.0",
|
||||||
|
monthly="4.0",
|
||||||
|
ytd="5.0",
|
||||||
|
yoy="6.0",
|
||||||
|
)
|
||||||
|
inner = TradingEconomicsTimePoint(root=deep_inner)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingeconomics", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["price_i"] == 100.0
|
||||||
|
assert result["value_day"] == 1.5
|
||||||
|
assert result["value_prc"] == 2.0
|
||||||
|
assert result["value_weekly_prc"] == 3.0
|
||||||
|
assert result["value_monthly_prc"] == 4.0
|
||||||
|
assert result["value_ytd_prc"] == 5.0
|
||||||
|
assert result["value_yoy_prc"] == 6.0
|
||||||
|
|
||||||
|
def test_handles_trading_economics_last_prev(self):
|
||||||
|
"""Тест обработки TradingEconomicsLastPrev."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
deep_inner = TradingEconomicsLastPrev(last="100", previous="95", unit="%")
|
||||||
|
inner = TradingEconomicsTimePoint(root=deep_inner)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingeconomics", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["value_last"] == 100.0
|
||||||
|
assert result["value_previous"] == 95.0
|
||||||
|
assert result["unit"] == "%"
|
||||||
|
|
||||||
|
def test_handles_trading_economics_string_percent(self):
|
||||||
|
"""Тест обработки TradingEconomicsStringPercent."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
deep_inner = TradingEconomicsStringPercent(root="5.5%")
|
||||||
|
inner = TradingEconomicsTimePoint(root=deep_inner)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingeconomics", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["value_prc"] == 5.5
|
||||||
|
|
||||||
|
def test_handles_trading_economics_string_time(self):
|
||||||
|
"""Тест обработки TradingEconomicsStringTime."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
deep_inner = TradingEconomicsStringTime(root="12:00 PM")
|
||||||
|
inner = TradingEconomicsTimePoint(root=deep_inner)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingeconomics", dt, point)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_handles_trading_economics_empty_string(self):
|
||||||
|
"""Тест обработки TradingEconomicsEmptyString."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
deep_inner = TradingEconomicsEmptyString(root="")
|
||||||
|
inner = TradingEconomicsTimePoint(root=deep_inner)
|
||||||
|
point = TimePointUnion(root=inner)
|
||||||
|
|
||||||
|
result = _build_value_row("tradingeconomics", dt, point)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result["dt"] == dt
|
||||||
|
assert result["is_empty_str_flg"] is True
|
||||||
|
|
||||||
|
def test_returns_none_for_unknown_type(self):
|
||||||
|
"""Тест что неизвестный тип возвращает None."""
|
||||||
|
dt = datetime(2025, 1, 15, 12, 0, 0)
|
||||||
|
|
||||||
|
result = _build_value_row("unknown", dt, "string_value")
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestProcessSource:
|
||||||
|
"""Тесты для функции _process_source."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
async def test_processes_source_successfully(self, mock_ctx):
|
||||||
|
"""Тест успешной обработки источника."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
|
||||||
|
mock_section = MagicMock()
|
||||||
|
mock_section.section_id = 1
|
||||||
|
mock_repo.get_section_by_name = AsyncMock(return_value=mock_section)
|
||||||
|
|
||||||
|
mock_quote = MagicMock()
|
||||||
|
mock_quote.quote_id = 1
|
||||||
|
mock_repo.upsert_quote = AsyncMock(return_value=mock_quote)
|
||||||
|
|
||||||
|
mock_repo.bulk_upsert_quote_values = AsyncMock()
|
||||||
|
|
||||||
|
source_data = {
|
||||||
|
"instrument1": {
|
||||||
|
"1609459200": TimePointUnion(root=CbrTimePoint(value="80.5")),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
await _process_source(mock_repo, "cbr", source_data)
|
||||||
|
|
||||||
|
mock_repo.get_section_by_name.assert_called_once_with("cbr")
|
||||||
|
mock_repo.upsert_quote.assert_called_once()
|
||||||
|
mock_repo.bulk_upsert_quote_values.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
async def test_skips_source_when_section_not_found(self, mock_ctx):
|
||||||
|
"""Тест пропуска источника когда секция не найдена."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
mock_repo.get_section_by_name = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
source_data = {"instrument1": {}}
|
||||||
|
|
||||||
|
await _process_source(mock_repo, "unknown", source_data)
|
||||||
|
|
||||||
|
mock_repo.get_section_by_name.assert_called_once_with("unknown")
|
||||||
|
mock_repo.upsert_quote.assert_not_called()
|
||||||
|
mock_logger.warning.assert_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
async def test_skips_instruments_with_no_valid_rows(self, mock_ctx):
|
||||||
|
"""Тест пропуска инструментов без валидных строк."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
|
||||||
|
mock_section = MagicMock()
|
||||||
|
mock_repo.get_section_by_name = AsyncMock(return_value=mock_section)
|
||||||
|
|
||||||
|
mock_quote = MagicMock()
|
||||||
|
mock_repo.upsert_quote = AsyncMock(return_value=mock_quote)
|
||||||
|
|
||||||
|
mock_repo.bulk_upsert_quote_values = AsyncMock()
|
||||||
|
|
||||||
|
source_data = {
|
||||||
|
"instrument1": {
|
||||||
|
"invalid_ts": "invalid_data",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
await _process_source(mock_repo, "cbr", source_data)
|
||||||
|
|
||||||
|
mock_repo.upsert_quote.assert_called_once()
|
||||||
|
mock_repo.bulk_upsert_quote_values.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestLoadTeneraPipeline:
|
||||||
|
"""Тесты для пайплайна load_tenera."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
async def test_full_pipeline_success(self, mock_ctx):
|
||||||
|
"""Тест успешного выполнения полного пайплайна."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
mock_tenera = AsyncMock()
|
||||||
|
mock_tenera.__aenter__ = AsyncMock(return_value=mock_tenera)
|
||||||
|
mock_tenera.__aexit__ = AsyncMock()
|
||||||
|
|
||||||
|
mock_data = MagicMock(spec=MainData)
|
||||||
|
mock_data.cbr = {"USD": {"1609459200": TimePointUnion(root=CbrTimePoint(value="75.0"))}}
|
||||||
|
mock_data.investing = {}
|
||||||
|
mock_data.sgx = {}
|
||||||
|
mock_data.tradingeconomics = {}
|
||||||
|
mock_data.bloomberg = {}
|
||||||
|
mock_data.trading_view = {}
|
||||||
|
|
||||||
|
mock_tenera.get_quotes_data = AsyncMock(return_value=mock_data)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sessionmaker = MagicMock()
|
||||||
|
mock_sessionmaker.return_value.__aenter__ = AsyncMock(return_value=mock_session)
|
||||||
|
mock_sessionmaker.return_value.__aexit__ = AsyncMock()
|
||||||
|
|
||||||
|
mock_ctx.sessionmaker = mock_sessionmaker
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
mock_section = MagicMock()
|
||||||
|
mock_section.section_id = 1
|
||||||
|
mock_repo.get_section_by_name = AsyncMock(return_value=mock_section)
|
||||||
|
|
||||||
|
mock_quote = MagicMock()
|
||||||
|
mock_quote.quote_id = 1
|
||||||
|
mock_repo.upsert_quote = AsyncMock(return_value=mock_quote)
|
||||||
|
|
||||||
|
mock_repo.bulk_upsert_quote_values = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_tenera.get_async_tenera_interface",
|
||||||
|
return_value=mock_tenera,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_tenera.QuotesRepository",
|
||||||
|
return_value=mock_repo,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
steps = []
|
||||||
|
async for _ in load_tenera({}):
|
||||||
|
steps.append("step")
|
||||||
|
|
||||||
|
assert len(steps) >= 1
|
||||||
|
|
||||||
|
mock_tenera.get_quotes_data.assert_called_once()
|
||||||
|
mock_repo.get_section_by_name.assert_called()
|
||||||
|
mock_session.commit.assert_called()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch("dataloader.workers.pipelines.load_tenera.APP_CTX")
|
||||||
|
async def test_pipeline_processes_multiple_sources(self, mock_ctx):
|
||||||
|
"""Тест обработки нескольких источников."""
|
||||||
|
mock_ctx.pytz_timezone = pytz.timezone("Europe/Moscow")
|
||||||
|
mock_logger = MagicMock()
|
||||||
|
mock_ctx.logger = mock_logger
|
||||||
|
|
||||||
|
mock_tenera = AsyncMock()
|
||||||
|
mock_tenera.__aenter__ = AsyncMock(return_value=mock_tenera)
|
||||||
|
mock_tenera.__aexit__ = AsyncMock()
|
||||||
|
|
||||||
|
mock_data = MagicMock(spec=MainData)
|
||||||
|
mock_data.cbr = {"USD": {}}
|
||||||
|
mock_data.investing = {"SPX": {}}
|
||||||
|
mock_data.sgx = {}
|
||||||
|
mock_data.tradingeconomics = {}
|
||||||
|
mock_data.bloomberg = {}
|
||||||
|
mock_data.trading_view = {}
|
||||||
|
|
||||||
|
mock_tenera.get_quotes_data = AsyncMock(return_value=mock_data)
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sessionmaker = MagicMock()
|
||||||
|
mock_sessionmaker.return_value.__aenter__ = AsyncMock(return_value=mock_session)
|
||||||
|
mock_sessionmaker.return_value.__aexit__ = AsyncMock()
|
||||||
|
|
||||||
|
mock_ctx.sessionmaker = mock_sessionmaker
|
||||||
|
|
||||||
|
mock_repo = AsyncMock()
|
||||||
|
mock_section = MagicMock()
|
||||||
|
mock_repo.get_section_by_name = AsyncMock(return_value=mock_section)
|
||||||
|
mock_quote = MagicMock()
|
||||||
|
mock_repo.upsert_quote = AsyncMock(return_value=mock_quote)
|
||||||
|
mock_repo.bulk_upsert_quote_values = AsyncMock()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_tenera.get_async_tenera_interface",
|
||||||
|
return_value=mock_tenera,
|
||||||
|
),
|
||||||
|
patch(
|
||||||
|
"dataloader.workers.pipelines.load_tenera.QuotesRepository",
|
||||||
|
return_value=mock_repo,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
async for _ in load_tenera({}):
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert mock_repo.get_section_by_name.call_count >= 2
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.workers.pipelines.registry import _Registry, register, resolve, tasks
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestPipelineRegistry:
|
||||||
|
"""
|
||||||
|
Unit тесты для системы регистрации пайплайнов.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setup_method(self):
|
||||||
|
"""
|
||||||
|
Очищаем реестр перед каждым тестом.
|
||||||
|
"""
|
||||||
|
_Registry.clear()
|
||||||
|
|
||||||
|
def test_register_adds_pipeline_to_registry(self):
|
||||||
|
"""
|
||||||
|
Тест регистрации пайплайна.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@register("test.task")
|
||||||
|
def test_pipeline(args: dict):
|
||||||
|
return "result"
|
||||||
|
|
||||||
|
assert "test.task" in _Registry
|
||||||
|
assert _Registry["test.task"] == test_pipeline
|
||||||
|
|
||||||
|
def test_resolve_returns_registered_pipeline(self):
|
||||||
|
"""
|
||||||
|
Тест получения зарегистрированного пайплайна.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@register("test.resolve")
|
||||||
|
def test_pipeline(args: dict):
|
||||||
|
return "resolved"
|
||||||
|
|
||||||
|
resolved = resolve("test.resolve")
|
||||||
|
assert resolved == test_pipeline
|
||||||
|
assert resolved({}) == "resolved"
|
||||||
|
|
||||||
|
def test_resolve_raises_keyerror_for_unknown_task(self):
|
||||||
|
"""
|
||||||
|
Тест ошибки при запросе незарегистрированного пайплайна.
|
||||||
|
"""
|
||||||
|
with pytest.raises(KeyError) as exc_info:
|
||||||
|
resolve("unknown.task")
|
||||||
|
|
||||||
|
assert "pipeline not found: unknown.task" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_tasks_returns_registered_task_names(self):
|
||||||
|
"""
|
||||||
|
Тест получения списка зарегистрированных задач.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@register("task1")
|
||||||
|
def pipeline1(args: dict):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@register("task2")
|
||||||
|
def pipeline2(args: dict):
|
||||||
|
pass
|
||||||
|
|
||||||
|
task_list = list(tasks())
|
||||||
|
assert "task1" in task_list
|
||||||
|
assert "task2" in task_list
|
||||||
|
|
||||||
|
def test_register_overwrites_existing_pipeline(self):
|
||||||
|
"""
|
||||||
|
Тест перезаписи существующего пайплайна.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@register("overwrite.task")
|
||||||
|
def first_pipeline(args: dict):
|
||||||
|
return "first"
|
||||||
|
|
||||||
|
@register("overwrite.task")
|
||||||
|
def second_pipeline(args: dict):
|
||||||
|
return "second"
|
||||||
|
|
||||||
|
resolved = resolve("overwrite.task")
|
||||||
|
assert resolved({}) == "second"
|
||||||
|
|
@ -0,0 +1,551 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.workers.base import PGWorker, WorkerConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestPGWorker:
|
||||||
|
"""
|
||||||
|
Unit тесты для PGWorker.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_init_creates_worker_with_config(self):
|
||||||
|
"""
|
||||||
|
Тест создания воркера с конфигурацией.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test_queue", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with patch("dataloader.workers.base.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
assert worker._cfg == cfg
|
||||||
|
assert worker._stop == stop_event
|
||||||
|
assert worker._listener is None
|
||||||
|
assert not worker._notify_wakeup.is_set()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_starts_listener_and_processes_jobs(self):
|
||||||
|
"""
|
||||||
|
Тест запуска воркера с listener'ом.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=1)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.base.PGNotifyListener") as mock_listener_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
mock_cfg.pg.url = "postgresql+asyncpg://test"
|
||||||
|
|
||||||
|
mock_listener = Mock()
|
||||||
|
mock_listener.start = AsyncMock()
|
||||||
|
mock_listener.stop = AsyncMock()
|
||||||
|
mock_listener_cls.return_value = mock_listener
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
call_count = [0]
|
||||||
|
|
||||||
|
async def mock_claim():
|
||||||
|
call_count[0] += 1
|
||||||
|
if call_count[0] >= 2:
|
||||||
|
stop_event.set()
|
||||||
|
return False
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
worker, "_claim_and_execute_once", side_effect=mock_claim
|
||||||
|
):
|
||||||
|
await worker.run()
|
||||||
|
|
||||||
|
assert mock_listener.start.call_count == 1
|
||||||
|
assert mock_listener.stop.call_count == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_falls_back_to_polling_if_listener_fails(self):
|
||||||
|
"""
|
||||||
|
Тест fallback на polling, если LISTEN/NOTIFY не запустился.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=1)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.base.PGNotifyListener") as mock_listener_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_logger = Mock()
|
||||||
|
mock_ctx.get_logger.return_value = mock_logger
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
mock_cfg.pg.url = "postgresql+asyncpg://test"
|
||||||
|
|
||||||
|
mock_listener = Mock()
|
||||||
|
mock_listener.start = AsyncMock(side_effect=Exception("Connection failed"))
|
||||||
|
mock_listener_cls.return_value = mock_listener
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
call_count = [0]
|
||||||
|
|
||||||
|
async def mock_claim():
|
||||||
|
call_count[0] += 1
|
||||||
|
if call_count[0] >= 2:
|
||||||
|
stop_event.set()
|
||||||
|
return False
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
worker, "_claim_and_execute_once", side_effect=mock_claim
|
||||||
|
):
|
||||||
|
await worker.run()
|
||||||
|
|
||||||
|
assert worker._listener is None
|
||||||
|
assert mock_logger.warning.call_count == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_listen_or_sleep_with_listener_waits_for_notify(self):
|
||||||
|
"""
|
||||||
|
Тест ожидания через LISTEN/NOTIFY.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with patch("dataloader.workers.base.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
worker._listener = Mock()
|
||||||
|
|
||||||
|
worker._notify_wakeup.set()
|
||||||
|
|
||||||
|
await worker._listen_or_sleep(1)
|
||||||
|
|
||||||
|
assert not worker._notify_wakeup.is_set()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_listen_or_sleep_without_listener_uses_timeout(self):
|
||||||
|
"""
|
||||||
|
Тест fallback на таймаут без listener'а.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=1)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with patch("dataloader.workers.base.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
await worker._listen_or_sleep(1)
|
||||||
|
elapsed = asyncio.get_event_loop().time() - start_time
|
||||||
|
|
||||||
|
assert elapsed >= 1.0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_claim_and_execute_once_returns_false_when_no_job(self):
|
||||||
|
"""
|
||||||
|
Тест, что claim_and_execute_once возвращает False, если задач нет.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_session.commit = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.claim_one = AsyncMock(return_value=None)
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
result = await worker._claim_and_execute_once()
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
assert mock_session.commit.call_count == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_claim_and_execute_once_executes_job_successfully(self):
|
||||||
|
"""
|
||||||
|
Тест успешного выполнения задачи.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.claim_one = AsyncMock(
|
||||||
|
return_value={
|
||||||
|
"job_id": "test-job-id",
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
"task": "test.task",
|
||||||
|
"args": {"key": "value"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_repo.finish_ok = AsyncMock()
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
async def mock_pipeline(task, args):
|
||||||
|
yield
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(worker, "_pipeline", side_effect=mock_pipeline),
|
||||||
|
patch.object(worker, "_execute_with_heartbeat", return_value=False),
|
||||||
|
):
|
||||||
|
result = await worker._claim_and_execute_once()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
assert mock_repo.finish_ok.call_count == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_claim_and_execute_once_handles_cancellation(self):
|
||||||
|
"""
|
||||||
|
Тест обработки отмены задачи пользователем.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.claim_one = AsyncMock(
|
||||||
|
return_value={
|
||||||
|
"job_id": "test-job-id",
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
"task": "test.task",
|
||||||
|
"args": {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_repo.finish_fail_or_retry = AsyncMock()
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
with patch.object(worker, "_execute_with_heartbeat", return_value=True):
|
||||||
|
result = await worker._claim_and_execute_once()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
mock_repo.finish_fail_or_retry.assert_called_once()
|
||||||
|
args = mock_repo.finish_fail_or_retry.call_args
|
||||||
|
assert "canceled by user" in args[0]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_claim_and_execute_once_handles_exceptions(self):
|
||||||
|
"""
|
||||||
|
Тест обработки исключений при выполнении задачи.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.claim_one = AsyncMock(
|
||||||
|
return_value={
|
||||||
|
"job_id": "test-job-id",
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
"task": "test.task",
|
||||||
|
"args": {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_repo.finish_fail_or_retry = AsyncMock()
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
worker, "_execute_with_heartbeat", side_effect=ValueError("Test error")
|
||||||
|
):
|
||||||
|
result = await worker._claim_and_execute_once()
|
||||||
|
|
||||||
|
assert result is True
|
||||||
|
mock_repo.finish_fail_or_retry.assert_called_once()
|
||||||
|
args = mock_repo.finish_fail_or_retry.call_args
|
||||||
|
assert "Test error" in args[0]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_execute_with_heartbeat_sends_heartbeats(self):
|
||||||
|
"""
|
||||||
|
Тест отправки heartbeat'ов во время выполнения задачи.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=1, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.heartbeat = AsyncMock(return_value=(True, False))
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
async def slow_pipeline():
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
yield
|
||||||
|
await asyncio.sleep(0.6)
|
||||||
|
yield
|
||||||
|
|
||||||
|
canceled = await worker._execute_with_heartbeat(
|
||||||
|
"job-id", 60, slow_pipeline()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert canceled is False
|
||||||
|
assert mock_repo.heartbeat.call_count >= 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_execute_with_heartbeat_detects_cancellation(self):
|
||||||
|
"""
|
||||||
|
Тест обнаружения отмены через heartbeat.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=1, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock()
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.heartbeat = AsyncMock(return_value=(True, True))
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
async def slow_pipeline():
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
yield
|
||||||
|
await asyncio.sleep(0.6)
|
||||||
|
yield
|
||||||
|
|
||||||
|
canceled = await worker._execute_with_heartbeat(
|
||||||
|
"job-id", 60, slow_pipeline()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert canceled is True
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pipeline_handles_sync_function(self):
|
||||||
|
"""
|
||||||
|
Тест выполнения синхронной функции-пайплайна.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.resolve_pipeline") as mock_resolve,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
def sync_pipeline(args):
|
||||||
|
return "result"
|
||||||
|
|
||||||
|
mock_resolve.return_value = sync_pipeline
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
async for _ in worker._pipeline("test.task", {}):
|
||||||
|
results.append(_)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pipeline_handles_async_function(self):
|
||||||
|
"""
|
||||||
|
Тест выполнения асинхронной функции-пайплайна.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.resolve_pipeline") as mock_resolve,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
async def async_pipeline(args):
|
||||||
|
return "result"
|
||||||
|
|
||||||
|
mock_resolve.return_value = async_pipeline
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
async for _ in worker._pipeline("test.task", {}):
|
||||||
|
results.append(_)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pipeline_handles_async_generator(self):
|
||||||
|
"""
|
||||||
|
Тест выполнения асинхронного генератора-пайплайна.
|
||||||
|
"""
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.resolve_pipeline") as mock_resolve,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
|
||||||
|
async def async_gen_pipeline(args):
|
||||||
|
yield
|
||||||
|
yield
|
||||||
|
yield
|
||||||
|
|
||||||
|
mock_resolve.return_value = async_gen_pipeline
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
async for _ in worker._pipeline("test.task", {}):
|
||||||
|
results.append(_)
|
||||||
|
|
||||||
|
assert len(results) == 3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_claim_and_execute_once_handles_shutdown_cancelled_error(self):
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=10, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_sm.return_value.__aexit__.return_value = AsyncMock(return_value=False)
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_repo = Mock()
|
||||||
|
mock_repo.claim_one = AsyncMock(
|
||||||
|
return_value={
|
||||||
|
"job_id": "test-job-id",
|
||||||
|
"lease_ttl_sec": 60,
|
||||||
|
"task": "test.task",
|
||||||
|
"args": {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_repo.finish_fail_or_retry = AsyncMock()
|
||||||
|
mock_repo_cls.return_value = mock_repo
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
async def raise_cancel(*_args, **_kwargs):
|
||||||
|
raise asyncio.CancelledError()
|
||||||
|
|
||||||
|
with patch.object(worker, "_execute_with_heartbeat", new=raise_cancel):
|
||||||
|
await worker._claim_and_execute_once()
|
||||||
|
|
||||||
|
mock_repo.finish_fail_or_retry.assert_called_once()
|
||||||
|
args, kwargs = mock_repo.finish_fail_or_retry.call_args
|
||||||
|
assert args[0] == "test-job-id"
|
||||||
|
assert "cancelled by shutdown" in args[1]
|
||||||
|
assert kwargs.get("is_canceled") is True
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_execute_with_heartbeat_raises_cancelled_when_stop_set(self):
|
||||||
|
cfg = WorkerConfig(queue="test", heartbeat_sec=1000, claim_backoff_sec=5)
|
||||||
|
stop_event = asyncio.Event()
|
||||||
|
stop_event.set()
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.base.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.base.QueueRepository") as mock_repo_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_ctx.sessionmaker = Mock()
|
||||||
|
mock_repo_cls.return_value = Mock()
|
||||||
|
|
||||||
|
worker = PGWorker(cfg, stop_event)
|
||||||
|
|
||||||
|
async def one_yield():
|
||||||
|
yield
|
||||||
|
|
||||||
|
with pytest.raises(asyncio.CancelledError):
|
||||||
|
await worker._execute_with_heartbeat("job-id", 60, one_yield())
|
||||||
|
|
@ -0,0 +1,294 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.workers.manager import WorkerManager, WorkerSpec, build_manager_from_env
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestWorkerManager:
|
||||||
|
"""
|
||||||
|
Unit тесты для WorkerManager.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_init_creates_manager_with_specs(self):
|
||||||
|
"""
|
||||||
|
Тест создания менеджера со спецификациями воркеров.
|
||||||
|
"""
|
||||||
|
specs = [WorkerSpec(queue="test_queue", concurrency=2)]
|
||||||
|
|
||||||
|
with patch("dataloader.workers.manager.APP_CTX") as mock_ctx:
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
|
||||||
|
assert manager._specs == specs
|
||||||
|
assert manager._stop.is_set() is False
|
||||||
|
assert manager._tasks == []
|
||||||
|
assert manager._reaper_task is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_creates_worker_tasks(self):
|
||||||
|
"""
|
||||||
|
Тест старта воркеров и создания задач.
|
||||||
|
"""
|
||||||
|
specs = [
|
||||||
|
WorkerSpec(queue="queue1", concurrency=2),
|
||||||
|
WorkerSpec(queue="queue2", concurrency=1),
|
||||||
|
]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.manager.PGWorker") as mock_worker_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.heartbeat_sec = 10
|
||||||
|
mock_cfg.worker.claim_backoff_sec = 5
|
||||||
|
|
||||||
|
mock_worker_instance = Mock()
|
||||||
|
mock_worker_instance.run = AsyncMock()
|
||||||
|
mock_worker_cls.return_value = mock_worker_instance
|
||||||
|
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
await manager.start()
|
||||||
|
|
||||||
|
assert len(manager._tasks) == 3
|
||||||
|
assert manager._reaper_task is not None
|
||||||
|
assert mock_worker_cls.call_count == 3
|
||||||
|
|
||||||
|
await manager.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_start_with_zero_concurrency_creates_one_worker(self):
|
||||||
|
"""
|
||||||
|
Тест, что concurrency=0 создаёт минимум 1 воркер.
|
||||||
|
"""
|
||||||
|
specs = [WorkerSpec(queue="test", concurrency=0)]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.manager.PGWorker") as mock_worker_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.heartbeat_sec = 10
|
||||||
|
mock_cfg.worker.claim_backoff_sec = 5
|
||||||
|
|
||||||
|
mock_worker_instance = Mock()
|
||||||
|
mock_worker_instance.run = AsyncMock()
|
||||||
|
mock_worker_cls.return_value = mock_worker_instance
|
||||||
|
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
await manager.start()
|
||||||
|
|
||||||
|
assert len(manager._tasks) == 1
|
||||||
|
|
||||||
|
await manager.stop()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stop_cancels_all_tasks(self):
|
||||||
|
"""
|
||||||
|
Тест остановки всех задач воркеров.
|
||||||
|
"""
|
||||||
|
specs = [WorkerSpec(queue="test", concurrency=2)]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.manager.PGWorker") as mock_worker_cls,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.heartbeat_sec = 10
|
||||||
|
mock_cfg.worker.claim_backoff_sec = 5
|
||||||
|
|
||||||
|
mock_worker_instance = Mock()
|
||||||
|
mock_worker_instance.run = AsyncMock()
|
||||||
|
mock_worker_cls.return_value = mock_worker_instance
|
||||||
|
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
await manager.start()
|
||||||
|
|
||||||
|
await manager.stop()
|
||||||
|
|
||||||
|
assert manager._stop.is_set()
|
||||||
|
assert len(manager._tasks) == 0
|
||||||
|
assert manager._reaper_task is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reaper_loop_calls_requeue_lost(self):
|
||||||
|
"""
|
||||||
|
Тест, что реапер вызывает requeue_lost.
|
||||||
|
"""
|
||||||
|
specs = [WorkerSpec(queue="test", concurrency=1)]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.manager.PGWorker") as mock_worker_cls,
|
||||||
|
patch("dataloader.workers.manager.requeue_lost") as mock_requeue,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_logger = Mock()
|
||||||
|
mock_ctx.get_logger.return_value = mock_logger
|
||||||
|
mock_cfg.worker.heartbeat_sec = 10
|
||||||
|
mock_cfg.worker.claim_backoff_sec = 5
|
||||||
|
mock_cfg.worker.reaper_period_sec = 1
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_worker_instance = Mock()
|
||||||
|
mock_worker_instance.run = AsyncMock()
|
||||||
|
mock_worker_cls.return_value = mock_worker_instance
|
||||||
|
|
||||||
|
mock_requeue.return_value = ["job1", "job2"]
|
||||||
|
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
await manager.start()
|
||||||
|
|
||||||
|
await asyncio.sleep(1.5)
|
||||||
|
|
||||||
|
await manager.stop()
|
||||||
|
|
||||||
|
assert mock_requeue.call_count >= 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reaper_loop_handles_exceptions(self):
|
||||||
|
"""
|
||||||
|
Тест, что реапер обрабатывает исключения и продолжает работу.
|
||||||
|
"""
|
||||||
|
specs = [WorkerSpec(queue="test", concurrency=1)]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
patch("dataloader.workers.manager.PGWorker") as mock_worker_cls,
|
||||||
|
patch("dataloader.workers.manager.requeue_lost") as mock_requeue,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_logger = Mock()
|
||||||
|
mock_ctx.get_logger.return_value = mock_logger
|
||||||
|
mock_cfg.worker.heartbeat_sec = 10
|
||||||
|
mock_cfg.worker.claim_backoff_sec = 5
|
||||||
|
mock_cfg.worker.reaper_period_sec = 0.5
|
||||||
|
|
||||||
|
mock_session = AsyncMock()
|
||||||
|
mock_sm = MagicMock()
|
||||||
|
mock_sm.return_value.__aenter__.return_value = mock_session
|
||||||
|
mock_ctx.sessionmaker = mock_sm
|
||||||
|
|
||||||
|
mock_worker_instance = Mock()
|
||||||
|
mock_worker_instance.run = AsyncMock()
|
||||||
|
mock_worker_cls.return_value = mock_worker_instance
|
||||||
|
|
||||||
|
mock_requeue.side_effect = [Exception("DB error"), []]
|
||||||
|
|
||||||
|
manager = WorkerManager(specs)
|
||||||
|
await manager.start()
|
||||||
|
|
||||||
|
await asyncio.sleep(1.2)
|
||||||
|
|
||||||
|
await manager.stop()
|
||||||
|
|
||||||
|
assert mock_logger.exception.call_count >= 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
class TestBuildManagerFromEnv:
|
||||||
|
"""
|
||||||
|
Unit тесты для build_manager_from_env.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_builds_manager_from_config(self):
|
||||||
|
"""
|
||||||
|
Тест создания менеджера из конфигурации.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.parsed_workers.return_value = [
|
||||||
|
{"queue": "queue1", "concurrency": 2},
|
||||||
|
{"queue": "queue2", "concurrency": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
manager = build_manager_from_env()
|
||||||
|
|
||||||
|
assert len(manager._specs) == 2
|
||||||
|
assert manager._specs[0].queue == "queue1"
|
||||||
|
assert manager._specs[0].concurrency == 2
|
||||||
|
assert manager._specs[1].queue == "queue2"
|
||||||
|
assert manager._specs[1].concurrency == 3
|
||||||
|
|
||||||
|
def test_skips_empty_queue_names(self):
|
||||||
|
"""
|
||||||
|
Тест, что пустые имена очередей пропускаются.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.parsed_workers.return_value = [
|
||||||
|
{"queue": "", "concurrency": 2},
|
||||||
|
{"queue": "valid_queue", "concurrency": 1},
|
||||||
|
{"queue": " ", "concurrency": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
manager = build_manager_from_env()
|
||||||
|
|
||||||
|
assert len(manager._specs) == 1
|
||||||
|
assert manager._specs[0].queue == "valid_queue"
|
||||||
|
|
||||||
|
def test_handles_missing_fields_with_defaults(self):
|
||||||
|
"""
|
||||||
|
Тест обработки отсутствующих полей с дефолтными значениями.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.parsed_workers.return_value = [
|
||||||
|
{"queue": "test"},
|
||||||
|
{"queue": "test2", "concurrency": 0},
|
||||||
|
]
|
||||||
|
|
||||||
|
manager = build_manager_from_env()
|
||||||
|
|
||||||
|
assert len(manager._specs) == 2
|
||||||
|
assert manager._specs[0].concurrency == 1
|
||||||
|
assert manager._specs[1].concurrency == 1
|
||||||
|
|
||||||
|
def test_ensures_minimum_concurrency_of_one(self):
|
||||||
|
"""
|
||||||
|
Тест, что concurrency всегда минимум 1.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
patch("dataloader.workers.manager.APP_CTX") as mock_ctx,
|
||||||
|
patch("dataloader.workers.manager.APP_CONFIG") as mock_cfg,
|
||||||
|
):
|
||||||
|
|
||||||
|
mock_ctx.get_logger.return_value = Mock()
|
||||||
|
mock_cfg.worker.parsed_workers.return_value = [
|
||||||
|
{"queue": "test1", "concurrency": 0},
|
||||||
|
{"queue": "test2", "concurrency": -5},
|
||||||
|
]
|
||||||
|
|
||||||
|
manager = build_manager_from_env()
|
||||||
|
|
||||||
|
assert len(manager._specs) == 2
|
||||||
|
assert manager._specs[0].concurrency == 1
|
||||||
|
assert manager._specs[1].concurrency == 1
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dataloader.workers.reaper import requeue_lost
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_requeue_lost_calls_repository_and_returns_ids():
|
||||||
|
"""
|
||||||
|
Проверяет, что requeue_lost вызывает QueueRepository.requeue_lost и возвращает результат.
|
||||||
|
"""
|
||||||
|
fake_session = Mock()
|
||||||
|
|
||||||
|
with patch("dataloader.workers.reaper.QueueRepository") as repo_cls:
|
||||||
|
repo = Mock()
|
||||||
|
repo.requeue_lost = AsyncMock(return_value=["id1", "id2"])
|
||||||
|
repo_cls.return_value = repo
|
||||||
|
|
||||||
|
res = await requeue_lost(fake_session)
|
||||||
|
|
||||||
|
assert res == ["id1", "id2"]
|
||||||
|
repo_cls.assert_called_once_with(fake_session)
|
||||||
|
repo.requeue_lost.assert_awaited_once()
|
||||||
Loading…
Reference in New Issue