fixes
This commit is contained in:
parent
2e7aace21f
commit
d9cc0c85d1
|
|
@ -118,6 +118,7 @@ class PGSettings(BaseSettings):
|
||||||
schema_queue: str = Field(validation_alias="PG_SCHEMA_QUEUE", default="public")
|
schema_queue: str = Field(validation_alias="PG_SCHEMA_QUEUE", default="public")
|
||||||
schema_quotes: str = Field(validation_alias="PG_SCHEMA_QUOTES", default="public")
|
schema_quotes: str = Field(validation_alias="PG_SCHEMA_QUOTES", default="public")
|
||||||
schema_opu: str = Field(validation_alias="PG_SCHEMA_OPU", default="public")
|
schema_opu: str = Field(validation_alias="PG_SCHEMA_OPU", default="public")
|
||||||
|
batch_size: int = Field(validation_alias="PG_BATCH_SIZE", default=1000)
|
||||||
use_pool: bool = Field(validation_alias="PG_USE_POOL", default=True)
|
use_pool: bool = Field(validation_alias="PG_USE_POOL", default=True)
|
||||||
pool_size: int = Field(validation_alias="PG_POOL_SIZE", default=5)
|
pool_size: int = Field(validation_alias="PG_POOL_SIZE", default=5)
|
||||||
max_overflow: int = Field(validation_alias="PG_MAX_OVERFLOW", default=10)
|
max_overflow: int = Field(validation_alias="PG_MAX_OVERFLOW", default=10)
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,11 @@ from __future__ import annotations
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import DDL, text
|
||||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from dataloader.config import APP_CONFIG
|
||||||
from dataloader.storage.models import BriefDigitalCertificateOpu
|
from dataloader.storage.models import BriefDigitalCertificateOpu
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -23,25 +24,49 @@ class OpuRepository:
|
||||||
session: Асинхронная сессия SQLAlchemy
|
session: Асинхронная сессия SQLAlchemy
|
||||||
"""
|
"""
|
||||||
self.s = session
|
self.s = session
|
||||||
|
self.schema = APP_CONFIG.pg.schema_opu
|
||||||
|
self.batch_size = APP_CONFIG.pg.batch_size
|
||||||
|
|
||||||
async def truncate(self) -> None:
|
async def truncate(
|
||||||
|
self, *, cascade: bool = False, restart_identity: bool = True
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Очищает таблицу brief_digital_certificate_opu.
|
Быстро очищает таблицу, уважая имя схемы и безопасное квотирование для PostgreSQL.
|
||||||
|
|
||||||
Использует TRUNCATE для быстрой очистки.
|
Args:
|
||||||
|
cascade: добавляет CASCADE
|
||||||
|
restart_identity: добавляет RESTART IDENTITY
|
||||||
"""
|
"""
|
||||||
table_name = BriefDigitalCertificateOpu.__tablename__
|
table = BriefDigitalCertificateOpu.__table__
|
||||||
schema = BriefDigitalCertificateOpu.__table_args__[0]["schema"]
|
|
||||||
full_table_name = f"{schema}.{table_name}"
|
|
||||||
|
|
||||||
await self.s.execute(text(f"TRUNCATE TABLE {full_table_name}"))
|
def quote_ident(name: str) -> str:
|
||||||
|
"""Экранирует кавычки и оборачивает имя в двойные."""
|
||||||
|
return f'"{name.replace("\"", "\"\"")}"'
|
||||||
|
|
||||||
async def bulk_insert(self, records: Sequence[dict[str, Any]]) -> int:
|
schema_quoted = quote_ident(self.schema)
|
||||||
|
table_quoted = quote_ident(table.name)
|
||||||
|
full_table_name = f"{schema_quoted}.{table_quoted}"
|
||||||
|
|
||||||
|
opts = []
|
||||||
|
if restart_identity:
|
||||||
|
opts.append("RESTART IDENTITY")
|
||||||
|
if cascade:
|
||||||
|
opts.append("CASCADE")
|
||||||
|
|
||||||
|
suffix = f" {' '.join(opts)}" if opts else ""
|
||||||
|
|
||||||
|
await self.s.execute(text(f"TRUNCATE TABLE {full_table_name}{suffix}"))
|
||||||
|
await self.s.commit()
|
||||||
|
|
||||||
|
async def bulk_insert(
|
||||||
|
self, records: Sequence[dict[str, Any]], batch_size: int | None = None
|
||||||
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Массовая вставка записей в таблицу.
|
Массовая вставка записей в таблицу батчами.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
records: Список словарей с данными для вставки
|
records: Список словарей с данными для вставки
|
||||||
|
batch_size: Размер батча (default: из конфига PG_BATCH_SIZE)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Количество вставленных записей
|
Количество вставленных записей
|
||||||
|
|
@ -49,19 +74,32 @@ class OpuRepository:
|
||||||
if not records:
|
if not records:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.batch_size
|
||||||
|
|
||||||
|
total_inserted = 0
|
||||||
|
|
||||||
|
for i in range(0, len(records), batch_size):
|
||||||
|
batch = records[i : i + batch_size]
|
||||||
|
|
||||||
async with self.s.begin_nested():
|
async with self.s.begin_nested():
|
||||||
stmt = pg_insert(BriefDigitalCertificateOpu).values(records)
|
stmt = pg_insert(BriefDigitalCertificateOpu).values(batch)
|
||||||
await self.s.execute(stmt)
|
await self.s.execute(stmt)
|
||||||
await self.s.flush()
|
await self.s.flush()
|
||||||
|
|
||||||
return len(records)
|
total_inserted += len(batch)
|
||||||
|
|
||||||
async def bulk_upsert(self, records: Sequence[dict[str, Any]]) -> int:
|
return total_inserted
|
||||||
|
|
||||||
|
async def bulk_upsert(
|
||||||
|
self, records: Sequence[dict[str, Any]], batch_size: int | None = None
|
||||||
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Массовая вставка/обновление записей (UPSERT).
|
Массовая вставка/обновление записей (UPSERT) батчами.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
records: Список словарей с данными
|
records: Список словарей с данными
|
||||||
|
batch_size: Размер батча (default: из конфига PG_BATCH_SIZE)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Количество обработанных записей
|
Количество обработанных записей
|
||||||
|
|
@ -69,6 +107,9 @@ class OpuRepository:
|
||||||
if not records:
|
if not records:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.batch_size
|
||||||
|
|
||||||
update_columns = {
|
update_columns = {
|
||||||
c.name
|
c.name
|
||||||
for c in BriefDigitalCertificateOpu.__table__.columns
|
for c in BriefDigitalCertificateOpu.__table__.columns
|
||||||
|
|
@ -76,7 +117,12 @@ class OpuRepository:
|
||||||
and c.name not in {"wf_load_id", "wf_load_dttm", "wf_row_id"}
|
and c.name not in {"wf_load_id", "wf_load_dttm", "wf_row_id"}
|
||||||
}
|
}
|
||||||
|
|
||||||
insert_stmt = pg_insert(BriefDigitalCertificateOpu).values(records)
|
total_upserted = 0
|
||||||
|
|
||||||
|
for i in range(0, len(records), batch_size):
|
||||||
|
batch = records[i : i + batch_size]
|
||||||
|
|
||||||
|
insert_stmt = pg_insert(BriefDigitalCertificateOpu).values(batch)
|
||||||
update_cols = {col: insert_stmt.excluded[col] for col in update_columns}
|
update_cols = {col: insert_stmt.excluded[col] for col in update_columns}
|
||||||
|
|
||||||
stmt = insert_stmt.on_conflict_do_update(
|
stmt = insert_stmt.on_conflict_do_update(
|
||||||
|
|
@ -97,4 +143,6 @@ class OpuRepository:
|
||||||
await self.s.execute(stmt)
|
await self.s.execute(stmt)
|
||||||
await self.s.flush()
|
await self.s.flush()
|
||||||
|
|
||||||
return len(records)
|
total_upserted += len(batch)
|
||||||
|
|
||||||
|
return total_upserted
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue