Initial commit: Add complete project files to the repository

This commit is contained in:
itqop 2025-01-15 05:14:27 +03:00
parent 74a8baefc9
commit a4e6544db1
8 changed files with 252 additions and 0 deletions

0
__init__.py Normal file
View File

9
config.py Normal file
View File

@ -0,0 +1,9 @@
ALPHABET = '-ABEKMHOPCTYX0123456789'
CHAR_TO_IDX = {char: idx + 1 for idx, char in enumerate(ALPHABET)}
IDX_TO_CHAR = {idx + 1: char for idx, char in enumerate(ALPHABET)}
NUM_CLASSES = len(ALPHABET) + 1
CTC_BLANK = 0
YOLO_WEIGHTS_PATH = "models/yolo_plate.pt"
CRNN_WEIGHTS_PATH = "models/best_accuracy_model_3.pth"

107
license_plate_recognizer.py Normal file
View File

@ -0,0 +1,107 @@
import torch
from PIL import Image
from ultralytics import YOLO
from torchvision import transforms
from model import CRNN
import cv2
from config import CTC_BLANK, IDX_TO_CHAR
# ------------------------------------------------------------
# Основной класс, объединяющий YOLO + CRNN
# ------------------------------------------------------------
class LicensePlateRecognizer:
def __init__(self,
yolo_model_path: str,
crnn_model_path: str,
num_classes: int,
device: str = "cpu"):
"""
yolo_model_path: путь к файлу весов YOLO (напр. "yolo_plate.pt")
crnn_model_path: путь к файлу весов CRNN (напр. "best_accuracy_model_2.pth")
"""
self.yolo_model = YOLO(yolo_model_path)
self.crnn_model = CRNN(num_classes=num_classes).to(device)
self.crnn_model.load_state_dict(torch.load(crnn_model_path, map_location=device))
self.crnn_model.eval()
self.transform = transforms.Compose([
transforms.Resize((32, 128)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
self.device = device
def detect_and_recognize_frame(self, frame, padding: int = 5):
"""
Принимает кадр (BGR, np.array) напрямую,
возвращает список словарей:
{
"bbox": (x1, y1, x2, y2),
"text": распознанный_номер
}
"""
results = self.yolo_model.predict(frame)
detections_info = []
if not results:
return detections_info
for result in results:
boxes = result.boxes
if boxes is None or len(boxes) == 0:
continue
for box in boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
x1_padded = max(x1 - padding, 0)
y1_padded = max(y1 - padding, 0)
x2_padded = min(x2 + padding, frame.shape[1])
y2_padded = min(y2 + padding, frame.shape[0])
plate_crop_bgr = frame[y1_padded:y2_padded, x1_padded:x2_padded]
if plate_crop_bgr.size == 0:
continue
# Конвертация в PIL (grayscale)
plate_gray = cv2.cvtColor(plate_crop_bgr, cv2.COLOR_BGR2GRAY)
plate_pil = Image.fromarray(plate_gray)
# Подготовка к CRNN
plate_tensor = self.transform(plate_pil).unsqueeze(0).to(self.device)
with torch.no_grad():
logits = self.crnn_model(plate_tensor)
decoded_texts = decode_predictions(logits)
recognized_text = decoded_texts[0] if len(decoded_texts) > 0 else ""
detections_info.append({
"bbox": (x1, y1, x2, y2),
"text": recognized_text
})
return detections_info
# ------------------------------------------------------------
# Расшифровка результатов CRNN
# ------------------------------------------------------------
def decode_predictions(preds, blank=CTC_BLANK):
# preds: (seq_len, batch, num_classes)
preds = preds.argmax(2) # (seq_len, batch)
preds = preds.permute(1, 0) # (batch, seq_len)
decoded = []
for pred in preds:
pred = pred.tolist()
decoded_seq = []
previous = blank
for p in pred:
if p != previous and p != blank:
decoded_seq.append(IDX_TO_CHAR.get(p, ''))
previous = p
decoded.append(''.join(decoded_seq))
return decoded

69
main.py Normal file
View File

@ -0,0 +1,69 @@
import cv2
import numpy as np
from config import NUM_CLASSES, CRNN_WEIGHTS_PATH, YOLO_WEIGHTS_PATH
from license_plate_recognizer import LicensePlateRecognizer
# ------------------------------------------------------------
# Запуск в режиме реального времени (веб-камера)
# ------------------------------------------------------------
if __name__ == "__main__":
lpr = LicensePlateRecognizer(
yolo_model_path=YOLO_WEIGHTS_PATH,
crnn_model_path=CRNN_WEIGHTS_PATH,
num_classes=NUM_CLASSES,
device="cpu"
)
cap = cv2.VideoCapture(0)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
while True:
ret, frame = cap.read()
if not ret:
print("Не удалось считать кадр с веб-камеры.")
break
detections = lpr.detect_and_recognize_frame(frame, padding=5)
for det in detections:
x1, y1, x2, y2 = det["bbox"]
text = det["text"]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(
frame, text, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 255, 0), 2
)
height, width, _ = frame.shape
black_bar_width = 300
black_bar = np.zeros((height, black_bar_width, 3), dtype=np.uint8)
y_start = 40
for i, det in enumerate(detections):
txt = det["text"]
cv2.putText(
black_bar,
f"Plate #{i+1}: {txt}",
(10, y_start),
cv2.FONT_HERSHEY_SIMPLEX,
0.7, (255, 255, 255), 2
)
y_start += 40
display_frame = np.hstack((frame, black_bar))
cv2.imshow("License Plate Recognition", display_frame)
key = cv2.waitKey(1) & 0xFF
if key == 27 or key == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

56
model.py Normal file
View File

@ -0,0 +1,56 @@
import torch.nn as nn
# ------------------------------------------------------------
# CRNN-модель
# ------------------------------------------------------------
class CRNN(nn.Module):
def __init__(self, num_classes):
super(CRNN, self).__init__()
# CNN часть
self.cnn = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(256),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d((2,1), (2,1)),
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d((2,1), (2,1)),
)
# RNN часть
self.linear1 = nn.Linear(512 * 2, 256)
self.relu = nn.ReLU(inplace=True)
self.lstm = nn.LSTM(256, 256, bidirectional=True, batch_first=True)
self.linear2 = nn.Linear(512, num_classes)
def forward(self, x):
# x: (batch, 1, 32, 128) — после Resize
conv = self.cnn(x) # (batch, 512, 2, 32)
conv = conv.permute(0, 3, 1, 2) # (batch, width=32, channels=512, height=2)
conv = conv.view(conv.size(0), conv.size(1), -1) # (batch, 32, 512*2)
out = self.linear1(conv) # (batch, 32, 256)
out = self.relu(out) # (batch, 32, 256)
out, _ = self.lstm(out) # (batch, 32, 512) — bidirectional
out = self.linear2(out) # (batch, 32, num_classes)
out = out.permute(1, 0, 2) # (32, batch, num_classes)
return out

Binary file not shown.

BIN
models/yolo_plate.pt Normal file

Binary file not shown.

11
requirements.txt Normal file
View File

@ -0,0 +1,11 @@
--index-url https://pypi.org/simple
numpy==1.26.3
pillow==10.2.0
opencv-python==4.10.0.84
scikit-learn==1.5.2
scipy==1.13.1
matplotlib==3.9.2
tqdm
ultralytics
torch==2.0.1
torchvision==0.15.2