diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.py b/config.py new file mode 100644 index 0000000..2eb390d --- /dev/null +++ b/config.py @@ -0,0 +1,9 @@ +ALPHABET = '-ABEKMHOPCTYX0123456789' +CHAR_TO_IDX = {char: idx + 1 for idx, char in enumerate(ALPHABET)} +IDX_TO_CHAR = {idx + 1: char for idx, char in enumerate(ALPHABET)} + +NUM_CLASSES = len(ALPHABET) + 1 +CTC_BLANK = 0 + +YOLO_WEIGHTS_PATH = "models/yolo_plate.pt" +CRNN_WEIGHTS_PATH = "models/best_accuracy_model_3.pth" \ No newline at end of file diff --git a/license_plate_recognizer.py b/license_plate_recognizer.py new file mode 100644 index 0000000..2313a70 --- /dev/null +++ b/license_plate_recognizer.py @@ -0,0 +1,107 @@ +import torch +from PIL import Image +from ultralytics import YOLO +from torchvision import transforms +from model import CRNN +import cv2 + +from config import CTC_BLANK, IDX_TO_CHAR + +# ------------------------------------------------------------ +# Основной класс, объединяющий YOLO + CRNN +# ------------------------------------------------------------ +class LicensePlateRecognizer: + def __init__(self, + yolo_model_path: str, + crnn_model_path: str, + num_classes: int, + device: str = "cpu"): + """ + yolo_model_path: путь к файлу весов YOLO (напр. "yolo_plate.pt") + crnn_model_path: путь к файлу весов CRNN (напр. "best_accuracy_model_2.pth") + """ + self.yolo_model = YOLO(yolo_model_path) + + self.crnn_model = CRNN(num_classes=num_classes).to(device) + self.crnn_model.load_state_dict(torch.load(crnn_model_path, map_location=device)) + self.crnn_model.eval() + + self.transform = transforms.Compose([ + transforms.Resize((32, 128)), + transforms.ToTensor(), + transforms.Normalize((0.5,), (0.5,)) + ]) + + self.device = device + + def detect_and_recognize_frame(self, frame, padding: int = 5): + """ + Принимает кадр (BGR, np.array) напрямую, + возвращает список словарей: + { + "bbox": (x1, y1, x2, y2), + "text": распознанный_номер + } + """ + results = self.yolo_model.predict(frame) + detections_info = [] + + if not results: + return detections_info + + for result in results: + boxes = result.boxes + if boxes is None or len(boxes) == 0: + continue + + for box in boxes: + x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) + + x1_padded = max(x1 - padding, 0) + y1_padded = max(y1 - padding, 0) + x2_padded = min(x2 + padding, frame.shape[1]) + y2_padded = min(y2 + padding, frame.shape[0]) + + plate_crop_bgr = frame[y1_padded:y2_padded, x1_padded:x2_padded] + if plate_crop_bgr.size == 0: + continue + + # Конвертация в PIL (grayscale) + plate_gray = cv2.cvtColor(plate_crop_bgr, cv2.COLOR_BGR2GRAY) + plate_pil = Image.fromarray(plate_gray) + + # Подготовка к CRNN + plate_tensor = self.transform(plate_pil).unsqueeze(0).to(self.device) + + with torch.no_grad(): + logits = self.crnn_model(plate_tensor) + decoded_texts = decode_predictions(logits) + recognized_text = decoded_texts[0] if len(decoded_texts) > 0 else "" + + detections_info.append({ + "bbox": (x1, y1, x2, y2), + "text": recognized_text + }) + + return detections_info + + +# ------------------------------------------------------------ +# Расшифровка результатов CRNN +# ------------------------------------------------------------ +def decode_predictions(preds, blank=CTC_BLANK): + # preds: (seq_len, batch, num_classes) + preds = preds.argmax(2) # (seq_len, batch) + preds = preds.permute(1, 0) # (batch, seq_len) + + decoded = [] + for pred in preds: + pred = pred.tolist() + decoded_seq = [] + previous = blank + for p in pred: + if p != previous and p != blank: + decoded_seq.append(IDX_TO_CHAR.get(p, '')) + previous = p + decoded.append(''.join(decoded_seq)) + return decoded \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..4bd9e0c --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ + +import cv2 +import numpy as np + +from config import NUM_CLASSES, CRNN_WEIGHTS_PATH, YOLO_WEIGHTS_PATH +from license_plate_recognizer import LicensePlateRecognizer + +# ------------------------------------------------------------ +# Запуск в режиме реального времени (веб-камера) +# ------------------------------------------------------------ +if __name__ == "__main__": + lpr = LicensePlateRecognizer( + yolo_model_path=YOLO_WEIGHTS_PATH, + crnn_model_path=CRNN_WEIGHTS_PATH, + num_classes=NUM_CLASSES, + device="cpu" + ) + + cap = cv2.VideoCapture(0) + + # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) + # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) + + while True: + ret, frame = cap.read() + if not ret: + print("Не удалось считать кадр с веб-камеры.") + break + + detections = lpr.detect_and_recognize_frame(frame, padding=5) + + for det in detections: + x1, y1, x2, y2 = det["bbox"] + text = det["text"] + + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + cv2.putText( + frame, text, (x1, y1 - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, (0, 255, 0), 2 + ) + + height, width, _ = frame.shape + black_bar_width = 300 + black_bar = np.zeros((height, black_bar_width, 3), dtype=np.uint8) + + y_start = 40 + for i, det in enumerate(detections): + txt = det["text"] + cv2.putText( + black_bar, + f"Plate #{i+1}: {txt}", + (10, y_start), + cv2.FONT_HERSHEY_SIMPLEX, + 0.7, (255, 255, 255), 2 + ) + y_start += 40 + + display_frame = np.hstack((frame, black_bar)) + + cv2.imshow("License Plate Recognition", display_frame) + + key = cv2.waitKey(1) & 0xFF + if key == 27 or key == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() diff --git a/model.py b/model.py new file mode 100644 index 0000000..71fae6a --- /dev/null +++ b/model.py @@ -0,0 +1,56 @@ +import torch.nn as nn + + +# ------------------------------------------------------------ +# CRNN-модель +# ------------------------------------------------------------ +class CRNN(nn.Module): + def __init__(self, num_classes): + super(CRNN, self).__init__() + + # CNN часть + self.cnn = nn.Sequential( + nn.Conv2d(1, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(64, 128, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(2, 2), + + nn.Conv2d(128, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.BatchNorm2d(256), + + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d((2,1), (2,1)), + + nn.Conv2d(256, 512, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.BatchNorm2d(512), + + nn.Conv2d(512, 512, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d((2,1), (2,1)), + ) + + # RNN часть + self.linear1 = nn.Linear(512 * 2, 256) + self.relu = nn.ReLU(inplace=True) + self.lstm = nn.LSTM(256, 256, bidirectional=True, batch_first=True) + self.linear2 = nn.Linear(512, num_classes) + + def forward(self, x): + # x: (batch, 1, 32, 128) — после Resize + conv = self.cnn(x) # (batch, 512, 2, 32) + conv = conv.permute(0, 3, 1, 2) # (batch, width=32, channels=512, height=2) + conv = conv.view(conv.size(0), conv.size(1), -1) # (batch, 32, 512*2) + + out = self.linear1(conv) # (batch, 32, 256) + out = self.relu(out) # (batch, 32, 256) + out, _ = self.lstm(out) # (batch, 32, 512) — bidirectional + out = self.linear2(out) # (batch, 32, num_classes) + + out = out.permute(1, 0, 2) # (32, batch, num_classes) + return out diff --git a/models/best_accuracy_model_3.pth b/models/best_accuracy_model_3.pth new file mode 100644 index 0000000..6d91355 Binary files /dev/null and b/models/best_accuracy_model_3.pth differ diff --git a/models/yolo_plate.pt b/models/yolo_plate.pt new file mode 100644 index 0000000..3fa30ab Binary files /dev/null and b/models/yolo_plate.pt differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d120fda --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +--index-url https://pypi.org/simple +numpy==1.26.3 +pillow==10.2.0 +opencv-python==4.10.0.84 +scikit-learn==1.5.2 +scipy==1.13.1 +matplotlib==3.9.2 +tqdm +ultralytics +torch==2.0.1 +torchvision==0.15.2 \ No newline at end of file