Initial commit: Add complete project files to the repository

2025-01-15 05:14:27 +03:00 · 2025-01-15 05:14:27 +03:00 · a4e6544db1
parent 74a8baefc9
commit a4e6544db1
8 changed files with 252 additions and 0 deletions
--- a/init.py
+++ b/init.py
--- a/config.py
+++ b/config.py
@ -0,0 +1,9 @@
 ALPHABET = '-ABEKMHOPCTYX0123456789'
 CHAR_TO_IDX = {char: idx + 1 for idx, char in enumerate(ALPHABET)}
 IDX_TO_CHAR = {idx + 1: char for idx, char in enumerate(ALPHABET)}
 NUM_CLASSES = len(ALPHABET) + 1
 CTC_BLANK = 0
 YOLO_WEIGHTS_PATH = "models/yolo_plate.pt"
 CRNN_WEIGHTS_PATH = "models/best_accuracy_model_3.pth"
--- a/license_plate_recognizer.py
+++ b/license_plate_recognizer.py
@ -0,0 +1,107 @@
 import torch
 from PIL import Image
 from ultralytics import YOLO
 from torchvision import transforms
 from model import CRNN
 import cv2
 from config import CTC_BLANK, IDX_TO_CHAR
 # ------------------------------------------------------------
 # Основной класс, объединяющий YOLO + CRNN
 # ------------------------------------------------------------
 class LicensePlateRecognizer:
    def __init__(self,
                 yolo_model_path: str,
                 crnn_model_path: str,
                 num_classes: int,
                 device: str = "cpu"):
        """
        yolo_model_path: путь к файлу весов YOLO (напр. "yolo_plate.pt")
        crnn_model_path: путь к файлу весов CRNN (напр. "best_accuracy_model_2.pth")
        """
        self.yolo_model = YOLO(yolo_model_path)
        self.crnn_model = CRNN(num_classes=num_classes).to(device)
        self.crnn_model.load_state_dict(torch.load(crnn_model_path, map_location=device))
        self.crnn_model.eval()
        self.transform = transforms.Compose([
            transforms.Resize((32, 128)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        self.device = device
    def detect_and_recognize_frame(self, frame, padding: int = 5):
        """
        Принимает кадр (BGR, np.array) напрямую,
        возвращает список словарей:
            {
              "bbox": (x1, y1, x2, y2),
              "text": распознанный_номер
            }
        """
        results = self.yolo_model.predict(frame)
        detections_info = []
        if not results:
            return detections_info
        for result in results:
            boxes = result.boxes
            if boxes is None or len(boxes) == 0:
                continue
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                x1_padded = max(x1 - padding, 0)
                y1_padded = max(y1 - padding, 0)
                x2_padded = min(x2 + padding, frame.shape[1])
                y2_padded = min(y2 + padding, frame.shape[0])
                plate_crop_bgr = frame[y1_padded:y2_padded, x1_padded:x2_padded]
                if plate_crop_bgr.size == 0:
                    continue
                # Конвертация в PIL (grayscale)
                plate_gray = cv2.cvtColor(plate_crop_bgr, cv2.COLOR_BGR2GRAY)
                plate_pil = Image.fromarray(plate_gray)
                # Подготовка к CRNN
                plate_tensor = self.transform(plate_pil).unsqueeze(0).to(self.device)
                with torch.no_grad():
                    logits = self.crnn_model(plate_tensor)
                decoded_texts = decode_predictions(logits)
                recognized_text = decoded_texts[0] if len(decoded_texts) > 0 else ""
                detections_info.append({
                    "bbox": (x1, y1, x2, y2),
                    "text": recognized_text
                })
        return detections_info
 # ------------------------------------------------------------
 # Расшифровка результатов CRNN
 # ------------------------------------------------------------
 def decode_predictions(preds, blank=CTC_BLANK):
    # preds: (seq_len, batch, num_classes)
    preds = preds.argmax(2)  # (seq_len, batch)
    preds = preds.permute(1, 0)  # (batch, seq_len)
    decoded = []
    for pred in preds:
        pred = pred.tolist()
        decoded_seq = []
        previous = blank
        for p in pred:
            if p != previous and p != blank:
                decoded_seq.append(IDX_TO_CHAR.get(p, ''))
            previous = p
        decoded.append(''.join(decoded_seq))
    return decoded
--- a/main.py
+++ b/main.py
@ -0,0 +1,69 @@
 import cv2
 import numpy as np
 from config import NUM_CLASSES, CRNN_WEIGHTS_PATH, YOLO_WEIGHTS_PATH
 from license_plate_recognizer import LicensePlateRecognizer
 # ------------------------------------------------------------
 # Запуск в режиме реального времени (веб-камера)
 # ------------------------------------------------------------
 if __name__ == "__main__":
    lpr = LicensePlateRecognizer(
        yolo_model_path=YOLO_WEIGHTS_PATH,
        crnn_model_path=CRNN_WEIGHTS_PATH,
        num_classes=NUM_CLASSES,
        device="cpu"
    )
    cap = cv2.VideoCapture(0) 
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Не удалось считать кадр с веб-камеры.")
            break
        detections = lpr.detect_and_recognize_frame(frame, padding=5)
        for det in detections:
            x1, y1, x2, y2 = det["bbox"]
            text = det["text"]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(
                frame, text, (x1, y1 - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 
                0.7, (0, 255, 0), 2
            )
        height, width, _ = frame.shape
        black_bar_width = 300
        black_bar = np.zeros((height, black_bar_width, 3), dtype=np.uint8)
        y_start = 40
        for i, det in enumerate(detections):
            txt = det["text"]
            cv2.putText(
                black_bar,
                f"Plate #{i+1}: {txt}", 
                (10, y_start),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (255, 255, 255), 2
            )
            y_start += 40
        display_frame = np.hstack((frame, black_bar))
        cv2.imshow("License Plate Recognition", display_frame)
        key = cv2.waitKey(1) & 0xFF
        if key == 27 or key == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
--- a/model.py
+++ b/model.py
@ -0,0 +1,56 @@
 import torch.nn as nn
 # ------------------------------------------------------------
 # CRNN-модель
 # ------------------------------------------------------------
 class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        # CNN часть
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,1), (2,1)),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,1), (2,1)),
        )
        # RNN часть
        self.linear1 = nn.Linear(512 * 2, 256)
        self.relu = nn.ReLU(inplace=True)
        self.lstm = nn.LSTM(256, 256, bidirectional=True, batch_first=True)
        self.linear2 = nn.Linear(512, num_classes)
    def forward(self, x):
        # x: (batch, 1, 32, 128) — после Resize
        conv = self.cnn(x)  # (batch, 512, 2, 32)
        conv = conv.permute(0, 3, 1, 2)  # (batch, width=32, channels=512, height=2)
        conv = conv.view(conv.size(0), conv.size(1), -1)  # (batch, 32, 512*2)
        out = self.linear1(conv)     # (batch, 32, 256)
        out = self.relu(out)         # (batch, 32, 256)
        out, _ = self.lstm(out)      # (batch, 32, 512) — bidirectional
        out = self.linear2(out)      # (batch, 32, num_classes)
        out = out.permute(1, 0, 2)   # (32, batch, num_classes)
        return out
--- a/models/best_accuracy_model_3.pth
+++ b/models/best_accuracy_model_3.pth
--- a/models/yolo_plate.pt
+++ b/models/yolo_plate.pt
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,11 @@
 --index-url https://pypi.org/simple
 numpy==1.26.3
 pillow==10.2.0
 opencv-python==4.10.0.84
 scikit-learn==1.5.2
 scipy==1.13.1
 matplotlib==3.9.2
 tqdm
 ultralytics
 torch==2.0.1
 torchvision==0.15.2