Initial commit: Add complete project files to the repository

2025-01-15 05:14:27 +03:00 · 2025-01-15 05:14:27 +03:00 · a4e6544db1
parent 74a8baefc9
commit a4e6544db1
8 changed files with 252 additions and 0 deletions
--- a/init.py
+++ b/init.py
--- a/config.py
+++ b/config.py
@ -0,0 +1,9 @@
+ALPHABET = '-ABEKMHOPCTYX0123456789'
+CHAR_TO_IDX = {char: idx + 1 for idx, char in enumerate(ALPHABET)}
+IDX_TO_CHAR = {idx + 1: char for idx, char in enumerate(ALPHABET)}
+
+NUM_CLASSES = len(ALPHABET) + 1
+CTC_BLANK = 0
+
+YOLO_WEIGHTS_PATH = "models/yolo_plate.pt"
+CRNN_WEIGHTS_PATH = "models/best_accuracy_model_3.pth"
--- a/license_plate_recognizer.py
+++ b/license_plate_recognizer.py
@ -0,0 +1,107 @@
+import torch
+from PIL import Image
+from ultralytics import YOLO
+from torchvision import transforms
+from model import CRNN
+import cv2
+
+from config import CTC_BLANK, IDX_TO_CHAR
+
+# ------------------------------------------------------------
+# Основной класс, объединяющий YOLO + CRNN
+# ------------------------------------------------------------
+class LicensePlateRecognizer:
+    def __init__(self,
+                 yolo_model_path: str,
+                 crnn_model_path: str,
+                 num_classes: int,
+                 device: str = "cpu"):
+        """
+        yolo_model_path: путь к файлу весов YOLO (напр. "yolo_plate.pt")
+        crnn_model_path: путь к файлу весов CRNN (напр. "best_accuracy_model_2.pth")
+        """
+        self.yolo_model = YOLO(yolo_model_path)
+
+        self.crnn_model = CRNN(num_classes=num_classes).to(device)
+        self.crnn_model.load_state_dict(torch.load(crnn_model_path, map_location=device))
+        self.crnn_model.eval()
+
+        self.transform = transforms.Compose([
+            transforms.Resize((32, 128)),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,), (0.5,))
+        ])
+
+        self.device = device
+
+    def detect_and_recognize_frame(self, frame, padding: int = 5):
+        """
+        Принимает кадр (BGR, np.array) напрямую,
+        возвращает список словарей:
+            {
+              "bbox": (x1, y1, x2, y2),
+              "text": распознанный_номер
+            }
+        """
+        results = self.yolo_model.predict(frame)
+        detections_info = []
+
+        if not results:
+            return detections_info
+
+        for result in results:
+            boxes = result.boxes
+            if boxes is None or len(boxes) == 0:
+                continue
+
+            for box in boxes:
+                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
+
+                x1_padded = max(x1 - padding, 0)
+                y1_padded = max(y1 - padding, 0)
+                x2_padded = min(x2 + padding, frame.shape[1])
+                y2_padded = min(y2 + padding, frame.shape[0])
+
+                plate_crop_bgr = frame[y1_padded:y2_padded, x1_padded:x2_padded]
+                if plate_crop_bgr.size == 0:
+                    continue
+
+                # Конвертация в PIL (grayscale)
+                plate_gray = cv2.cvtColor(plate_crop_bgr, cv2.COLOR_BGR2GRAY)
+                plate_pil = Image.fromarray(plate_gray)
+
+                # Подготовка к CRNN
+                plate_tensor = self.transform(plate_pil).unsqueeze(0).to(self.device)
+
+                with torch.no_grad():
+                    logits = self.crnn_model(plate_tensor)
+                decoded_texts = decode_predictions(logits)
+                recognized_text = decoded_texts[0] if len(decoded_texts) > 0 else ""
+
+                detections_info.append({
+                    "bbox": (x1, y1, x2, y2),
+                    "text": recognized_text
+                })
+
+        return detections_info
+    
+
+# ------------------------------------------------------------
+# Расшифровка результатов CRNN
+# ------------------------------------------------------------
+def decode_predictions(preds, blank=CTC_BLANK):
+    # preds: (seq_len, batch, num_classes)
+    preds = preds.argmax(2)  # (seq_len, batch)
+    preds = preds.permute(1, 0)  # (batch, seq_len)
+    
+    decoded = []
+    for pred in preds:
+        pred = pred.tolist()
+        decoded_seq = []
+        previous = blank
+        for p in pred:
+            if p != previous and p != blank:
+                decoded_seq.append(IDX_TO_CHAR.get(p, ''))
+            previous = p
+        decoded.append(''.join(decoded_seq))
+    return decoded
--- a/main.py
+++ b/main.py
@ -0,0 +1,69 @@
+
+import cv2
+import numpy as np
+
+from config import NUM_CLASSES, CRNN_WEIGHTS_PATH, YOLO_WEIGHTS_PATH
+from license_plate_recognizer import LicensePlateRecognizer
+
+# ------------------------------------------------------------
+# Запуск в режиме реального времени (веб-камера)
+# ------------------------------------------------------------
+if __name__ == "__main__":
+    lpr = LicensePlateRecognizer(
+        yolo_model_path=YOLO_WEIGHTS_PATH,
+        crnn_model_path=CRNN_WEIGHTS_PATH,
+        num_classes=NUM_CLASSES,
+        device="cpu"
+    )
+
+    cap = cv2.VideoCapture(0) 
+
+    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
+    # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            print("Не удалось считать кадр с веб-камеры.")
+            break
+
+        detections = lpr.detect_and_recognize_frame(frame, padding=5)
+
+        for det in detections:
+            x1, y1, x2, y2 = det["bbox"]
+            text = det["text"]
+
+            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+            cv2.putText(
+                frame, text, (x1, y1 - 10), 
+                cv2.FONT_HERSHEY_SIMPLEX, 
+                0.7, (0, 255, 0), 2
+            )
+
+        height, width, _ = frame.shape
+        black_bar_width = 300
+        black_bar = np.zeros((height, black_bar_width, 3), dtype=np.uint8)
+
+        y_start = 40
+        for i, det in enumerate(detections):
+            txt = det["text"]
+            cv2.putText(
+                black_bar,
+                f"Plate #{i+1}: {txt}", 
+                (10, y_start),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.7, (255, 255, 255), 2
+            )
+            y_start += 40
+
+        display_frame = np.hstack((frame, black_bar))
+
+        cv2.imshow("License Plate Recognition", display_frame)
+
+        key = cv2.waitKey(1) & 0xFF
+        if key == 27 or key == ord('q'):
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
--- a/model.py
+++ b/model.py
@ -0,0 +1,56 @@
+import torch.nn as nn
+
+
+# ------------------------------------------------------------
+# CRNN-модель
+# ------------------------------------------------------------
+class CRNN(nn.Module):
+    def __init__(self, num_classes):
+        super(CRNN, self).__init__()
+        
+        # CNN часть
+        self.cnn = nn.Sequential(
+            nn.Conv2d(1, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+            
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+            
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(256),
+            
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2,1), (2,1)),
+            
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.BatchNorm2d(512),
+            
+            nn.Conv2d(512, 512, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d((2,1), (2,1)),
+        )
+        
+        # RNN часть
+        self.linear1 = nn.Linear(512 * 2, 256)
+        self.relu = nn.ReLU(inplace=True)
+        self.lstm = nn.LSTM(256, 256, bidirectional=True, batch_first=True)
+        self.linear2 = nn.Linear(512, num_classes)
+        
+    def forward(self, x):
+        # x: (batch, 1, 32, 128) — после Resize
+        conv = self.cnn(x)  # (batch, 512, 2, 32)
+        conv = conv.permute(0, 3, 1, 2)  # (batch, width=32, channels=512, height=2)
+        conv = conv.view(conv.size(0), conv.size(1), -1)  # (batch, 32, 512*2)
+        
+        out = self.linear1(conv)     # (batch, 32, 256)
+        out = self.relu(out)         # (batch, 32, 256)
+        out, _ = self.lstm(out)      # (batch, 32, 512) — bidirectional
+        out = self.linear2(out)      # (batch, 32, num_classes)
+        
+        out = out.permute(1, 0, 2)   # (32, batch, num_classes)
+        return out
--- a/models/best_accuracy_model_3.pth
+++ b/models/best_accuracy_model_3.pth
--- a/models/yolo_plate.pt
+++ b/models/yolo_plate.pt
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,11 @@
+--index-url https://pypi.org/simple
+numpy==1.26.3
+pillow==10.2.0
+opencv-python==4.10.0.84
+scikit-learn==1.5.2
+scipy==1.13.1
+matplotlib==3.9.2
+tqdm
+ultralytics
+torch==2.0.1
+torchvision==0.15.2