Ready Model

This commit is contained in:
Denys Seredenko
2024-12-18 12:15:41 +01:00
commit c148b03ab5
17 changed files with 87007 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

110
Evaluator_Kaggle.ipynb Normal file
View File

@@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import csv\n",
"import os\n",
"import re\n",
"import time\n",
"import math, re, os\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"from kaggle_datasets import KaggleDatasets\n",
"from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix\n",
"\n",
"def create_model(image_size):\n",
" model = tf.keras.Sequential()\n",
" \n",
" if image_size <= 10:\n",
" model.add(tf.keras.layers.Conv2D(32, (2, 2), activation='relu', input_shape=(image_size, image_size, 3)))\n",
" model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same')) \n",
" else:\n",
" model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size, image_size, 3)))\n",
" model.add(tf.keras.layers.MaxPooling2D((2, 2)))\n",
"\n",
" if image_size > 10:\n",
" model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))\n",
" model.add(tf.keras.layers.MaxPooling2D((2, 2)))\n",
" \n",
" if image_size > 20:\n",
" model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))\n",
" model.add(tf.keras.layers.MaxPooling2D((2, 2)))\n",
"\n",
" model.add(tf.keras.layers.GlobalAveragePooling2D())\n",
" \n",
" model.add(tf.keras.layers.Dense(128, activation='relu'))\n",
" model.add(tf.keras.layers.Dense(7, activation='softmax')) \n",
"\n",
" return model\n",
"\n",
"if __name__ == '__main__':\n",
" dataset_path = os.path.abspath(\"/kaggle/input\")\n",
" evaluation_csv = os.path.abspath(\"/kaggle/working/evaluation_ready.csv\")\n",
"\n",
" with open(evaluation_csv, \"w\") as csv_file:\n",
" csv_writer = csv.writer(csv_file, delimiter=\";\")\n",
" csv_writer.writerow([\"size\",\"errors\", \"duration_100_tests\"])\n",
"\n",
" datasets = os.listdir(dataset_path)\n",
" for dataset_name in datasets:\n",
" folders = os.listdir(os.path.abspath(os.path.join(dataset_path, dataset_name)))\n",
" single_dataset_path = os.path.abspath(os.path.join(dataset_path, dataset_name, folders[0]))\n",
" \n",
" image_size = int(re.search(r'(\\d+)$', dataset_name)[0])\n",
"\n",
" test_amount = 100\n",
" errors = []\n",
"\n",
" start = time.perf_counter()\n",
" for test_number in range(test_amount):\n",
" print(f\"Image: {image_size} starting test {test_number}\\n\\n\")\n",
" \n",
" ds = tf.data.Dataset.load(single_dataset_path)\n",
" \n",
" ds = ds.shuffle(buffer_size=43349)\n",
" \n",
" # Split into training and testing\n",
" train_ds, test_ds = tf.keras.utils.split_dataset(ds, left_size=0.9)\n",
" \n",
" # Batch and prefetch\n",
" train_data = train_ds.batch(5).prefetch(tf.data.AUTOTUNE)\n",
" test_data = test_ds.batch(5).prefetch(tf.data.AUTOTUNE)\n",
" \n",
" model = create_model(image_size)\n",
"\n",
" tf.keras.utils.plot_model(model, to_file='/kaggle/working/model.png', show_shapes=True)\n",
" \n",
" model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),\n",
" loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
" metrics=['accuracy'])\n",
" \n",
" \n",
" model.fit(train_data, epochs=7)\n",
" \n",
" test_loss, test_acc = model.evaluate(test_data, verbose=2)\n",
" errors.append(1 - test_acc)\n",
"\n",
" end = time.perf_counter()\n",
" with open(evaluation_csv, \"a\") as csv_file:\n",
" csv_writer = csv.writer(csv_file, delimiter=\";\")\n",
" csv_writer.writerow([image_size, errors, round(end-start, 4)])"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

17
README.md Normal file
View File

@@ -0,0 +1,17 @@
# Starting project
## Step 1. Create Map
First of all we have to create map for TF Dataset. Using DataSetLoader we have to provide path to images folder and call get_classified_csv(). This will create a Map (Image Path with label).
This will be later used for creating TF dataset.
## Step 2. Create Dataset
Then you can uncomment lines in main.py for generating dataset. Make sure that you have changed a path. Also change an image size, for this we have parameters like x and y. This way resized images will be saved to dataset. We recommend using x=35 and y=35. That's how you get the best results.
## Step 3. Load datasets to kaggle
Just create a new Dataset and move dataset_YxY after that add to notebook as input
## Step 4. Evaluate in Kaggle
Copy everything from Evaluator_Kaggle.ipynb in Notebook. Don't forget to turn on an GPU Accelarator. That way you will get result a lot faster.
## Optional. Testing locally
You can also train model and start tests locally. For that just start a main.py. Offcourse, don't forget to comment lines for creating a CSV and mapping. Also change a path to wished dataset.

43351
all_data.csv Normal file

File diff suppressed because it is too large Load Diff

43348
all_data_cropped.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
classes/.DS_Store vendored Normal file

Binary file not shown.

27
classes/Concept.py Normal file
View File

@@ -0,0 +1,27 @@
from enum import Enum, auto
class Concept(Enum):
UNKNOWN = auto()
VORFAHRT_GEWAEHREN = auto()
VORFAHRT_STRASSE = auto()
STOP = auto()
RECHTS_ABBIEGEN = auto()
LINKS_ABBIEGEN = auto()
RECHTS_VOR_LINKS = auto()
def identify_by_str(name: str):
match name:
case 'VORFAHRT_GEWAEHREN':
return Concept.VORFAHRT_GEWAEHREN
case 'VORFAHRT_STRASSE':
return Concept.VORFAHRT_STRASSE
case 'STOP':
return Concept.STOP
case 'RECHTS_ABBIEGEN':
return Concept.RECHTS_ABBIEGEN
case 'LINKS_ABBIEGEN':
return Concept.LINKS_ABBIEGEN
case 'RECHTS_VOR_LINKS':
return Concept.RECHTS_VOR_LINKS
case _:
return Concept.UNKNOWN

95
classes/DataLoader.py Normal file
View File

@@ -0,0 +1,95 @@
import os
import csv
import cv2
import tensorflow as tf
import numpy as np
class DataSetLoader:
def __init__(self, p: str, x: int, y: int) -> None:
if not os.path.exists(p) or p is None:
raise Exception("Path is not correct")
else:
self.path_to_dataset = p
self.x_size = x if x is not None else 25
self.y_size = y if y is not None else 25
def get_concept(self, path) -> int:
if "fahrtrichtung_links" in path:
return 1
elif "fahrtrichtung_rechts" in path:
return 2
elif "rechts_vor_links" in path:
return 3
elif "stop" in path:
return 4
elif "vorfahrt_gewaehren" in path:
return 5
elif "vorfahrtsstrasse" in path:
return 6
else:
return 7
def get_classified_csv(self):
images = []
for dirpath, dnames, fnames in os.walk(self.path_to_dataset):
for fname in fnames:
image_path = os.path.join(dirpath, fname)
concept = self.get_concept(image_path)
if any(ext in fname for ext in ['.bmp', '.jpeg', '.jpg', '.png']):
images.append((image_path, concept))
with open(os.path.abspath(os.path.join(__file__, "..", "..", "all_data_cropped.csv")), "w") as csv_output:
csv_writer = csv.writer(csv_output, delimiter=";")
csv_writer.writerow(["name", "concept"])
for image in images:
csv_writer.writerow(image)
def create_dataset_from_csv(self, path: str):
images = []
concepts = []
with open(path, "r") as csv_data:
csv_reader = csv.reader(csv_data, delimiter=";")
next(csv_reader)
for row in csv_reader:
images.append(row[0])
concepts.append(int(row[1]))
all_images = []
all_labels = []
for idx, (img_path, label) in enumerate(zip(images, concepts)):
try:
image = self.load_image_with_opencv(img_path)
all_images.append(image)
all_labels.append(label)
tf.print(idx)
except Exception as e:
print(f"Error loading image {img_path}: {e}")
continue # Skip this image if error occurs
all_images = np.array(all_images)
all_labels = np.array(all_labels)
dataset = tf.data.Dataset.from_tensor_slices((all_images, all_labels))
return dataset
def load_image_with_opencv(self, file_path):
image = cv2.imread(file_path)
if image is None:
raise Exception(f"Could not load image: {file_path}")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (self.x_size, self.y_size))
image = image.astype(np.float32) / 255.0
return image

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@
8130170973816811008<18><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> * 0<><30><02>>

Binary file not shown.

View File

@@ -0,0 +1 @@
13241530939751446168<18><><EFBFBD><E6B89E> * 0<><30><02>>

57
main.py Normal file
View File

@@ -0,0 +1,57 @@
from classes.DataLoader import DataSetLoader
import tensorflow as tf
import matplotlib.pyplot as plt
def create_model(image_size):
model = tf.keras.Sequential()
if image_size <= 10:
model.add(tf.keras.layers.Conv2D(32, (2, 2), activation='relu', input_shape=(image_size, image_size, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same'))
else:
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size, image_size, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
if image_size > 10:
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
if image_size > 20:
model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(7, activation='softmax'))
return model
if __name__ == '__main__':
#Create dataset from image folder
# loader = DataSetLoader(p='/Users/denysseredenko/Desktop/ML-cropped/processed', x=50, y=50)
# ds = loader.create_dataset_from_csv('../all_data_cropped.csv')
# ds.save('/Users/denysseredenko/Desktop/privat/StreetSignRecognitionTensor/dataset_cropped_50x50')
ds = tf.data.Dataset.load('../dataset/13802249122632658895/00000000.shard/00000000.snapshot')
ds = ds.shuffle(buffer_size=43349)
ds.save('/Users/denysseredenko/Desktop/privat/StreetSignRecognitionTensor/dataset_cropped_50x50')
train_ds, test_ds = tf.keras.utils.split_dataset(ds, left_size=0.9)
train_data = train_ds.batch(5).prefetch(tf.data.AUTOTUNE)
test_data = test_ds.batch(5).prefetch(tf.data.AUTOTUNE)
model = create_model(50)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
model.fit(train_data, epochs=7)
test_loss, test_acc = model.evaluate(test_data, verbose=2)
print('\nTest accuracy:', test_acc)