kNN implemented with analyse

This commit is contained in:
Denys Seredenko
2024-10-20 12:44:08 +02:00
parent 7fd466308b
commit dcb7cee31c
10 changed files with 1849 additions and 1763 deletions

10
src/classes/concept.py Normal file
View File

@@ -0,0 +1,10 @@
from enum import Enum, auto
class Concept(Enum):
UNKNOWN = auto()
VORFAHRT_GEWAEHREN = auto()
VORFAHRT_STRASSE = auto()
STOP = auto()
RECHTS_ABBIEGEN = auto()
LINKS_ABBIEGEN = auto()
RECHTS_VOR_LINKS = auto()

8
src/classes/feature.py Normal file
View File

@@ -0,0 +1,8 @@
from enum import Enum, auto
class Feature(Enum):
OVERALL_COLOR_PERCENTAGE = auto()
RASTER_COLOR_PERCENTAGE = auto()
CORNERS = auto()
EDGES = auto()
COUNTOURS = auto()

View File

@@ -0,0 +1,96 @@
from typing import Any, Self
import numpy as np
import os
import pickle
from classes.concept import Concept
from classes.feature import Feature
class FeatureVector:
def __init__(self, concept: Concept, features: dict = {}, features_list: list = None, loaded: bool = False) -> None:
if loaded == False:
self.loaded: bool = False
self.features: dict = features
self.concept: Concept = concept
else:
self.loaded: bool = True
self.concept: Concept = concept
self.features_list: list = features_list
def _get_values_of_feature(self, feature: Feature) -> list[int]:
try:
feature_data = self.features[feature]
except KeyError:
print("missing key: ", feature)
return [-1]
ret = []
if feature == Feature.OVERALL_COLOR_PERCENTAGE:
feature_data: dict
for val in feature_data.values():
ret.append(val)
return ret
elif feature == Feature.RASTER_COLOR_PERCENTAGE:
feature_data: list[list[dict]]
for feature_data1d in feature_data:
for data in feature_data1d:
for val in data.values():
ret.append(val)
return ret
elif feature == Feature.CORNERS:
ret.append(feature_data)
return ret
elif feature == Feature.EDGES:
ret.append(feature_data)
return ret
elif feature == Feature.COUNTOURS:
ret.append(feature_data)
return ret
def add_feature(self, key: Feature, value: Any) -> None:
self.features.update({key: value})
def get_vector(self) -> list:
if self.loaded:
return self.features_list
ret = []
for feature in Feature:
ret = ret + self._get_values_of_feature(feature)
return ret
def get_concept(self) -> Concept:
return self.concept
def get_num_features(self) -> int:
return len(self.features)
def get_feature_value(self, key: str) -> Any:
return self.features[key]
def get_features(self) -> Any:
return self.features
def save(self, path: os.path) -> None:
with open(path, "wb") as write:
pickle.dump(self, write, pickle.HIGHEST_PROTOCOL)
def load(path: os.path) -> Self:
with open(path, "rb") as read:
self: FeatureVector = pickle.load(read)
return self

83
src/classes/learner.py Normal file
View File

@@ -0,0 +1,83 @@
import csv
import ast
import math
from classes.concept import Concept
from classes.feature_vector import FeatureVector
class Learner:
# The training method, that changes the internal state of the learner such that
# it will classify examples of a similar set (i.e. the testSet better.
#
# @param trainingSet contains feature vectors and corresponding concepts
# to provide experience to learn from
def learn(self, path_to_training_set: str):
training_set = []
with open(path_to_training_set, mode='r' ,newline='') as csv_file:
reader = csv.reader(csv_file, delimiter=';')
next(reader) # read from 2. row without header
for row in reader:
fv = FeatureVector(concept = row[1], features_list = ast.literal_eval(row[2]), loaded = True)
training_set.append(fv)
self.training_set = training_set
return training_set
# find the concept of the example from the internal knowledge of the lerner
# this method must not consider example.getConcept() at all!!
#
# @param example: is a feature vector
# @return the concept of the examplke as learned by this before
def classify(self, input_feature_vector):
distances = []
result: dict = {
Concept.LINKS_ABBIEGEN: 0,
Concept.RECHTS_ABBIEGEN: 0,
Concept.RECHTS_VOR_LINKS: 0,
Concept.STOP: 0,
Concept.VORFAHRT_GEWAEHREN: 0,
Concept.VORFAHRT_STRASSE: 0
}
for single_fv in self.training_set:
single_dist = self.euclid_distance(single_fv.get_vector(), input_feature_vector)
distances.append((single_fv.get_concept(), single_dist))
sorted_distances = sorted(distances, key=lambda tuple: tuple[1])
k_nearest = 3
interested_distances = sorted_distances[:k_nearest]
for interested_fv in interested_distances:
concept = self.string_to_enum(Concept, interested_fv[0])
result[concept] += 1
return result
def euclid_distance(self, list_a, list_b):
if len(list_a) != len(list_b):
raise Exception("Both lists must equal in size!")
sum = 0
for i in range(0, len(list_a)):
sum += (list_b[i] - list_a[i]) ** 2
return math.sqrt(sum)
def string_to_enum(self, enum_class, enum_string):
try:
# Split the string to get the enum member name
_, member_name = enum_string.split('.')
# Use getattr to get the enum member
return getattr(enum_class, member_name)
except (AttributeError, ValueError) as e:
print(f"Error: {e}")
return None
def analyse(self, result, k):
sorted_dict_result = {key: value for key, value in sorted(result.items(), key=lambda item: item[1])}
for key, amount in sorted_dict_result.items():
probability = (amount/k) * 100
print(f" Probability of {key} is {probability}%")