diff --git a/.gitignore b/.gitignore index bc079b2..6ceff55 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vsc +.vscode .venv data diff --git a/src/classes/decision_tree.py b/src/classes/decision_tree.py new file mode 100644 index 0000000..24ced61 --- /dev/null +++ b/src/classes/decision_tree.py @@ -0,0 +1,56 @@ +from typing import Self +import csv +import ast + +from classes.concept import Concept +from classes.feature_vector import FeatureVector + +class DecisionTree: + + def __init__(self, children: dict = {}, parent: Self = None, concept: Concept = Concept.UNKNOWN, depth: int = 0) -> None: + self.children: dict = children # (number, DecisionTree) + self.parent: Self = parent + self.concept: Concept = concept + self.depth: int = depth + + + # TODO + # IMPLEMENTATION IS NOT WORKING + + def classify(self, feature_vector: FeatureVector) -> tuple[Concept, int]: + if len(self.children) == 0: + return self.concept, self.depth + + feature_value = feature_vector.features_list[self.depth] # get feature value of vector based on current depth + + try: + new_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature + except KeyError: + return Concept.UNKNOWN, self.depth + + return new_decision_tree.classify(feature_vector) # classify new desicion tree + + + def learn(self, training_set: list[FeatureVector]) -> None: + i = 0 + while not self._finished(i, len(training_set)*2): + vector: FeatureVector = training_set[i % len(training_set)] + concept, depth = self.classify(vector) + print(f"concept is {vector.concept}, was classified as {concept} in depth: {depth}") + if concept == Concept.UNKNOWN: + self.concept = vector.concept + i+=1 + + elif concept == vector.concept: + i+=1 + + else: + self.concept = Concept.UNKNOWN + self.children.update({vector.features_list[depth]: DecisionTree(concept=vector.concept, parent=self, depth=depth+1, children={})}) + + + def _finished(self, i: int, num_training_data: int): + # TODO implement proper finished condition + if i < num_training_data: + return False + return True \ No newline at end of file diff --git a/src/main.py b/src/main.py index 3ca09b5..19e0baa 100644 --- a/src/main.py +++ b/src/main.py @@ -2,26 +2,49 @@ import os import ast import csv -from classes.learner import Learner from classes.concept import Concept +from classes.feature_vector import FeatureVector -training = os.path.abspath(os.path.join(__file__, "..", "training.csv")) -testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv")) -testing_data = [] -with open(testing, 'r') as csv_file: - reader = csv.reader(csv_file, delimiter=";") - next(reader) - for row in reader: - testing_data.append((row[1], row[2])) +def knn(): + from classes.learner import Learner -learner = Learner(k_paramater=3) -learner.learn(training) + training = os.path.abspath(os.path.join(__file__, "..", "training.csv")) + testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv")) -cnt = 0 + testing_data = [] + with open(testing, 'r') as csv_file: + reader = csv.reader(csv_file, delimiter=";") + next(reader) + for row in reader: + testing_data.append((row[1], row[2])) -for test_feature_vector in testing_data: - distances = learner.classify(ast.literal_eval(test_feature_vector[1])) - cnt += learner.analyse(distances, Concept.identify_by_str(test_feature_vector[0].split('.')[1])) + learner = Learner(k_paramater=3) + learner.learn(training) -print(f"Correctnes: {(cnt / len(testing_data)) * 100}%") \ No newline at end of file + cnt = 0 + + for test_feature_vector in testing_data: + distances = learner.classify(ast.literal_eval(test_feature_vector[1])) + cnt += learner.analyse(distances, Concept.identify_by_str(test_feature_vector[0].split('.')[1])) + + print(f"Correctness: {(cnt / len(testing_data)) * 100}%") + + +if __name__ == "__main__": + + from classes.decision_tree import DecisionTree + + training = os.path.abspath(os.path.join(__file__, "..", "training.csv")) + testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv")) + + testing_data = [] + with open(testing, 'r') as csv_file: + reader = csv.reader(csv_file, delimiter=";") + next(reader) + for row in reader: + fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True) + testing_data.append(fv) + + tree = DecisionTree() + tree.learn(testing_data) \ No newline at end of file