From e5099b5acceeb5686741a939a2efc3aab26d9a93 Mon Sep 17 00:00:00 2001 From: Lukas K Date: Sat, 26 Oct 2024 13:35:05 +0200 Subject: [PATCH] added proper implementation for learn in decisiontree --- src/classes/decision_tree.py | 45 +++++++++++++++++++++--------------- src/main.py | 29 ++++++++++++++++++----- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/classes/decision_tree.py b/src/classes/decision_tree.py index 24ced61..4a45821 100644 --- a/src/classes/decision_tree.py +++ b/src/classes/decision_tree.py @@ -17,40 +17,49 @@ class DecisionTree: # TODO # IMPLEMENTATION IS NOT WORKING - def classify(self, feature_vector: FeatureVector) -> tuple[Concept, int]: + def classify(self, feature_vector: FeatureVector) -> Concept: if len(self.children) == 0: - return self.concept, self.depth + return self.concept feature_value = feature_vector.features_list[self.depth] # get feature value of vector based on current depth try: new_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature except KeyError: - return Concept.UNKNOWN, self.depth + return Concept.UNKNOWN return new_decision_tree.classify(feature_vector) # classify new desicion tree def learn(self, training_set: list[FeatureVector]) -> None: i = 0 - while not self._finished(i, len(training_set)*2): + while not self._finished(i, len(training_set)): vector: FeatureVector = training_set[i % len(training_set)] - concept, depth = self.classify(vector) - print(f"concept is {vector.concept}, was classified as {concept} in depth: {depth}") - if concept == Concept.UNKNOWN: - self.concept = vector.concept - i+=1 - - elif concept == vector.concept: - i+=1 - + concept = self.classify(vector) + #print(f"{i}\t: actual: {vector.concept} \t classified as: {concept}") + if concept == vector.concept: + i += 1 else: - self.concept = Concept.UNKNOWN - self.children.update({vector.features_list[depth]: DecisionTree(concept=vector.concept, parent=self, depth=depth+1, children={})}) - - + self._insert(vector) + #i += 1 + + + def _insert(self, vector: FeatureVector) -> None: + if self.concept == Concept.UNKNOWN: + self.concept = vector.concept + + else: + feature_value = vector.features_list[self.depth] + + try: + sub_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature + sub_decision_tree._insert(vector) + except KeyError: + self.children.update({vector.features_list[self.depth]: DecisionTree(concept=vector.concept, parent=self, depth=self.depth+1, children={})}) + + def _finished(self, i: int, num_training_data: int): # TODO implement proper finished condition - if i < num_training_data: + if i < num_training_data*3: return False return True \ No newline at end of file diff --git a/src/main.py b/src/main.py index 19e0baa..fa30c0c 100644 --- a/src/main.py +++ b/src/main.py @@ -31,20 +31,37 @@ def knn(): print(f"Correctness: {(cnt / len(testing_data)) * 100}%") -if __name__ == "__main__": - +def decision_tree(): from classes.decision_tree import DecisionTree training = os.path.abspath(os.path.join(__file__, "..", "training.csv")) testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv")) - testing_data = [] - with open(testing, 'r') as csv_file: + training_data = [] + with open(training, 'r') as csv_file: reader = csv.reader(csv_file, delimiter=";") next(reader) for row in reader: fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True) - testing_data.append(fv) + training_data.append(fv) tree = DecisionTree() - tree.learn(testing_data) \ No newline at end of file + tree.learn(training_data) + + cnt_data = 0 + cnt_correct = 0 + with open(testing, 'r') as csv_file: + reader = csv.reader(csv_file, delimiter=";") + next(reader) + for row in reader: + cnt_data += 1 + fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True) + classified_concept = tree.classify(fv) + cnt_correct += 1 if fv.concept == classified_concept else 0 + print(f"{fv.concept} was classified as {classified_concept}") + + print(f"classified {cnt_correct}/{cnt_data} correctly ({round(cnt_correct/cnt_data*100, 3)}%)") + + +if __name__ == "__main__": + decision_tree() \ No newline at end of file