added proper implementation for learn in decisiontree

This commit is contained in:
Lukas K
2024-10-26 13:35:05 +02:00
parent 3ff4a05d7f
commit e5099b5acc
2 changed files with 50 additions and 24 deletions

View File

@@ -17,40 +17,49 @@ class DecisionTree:
# TODO # TODO
# IMPLEMENTATION IS NOT WORKING # IMPLEMENTATION IS NOT WORKING
def classify(self, feature_vector: FeatureVector) -> tuple[Concept, int]: def classify(self, feature_vector: FeatureVector) -> Concept:
if len(self.children) == 0: if len(self.children) == 0:
return self.concept, self.depth return self.concept
feature_value = feature_vector.features_list[self.depth] # get feature value of vector based on current depth feature_value = feature_vector.features_list[self.depth] # get feature value of vector based on current depth
try: try:
new_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature new_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature
except KeyError: except KeyError:
return Concept.UNKNOWN, self.depth return Concept.UNKNOWN
return new_decision_tree.classify(feature_vector) # classify new desicion tree return new_decision_tree.classify(feature_vector) # classify new desicion tree
def learn(self, training_set: list[FeatureVector]) -> None: def learn(self, training_set: list[FeatureVector]) -> None:
i = 0 i = 0
while not self._finished(i, len(training_set)*2): while not self._finished(i, len(training_set)):
vector: FeatureVector = training_set[i % len(training_set)] vector: FeatureVector = training_set[i % len(training_set)]
concept, depth = self.classify(vector) concept = self.classify(vector)
print(f"concept is {vector.concept}, was classified as {concept} in depth: {depth}") #print(f"{i}\t: actual: {vector.concept} \t classified as: {concept}")
if concept == Concept.UNKNOWN: if concept == vector.concept:
self.concept = vector.concept i += 1
i+=1
elif concept == vector.concept:
i+=1
else: else:
self.concept = Concept.UNKNOWN self._insert(vector)
self.children.update({vector.features_list[depth]: DecisionTree(concept=vector.concept, parent=self, depth=depth+1, children={})}) #i += 1
def _insert(self, vector: FeatureVector) -> None:
if self.concept == Concept.UNKNOWN:
self.concept = vector.concept
else:
feature_value = vector.features_list[self.depth]
try:
sub_decision_tree: Self = self.children[feature_value] # get sub decition tree for feature
sub_decision_tree._insert(vector)
except KeyError:
self.children.update({vector.features_list[self.depth]: DecisionTree(concept=vector.concept, parent=self, depth=self.depth+1, children={})})
def _finished(self, i: int, num_training_data: int): def _finished(self, i: int, num_training_data: int):
# TODO implement proper finished condition # TODO implement proper finished condition
if i < num_training_data: if i < num_training_data*3:
return False return False
return True return True

View File

@@ -31,20 +31,37 @@ def knn():
print(f"Correctness: {(cnt / len(testing_data)) * 100}%") print(f"Correctness: {(cnt / len(testing_data)) * 100}%")
if __name__ == "__main__": def decision_tree():
from classes.decision_tree import DecisionTree from classes.decision_tree import DecisionTree
training = os.path.abspath(os.path.join(__file__, "..", "training.csv")) training = os.path.abspath(os.path.join(__file__, "..", "training.csv"))
testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv")) testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv"))
testing_data = [] training_data = []
with open(testing, 'r') as csv_file: with open(training, 'r') as csv_file:
reader = csv.reader(csv_file, delimiter=";") reader = csv.reader(csv_file, delimiter=";")
next(reader) next(reader)
for row in reader: for row in reader:
fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True) fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True)
testing_data.append(fv) training_data.append(fv)
tree = DecisionTree() tree = DecisionTree()
tree.learn(testing_data) tree.learn(training_data)
cnt_data = 0
cnt_correct = 0
with open(testing, 'r') as csv_file:
reader = csv.reader(csv_file, delimiter=";")
next(reader)
for row in reader:
cnt_data += 1
fv = FeatureVector(concept = Concept.identify_by_str(row[1].split(".")[1]), features_list = ast.literal_eval(row[2]), loaded = True)
classified_concept = tree.classify(fv)
cnt_correct += 1 if fv.concept == classified_concept else 0
print(f"{fv.concept} was classified as {classified_concept}")
print(f"classified {cnt_correct}/{cnt_data} correctly ({round(cnt_correct/cnt_data*100, 3)}%)")
if __name__ == "__main__":
decision_tree()