splitting data_set and getting correctnes

This commit is contained in:
Denys Seredenko
2024-10-21 14:44:33 +02:00
parent dcb7cee31c
commit 2e2e627bf3
7 changed files with 1836 additions and 15 deletions

45
src/all_data_splitter.py Normal file
View File

@@ -0,0 +1,45 @@
import csv
import os
from random import randint
from classes.learner import Learner
from classes.concept import Concept
all_data = os.path.abspath(os.path.join(__file__, "..", "all_feature_vectors.csv"))
training = os.path.abspath(os.path.join(__file__, "..", "training.csv"))
testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv"))
def split_all_data(all_data_path: str, future_training_path: str, future_testing_path: str):
training_data = []
testing_data = []
with open(all_data_path, "r") as all_data:
reader = csv.reader(all_data, delimiter=';')
next(reader)
for row in reader:
is_training = randint(0, 9) < 8
if is_training:
training_data.append(row)
else:
testing_data.append(row)
with open(future_training_path, 'w') as csv_file:
writer = csv.writer(csv_file, delimiter=";")
writer.writerow(['filename','concept', 'vector'])
for sing in training_data:
writer.writerow(sing)
with open(future_testing_path, 'w') as csv_file:
writer = csv.writer(csv_file, delimiter=";")
writer.writerow(['filename','concept', 'vector'])
for sing in testing_data:
writer.writerow(sing)
split_all_data(all_data, training, testing)
# learner = Learner(k_paramater=3)
# learner.learn(path_to_data_set)
# #TODO: add feature vector
# distances = learner.classify([0, 57, 0, 19, 0, 24, 0, 20, 0, 10, 0, 70, 0, 63, 0, 26, 0, 11, 0, 78, 0, 11, 0, 11, 0, 19, 0, 10, 0, 71, 0, 71, 0, 17, 0, 12, 0, 37, 0, 62, 0, 1, 0, 66, 0, 33, 0, 1, 0, 77, 0, 12, 0, 11, 0, 76, 0, 13, 0, 11, 0, 100, 0, 0, 0, 0, 0, 63, 0, 36, 0, 1, 0, 76, 0, 13, 0, 11, 0, 17, 0, 11, 0, 72, 0, 75, 0, 14, 0, 11, 0, 65, 0, 23, 0, 12, 0, 17, 0, 11, 0, 72, 18, 35, 2])
# print(learner.analyse(distances, Concept.RECHTS_ABBIEGEN))