splitting data_set and getting correctnes

2025-12-22 01:45:52 +00:00 · 2024-10-21 14:44:33 +02:00
parent dcb7cee31c
commit 2e2e627bf3
7 changed files with 1836 additions and 15 deletions
--- a/src/all_data_splitter.py
+++ b/src/all_data_splitter.py
@@ -0,0 +1,45 @@
+import csv
+import os
+from random import randint
+
+from classes.learner import Learner
+from classes.concept import Concept
+
+all_data = os.path.abspath(os.path.join(__file__, "..", "all_feature_vectors.csv"))
+training = os.path.abspath(os.path.join(__file__, "..", "training.csv"))
+testing = os.path.abspath(os.path.join(__file__, "..", "testing.csv"))
+
+def split_all_data(all_data_path: str, future_training_path: str, future_testing_path: str):
+    training_data = []
+    testing_data = []
+    with open(all_data_path, "r") as all_data:
+        reader = csv.reader(all_data, delimiter=';')
+        next(reader)
+        for row in reader:
+            is_training = randint(0, 9) < 8
+            if is_training: 
+                training_data.append(row)
+            else:
+                testing_data.append(row)
+
+    with open(future_training_path, 'w') as csv_file: 
+        writer = csv.writer(csv_file, delimiter=";")
+        writer.writerow(['filename','concept', 'vector'])
+        for sing in training_data:
+            writer.writerow(sing)
+
+    with open(future_testing_path, 'w') as csv_file: 
+        writer = csv.writer(csv_file, delimiter=";")
+        writer.writerow(['filename','concept', 'vector'])
+        for sing in testing_data:
+            writer.writerow(sing)
+
+split_all_data(all_data, training, testing)
+
+# learner = Learner(k_paramater=3)
+# learner.learn(path_to_data_set)
+
+# #TODO: add feature vector
+# distances = learner.classify([0, 57, 0, 19, 0, 24, 0, 20, 0, 10, 0, 70, 0, 63, 0, 26, 0, 11, 0, 78, 0, 11, 0, 11, 0, 19, 0, 10, 0, 71, 0, 71, 0, 17, 0, 12, 0, 37, 0, 62, 0, 1, 0, 66, 0, 33, 0, 1, 0, 77, 0, 12, 0, 11, 0, 76, 0, 13, 0, 11, 0, 100, 0, 0, 0, 0, 0, 63, 0, 36, 0, 1, 0, 76, 0, 13, 0, 11, 0, 17, 0, 11, 0, 72, 0, 75, 0, 14, 0, 11, 0, 65, 0, 23, 0, 12, 0, 17, 0, 11, 0, 72, 18, 35, 2])
+
+# print(learner.analyse(distances, Concept.RECHTS_ABBIEGEN))