From 8c3b984e49a18990c2860213b4ccb65b67a0a0a5 Mon Sep 17 00:00:00 2001
From: Brandon Rodriguez <brodriguez8774@gmail.com>
Date: Sat, 18 Nov 2017 02:31:38 -0500
Subject: [PATCH] Implement sorting of all thread data and saving to a single
 file

---
 Data/test/test2 |  10 ++
 Main.c          | 296 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 218 insertions(+), 88 deletions(-)
 create mode 100644 Data/test/test2

diff --git a/Data/test/test2 b/Data/test/test2
new file mode 100644
index 0000000..72925df
--- /dev/null
+++ b/Data/test/test2
@@ -0,0 +1,10 @@
+Impeartale,fooQu1yee5m,A+,TextFiction.com,2757
+Mhen1958,Ohfi0ieLei,O+,FindSpin.com,971
+Allonand,Shootuy0ie,O+,FlameBalls.com,2864
+Proffelf,Ko6JohzoC,O+,BidPursuit.com,307
+Suntly,paech2ooSei,O+,BaseRingTones.com,783
+Bantiong,baiKee2t,A+,SecuritiesCard.com,1373
+Mysis1931,Atheeyosh9ai,A+,PageRings.com,2911
+Pinge1982,odoo6gahMai,B+,AffordableTest.com,1578
+Colood,vah2Xadoos,O+,PlayDetails.com,2154
+Thereshed97,Iechoonga7,A+,ToysManual.com,1967
diff --git a/Main.c b/Main.c
index 0b4b0df..e2a9671 100644
--- a/Main.c
+++ b/Main.c
@@ -23,7 +23,17 @@
 
 /**
  * Known Issues:
+ *  This appears to have a race condition, somehow.
+ *  Unfortunately, I don't seem to understand threading enough to troubleshoot it.
  *
+ *  But sometimes when running, all memory is cleared according to valgrind and
+ *  everything is fine.
+ *  Other times, valgrind reports errors on program end. There will arbitrarily be
+ *  exactly "1,614 bytes in 4 blocks" that are lost. This appears to generally be
+ *  consistent, regardless of how many files are in the selected folder (IE, how
+ *  many threads are created) or what the contents of said files are.
+ *  The actual error descrition seems to point to the imported pthread code,
+ *  which means the issue is something to do with my implementation of threading.
  */
 
 
@@ -62,15 +72,19 @@ typedef struct {
 int dir_file_counter;
 char* absolute_path;
 pthread_t* thread_array;
+thread_return_struct* full_dataset;
 
 
 // Method Declaration.
-int change_directory();
-void open_folder();
-void* thread_read_file();        // Reads given file and reorganizes data.
-data_struct* sort_data_array();
-data_struct* merge_sort();
-data_struct* merge_array();
+int change_directory();         // Safely changes directory to inicated folder.
+void open_folder();             // Opens indicated folder.
+void* thread_read_file();       // Reads given file and reorganizes data.
+data_struct* sort_data_array(); // Begins sorting of read-in file.
+data_struct* merge_sort();      // First half of merge sort.
+data_struct* merge_array();     // Second half of merge sort.
+void merge_to_file();           // Merges all thread data into one to write to file.
+thread_return_struct* merge_to_master();    // Merges thread data together.
+void write_to_file();           // Writes a single data line to file.
 
 
 /**
@@ -110,26 +124,32 @@ int main(int argc, char* argv[]) {
             for (index = 0; index < dir_file_counter; index++) {
                 pthread_join(thread_array[index], (void**) &return_struct);
 
-                thread_arrays[index] = return_struct;
+                // thread_arrays[index] = return_struct;
+                thread_arrays[index] = calloc(1, sizeof(thread_return_struct));
+                thread_arrays[index]->array_count = return_struct->array_count;
+                thread_arrays[index]->data_array = return_struct->data_array;
+                free(return_struct);
+            }
+
+            // Take thread data and merge + write to file.
+            merge_to_file(thread_arrays);
 
-                for (struct_number = 0; struct_number <= thread_arrays[index]->array_count; struct_number++) {
-                    printf("User: %-16s   ", thread_arrays[index]->data_array[struct_number].user_name);
-                    printf("Password: %-17s   ", thread_arrays[index]->data_array[struct_number].password);
-                    printf("Blood Type: %-7s   ", thread_arrays[index]->data_array[struct_number].blood_type);
-                    printf("Domain: %-28s   ", thread_arrays[index]->data_array[struct_number].domain_name);
-                    printf("DB Index: %-14d   \n", thread_arrays[index]->data_array[struct_number].db_index);
+            // Merging done. Free memory.
+            for (index = 0; index < dir_file_counter; index++) {
+                for (struct_number = 0; struct_number < thread_arrays[index]->array_count; struct_number++) {
                     free(thread_arrays[index]->data_array[struct_number].user_name);
                     free(thread_arrays[index]->data_array[struct_number].password);
                     free(thread_arrays[index]->data_array[struct_number].blood_type);
                     free(thread_arrays[index]->data_array[struct_number].domain_name);
                 }
                 free(thread_arrays[index]->data_array);
-                free(return_struct);
-                printf("\n\n");
+                free(thread_arrays[index]);
             }
             free(thread_arrays);
             free(absolute_path);
             free(thread_array);
+            free(full_dataset->data_array);
+            free(full_dataset);
         }
     }
 
@@ -217,8 +237,6 @@ void open_folder() {
     } else {
         err_msg("Directory does not have read access. Cannot view files.");
     }
-    printf("Directory File Counter: %d\n", dir_file_counter);
-
 
     // Prepare to set up threads.
     thread_array = calloc(dir_file_counter, sizeof(pthread_t));
@@ -255,7 +273,7 @@ void open_folder() {
                     temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE);
                     strcat(temp_string, "/");
                     strcat(temp_string, dir_struct->d_name);
-                    printf("Path: %s\n", temp_string);
+                    // printf("Path: %s\n", temp_string);
 
                     // Actually create threads.
                     pthread_create(&thread_array[index], &attr, thread_read_file, (void*) copy_string(temp_string));
@@ -315,37 +333,31 @@ void* thread_read_file(void* file_location) {
         }
 
         // Actually parse line to array.
-
         // First get username.
         token = strtok_r(line_buffer, ",", &save_pointer);
         data_array[struct_number].user_name = copy_string(token);
-        // printf("User: %s   ", data_array[struct_number].user_name);
 
         // Get password.
         token = strtok_r(NULL, ",", &save_pointer);
         data_array[struct_number].password = copy_string(token);
-        // printf("Password: %s   ", data_array[struct_number].password);
 
         // Get blood type.
         token = strtok_r(NULL, ",", &save_pointer);
         data_array[struct_number].blood_type = copy_string(token);
-        // printf("Blood Type: %s   ", data_array[struct_number].blood_type);
 
         // Get domain name.
         token = strtok_r(NULL, ",", &save_pointer);
         data_array[struct_number].domain_name = copy_string(token);
-        // printf("Domain: %s   ", data_array[struct_number].domain_name);
 
         // Get db index.
         token = strtok_r(NULL, ",", &save_pointer);
         data_array[struct_number].db_index = atoi(token);
-        // printf("DB Index: %d  \n", data_array[struct_number].db_index);
     }
 
     data_array = merge_sort(data_array, 0, struct_number, (struct_number) + 1);
 
     return_struct = calloc(1, sizeof(thread_return_struct));
-    return_struct->array_count = struct_number;
+    return_struct->array_count = (struct_number + 1);
     return_struct->data_array = data_array;
 
     // Close file and exit out.
@@ -359,6 +371,11 @@ void* thread_read_file(void* file_location) {
 }
 
 
+/**
+ * Begins sorting of single read-in file's data.
+ *
+ * Returns struct array of sorted data.
+ */
 data_struct* sort_data_array(data_struct* data_array, int array_size) {
     data_struct* temp_array = calloc(array_size, sizeof(data_struct));
 
@@ -369,6 +386,11 @@ data_struct* sort_data_array(data_struct* data_array, int array_size) {
 }
 
 
+/**
+ * First half of data array merge sort.
+ *
+ * Returns struct array of sorted data.
+ */
 data_struct* merge_sort(data_struct* data_array, int low_int, int high_int, int array_size) {
     int mid_int;
 
@@ -385,105 +407,203 @@ data_struct* merge_sort(data_struct* data_array, int low_int, int high_int, int
 }
 
 
+/**
+ * Second half of data array merge sort.
+ *
+ * Returns sorted section of data array.
+ */
 data_struct* merge_array(data_struct* data_array, int low_int, int mid_int, int high_int, int array_size) {
     int index;
     int left_int = low_int;
     int right_int = mid_int + 1;
     data_struct* temp_array = calloc(array_size, sizeof(data_struct));
 
-    // printf("\n\n\nDuplicating data_array into temp_array... Low: %d   Mid: %d   High: %d   ArraySize: %d\n", low_int, mid_int, high_int, array_size);
-
     // Duplicate array.
     for (index = low_int; index < array_size; index++) {
         temp_array[index] = data_array[index];
-
-        // printf("Temp UserN: %-20s   ", temp_array[index].user_name);
-        // printf("Temp Index: %-20d   ", temp_array[index].db_index);
-        // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-        // printf("Strd Index: %-20d\n", data_array[index].db_index);
     }
-    // printf("\n");
 
     // Loop until all index have been iterated.
     for (index = low_int; index < (high_int + 1); index++) {
 
         // Check if left side is done but right is not.
         if (left_int > mid_int) {
-
-            // printf("Left(%d) not less than mid(%d), \n", low_int, mid_int);
-            // printf("Temp UserN: %-20s   ", temp_array[right_int].user_name);
-            // printf("Temp Index: %-20d   ", temp_array[right_int].db_index);
-            // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-            // printf("Strd Index: %-20d\n", data_array[index].db_index);
-
             data_array[index] = temp_array[right_int];
-
-            // printf("Swapped.\n");
-            // printf("Temp UserN: %-20s   ", temp_array[right_int].user_name);
-            // printf("Temp Index: %-20d   ", temp_array[right_int].db_index);
-            // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-            // printf("Strd Index: %-20d\n", data_array[index].db_index);
-
             right_int++;
         } else {
             // Check if right side is done but left is not.
             if (right_int > high_int) {
-
-                // printf("Right(%d) not less than high(%d), \n", right_int, high_int);
-                // printf("Temp UserN: %-20s   ", temp_array[left_int].user_name);
-                // printf("Temp Index: %-20d   ", temp_array[left_int].db_index);
-                // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                // printf("Strd Index: %-20d\n", data_array[index].db_index);
-
                 data_array[index] = temp_array[left_int];
-
-                // printf("Swapped.\n");
-                // printf("Temp UserN: %-20s   ", temp_array[left_int].user_name);
-                // printf("Temp Index: %-20d   ", temp_array[left_int].db_index);
-                // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                // printf("Strd Index: %-20d\n", data_array[index].db_index);
-
                 left_int++;
             } else {
                 // Both sides not done. Compare both values.
                 if (temp_array[left_int].db_index < temp_array[right_int].db_index) {
+                    // Left less than right.
+                    data_array[index] = temp_array[left_int];
+                    left_int++;
+                } else {
+                    // Right less than left.
+                    data_array[index] = temp_array[right_int];
+                    right_int++;
+                }
+            }
+        }
+    }
+    free(temp_array);
+    return data_array;
+}
 
-                    // printf("Left(%d) less than right(%d), \n", left_int, right_int);
-                    // printf("Temp UserN: %-20s   ", temp_array[left_int].user_name);
-                    // printf("Temp Index: %-20d   ", temp_array[left_int].db_index);
-                    // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                    // printf("Strd Index: %-20d\n", data_array[index].db_index);
 
-                    data_array[index] = temp_array[left_int];
+/**
+ * Merges all thread data into one struct.
+ * Then saves this by writing to file.
+ */
+void merge_to_file(thread_return_struct** thread_arrays) {
+    int index;
+    char* temp_string;
+    FILE* reset_file;
+    thread_return_struct* temp_dataset;
+
+    // Get file path of file to save.
+    temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE);
+    strcat(temp_string, "/");
+    strcat(temp_string, "sorted.yay");
+
+    // Creates new file if does not exist. If already present, then clears file of all data.
+    reset_file = fopen(temp_string, "w");
+    if (reset_file == NULL) {
+        err_sys("Failed to open file at %s", temp_string);
+    }
+    fclose(reset_file);
+    free(temp_string);
+
+    full_dataset = calloc(1, sizeof(thread_return_struct));
+    full_dataset->data_array = calloc(thread_arrays[0]->array_count, sizeof(thread_return_struct));
+
+    // For each dataset from directory, merge into full_dataset struct.
+    for (index = 0; index < dir_file_counter; index++) {
+        temp_dataset = merge_to_master(thread_arrays[index]);
+        free(full_dataset->data_array);
+        full_dataset->array_count = temp_dataset->array_count;
+        full_dataset->data_array = temp_dataset->data_array;
+        free(temp_dataset);
+    }
 
-                    // printf("Swapped.\n");
-                    // printf("Temp UserN: %-20s   ", temp_array[left_int].user_name);
-                    // printf("Temp Index: %-20d   ", temp_array[left_int].db_index);
-                    // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                    // printf("Strd Index: %-20d\n", data_array[index].db_index);
+    // For each record stored in the full_dataset struct, write to file.
+    for (index = 0; index < full_dataset->array_count; index++) {
+        write_to_file(full_dataset->data_array[index]);
+    }
 
-                    left_int++;
-                } else {
+}
 
-                    // printf("Right(%d) less than left(%d), \n", right_int, left_int);
-                    // printf("Temp UserN: %-20s   ", temp_array[right_int].user_name);
-                    // printf("Temp Index: %-20d   ", temp_array[right_int].db_index);
-                    // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                    // printf("Strd Index: %-20d\n", data_array[index].db_index);
 
-                    data_array[index] = temp_array[right_int];
+/**
+ * Merge given subset of data into into the larger datset struct.
+ * Keeps data in sorted order.
+ *
+ * Returns new full set of sorted data.
+ */
+thread_return_struct* merge_to_master(thread_return_struct* merge_dataset) {
+    int index;
+    int master_int = 0;
+    int child_int = 0;
+    int new_dataset_size;
+    thread_return_struct* temp_dataset;
 
-                    // printf("Swapped.\n");
-                    // printf("Temp UserN: %-20s   ", temp_array[right_int].user_name);
-                    // printf("Temp Index: %-20d   ", temp_array[right_int].db_index);
-                    // printf("Strd UserN: %-20s   ", data_array[index].user_name);
-                    // printf("Strd Index: %-20d\n", data_array[index].db_index);
+    // Calculate size of new dataset.
+    new_dataset_size = (full_dataset->array_count + merge_dataset->array_count);
 
-                    right_int++;
+    // Create duplicate temp array..
+    temp_dataset = calloc(new_dataset_size, sizeof(thread_return_struct));
+    temp_dataset->data_array = calloc(new_dataset_size, sizeof(data_struct));
+
+    // Actually merge given given struct into full_dataset struct.
+    for (index = 0; index < new_dataset_size; index++) {
+
+        // Check if master dataset is done.
+        if (master_int >= full_dataset->array_count) {
+            temp_dataset->data_array[index] = merge_dataset->data_array[child_int];
+            child_int++;
+        } else {
+            // Check if merge dataset is done.
+            if (child_int >= merge_dataset->array_count) {
+                temp_dataset->data_array[index] = full_dataset->data_array[master_int];
+                master_int++;
+            } else {
+                // Both not done. Compare values.
+                if (full_dataset->data_array[master_int].db_index < merge_dataset->data_array[child_int].db_index) {
+                    temp_dataset->data_array[index] = full_dataset->data_array[master_int];
+                    master_int++;
+                } else {
+                    temp_dataset->data_array[index] = merge_dataset->data_array[child_int];
+                    child_int++;
                 }
             }
         }
     }
-    free(temp_array);
-    return data_array;
+    temp_dataset->array_count = new_dataset_size;
+    return temp_dataset;
+}
+
+
+/**
+ * Writes a single data line to file.
+ */
+void write_to_file(data_struct data_array) {
+    int index;
+    int write_buffer_size;
+    int newline_present_bool;
+    char* temp_string = calloc(20, sizeof(char*));
+    FILE* write_file;
+
+    // Determine size of write buffer to create.
+    snprintf(temp_string, 10, "%d", data_array.db_index);
+    write_buffer_size = (strlen(data_array.user_name) + 1);
+    write_buffer_size += (strlen(data_array.password) + 1);
+    write_buffer_size += (strlen(data_array.blood_type) + 1);
+    write_buffer_size += (strlen(data_array.domain_name) + 1);
+    write_buffer_size += (strlen(temp_string));
+    write_buffer_size += 2;
+
+    // Create and populate write buffer.
+    char* write_buffer = calloc(write_buffer_size, sizeof(char*));
+    write_buffer = strcat(write_buffer, data_array.user_name);
+    write_buffer = strcat(write_buffer, ",");
+    write_buffer = strcat(write_buffer, data_array.password);
+    write_buffer = strcat(write_buffer, ",");
+    write_buffer = strcat(write_buffer, data_array.blood_type);
+    write_buffer = strcat(write_buffer, ",");
+    write_buffer = strcat(write_buffer, data_array.domain_name);
+    write_buffer = strcat(write_buffer, ",");
+    write_buffer = strcat(write_buffer, temp_string);
+    free(temp_string);
+
+    // Ensure file has newline char.
+    index = 0;
+    newline_present_bool = 0;
+    while ((newline_present_bool == 0) && index < write_buffer_size) {
+        if (write_buffer[index] == '\0') {
+            if (write_buffer[index - 1] != '\n') {
+                write_buffer[index] = '\n';
+            }
+            newline_present_bool = 1;
+        }
+        index++;
+    }
+
+    // Save write buffer to file.
+    temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE);
+    strcat(temp_string, "/");
+    strcat(temp_string, "sorted.yay");
+
+    write_file = fopen(temp_string, "a");
+    if (write_file == NULL) {
+        err_sys("Failed to open file at %s", temp_string);
+    }
+
+    fputs(write_buffer, write_file);
+    fclose(write_file);
+
+    free(write_buffer);
+    free(temp_string);
 }
-- 
GitLab