diff --git a/Data/test/test2 b/Data/test/test2 new file mode 100644 index 0000000000000000000000000000000000000000..72925df400743701a23502c4277343621bdc5c9d --- /dev/null +++ b/Data/test/test2 @@ -0,0 +1,10 @@ +Impeartale,fooQu1yee5m,A+,TextFiction.com,2757 +Mhen1958,Ohfi0ieLei,O+,FindSpin.com,971 +Allonand,Shootuy0ie,O+,FlameBalls.com,2864 +Proffelf,Ko6JohzoC,O+,BidPursuit.com,307 +Suntly,paech2ooSei,O+,BaseRingTones.com,783 +Bantiong,baiKee2t,A+,SecuritiesCard.com,1373 +Mysis1931,Atheeyosh9ai,A+,PageRings.com,2911 +Pinge1982,odoo6gahMai,B+,AffordableTest.com,1578 +Colood,vah2Xadoos,O+,PlayDetails.com,2154 +Thereshed97,Iechoonga7,A+,ToysManual.com,1967 diff --git a/Main.c b/Main.c index 0b4b0df4ca5fccbe37d0aa9dcb534512b75db509..e2a9671a67492ce12ee745d4c97d6816f73ad1a4 100644 --- a/Main.c +++ b/Main.c @@ -23,7 +23,17 @@ /** * Known Issues: + * This appears to have a race condition, somehow. + * Unfortunately, I don't seem to understand threading enough to troubleshoot it. * + * But sometimes when running, all memory is cleared according to valgrind and + * everything is fine. + * Other times, valgrind reports errors on program end. There will arbitrarily be + * exactly "1,614 bytes in 4 blocks" that are lost. This appears to generally be + * consistent, regardless of how many files are in the selected folder (IE, how + * many threads are created) or what the contents of said files are. + * The actual error descrition seems to point to the imported pthread code, + * which means the issue is something to do with my implementation of threading. */ @@ -62,15 +72,19 @@ typedef struct { int dir_file_counter; char* absolute_path; pthread_t* thread_array; +thread_return_struct* full_dataset; // Method Declaration. -int change_directory(); -void open_folder(); -void* thread_read_file(); // Reads given file and reorganizes data. -data_struct* sort_data_array(); -data_struct* merge_sort(); -data_struct* merge_array(); +int change_directory(); // Safely changes directory to inicated folder. +void open_folder(); // Opens indicated folder. +void* thread_read_file(); // Reads given file and reorganizes data. +data_struct* sort_data_array(); // Begins sorting of read-in file. +data_struct* merge_sort(); // First half of merge sort. +data_struct* merge_array(); // Second half of merge sort. +void merge_to_file(); // Merges all thread data into one to write to file. +thread_return_struct* merge_to_master(); // Merges thread data together. +void write_to_file(); // Writes a single data line to file. /** @@ -110,26 +124,32 @@ int main(int argc, char* argv[]) { for (index = 0; index < dir_file_counter; index++) { pthread_join(thread_array[index], (void**) &return_struct); - thread_arrays[index] = return_struct; + // thread_arrays[index] = return_struct; + thread_arrays[index] = calloc(1, sizeof(thread_return_struct)); + thread_arrays[index]->array_count = return_struct->array_count; + thread_arrays[index]->data_array = return_struct->data_array; + free(return_struct); + } + + // Take thread data and merge + write to file. + merge_to_file(thread_arrays); - for (struct_number = 0; struct_number <= thread_arrays[index]->array_count; struct_number++) { - printf("User: %-16s ", thread_arrays[index]->data_array[struct_number].user_name); - printf("Password: %-17s ", thread_arrays[index]->data_array[struct_number].password); - printf("Blood Type: %-7s ", thread_arrays[index]->data_array[struct_number].blood_type); - printf("Domain: %-28s ", thread_arrays[index]->data_array[struct_number].domain_name); - printf("DB Index: %-14d \n", thread_arrays[index]->data_array[struct_number].db_index); + // Merging done. Free memory. + for (index = 0; index < dir_file_counter; index++) { + for (struct_number = 0; struct_number < thread_arrays[index]->array_count; struct_number++) { free(thread_arrays[index]->data_array[struct_number].user_name); free(thread_arrays[index]->data_array[struct_number].password); free(thread_arrays[index]->data_array[struct_number].blood_type); free(thread_arrays[index]->data_array[struct_number].domain_name); } free(thread_arrays[index]->data_array); - free(return_struct); - printf("\n\n"); + free(thread_arrays[index]); } free(thread_arrays); free(absolute_path); free(thread_array); + free(full_dataset->data_array); + free(full_dataset); } } @@ -217,8 +237,6 @@ void open_folder() { } else { err_msg("Directory does not have read access. Cannot view files."); } - printf("Directory File Counter: %d\n", dir_file_counter); - // Prepare to set up threads. thread_array = calloc(dir_file_counter, sizeof(pthread_t)); @@ -255,7 +273,7 @@ void open_folder() { temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE); strcat(temp_string, "/"); strcat(temp_string, dir_struct->d_name); - printf("Path: %s\n", temp_string); + // printf("Path: %s\n", temp_string); // Actually create threads. pthread_create(&thread_array[index], &attr, thread_read_file, (void*) copy_string(temp_string)); @@ -315,37 +333,31 @@ void* thread_read_file(void* file_location) { } // Actually parse line to array. - // First get username. token = strtok_r(line_buffer, ",", &save_pointer); data_array[struct_number].user_name = copy_string(token); - // printf("User: %s ", data_array[struct_number].user_name); // Get password. token = strtok_r(NULL, ",", &save_pointer); data_array[struct_number].password = copy_string(token); - // printf("Password: %s ", data_array[struct_number].password); // Get blood type. token = strtok_r(NULL, ",", &save_pointer); data_array[struct_number].blood_type = copy_string(token); - // printf("Blood Type: %s ", data_array[struct_number].blood_type); // Get domain name. token = strtok_r(NULL, ",", &save_pointer); data_array[struct_number].domain_name = copy_string(token); - // printf("Domain: %s ", data_array[struct_number].domain_name); // Get db index. token = strtok_r(NULL, ",", &save_pointer); data_array[struct_number].db_index = atoi(token); - // printf("DB Index: %d \n", data_array[struct_number].db_index); } data_array = merge_sort(data_array, 0, struct_number, (struct_number) + 1); return_struct = calloc(1, sizeof(thread_return_struct)); - return_struct->array_count = struct_number; + return_struct->array_count = (struct_number + 1); return_struct->data_array = data_array; // Close file and exit out. @@ -359,6 +371,11 @@ void* thread_read_file(void* file_location) { } +/** + * Begins sorting of single read-in file's data. + * + * Returns struct array of sorted data. + */ data_struct* sort_data_array(data_struct* data_array, int array_size) { data_struct* temp_array = calloc(array_size, sizeof(data_struct)); @@ -369,6 +386,11 @@ data_struct* sort_data_array(data_struct* data_array, int array_size) { } +/** + * First half of data array merge sort. + * + * Returns struct array of sorted data. + */ data_struct* merge_sort(data_struct* data_array, int low_int, int high_int, int array_size) { int mid_int; @@ -385,105 +407,203 @@ data_struct* merge_sort(data_struct* data_array, int low_int, int high_int, int } +/** + * Second half of data array merge sort. + * + * Returns sorted section of data array. + */ data_struct* merge_array(data_struct* data_array, int low_int, int mid_int, int high_int, int array_size) { int index; int left_int = low_int; int right_int = mid_int + 1; data_struct* temp_array = calloc(array_size, sizeof(data_struct)); - // printf("\n\n\nDuplicating data_array into temp_array... Low: %d Mid: %d High: %d ArraySize: %d\n", low_int, mid_int, high_int, array_size); - // Duplicate array. for (index = low_int; index < array_size; index++) { temp_array[index] = data_array[index]; - - // printf("Temp UserN: %-20s ", temp_array[index].user_name); - // printf("Temp Index: %-20d ", temp_array[index].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); } - // printf("\n"); // Loop until all index have been iterated. for (index = low_int; index < (high_int + 1); index++) { // Check if left side is done but right is not. if (left_int > mid_int) { - - // printf("Left(%d) not less than mid(%d), \n", low_int, mid_int); - // printf("Temp UserN: %-20s ", temp_array[right_int].user_name); - // printf("Temp Index: %-20d ", temp_array[right_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - data_array[index] = temp_array[right_int]; - - // printf("Swapped.\n"); - // printf("Temp UserN: %-20s ", temp_array[right_int].user_name); - // printf("Temp Index: %-20d ", temp_array[right_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - right_int++; } else { // Check if right side is done but left is not. if (right_int > high_int) { - - // printf("Right(%d) not less than high(%d), \n", right_int, high_int); - // printf("Temp UserN: %-20s ", temp_array[left_int].user_name); - // printf("Temp Index: %-20d ", temp_array[left_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - data_array[index] = temp_array[left_int]; - - // printf("Swapped.\n"); - // printf("Temp UserN: %-20s ", temp_array[left_int].user_name); - // printf("Temp Index: %-20d ", temp_array[left_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - left_int++; } else { // Both sides not done. Compare both values. if (temp_array[left_int].db_index < temp_array[right_int].db_index) { + // Left less than right. + data_array[index] = temp_array[left_int]; + left_int++; + } else { + // Right less than left. + data_array[index] = temp_array[right_int]; + right_int++; + } + } + } + } + free(temp_array); + return data_array; +} - // printf("Left(%d) less than right(%d), \n", left_int, right_int); - // printf("Temp UserN: %-20s ", temp_array[left_int].user_name); - // printf("Temp Index: %-20d ", temp_array[left_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - data_array[index] = temp_array[left_int]; +/** + * Merges all thread data into one struct. + * Then saves this by writing to file. + */ +void merge_to_file(thread_return_struct** thread_arrays) { + int index; + char* temp_string; + FILE* reset_file; + thread_return_struct* temp_dataset; + + // Get file path of file to save. + temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE); + strcat(temp_string, "/"); + strcat(temp_string, "sorted.yay"); + + // Creates new file if does not exist. If already present, then clears file of all data. + reset_file = fopen(temp_string, "w"); + if (reset_file == NULL) { + err_sys("Failed to open file at %s", temp_string); + } + fclose(reset_file); + free(temp_string); + + full_dataset = calloc(1, sizeof(thread_return_struct)); + full_dataset->data_array = calloc(thread_arrays[0]->array_count, sizeof(thread_return_struct)); + + // For each dataset from directory, merge into full_dataset struct. + for (index = 0; index < dir_file_counter; index++) { + temp_dataset = merge_to_master(thread_arrays[index]); + free(full_dataset->data_array); + full_dataset->array_count = temp_dataset->array_count; + full_dataset->data_array = temp_dataset->data_array; + free(temp_dataset); + } - // printf("Swapped.\n"); - // printf("Temp UserN: %-20s ", temp_array[left_int].user_name); - // printf("Temp Index: %-20d ", temp_array[left_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); + // For each record stored in the full_dataset struct, write to file. + for (index = 0; index < full_dataset->array_count; index++) { + write_to_file(full_dataset->data_array[index]); + } - left_int++; - } else { +} - // printf("Right(%d) less than left(%d), \n", right_int, left_int); - // printf("Temp UserN: %-20s ", temp_array[right_int].user_name); - // printf("Temp Index: %-20d ", temp_array[right_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); - data_array[index] = temp_array[right_int]; +/** + * Merge given subset of data into into the larger datset struct. + * Keeps data in sorted order. + * + * Returns new full set of sorted data. + */ +thread_return_struct* merge_to_master(thread_return_struct* merge_dataset) { + int index; + int master_int = 0; + int child_int = 0; + int new_dataset_size; + thread_return_struct* temp_dataset; - // printf("Swapped.\n"); - // printf("Temp UserN: %-20s ", temp_array[right_int].user_name); - // printf("Temp Index: %-20d ", temp_array[right_int].db_index); - // printf("Strd UserN: %-20s ", data_array[index].user_name); - // printf("Strd Index: %-20d\n", data_array[index].db_index); + // Calculate size of new dataset. + new_dataset_size = (full_dataset->array_count + merge_dataset->array_count); - right_int++; + // Create duplicate temp array.. + temp_dataset = calloc(new_dataset_size, sizeof(thread_return_struct)); + temp_dataset->data_array = calloc(new_dataset_size, sizeof(data_struct)); + + // Actually merge given given struct into full_dataset struct. + for (index = 0; index < new_dataset_size; index++) { + + // Check if master dataset is done. + if (master_int >= full_dataset->array_count) { + temp_dataset->data_array[index] = merge_dataset->data_array[child_int]; + child_int++; + } else { + // Check if merge dataset is done. + if (child_int >= merge_dataset->array_count) { + temp_dataset->data_array[index] = full_dataset->data_array[master_int]; + master_int++; + } else { + // Both not done. Compare values. + if (full_dataset->data_array[master_int].db_index < merge_dataset->data_array[child_int].db_index) { + temp_dataset->data_array[index] = full_dataset->data_array[master_int]; + master_int++; + } else { + temp_dataset->data_array[index] = merge_dataset->data_array[child_int]; + child_int++; } } } } - free(temp_array); - return data_array; + temp_dataset->array_count = new_dataset_size; + return temp_dataset; +} + + +/** + * Writes a single data line to file. + */ +void write_to_file(data_struct data_array) { + int index; + int write_buffer_size; + int newline_present_bool; + char* temp_string = calloc(20, sizeof(char*)); + FILE* write_file; + + // Determine size of write buffer to create. + snprintf(temp_string, 10, "%d", data_array.db_index); + write_buffer_size = (strlen(data_array.user_name) + 1); + write_buffer_size += (strlen(data_array.password) + 1); + write_buffer_size += (strlen(data_array.blood_type) + 1); + write_buffer_size += (strlen(data_array.domain_name) + 1); + write_buffer_size += (strlen(temp_string)); + write_buffer_size += 2; + + // Create and populate write buffer. + char* write_buffer = calloc(write_buffer_size, sizeof(char*)); + write_buffer = strcat(write_buffer, data_array.user_name); + write_buffer = strcat(write_buffer, ","); + write_buffer = strcat(write_buffer, data_array.password); + write_buffer = strcat(write_buffer, ","); + write_buffer = strcat(write_buffer, data_array.blood_type); + write_buffer = strcat(write_buffer, ","); + write_buffer = strcat(write_buffer, data_array.domain_name); + write_buffer = strcat(write_buffer, ","); + write_buffer = strcat(write_buffer, temp_string); + free(temp_string); + + // Ensure file has newline char. + index = 0; + newline_present_bool = 0; + while ((newline_present_bool == 0) && index < write_buffer_size) { + if (write_buffer[index] == '\0') { + if (write_buffer[index - 1] != '\n') { + write_buffer[index] = '\n'; + } + newline_present_bool = 1; + } + index++; + } + + // Save write buffer to file. + temp_string = copy_string_with_buffer(absolute_path, BUFFER_SIZE); + strcat(temp_string, "/"); + strcat(temp_string, "sorted.yay"); + + write_file = fopen(temp_string, "a"); + if (write_file == NULL) { + err_sys("Failed to open file at %s", temp_string); + } + + fputs(write_buffer, write_file); + fclose(write_file); + + free(write_buffer); + free(temp_string); }