diff --git a/documents/references.md b/documents/references.md
index d999b70c4946a4b2d06d78d8e9113f978d4010aa..9053dea2f28c8e4d80c44b3678aad1c1b55d821f 100644
--- a/documents/references.md
+++ b/documents/references.md
@@ -33,3 +33,7 @@ Most parallelization logic is from the book "Introduction to Parallel Programmin
 #### Moving Cursor in Terminal
 <https://stackoverflow.com/a/35190285>
 <https://tldp.org/HOWTO/Bash-Prompt-HOWTO/x361.html>
+
+### Makefiles
+#### Passing Command Line Args
+<https://stackoverflow.com/a/47008498>
diff --git a/makefile b/makefile
index 55bfd53853409298bc31056eb509df079d6be306..ead3aa0de45cc841be810004d3421832716d8c6c 100644
--- a/makefile
+++ b/makefile
@@ -1,3 +1,27 @@
 
-all:
-	gcc -Wall -Wpedantic -std=c99 src/main.c src/structs.c src/simulate_loads.c src/terminal_commands.c -g -o main.out -pthread
+# Tell makefile to use commands defined here, if file with same name exists.
+.PHONY: all run valgrind
+# Set default if none is specified.
+default: all
+
+
+# Variables.
+CC = mpicc
+EXE = mpiexec
+CFLAGS = -Wall -Wpedantic -std=c99 -g
+TARGET = main.out
+LIBRARIES = -pthread
+DEPENDENCIES = src/main.c src/structs.c src/simulate_loads.c src/terminal_commands.c
+CORES := $(shell nproc)
+ARGS = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}`
+
+
+# Compile target if any depenencies update.
+$(TARGET): $(DEPENDENCIES)
+	@$(CC) $(CFLAGS) $(DEPENDENCIES) -o $(TARGET) $(LIBRARIES)
+
+
+# Compile target if dependencies update. Then run.
+all: $(TARGET)
+	@$(EXE) -n $(CORES) --oversubscribe ./$(TARGET) $(ARGS)
+	# @$(EXE) -n 3 --oversubscribe ./$(TARGET) $(ARGS)
diff --git a/src/main.c b/src/main.c
index d512d6b893ae3a3678d44eb6fc5f10d616899026..0b34381cd77039d977d49541a14cd8940eae64cf 100644
--- a/src/main.c
+++ b/src/main.c
@@ -6,6 +6,7 @@
 // System Import Headers.
 #include <ctype.h>
 #include <limits.h>
+#include <mpi.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -30,6 +31,7 @@ void run_program();         // Main program logic. Handles various Load Balance
 
 // Global Variables.
 int total_processors;
+int process_rank;
 int seconds_per_index;
 int indexes_per_load;
 int total_loads;
@@ -39,38 +41,59 @@ int total_loads;
  * Program's main. Initialized and runs program.
  */
 int main(int argc, char* argv[]) {
-    printf("Initializing program.\n");
-    printf("\n");
+
+    // Initialize thread communication.
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(MPI_COMM_WORLD, &total_processors);
+    MPI_Comm_rank(MPI_COMM_WORLD, &process_rank);
+
+    if (process_rank == 0) {
+        printf("Initializing program.\n");
+        printf("\n");
+    }
 
     // Validate program args.
     validate_args(argc, argv);
+
     // If we got this far, then args are valid. Proceed with program.
-    total_processors = get_nprocs();
+    // total_processors = get_nprocs();
 
     // Display args.
-    printf("Provided values:\n");
-    printf("    %i Total Loads\n", total_loads);
-    printf("    %i Indexes Per Load\n", indexes_per_load);
-    printf("    %i Seconds Per Index\n", seconds_per_index);
-    printf("\n");
-    printf("    Total Expected work = TotalLoads * IndexesPerLoad * SecondsPerIndex\n");
-    printf("                        = %i * %i * %i\n", total_loads, indexes_per_load, seconds_per_index);
-    int seconds_active_work = total_loads * indexes_per_load * seconds_per_index;
-    printf("                        = %i Seconds Active Work\n", seconds_active_work);
-    printf("                        = %i Minutes Active Work\n", (seconds_active_work / 60));
-    printf("\n");
-    printf("    %i Total System Processors\n", total_processors);
-    printf("\n");
-    printf("    Expected total execution time with ideal processor usage:\n");
-    printf("    (Work / Processor Count) = %i Minutes\n", (seconds_active_work / 60 / total_processors));
-    printf("\n");
+    if (process_rank == 0) {
+        printf("System & Program Variables:\n");
+        printf("    %i Total Loads\n", total_loads);
+        printf("    %i Indexes Per Load\n", indexes_per_load);
+        printf("    %i Seconds Per Index\n", seconds_per_index);
+        printf("\n");
+        printf("    Total Expected work = TotalLoads * IndexesPerLoad * SecondsPerIndex\n");
+        printf("                        = %i * %i * %i\n", total_loads, indexes_per_load, seconds_per_index);
+        int seconds_active_work = total_loads * indexes_per_load * seconds_per_index;
+        printf("                        = %i Seconds Active Work\n", seconds_active_work);
+        printf("                        = %i Minutes Active Work\n", (seconds_active_work / 60));
+        printf("\n");
+        printf("    %i Total System Processors\n", get_nprocs());
+        printf("    %i Processors in Use\n", total_processors);
+        printf("\n");
+        printf("    Expected total execution time with ideal processor usage (ignoring unused processors):\n");
+        printf("    (Work / Processor Count) = %i Minutes\n", (seconds_active_work / 60 / total_processors));
+        printf("\n");
+    }
+
+    // Wait for all processes to synchronize.
+    MPI_Barrier(MPI_COMM_WORLD);
 
     // Run program.
     // display_max_types();
-    run_program(total_processors);
+    run_program();
+
+    if (process_rank == 0) {
+        printf("\n");
+        printf("Terminating program.\n");
+    }
+
+    // Close thread communication.
+    MPI_Finalize();
 
-    printf("\n");
-    printf("Terminating program.\n");
     exit(0);
 }
 
@@ -86,41 +109,52 @@ void validate_args(int argc, char* argv[]) {
         // Validate "seconds_per_index" value. Should be between 1 and 10.
         seconds_per_index = strtol(argv[1], NULL, 10);
         if ((seconds_per_index < 1) || (seconds_per_index > 10)) {
-            printf("Arg1 (seconds_per_index) should be int between 1 and 10.\n");
-            printf("\n");
-            printf("Terminating program.\n");
+            if (process_rank == 0) {
+                printf("Arg1 (seconds_per_index) should be int between 1 and 10.\n");
+                printf("\n");
+                printf("Terminating program.\n");
+            }
             exit(1);
         }
 
         // Validate "indexes_per_load" value. Should be between 100 and 10,000.
         indexes_per_load = strtol(argv[2], NULL, 10);
         if ((indexes_per_load < 10) || (indexes_per_load > 10000)) {
-            printf("Arg2 (indexes_per_load) should be int between 10 and 10,000.\n");
-            printf("\n");
-            printf("Terminating program.\n");
+            if (process_rank == 0) {
+                printf("Arg2 (indexes_per_load) should be int between 10 and 10,000.\n");
+                printf("\n");
+                printf("Terminating program.\n");
+            }
             exit(1);
         }
 
         // Validate "total_loads" value. Should be between 100 and 10,000.
         total_loads = strtol(argv[3], NULL, 10);
         if ((total_loads < 10) || (total_loads > 10000)) {
-            printf("Arg3 (total_loads) should be int between 10 and 10,000.\n");
-            printf("\n");
-            printf("Terminating program.\n");
+            if (process_rank == 0) {
+                printf("Arg3 (total_loads) should be int between 10 and 10,000.\n");
+                printf("\n");
+                printf("Terminating program.\n");
+            }
             exit(1);
         }
 
     } else if (argc > 4) {
         // Too many args. Error.
-        printf("Too many args passed. Got %i. Expected 3.\n", (argc - 1));
-        printf("\n");
-        printf("Terminating program.\n");
+        if (process_rank == 0) {
+            printf("Too many args passed. Got %i. Expected 3.\n", (argc - 1));
+            printf("\n");
+            printf("Terminating program.\n");
+        }
         exit(1);
+
     } else {
         // Too few args. Error.
-        printf("Too few args passed. Got %i. Expected 3.\n", (argc - 1));
-        printf("\n");
-        printf("Terminating program.\n");
+        if (process_rank == 0) {
+            printf("Too few args passed. Got %i. Expected 3.\n", (argc - 1));
+            printf("\n");
+            printf("Terminating program.\n");
+        }
         exit(1);
     }
 }
@@ -145,15 +179,29 @@ void display_max_types() {
  * Main program logic.
  * Entry point for running various Load Balancing schemes.
  */
-void run_program(int total_processors) {
-    printf("Running core program logic.");
-    printf("\n");
+void run_program() {
+    if (process_rank == 0) {
+        printf("Running core program logic.\n");
+        printf("\n");
+    }
 
-    run_arr(total_processors, seconds_per_index, indexes_per_load, total_loads);
+    // Wait for all processes to synchronize.
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    thread_struct* thread_args_ptr = initialize_thread_struct(
+        total_processors, seconds_per_index, indexes_per_load, total_loads, process_rank
+    );
+
+    run_arr(thread_args_ptr);
 
     // Test load logic with no parallelization. Debugging only.
     // simulate_load("main");
 
-    printf("\n");
-    printf("Core program logic complete.");
+    // Wait for all processes to synchronize.
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    if (process_rank == 0) {
+        printf("\n");
+        printf("Core program logic complete.\n");
+    }
 }
diff --git a/src/simulate_loads.c b/src/simulate_loads.c
index 5d3cb5d13f98c4659c2a69ec16882c991ef09096..8f95747e1ccdf0dd86872bcefe0d855d131a2dcd 100644
--- a/src/simulate_loads.c
+++ b/src/simulate_loads.c
@@ -15,37 +15,54 @@
 /**
  * Logic to run "Asynchronous Round Robin" load scheme.
  */
-void run_arr(int total_processors, int seconds_per_index, int indexes_per_load, int total_loads) {
-    printf("Starting ARR load scheme.");
-    printf("\n");
+void run_arr(thread_struct* thread_args_ptr) {
+    int process_rank = thread_args_ptr->thread_num;
 
-    // Initialize threading.
-    pthread_t* thread_pool = calloc((total_processors - 1), sizeof(pthread_t));
-    int* thread_results = calloc((total_processors - 1), sizeof(int));
+    if (process_rank == 0) {
+        printf("Starting ARR load scheme.\n");
+        printf("\n");
+    }
 
-    // Launch threads.
-    for (int index = 0; index < (total_processors - 1); index++) {
+    // Wait for all processes to synchronize.
+    // Also sleep for a second to allow time for console output to process.
+    MPI_Barrier(MPI_COMM_WORLD);
+    sleep(1);
 
-        // // Initialize thread args.
-        thread_struct* thread_args = initialize_thread_struct(
-            &thread_pool, &thread_results, total_processors, seconds_per_index, indexes_per_load, total_loads, index
-        );
+    // Handle based on process number.
+    if (process_rank > 0) {
+        // Child process. Handles all the work.
 
-        // Create current thread.
-        pthread_create(&thread_pool[index], NULL, simulate_arr_load, (void*) thread_args);
-    }
+        int* send_array = calloc(2, sizeof(int));
+        send_array[0] = process_rank;
+        send_array[1] = thread_args_ptr->total_loads;
+        MPI_Send(send_array, 2, MPI_INT, 0, 0, MPI_COMM_WORLD);
+        free(send_array);
+
+    } else {
+        // Main process. Handles communication to terminal.
+
+        // Get message from all child processors.
+        int index = 0;
+        while (index < thread_args_ptr->total_processors - 1) {
+
+            int* recv_array = calloc(3, sizeof(int));
+            MPI_Recv(recv_array, 2, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            printf("Thread %i: %i / %i Loads Remaining\n", recv_array[0], recv_array[1], thread_args_ptr->total_loads);
+            free(recv_array);
 
-    // Wait for threads to finish.
-    for (int index = 0; index < (total_processors - 1); index++) {
-        pthread_join(thread_pool[index], NULL);
+            index += 1;
+        }
     }
 
-    // Free variables.
-    free(thread_pool);
-    free(thread_results);
+    // Wait for all processes to synchronize.
+    // Also sleep for a second to allow time for console output to process.
+    MPI_Barrier(MPI_COMM_WORLD);
+    sleep(1);
 
-    printf("\n");
-    printf("ARR load scheme. Finished.");
+    if (process_rank == 0) {
+        printf("\n");
+        printf("ARR load scheme. Finished.\n");
+    }
 }
 
 
@@ -56,7 +73,7 @@ void run_grr(int total_processors) {
     printf("Starting GRR load scheme.");
     printf("\n");
 
-    pthread_t* thread_pool = NULL;
+    // pthread_t* thread_pool = NULL;
 
     printf("\n");
     printf("GRR load scheme. Finished.");
@@ -70,7 +87,7 @@ void run_rp(int total_processors) {
     printf("Starting RP load scheme.");
     printf("\n");
 
-    pthread_t* thread_pool = NULL;
+    // pthread_t* thread_pool = NULL;
 
     printf("\n");
     printf("RP load scheme. Finished.");
@@ -85,7 +102,7 @@ void run_nn(int total_processors) {
     printf("Starting NN load scheme.");
     printf("\n");
 
-    pthread_t* thread_pool = NULL;
+    // pthread_t* thread_pool = NULL;
 
     printf("\n");
     printf("NN load scheme. Finished.");
@@ -132,37 +149,41 @@ void run_nn(int total_processors) {
 /**
  * Function to simulate working on an ARR load.
  */
-void* simulate_arr_load(thread_struct* thread_struct_ptr) {
-    printf("Simulating load processing on thread \"%i\".\n", thread_struct_ptr->thread_num);
+// void* simulate_arr_load(thread_struct* thread_struct_ptr) {
+//     printf("Simulating load processing on thread \"%i\".\n", thread_struct_ptr->thread_num);
 
-    // int load_counter = 0;
-    // int index = 0;
+//     // Send message back to main thread.
+//     // char* message = "Thread %i checking in!\n";
+//     // MPI_Send(message, sizeof(message) + 1, MPI_CHAR, 0, 0,)
 
-    // // Iterate through loads. Each load has indexes to simulate "work".
-    // while (load_counter < total_loads) {
+//     // int load_counter = 0;
+//     // int index = 0;
 
-    //     // Increment load num.
-    //     load_counter += 1;
-    //     printf("Simulating load %i on thread \"%s\".\n", load_counter, thread_name);
+//     // // Iterate through loads. Each load has indexes to simulate "work".
+//     // while (load_counter < total_loads) {
 
-    //     // Simulate running the load.
-    //     index = 0;
-    //     while (index < indexes_per_load) {
+//     //     // Increment load num.
+//     //     load_counter += 1;
+//     //     printf("Simulating load %i on thread \"%s\".\n", load_counter, thread_name);
 
-    //         // Print out index value.
-    //         if ((index % 10) == 0) {
-    //             printf("load %i / %i    index %i / %i\n", load_counter, total_loads, index, indexes_per_load);
-    //         }
+//     //     // Simulate running the load.
+//     //     index = 0;
+//     //     while (index < indexes_per_load) {
 
-    //         // Wait 1 second to simulate work.
-    //         sleep(1);
+//     //         // Print out index value.
+//     //         if ((index % 10) == 0) {
+//     //             printf("load %i / %i    index %i / %i\n", load_counter, total_loads, index, indexes_per_load);
+//     //         }
 
-    //         // Increment index.
-    //         index += 1;
-    //     }
+//     //         // Wait 1 second to simulate work.
+//     //         sleep(1);
 
-    // }
+//     //         // Increment index.
+//     //         index += 1;
+//     //     }
 
-    free(thread_struct_ptr);
-}
+//     // }
+
+//     free(thread_struct_ptr);
+// }
 
diff --git a/src/simulate_loads.h b/src/simulate_loads.h
index ee07d54052b26e261e4f035f6c0954ea6ac15591..b31363fe031239b1f521c06f8fc26f7bd3a83898 100644
--- a/src/simulate_loads.h
+++ b/src/simulate_loads.h
@@ -5,10 +5,11 @@
 
 // System Import Headers.
 #include <ctype.h>
-#include <pthread.h>
+#include <mpi.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 
 // Function Prototypes.
diff --git a/src/structs.c b/src/structs.c
index 51f6bf02c8df84175ba7d1af96ec1c5e7c472bc1..78f79aba91110e827daf697b7b39ddce6f534813 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -15,8 +15,6 @@
  * Initializes a "thread_struct" object.
  */
 thread_struct* initialize_thread_struct(
-        pthread_t** thread_pool_ptr,
-        int** thread_results_ptr,
         int total_processors,
         int seconds_per_index,
         int indexes_per_load,
@@ -28,8 +26,6 @@ thread_struct* initialize_thread_struct(
     thread_struct* new_struct = calloc(1, sizeof(thread_struct));
 
     // Populate fields.
-    new_struct->thread_pool_ptr = thread_pool_ptr;
-    new_struct->thread_results_ptr = thread_results_ptr;
     new_struct->total_processors = total_processors;
     new_struct->seconds_per_index = seconds_per_index;
     new_struct->indexes_per_load = indexes_per_load;
diff --git a/src/structs.h b/src/structs.h
index a199ef68e74e8cbc13da2786e23c11243459f2d8..61f60f166c2c2fc365624dedf5c1aa99aa8839af 100644
--- a/src/structs.h
+++ b/src/structs.h
@@ -17,8 +17,6 @@
 typedef struct {
 
     // "Global" thread values.
-    pthread_t** thread_pool_ptr;
-    int** thread_results_ptr;
     int total_processors;
     int seconds_per_index;
     int indexes_per_load;