From 96e143b150f09c766d45f5fa7fb8092974b8ae10 Mon Sep 17 00:00:00 2001
From: Brandon Rodriguez <brodriguez8774@gmail.com>
Date: Sun, 17 May 2020 11:11:31 -0400
Subject: [PATCH] Create additional aggregation functions

---
 main.py                     |  49 +----------
 resources/stat_functions.py | 158 ++++++++++++++++++++++++++++++------
 2 files changed, 138 insertions(+), 69 deletions(-)

diff --git a/main.py b/main.py
index 69a6c3c..0e743bf 100644
--- a/main.py
+++ b/main.py
@@ -6,14 +6,13 @@ Covers basic statistical measurements in data mining.
 
 # User Imports.
 from resources import logging as init_logging
-from resources.stat_functions import DictAggregations
+from resources.stat_functions import Aggregations
 
 
 # Initialize Logger.
 logger = init_logging.get_logger(__name__)
 
 
-
 def main():
     """
     Program main.
@@ -28,9 +27,6 @@ def calc_part_1():
     Logic for "part 1" of assignment.
     """
     student_array = []
-    id_agg = {}
-    midterm_agg = {}
-    final_agg = {}
 
     # Open file.
     with open('./documents/data.online.scores.txt', 'r') as student_scores_file:
@@ -48,46 +44,9 @@ def calc_part_1():
 
             # logger.info(student_array[len(student_array) - 1])
 
-
-    # Get min.
-    id_agg['min'] = DictAggregations.min(student_array, 'id')
-    midterm_agg['min'] = DictAggregations.min(student_array, 'midterm')
-    final_agg['min'] = DictAggregations.min(student_array, 'final')
-
-    # Get max.
-    id_agg['max'] = DictAggregations.max(student_array, 'id')
-    midterm_agg['max'] = DictAggregations.max(student_array, 'midterm')
-    final_agg['max'] = DictAggregations.max(student_array, 'final')
-
-    # Get sum.
-    id_agg['sum'] = DictAggregations.sum(student_array, 'id')
-    midterm_agg['sum'] = DictAggregations.sum(student_array, 'midterm')
-    final_agg['sum'] = DictAggregations.sum(student_array, 'final')
-
-    # Get mean.
-    id_agg['mean'] = DictAggregations.mean(student_array, 'id')
-    midterm_agg['mean'] = DictAggregations.mean(student_array, 'midterm')
-    final_agg['mean'] = DictAggregations.mean(student_array, 'final')
-
-    # Get median.
-    id_agg['median'] = DictAggregations.median(student_array, 'id')
-    midterm_agg['median'] = DictAggregations.median(student_array, 'midterm')
-    final_agg['median'] = DictAggregations.median(student_array, 'final')
-
-    # Get mode.
-    id_agg['mode'] = DictAggregations.mode(student_array, 'id')
-    midterm_agg['mode'] = DictAggregations.mode(student_array, 'midterm')
-    final_agg['mode'] = DictAggregations.mode(student_array, 'final')
-
-    # Get variance.
-    id_agg['variance'] = DictAggregations.variance(student_array, 'id')
-    midterm_agg['variance'] = DictAggregations.variance(student_array, 'midterm')
-    final_agg['variance'] = DictAggregations.variance(student_array, 'final')
-
-    # Get standard deviation.
-    id_agg['standard_deviation'] = DictAggregations.standard_deviation(student_array, 'id')
-    midterm_agg['standard_deviation'] = DictAggregations.standard_deviation(student_array, 'midterm')
-    final_agg['standard_deviation'] = DictAggregations.standard_deviation(student_array, 'final')
+    id_agg = Aggregations.all(student_array, 'id', display=False)
+    midterm_agg = Aggregations.all(student_array, 'midterm', display=False)
+    final_agg = Aggregations.all(student_array, 'final', display=False)
 
     # Display results.
     logger.info('')
diff --git a/resources/stat_functions.py b/resources/stat_functions.py
index 9820d05..8139734 100644
--- a/resources/stat_functions.py
+++ b/resources/stat_functions.py
@@ -1,5 +1,9 @@
 """
 Various stat functions to run.
+
+These assume storage in the format of an array of dictionaries, where each dictionary represents a record.
+Thus, they work fine for small data amounts of data but are probably not very efficient for larger datasets.
+Larger datasets (where efficiency and runtime matter) should probably instead use direct numpy arrays.
 """
 
 # System Imports.
@@ -14,21 +18,53 @@ from resources import logging as init_logging
 logger = init_logging.get_logger(__name__)
 
 
-class DictAggregations():
+class Aggregations():
     """
-    These assume storage in the format of an array of dictionaries, where each dictionary represents a record.
     Forms aggregation functions on a single dict attribute for all present records.
-    Works fine for small data amounts of data but probably not very efficient for larger datasets.
     """
     @staticmethod
-    def min(struct, attribute):
+    def all(struct, attribute, display=True):
+        """
+        Finds all available aggregations of the attribute (key) for the provided dict.
+        :param struct: Data structure to aggregate on.
+        :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
+        :return: All available aggregations of dict attribute.
+        """
+        if display:
+            logger.info('Finding all aggregations of "{0}".'.format(attribute))
+
+        # Run all aggregations and store to dictionary.
+        agg_dict = {}
+        agg_dict['min'] = Aggregations.min(struct, attribute, display=display)
+        agg_dict['max'] = Aggregations.max(struct, attribute, display=display)
+        agg_dict['sum'] = Aggregations.sum(struct, attribute, display=display)
+        agg_dict['mean'] = Aggregations.mean(struct, attribute, display=display)
+        agg_dict['median'] = Aggregations.median(struct, attribute, display=display)
+        mode_struct = Aggregations.mode(struct, attribute, display=display)
+        agg_dict['mode'] = mode_struct.mode[0]
+        agg_dict['mode_count'] = mode_struct.count[0]
+        agg_dict['range'] = Aggregations.range(struct, attribute, display=display)
+        agg_dict['variance'] = Aggregations.variance(struct, attribute, display=display)
+        agg_dict['standard_deviation'] = Aggregations.standard_deviation(struct, attribute, display=display)
+        agg_dict['first_quartile'] = Aggregations.first_quartile(struct, attribute, display=display)
+        agg_dict['second_quartile'] = Aggregations.second_quartile(struct, attribute, display=display)
+        agg_dict['third_quartile'] = Aggregations.third_quartile(struct, attribute, display=display)
+
+        # Return aggregations.
+        return agg_dict
+
+    @staticmethod
+    def min(struct, attribute, display=True):
         """
         Finds the min of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
         :return: Min of dict attribute.
         """
-        logger.info('Finding min of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding min of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -37,14 +73,16 @@ class DictAggregations():
         return numpy.min(np_array)
 
     @staticmethod
-    def max(struct, attribute):
+    def max(struct, attribute, display=True):
         """
         Finds the max of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
         :return: Max of dict attribute.
         """
-        logger.info('Finding max of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding max of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -53,14 +91,16 @@ class DictAggregations():
         return numpy.max(np_array)
 
     @staticmethod
-    def sum(struct, attribute):
+    def sum(struct, attribute, display=True):
         """
         Sums the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
         :return: Sum of dict attribute.
         """
-        logger.info('Finding sum of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding sum of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -69,14 +109,16 @@ class DictAggregations():
         return numpy.sum(np_array)
 
     @staticmethod
-    def mean(struct, attribute):
+    def mean(struct, attribute, display=True):
         """
         Finds the mean of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
-        :return: Sum of dict attribute.
+        :param display: Bool indicating if helper text should display.
+        :return: Mean of dict attribute.
         """
-        logger.info('Finding mean of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding mean of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -85,14 +127,16 @@ class DictAggregations():
         return numpy.mean(np_array)
 
     @staticmethod
-    def median(struct, attribute):
+    def median(struct, attribute, display=True):
         """
         Finds the median of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
-        :return: Sum of dict attribute.
+        :param display: Bool indicating if helper text should display.
+        :return: Median of dict attribute.
         """
-        logger.info('Finding median of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding median of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -101,14 +145,16 @@ class DictAggregations():
         return numpy.median(np_array)
 
     @staticmethod
-    def mode(struct, attribute):
+    def mode(struct, attribute, display=True):
         """
         Finds the mode of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
-        :return: Sum of dict attribute.
+        :param display: Bool indicating if helper text should display.
+        :return: Mode of dict attribute.
         """
-        logger.info('Finding mode of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding mode of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -117,14 +163,31 @@ class DictAggregations():
         return stats.mode(np_array)
 
     @staticmethod
-    def variance(struct, attribute):
+    def range(struct, attribute, display=True):
+        """
+        Finds the range of the attribute (key) for the provided dict.
+        :param struct: Data structure to aggregate on.
+        :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
+        :return: Range of dict attribute.
+        """
+        if display:
+            logger.info('Finding range of "{0}".'.format(attribute))
+
+        # Return aggregation.
+        return Aggregations.max(struct, attribute, display=False) - Aggregations.min(struct, attribute, display=False)
+
+    @staticmethod
+    def variance(struct, attribute, display=True):
         """
         Finds the variance of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
-        :return: Sum of dict attribute.
+        :param display: Bool indicating if helper text should display.
+        :return: Variance of dict attribute.
         """
-        logger.info('Finding variance of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding variance of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
@@ -133,17 +196,64 @@ class DictAggregations():
         return numpy.var(np_array)
 
     @staticmethod
-    def standard_deviation(struct, attribute):
+    def standard_deviation(struct, attribute, display=True):
         """
         Finds the standard deviation of the attribute (key) for the provided dict.
         :param struct: Data structure to aggregate on.
         :param attribute: Attribute to aggregate.
-        :return: Sum of dict attribute.
+        :param display: Bool indicating if helper text should display.
+        :return: Standard deviation of dict attribute.
         """
-        logger.info('Finding standard deviation of "{0}".'.format(attribute))
+        if display:
+            logger.info('Finding standard deviation of "{0}".'.format(attribute))
 
         # Create numpy array of attribute.
         np_array = numpy.array([x[attribute] for x in struct])
 
         # Return aggregation.
         return numpy.std(np_array)
+
+    @staticmethod
+    def first_quartile(struct, attribute, display=True):
+        """
+        Finds the first quartile of the attribute (key) for the provided dict.
+        :param struct: Data structure to aggregate on.
+        :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
+        :return: First quartile of dict attribute.
+        """
+        if display:
+            logger.info('Finding first quartile of "{0}".'.format(attribute))
+
+        # Return aggregation.
+        return Aggregations.median(struct[:int(len(struct) / 2)], attribute, display=False)
+
+    @staticmethod
+    def second_quartile(struct, attribute, display=True):
+        """
+        Finds the second quartile (median) of the attribute (key) for the provided dict.
+        :param struct: Data structure to aggregate on.
+        :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
+        :return: Second quartile of dict attribute.
+        """
+        if display:
+            logger.info('Finding second quartile (median) of "{0}".'.format(attribute))
+
+        # Return aggregation.
+        return Aggregations.median(struct, attribute, display=False)
+
+    @staticmethod
+    def third_quartile(struct, attribute, display=True):
+        """
+        Finds the third quartile of the attribute (key) for the provided dict.
+        :param struct: Data structure to aggregate on.
+        :param attribute: Attribute to aggregate.
+        :param display: Bool indicating if helper text should display.
+        :return: Third quartile of dict attribute.
+        """
+        if display:
+            logger.info('Finding third quartile of "{0}".'.format(attribute))
+
+        # Return aggregation.
+        return Aggregations.median(struct[int(len(struct) / 2):], attribute, display=False)
-- 
GitLab