diff --git a/main.py b/main.py index 6aa6b99442be5c9c664cc3b024d197ceaa14c040..4bed905266f070622ae64d28e8b8db05d3aa321a 100644 --- a/main.py +++ b/main.py @@ -24,8 +24,12 @@ housing_data = pandas.read_csv('./Documents/other_housing.csv') logger.info('') normalizer = neural_net.Normalizer() normalized_data = normalizer.normalize_data(housing_data) -features = normalized_data.loc[:, normalized_data.columns != 'SalePrice'] -targets = normalized_data['SalePrice'] + +# Randomize data order. +randomized_data = normalized_data.iloc[numpy.random.permutation(len(normalized_data))] + +features = randomized_data.loc[:, randomized_data.columns != 'SalePrice'] +targets = randomized_data['SalePrice'] logger.info('') # logger.info('Normalized Features: \n{0}'.format(features)) @@ -33,4 +37,11 @@ logger.info('') # Start neural net. backprop = neural_net.BackPropNet(features) -backprop.train(features.values, targets.values) + +max_index = 0 +while max_index < len(features): + min_index = max_index + max_index += 20 + training_features = features[min_index:max_index] + training_targets = targets[min_index:max_index] + backprop.train(training_features.values, training_targets.values) diff --git a/neural_net.py b/neural_net.py index f975db463a0714eb2ccd83a6425fa168528f3265..b2d76a17cf633e3f9c45904127a4823b0ea6e470 100644 --- a/neural_net.py +++ b/neural_net.py @@ -41,26 +41,26 @@ class Normalizer(): continuous_columns = [ 'Lot Frontage', 'Lot Area', 'Mas Vnr Area', 'BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF', - # '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF', 'Gr Liv Area', 'Garage Area', 'Wood Deck SF', - # 'Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch', 'Pool Area', 'Misc Val', + '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF', 'Gr Liv Area', 'Garage Area', 'Wood Deck SF', + 'Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch', 'Pool Area', 'Misc Val', ] discrete_columns = [ - # 'Year Built', 'Year Remod/Add', 'Bsmt Full Bath', 'Bsmt Half Bath', 'Full Bath', 'Half Bath', - # 'Bedroom AbvGr', 'Kitchen AbvGr', 'TotRms AbvGrd', 'Fireplaces', 'Garage Yr Blt', 'Garage Cars', 'Mo Sold', - # 'Yr Sold', + 'Year Built', 'Year Remod/Add', 'Bsmt Full Bath', 'Bsmt Half Bath', 'Full Bath', 'Half Bath', + 'Bedroom AbvGr', 'Kitchen AbvGr', 'TotRms AbvGrd', 'Fireplaces', 'Garage Yr Blt', 'Garage Cars', 'Mo Sold', + 'Yr Sold', ] categorical_columns = [ - # 'MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Land Contour', 'Lot Config', 'Neighborhood', 'Condition 1', - # 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Exterior 2nd', - # 'Mas Vnr Type', 'Foundation', 'Heating', 'Central Air', 'Garage Type', 'Misc Feature', 'Sale Type', - # 'Sale Condition', + 'MS SubClass', 'MS Zoning', 'Street', 'Alley', 'Land Contour', 'Lot Config', 'Neighborhood', 'Condition 1', + 'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl', 'Exterior 1st', 'Exterior 2nd', + 'Mas Vnr Type', 'Foundation', 'Heating', 'Central Air', 'Garage Type', 'Misc Feature', 'Sale Type', + 'Sale Condition', ] categorical_dict = {} ordinal_columns = [ - # 'Lot Shape', 'Land Slope', 'Overall Qual', 'Overall Cond', 'Exter Qual', 'Exter Cond', 'Bsmt Qual', - # 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating QC', 'Electrical', - # 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Finish', 'Garage Qual', 'Garage Cond', 'Paved Drive', - # 'Pool QC', 'Fence', + 'Lot Shape', 'Land Slope', 'Overall Qual', 'Overall Cond', 'Exter Qual', 'Exter Cond', 'Bsmt Qual', + 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating QC', 'Electrical', + 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Finish', 'Garage Qual', 'Garage Cond', 'Paved Drive', + 'Pool QC', 'Fence', ] ordinal_dict = {} ignored_columns = ['Utilities',] @@ -202,7 +202,6 @@ class BackPropNet(): Weight values are randomized values near 0, using a normal distribution. :param data: Data to reference for input layer count. """ - logger.info('Columns: {0}'.format(data.columns)) # Create first hidden layer. hidden_layer_1 = [] for index in range(self.hidden_layer_size): @@ -227,10 +226,10 @@ class BackPropNet(): self.network.append(hidden_layer_2) self.network.append(output_layer) - logger.info('Network:') + # logger.info('Network:') index = 0 for layer in self.network: - logger.info('Layer {0}: {1}'.format(index, layer)) + # logger.info('Layer {0}: {1}'.format(index, layer)) index += 1 def _activation(self, weights, inputs): @@ -312,14 +311,14 @@ class BackPropNet(): :param targets: Desired prediction. :return: Delta of error difference. """ - return ( (targets - prediction) ** 2) # TODO: Is this ever used? + return ( (targets - prediction) ** 2) def train(self, features, targets): """ Trains net based on provided data. :param data: Data to train on. """ - logger.info('Initial Inputs: {0}'.format(features)) + # logger.info('Initial Inputs: {0}'.format(features)) prediction = [] self.layer_inputs = [] for index in range(len(features)): @@ -330,6 +329,7 @@ class BackPropNet(): self._backward_propagate(features, targets, prediction, delta_error) logger.info('Expected targets: {0}'.format(targets)) logger.info('Predicted targets: {0}'.format(prediction)) + logger.info('') def predict(self, data): """