diff --git a/a4.r b/a4.r index f459ae41f3a6296e0a37a7adbfdf92c65d611abe..c4c70c0921e02c58c69c7de0f6ddc3a26a2369ca 100644 --- a/a4.r +++ b/a4.r @@ -170,11 +170,109 @@ part_1_4 <- function() { } +### + # "Balance Scale data with 4 attributes and 625 records. 400 records for training." + ## +part_2_1 <- function() { + catn('') + catn('Part 2.1:') + + # Read in dataset from file. + dataset <- read.table('./documents/data/balance-scale.data', header=FALSE, sep=',') + + # Set dataset attribute names. + attribute_names <- c('Name', 'L-Weight', 'L-Dist', 'R-Weight', 'R-Dist') + colnames(dataset) <- attribute_names + + # Display dataset. + catn('') + catn('str(dataset):') + print(str(dataset)) + catn('') + catn('Summary(dataset):') + print(summary(dataset)) + catn('') + catn('Head(dataset):') + print(head(dataset)) + catn('') + catn('Tail(dataset):') + print(tail(dataset)) + catn('') + + catn('') +} + + +### + # "Nursery data with 8 attributes adn 12,960 records. 8,000 records for training." + ## +part_2_2 <- function() { + catn('') + catn('Part 2.2:') + + # Read in dataset from file. + dataset <- read.table('./documents/data/nursery.data', header=FALSE, sep=',') + + # Set dataset attribute names. + attribute_names <- c('Parents', 'Nursery', 'Form', 'Children', 'Housing', 'Finance', 'Social', 'Health', 'Recommendation') + colnames(dataset) <- attribute_names + + # Display dataset. + catn('') + catn('Summary(dataset):') + print(summary(dataset)) + catn('') + catn('Head(dataset):') + print(head(dataset)) + catn('') + catn('Tail(dataset):') + print(tail(dataset)) + catn('') + + catn('') +} + + +### + # "LED data with 7 attribuets and 10,000 records. 8,000 records for training." + ## +part_2_3 <- function() { + catn('') + catn('Part 2.3:') + + # Read in dataset from file. + dataset <- read.table('./documents/data/led.data', header=FALSE, sep=',') + + # Set dataset attribute names. + attribute_names <- c('D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'Class') + colnames(dataset) <- attribute_names + + # Display dataset info. + catn('') + catn('Summary(dataset):') + print(summary(dataset)) + catn('') + catn('Head(dataset):') + print(head(dataset)) + catn('') + catn('Tail(dataset):') + print(tail(dataset)) + catn('') + + catn('') +} + + # View dataset. -view_iris_dataset() +# view_iris_dataset() # View code parts, provided by assignment description file. -part_1_1() -part_1_2() -part_1_3() -part_1_4() +# part_1_1() +# part_1_2() +# part_1_3() +# part_1_4() + +# Data build parts. +part_2_1() +part_2_2() +part_2_3() diff --git a/documents/references.md b/documents/references.md index 3971bb49103f6946f087f10b3fb58c1295161a28..94b3921d2ee2018ac36b9df6101b3cd5f596499c 100644 --- a/documents/references.md +++ b/documents/references.md @@ -17,6 +17,9 @@ All references to external logic. Includes anything from stack overflow links to Because RStudio seems like a pretty bad IDE. Surprisingly, Terminal + Sublime Text is better.<br> <https://support.rstudio.com/hc/en-us/articles/218012917-How-to-run-R-scripts-from-the-command-line> +### Import Data from File +<https://www.statmethods.net/input/importingdata.html> + ### Rpart Documentation <https://www.rdocumentation.org/packages/rpart/versions/4.1-15/topics/rpart> <https://www.rdocumentation.org/packages/rpart/versions/4.1-15/topics/rpart.control>