diff --git a/a3.r b/a3.r index e172944eadf65bedcf8c07aacaabcdfd47bac30f..2bed78220baa051f36c396beca675ba0276dc554 100644 --- a/a3.r +++ b/a3.r @@ -1,5 +1,121 @@ # Clear console. -cat("\014") +cat('\014') + + +### + # The "cat" function seems to be an all around better alternative to "print". + # ("print" has a tendency to add extra, frivilous output that makes it look dirty and difficult to read.) + # + # This wrapper for "cat" then automatically adds a newline to the end of the message, effectively accomplishing + # what you would intuitively expect "print" to do. + # + # See references.md file for source of idea. + ## +catn <- function(message, trailing='\n') { + cat(message) + cat(trailing) +} + + +### + # + ## +apriori <- function(itemsets, display_candidates=TRUE, display_frequent=FALSE) { + # Initialize variables. + level = 1 + candidate_itemsets = list(itemsets, NULL) + frequent_itemsets = list(itemsets, NULL) + + # Loop until no more frequent itemsets are found for current level. + while (! is.null(frequent_itemsets[[level]])) { + level = level + 1 + + candidate_itemsets[[level]] = apriori_gen_candidates(frequent_itemsets[[level - 1]], level) + } + + # Optionally display candidate itemsets. + if (display_candidates) { + catn('') + catn('Candidate Itemsets') + print(candidate_itemsets) + } + + # Optionally display known frequent itemsets. + if (display_frequent) { + catn('') + catn('Frequent Itemsets') + print(frequent_itemsets) + } +} + + +### + # Generate all possible candidate itemsets. + ## +apriori_gen_candidates <- function(prev_freq_sets, level) { + candidates <- NULL + cand_names <- NULL + + # Loop through our previous frequent sets. + for (prev_freq_set in prev_freq_sets) { + catn('') + catn('================================================================================') + catn(paste('Itemset: (', paste(prev_freq_set, collapse=', '), ')')) + + # Loop through each item in frequent sets. + for (item_index in 1:length(prev_freq_set)) { + + catn('') + catn('====================') + item <- prev_freq_set[item_index] + catn(paste('New Item from Set:', item)) + + # Update item frequency in candidates dictionary. + if (is.null(candidates)) { + # Candidates dict does not yet exist. Initialize. + candidates <- c(1) + cand_names <- c(item) + names(candidates) <- c(cand_names) + + } else { + # Candidates dict exists. Update accordingly. + if (is.element(item, cand_names)) { + # Item in list. Increment counter. + catn('Not Null') + candidates[item] <- candidates[item] + 1 + } else { + # Item not yet in list. Add. + catn('Null') + candidates <- c(candidates, 1) + cand_names <- c(cand_names, item) + names(candidates) <- cand_names + } + print(candidates) + } + + catn('====================') + } + + catn('') + catn('================================================================================') + catn('') + } + + # Return found candidates. + return(candidates) +} + + +### + # + ## +apriori_check_subsets <- function() { + +} + # Create itemset. itemsets <- list(c(1, 2, 3), c(1, 2, 4), c(1, 2, 5), c(1, 3, 4), c(1, 3, 5), c(2, 3, 4), c(2, 3, 5), c(3, 4, 5)) + +# Run apriori. +apriori(itemsets) diff --git a/documents/references.md b/documents/references.md index 9517d9164e53bf281344739b1b52aac3d325d97f..b12ce738171a2d64a4b5095b80bb79f628648bdc 100644 --- a/documents/references.md +++ b/documents/references.md @@ -16,3 +16,7 @@ All references to external logic. Includes anything from stack overflow links to ### Run R Script in Terminal Because RStudio seems like a pretty bad IDE. Surprisingly, Terminal + Sublime Text is better. <https://support.rstudio.com/hc/en-us/articles/218012917-How-to-run-R-scripts-from-the-command-line> + +### Alternative to the "Print" Function +Because it doesn't always behave how I expect, and tends to give additional, "dirty" output that I don't want. +<https://stackoverflow.com/questions/11230957/alternative-to-print-and-cat>