diff --git a/a3.r b/a3.r index 2bed78220baa051f36c396beca675ba0276dc554..eb2abaf8f7f9a82fc8d5733b0f802c00f7051840 100644 --- a/a3.r +++ b/a3.r @@ -20,19 +20,28 @@ catn <- function(message, trailing='\n') { ### # ## -apriori <- function(itemsets, display_candidates=TRUE, display_frequent=FALSE) { +apriori <- function(itemsets, min_support, display_candidates=TRUE, display_frequent=TRUE) { # Initialize variables. level = 1 - candidate_itemsets = list(itemsets, NULL) - frequent_itemsets = list(itemsets, NULL) + candidate_itemsets <- list(itemsets) + frequent_itemsets <- list(itemsets) # Loop until no more frequent itemsets are found for current level. - while (! is.null(frequent_itemsets[[level]])) { - level = level + 1 + # Note that we seem to have to check length, because otherwise R errors when checking if a list index exists. + # This only works because the R "length" function doesn't count NULL values towards length. + while (length(frequent_itemsets) >= level) { + level <- level + 1 - candidate_itemsets[[level]] = apriori_gen_candidates(frequent_itemsets[[level - 1]], level) + candidate_itemsets[[level]] <- apriori_gen_candidates(frequent_itemsets[[level - 1]], level) + + frequent_itemsets <- c(frequent_itemsets, apriori_determine_freq_sets(candidate_itemsets[[level]], min_support)) + + catn('Freq Itemsets:') + print(frequent_itemsets) } + catn('================================================================================') + # Optionally display candidate itemsets. if (display_candidates) { catn('') @@ -106,6 +115,53 @@ apriori_gen_candidates <- function(prev_freq_sets, level) { } +### + # + ## +apriori_determine_freq_sets <-function(candidate_itemset, min_support) { + catn('') + catn('Finding frequent itemsets from candidates.') + catn('Candidates:') + print(candidate_itemset) + catn('') + + cand_names <- names(candidate_itemset) + catn('cand_names:') + catn(cand_names) + + freq_set <- NULL + freq_names <- NULL + # Loop through all candidates. + for (item in cand_names) { + if (candidate_itemset[item] >= min_support) { + + # Candidate is true frequent. Add to frequent set. + if (is.null(freq_set)) { + # Initialize frequent set. + freq_set <- c(candidate_itemset[item]) + freq_names <- c(item) + } else { + # Freq set exists. Add item. + freq_set <- c(freq_set, candidate_itemset[item]) + freq_names <- c(freq_names, item) + } + + names(freq_set) <- freq_names + } + } + + catn('') + catn(paste('min_support:', min_support)) + catn('Final freq set:') + print(freq_set) + + catn('') + + # Return found freq candidates. + return(freq_set) +} + + ### # ## @@ -115,7 +171,8 @@ apriori_check_subsets <- function() { # Create itemset. -itemsets <- list(c(1, 2, 3), c(1, 2, 4), c(1, 2, 5), c(1, 3, 4), c(1, 3, 5), c(2, 3, 4), c(2, 3, 5), c(3, 4, 5)) +# itemsets <- list(c(1, 2, 3), c(1, 2, 4), c(1, 2, 5), c(1, 3, 4), c(1, 3, 5), c(2, 3, 4), c(2, 3, 5), c(3, 4, 5)) +itemsets <- list(c('A', 'C', 'D'), c('B', 'C', 'E'), c('A', 'B', 'C', 'E'), c('B', 'E')) # Run apriori. -apriori(itemsets) +apriori(itemsets, 2)