diff --git a/a3.r b/a3.r index eb2abaf8f7f9a82fc8d5733b0f802c00f7051840..71f17fec79b4f672d3039d9b285398dda33c1576 100644 --- a/a3.r +++ b/a3.r @@ -21,10 +21,13 @@ catn <- function(message, trailing='\n') { # ## apriori <- function(itemsets, min_support, display_candidates=TRUE, display_frequent=TRUE) { + catn('Running apriori.') + catn('') + # Initialize variables. - level = 1 - candidate_itemsets <- list(itemsets) - frequent_itemsets <- list(itemsets) + level = 0 + candidate_itemsets <- NULL + frequent_itemsets <- NULL # Loop until no more frequent itemsets are found for current level. # Note that we seem to have to check length, because otherwise R errors when checking if a list index exists. @@ -32,12 +35,20 @@ apriori <- function(itemsets, min_support, display_candidates=TRUE, display_freq while (length(frequent_itemsets) >= level) { level <- level + 1 - candidate_itemsets[[level]] <- apriori_gen_candidates(frequent_itemsets[[level - 1]], level) + # Check level. + if (level == 1) { + # First level. Initialize frequency candidacy. + ret_val <- apriori_initialize_candidates(itemsets, level) + candidate_itemsets[[level]] <- ret_val[[1]] - frequent_itemsets <- c(frequent_itemsets, apriori_determine_freq_sets(candidate_itemsets[[level]], min_support)) + # Get true frequent itemset from candidates. + frequent_itemsets <- list(apriori_determine_freq_sets(ret_val[[1]], ret_val[[2]], min_support)) + } else { + # Not first level. + } - catn('Freq Itemsets:') - print(frequent_itemsets) + # catn('Freq Itemsets:') + # print(frequent_itemsets) } catn('================================================================================') @@ -59,25 +70,23 @@ apriori <- function(itemsets, min_support, display_candidates=TRUE, display_freq ### - # Generate all possible candidate itemsets. + # Generate all possible candidate sets from initial itemset. ## -apriori_gen_candidates <- function(prev_freq_sets, level) { +apriori_initialize_candidates <- function(initial_set, level, debug=FALSE) { + if (debug) { + catn('') + catn('Finding candidates.') + } + candidates <- NULL cand_names <- NULL # Loop through our previous frequent sets. - for (prev_freq_set in prev_freq_sets) { - catn('') - catn('================================================================================') - catn(paste('Itemset: (', paste(prev_freq_set, collapse=', '), ')')) + for (set in initial_set) { # Loop through each item in frequent sets. - for (item_index in 1:length(prev_freq_set)) { - - catn('') - catn('====================') - item <- prev_freq_set[item_index] - catn(paste('New Item from Set:', item)) + for (item_index in 1:length(set)) { + item <- set[item_index] # Update item frequency in candidates dictionary. if (is.null(candidates)) { @@ -90,49 +99,50 @@ apriori_gen_candidates <- function(prev_freq_sets, level) { # Candidates dict exists. Update accordingly. if (is.element(item, cand_names)) { # Item in list. Increment counter. - catn('Not Null') candidates[item] <- candidates[item] + 1 } else { # Item not yet in list. Add. - catn('Null') candidates <- c(candidates, 1) cand_names <- c(cand_names, item) names(candidates) <- cand_names } - print(candidates) } - - catn('====================') } + } - catn('') - catn('================================================================================') + if (debug) { + catn('Final candidate set:') + print(candidates) catn('') } # Return found candidates. - return(candidates) + return(list(candidates, cand_names)) } ### # ## -apriori_determine_freq_sets <-function(candidate_itemset, min_support) { - catn('') - catn('Finding frequent itemsets from candidates.') - catn('Candidates:') - print(candidate_itemset) - catn('') +apriori_gen_candidates <- function() { + +} - cand_names <- names(candidate_itemset) - catn('cand_names:') - catn(cand_names) + +### + # + ## +apriori_determine_freq_sets <-function(candidate_itemset, cand_names, min_support, debug=FALSE) { + if (debug) { + catn('') + catn('Finding frequent itemsets from candidates.') + } freq_set <- NULL freq_names <- NULL # Loop through all candidates. for (item in cand_names) { + # catn(paste('item:', item)) if (candidate_itemset[item] >= min_support) { # Candidate is true frequent. Add to frequent set. @@ -150,12 +160,10 @@ apriori_determine_freq_sets <-function(candidate_itemset, min_support) { } } - catn('') - catn(paste('min_support:', min_support)) - catn('Final freq set:') - print(freq_set) - - catn('') + if (debug) { + catn('Final freq set:') + print(freq_set) + } # Return found freq candidates. return(freq_set)