Skip to content
Snippets Groups Projects
Commit 1b300e7f authored by Brandon Rodriguez's avatar Brandon Rodriguez
Browse files

Initial part of apriori

Currently only gets frequency count for first candidate set.
parent a1ea7e09
Branches
No related merge requests found
# Clear console.
cat("\014")
cat('\014')
###
# The "cat" function seems to be an all around better alternative to "print".
# ("print" has a tendency to add extra, frivilous output that makes it look dirty and difficult to read.)
#
# This wrapper for "cat" then automatically adds a newline to the end of the message, effectively accomplishing
# what you would intuitively expect "print" to do.
#
# See references.md file for source of idea.
##
catn <- function(message, trailing='\n') {
cat(message)
cat(trailing)
}
###
#
##
apriori <- function(itemsets, display_candidates=TRUE, display_frequent=FALSE) {
# Initialize variables.
level = 1
candidate_itemsets = list(itemsets, NULL)
frequent_itemsets = list(itemsets, NULL)
# Loop until no more frequent itemsets are found for current level.
while (! is.null(frequent_itemsets[[level]])) {
level = level + 1
candidate_itemsets[[level]] = apriori_gen_candidates(frequent_itemsets[[level - 1]], level)
}
# Optionally display candidate itemsets.
if (display_candidates) {
catn('')
catn('Candidate Itemsets')
print(candidate_itemsets)
}
# Optionally display known frequent itemsets.
if (display_frequent) {
catn('')
catn('Frequent Itemsets')
print(frequent_itemsets)
}
}
###
# Generate all possible candidate itemsets.
##
apriori_gen_candidates <- function(prev_freq_sets, level) {
candidates <- NULL
cand_names <- NULL
# Loop through our previous frequent sets.
for (prev_freq_set in prev_freq_sets) {
catn('')
catn('================================================================================')
catn(paste('Itemset: (', paste(prev_freq_set, collapse=', '), ')'))
# Loop through each item in frequent sets.
for (item_index in 1:length(prev_freq_set)) {
catn('')
catn('====================')
item <- prev_freq_set[item_index]
catn(paste('New Item from Set:', item))
# Update item frequency in candidates dictionary.
if (is.null(candidates)) {
# Candidates dict does not yet exist. Initialize.
candidates <- c(1)
cand_names <- c(item)
names(candidates) <- c(cand_names)
} else {
# Candidates dict exists. Update accordingly.
if (is.element(item, cand_names)) {
# Item in list. Increment counter.
catn('Not Null')
candidates[item] <- candidates[item] + 1
} else {
# Item not yet in list. Add.
catn('Null')
candidates <- c(candidates, 1)
cand_names <- c(cand_names, item)
names(candidates) <- cand_names
}
print(candidates)
}
catn('====================')
}
catn('')
catn('================================================================================')
catn('')
}
# Return found candidates.
return(candidates)
}
###
#
##
apriori_check_subsets <- function() {
}
# Create itemset.
itemsets <- list(c(1, 2, 3), c(1, 2, 4), c(1, 2, 5), c(1, 3, 4), c(1, 3, 5), c(2, 3, 4), c(2, 3, 5), c(3, 4, 5))
# Run apriori.
apriori(itemsets)
......@@ -16,3 +16,7 @@ All references to external logic. Includes anything from stack overflow links to
### Run R Script in Terminal
Because RStudio seems like a pretty bad IDE. Surprisingly, Terminal + Sublime Text is better.
<https://support.rstudio.com/hc/en-us/articles/218012917-How-to-run-R-scripts-from-the-command-line>
### Alternative to the "Print" Function
Because it doesn't always behave how I expect, and tends to give additional, "dirty" output that I don't want.
<https://stackoverflow.com/questions/11230957/alternative-to-print-and-cat>
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment