diff --git a/documents/a3_report.pdf b/documents/a3_report.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ed9cde890b946d7e8edf6c70fdcdd00c89972ab6 Binary files /dev/null and b/documents/a3_report.pdf differ diff --git a/documents/latex/a3_report.tex b/documents/latex/a3_report.tex new file mode 100644 index 0000000000000000000000000000000000000000..9988f13ef7d6c985636532413d97123aa417e9ee --- /dev/null +++ b/documents/latex/a3_report.tex @@ -0,0 +1,62 @@ +\documentclass[a4paper,12pt]{article} + + +% For latex basics, see http://www.docs.is.ed.ac.uk/skills/documents/3722/3722-2014.pdf. +% For help on symbols, see https://oeis.org/wiki/List_of_LaTeX_mathematical_symbols. +% For info on state machines, look up youtube "Neso Academy" channel. + + +% Import Packages. +% Multi-column package. Allows formatting in columns. +\usepackage{multicol} + + +% Adjust page margins to not be so excessive. +\addtolength{\oddsidemargin}{-.2in} +\addtolength{\evensidemargin}{-.2in} +\addtolength{\textwidth}{.4in} +\addtolength{\topmargin}{-.8in} +\begin{document} + \title{CS6530 Assignment 3 Report} + \author{Brandon Rodriguez} + \date{\today} + \maketitle + + \vspace{-3ex}~ + + + \section* {Question 1} + The following are based off of the given "frequent 3-item sets":\\ + \{1, 2, 3\}, \{1, 2, 4\}, \{1, 2, 5\}, \{1, 3, 4\}, \{1, 3, 5\}, \{2, 3, 4\}, \{2, 3, 5\}, \{3, 4, 5\} + + \subsection* {1.a) Candidate 4-item Sets:} + \{1, 2, 3, 4\}, \{1, 2, 3, 5\}, \{1, 2, 4, 5\}, \{1, 3, 4, 5\}, \{2, 3, 4, 5\} + + \subsection* {1.b) Frequent 4-item Sets:} + \{1, 2, 3, 4\}, \{1, 2, 3, 5\} + + + \section* {Question 2} + Questions about using a Hash Tree data structure to find frequent itemsets. See assignment for diagram. + + \subsection* {2.a) Leaf nodes visited to find 3-itemset candidates of \{1, 3, 4, 5, 8\}:} + I think it'd be \{ L5, L1, L3, L9, L11\}? I'm not entirely sure. Am a bit fuzzy on how hash trees work. + + \subsection* {2.b) Candidate itemsets generated from part a:} + \{1, 4, 5\}, \{1, 5, 8\}, and \{4, 5, 8\}? + + + \section* {Question 3} + See "a3.r" file for code to generate parts b and c. + + \subsection* {3.a) Draw lattice of dataset candidate selection.} + See "a3\_q3\_a.png" image file in project root. + + \subsection* {3.b) Use R "arules" package to get frequent itemsets with min\_support of 30\%. Sorted by "support" value.} + See "a3\_q3\_b.png" image file in project root. + + \subsection* {3.c) Use R "arules" package to get frequent itemsets with min\_support of 30\% and min\_conf of 50\%. Sorted by "lift" value.} + See "a3\_q3\_c.png" image file in project root. + + +\end{document} \ No newline at end of file diff --git a/documents/references.md b/documents/references.md index 458cbb110498d323536d34e4c5a18230d5bd391e..acae42dd9c92ca3779f8cd75b94ae41866b19aed 100644 --- a/documents/references.md +++ b/documents/references.md @@ -5,6 +5,10 @@ All references to external logic. Includes anything from stack overflow links to notes about logic from previous works. +## Really Good Slides on Finding Patterns +<http://www.cs.uoi.gr/~tsap/teaching/2012f-cs059/material/datamining-lect3.pdf> + + ## R ### R Packages and Apriori diff --git a/readme.md b/readme.md index 0625bb46b7608a407e9c528cd0b3b2e04536eb39..c233caf7f0da1ec692ba35d7ded860792f0370bb 100644 --- a/readme.md +++ b/readme.md @@ -2,7 +2,7 @@ ## Description -Checks for frequent patterns within datasets. +Practice checking for frequent patterns within datasets. Full pdf report is available at `documents/a3_report.pdf`.