packages <- c("tidyverse")
packages <- lapply(packages, FUN = function(x) {
if(!require(x, character.only = TRUE)) {install.packages(x)
library(x, character.only = TRUE)}})
data <- diamonds
data <- data %>% na.omit(data)
summary(data)
Sum of the values divided by the number of values in a data series.
mean(data$carat)
ggplot(data, aes(x=carat)) + geom_histogram(binwidth=1, fill="lightblue") +
geom_vline(aes(xintercept=mean(carat)), color="blue", linetype="dashed", size=1) +
labs(title="Histogram of Values with Mean Line", x="Carat", y = "Count")
ggplot(data, aes(x=carat)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666") +
labs(title="Histogram of Values with Density Curve", x="Carat", y = "Count")
ggplot(data, aes(x=carat, y=cut, fill=cut)) +
geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4) +
coord_flip() + stat_summary(fun=mean, geom="point", shape=23, size=4) +
theme(legend.position="bottom") +
labs(title="Boxplot of Values",x="Carat", y = "Cut")
Middle most value in a data series
sort(data$carat)
median(data$carat)
No mode function exists in R so lets create one:
get.mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
get.mode(data$carat)
sd(na.omit(data$carat))