# Statistical Functions in R

packages <- c("tidyverse")

packages <- lapply(packages, FUN = function(x) {

if(!require(x, character.only = TRUE)) {install.packages(x)

library(x, character.only = TRUE)}})

data <- diamonds

### Omit all NA Values

data <- data %>% na.omit(data)

summary(data)

## Mean

Sum of the values divided by the number of values in a data series.

mean(data\$carat)

### Histogram with Mean Line

ggplot(data, aes(x=carat)) + geom_histogram(binwidth=1, fill="lightblue") +

geom_vline(aes(xintercept=mean(carat)), color="blue", linetype="dashed", size=1) +

labs(title="Histogram of Values with Mean Line", x="Carat", y = "Count")

### Histogram with Density Curve

ggplot(data, aes(x=carat)) +

geom_histogram(aes(y=..density..), colour="black", fill="white")+

geom_density(alpha=.2, fill="#FF6666") +

labs(title="Histogram of Values with Density Curve", x="Carat", y = "Count")

### Boxplot with Mean Value

ggplot(data, aes(x=carat, y=cut, fill=cut)) +

geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4) +

coord_flip() + stat_summary(fun=mean, geom="point", shape=23, size=4) +

theme(legend.position="bottom") +

labs(title="Boxplot of Values",x="Carat", y = "Cut")

## Median

Middle most value in a data series

sort(data\$carat)

median(data\$carat)

## Mode

No mode function exists in R so lets create one:

get.mode <- function(v) {

uniqv <- unique(v)

uniqv[which.max(tabulate(match(v, uniqv)))]

}

get.mode(data\$carat)

## Standard Deviation

sd(na.omit(data\$carat))