Common Errors in R
# --- Common Errors R-Script ---# Create Test Dataframedf <- data.frame(col1 = c("1", "2"), col2 = c("1", "2"))# Error Tracebacktraceback()# Error in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : Polygon edge not found# loadfonts(dev="win")# quartz()# https://stackoverflow.com/questions/10581440/error-in-grid-calll-textbounds-as-graphicsannotxlabel-xx-xy-polygon# Detach Package for Testdetach("package:tidyverse", unload = TRUE)# Could Not Find Functiondf.select <- df %>% select(col1)# Error: could not find function "%>%"# Fixedlibrary(tidyverse)# Error in library(tidyverse) : there is no package called ‘tidyverse’# Fixedinstall.packages("tidyverse")library(tidyverse)# Object Not Founddf.select <- df1 %>% select(col1)# Error in select(., col1) : object 'df1' not found# Fixeddf.select <- df %>% select(col1)# Unknown Column Selecteddf.select <- df %>% select(col12)# Error: Can't subset columns that don't exist. Column `col12` doesn't exist.# Fixeddf.select <- df %>% select(col1)# Bracket errorif(head(df$col1, 1) > 3) {print("Greater Than")} else {print("Less Than")}}# Error: unexpected '}' in "}"# Fixedif(head(df$col1, 1) > 3) {print("Greater Than")} else {print("Less Than")}# Parenthesis Errordf.select <- data %>% select(col1))# Error: unexpected ')' in "df.select <- data %>% select(col1))"# Fixeddf.select <- df %>% select(col1)# Unexpected Inputdf <- data.frame(col1 = c(‘1’, "2"), col2 = c("1", “2”))# Error: unexpected input in "‘"# Fixeddf <- data.frame(col1 = c("1", "2"), col2 = c("1", "2"))# Manage structure of data frame columnsstr(df)# Error: non-numeric argument to a binary operator# Change your text character column to a number formatdf$col1 <- as.numeric(df$col1)str(df)# Change back to characterdf$col1 <- as.character(df$col1)str(df)# Change to binarydf$col1 <- as.logical(df$col1)str(df)# Create factor levelsdf$col1 <- as.factor(df$col1)str(df)# Cannot Open File/Connectioncsv <- read.csv("~/Desktop/file.csv")# Fixedcsv <- read.csv("https://rstatistics.org/datasets/time-series/ts-outliers.csv")csv$output <- as.numeric(csv$output)str(csv)# Set Column as Date/Time POSIXct Objectcsv$date <- as.POSIXct(csv$date, format="%Y-%m-%d", tz = "UTC")# Date & Time Operators Table# +------+-------------------------------+------+-----------------------------------------------+# | Code | Meaning | Code | Meaning |# +------+-------------------------------+------+-----------------------------------------------+# | %a | Abbreviated weekday | %A | Full weekday |# +------+-------------------------------+------+-----------------------------------------------+# | %b | Abbreviated month | %B | Full month |# +------+-------------------------------+------+-----------------------------------------------+# | %c | Locale-specific date and time | %d | Decimal date |# +------+-------------------------------+------+-----------------------------------------------+# | %H | Decimal hours (24 hour) | %I | Decimal hours (12 hour) |# +------+-------------------------------+------+-----------------------------------------------+# | %j | Decimal day of the year | %m | Decimal month |# +------+-------------------------------+------+-----------------------------------------------+# | %M | Decimal minute | %p | Locale-specific AM/PM |# +------+-------------------------------+------+-----------------------------------------------+# | %S | Decimal second | %U | Decimal week of the year (starting on Sunday) |# +------+-------------------------------+------+-----------------------------------------------+# | %w | Decimal Weekday (0=Sunday) | %W | Decimal week of the year (starting on Monday) |# +------+-------------------------------+------+-----------------------------------------------+# | %x | Locale-specific Date | %X | Locale-specific Time |# +------+-------------------------------+------+-----------------------------------------------+# | %y | 2-digit year | %Y | 4-digit year |# +------+-------------------------------+------+-----------------------------------------------+# | %z | Offset from GMT | %Z | Time zone (character) |# +------+-------------------------------+------+-----------------------------------------------+# https://www.stat.berkeley.edu/~s133/dates.html# Examine structure of datastr(csv)# Create Time Series Chartoutliers.chart <- ggplot(csv, aes(x = date, y = output)) + geom_line(aes(color = "#00AFBB"), size = 1) +scale_color_manual(values = c("#00AFBB")) +theme_minimal() + labs(title = "Time Series Chart", x = "Date", y = "Output") +scale_x_datetime(date_breaks = "1 month", date_labels = "%m") +theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0)) + theme(legend.position="none") + stat_smooth(color = "#FC4E07", fill = "#FC4E07", method = "loess")outliers.chart# This is a mess so lets clean it up.# Outliers Errors# Get descriptive statistics to see outliersbase::summary(csv$output)# Min. 1st Qu. Median Mean 3rd Qu. Max. # 1 857 932 1075 1126 7790 # Order ascending (lowest to highest)csv.head <- csv[order(csv$output),] # Trim required number of outlierscsv.head <- head(csv.head, -1)# Order descending (highest to lowest)csv.head <- csv.head[order(-csv.head$output),] csv.trimmed <- head(csv.head, -1)# Compare descriptive statisticsbase::summary(csv.trimmed$output)# Min. 1st Qu. Median Mean 3rd Qu. Max. # 695.0 860.5 932.0 978.0 1122.5 1356.0 # Create Time Series Charttrimmed.chart <- ggplot(csv.trimmed, aes(x = date, y = output)) + geom_line(aes(color = "#00AFBB"), size = 1) +scale_color_manual(values = c("#00AFBB")) +theme_minimal() + labs(title = "Time Series Chart", x = "Date", y = "Output") +scale_x_datetime(date_breaks = "1 month", date_labels = "%m") +theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0)) + theme(legend.position="none") + stat_smooth(color = "#FC4E07", fill = "#FC4E07", method = "loess")trimmed.chart# Date Breaks Error# Change date_breaks = "1 month" => date_breaks = "1 year"# Change date_labels = "%m" => date_labels = "%Y"# See operators table above for all chart axis possibilities# Create Volume Chartfixed.chart <- ggplot(csv.trimmed, aes(x = date, y = output)) + geom_line(aes(color = "#00AFBB"), size = 1) +scale_color_manual(values = c("#00AFBB")) +theme_minimal() + labs(title = "Time Series Chart", x = "Date", y = "Output") +scale_x_datetime(date_breaks = "1 year", date_labels = "%Y") +theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0)) + theme(legend.position="none") + stat_smooth(color = "#FC4E07", fill = "#FC4E07", method = "loess")fixed.chart# Vector Memory Exhausted (Limit Reached)# The simplest fix is to simply to make your file smaller. A couple of ways to do this is to remove duplicates and columns. Another possibility is to remove any data that you will not be using. If the study has a shorter range of time, exclude data that is outside of that time. If there are entire columns of data that you do not use, then go ahead and eliminate them. All of these will reduce the amount of memory needed to hold the data. If reducing the size of your data does not do the job, then you may want to consider an increase in the size of the physical memory of your computer. Another solution is to keep the data in a database such as SQL where the program accesses it only as needed.