# First let's make a similar graph to the one above, and see how it simplifies complex plots. install.packages(ggplot2) #run this command if you've never used/installed ggplot2 before library(ggplot2) #load the ggplot2 library into memory for this R session ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() +geom_line() # we just made essentially the same plot with 1 line of code instead of 30 lines! # the "gg" in "ggplot" stands for "grammar of graphics". GGplot uses a different grammatical # syntax for plots than the regular plot function. Let's see how it works. # This tutorial is patterned after a similar online tutorial found here: # http://r-statistics.co/ggplot2-Tutorial-With-R.html # which you can check out to learn more if you want to. # First, to use ggplot you need to set up the plot with the ggplot() command. # In this command, you must at a minimum supply a dataframe where the data are located. # Typically you will also define some "aesthetic" characteristics using the aes() argument. # We're using the "Loblolly" dataframe so that is our first argument in our ggplot command. # Then we'll define the aesthetics: what goes on the x axis, what's on the y-axis, # how we want to split the data into groups (by the Seed variable), and how colors will be # assigned within the graph, once we start plotting things. # Notice that all we've done to this point is set up the graph, we haven't actually plotted: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) #makes a blank graph # Now that we have a blank slate to work with, we're going to add things using the plus (+) sign # and various add-on functions. Obviously the first thing we want to add is the actual points! # Any shape, line, or point we want to add is referred to as a "geometric" or "geom" for short. # So to add points, we add a geom_point function. In the example above, that was all we did: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() # That simply adds the dots onto our plot. But we can use the geom_point function to change the # aesthetics of HOW we add dots too: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point(aes(shape=Seed)) # note that within the geom_point function, we've again supplied an aesthetic argument. # This particular argument doesn't work well because we need so many types of shapes. Let's # try varying the size of the points instead. See if you can figure out how to do that. # Then scroll down for the answer: # Here is how you would do that: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point(aes(size=Seed)) # Notice the warning message it gives you: It's saying that since "seed" is a categorical # variable, it may not make sense to vary the size of the point. It gives us this warning, # but still makes the plot for us. # Now let's go back to our original plot: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() +geom_line() # The next "geom" we added was a line to connect each point. But note that R didn't connect # EVERY point, it only connected the points that came from the same seed source. How did it # magically know we wanted it to do this? The answer is that we already told it to when we # set up the plot in the ggplot() command. In that command, we told it we want to split the # data into groups (by seed source) and also that we wanted color to vary by seed source. # The geom commands "inherit" this aesthetic information from the plot setup, so that we don't # need to give it these instructions again. This is part of the reason ggplot can produce # graphs with less code - it makes assumptions about the way you want to graph things. If # you want to override these assumptions, you can tell it not to inherit the plot aesthetics, # and supply them yourself: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_line(data=Loblolly, mapping=aes(x=age, y=height,group=Seed), inherit.aes=FALSE) # I told the geom_line function not to inherit the ability to produce different colors # so all lines are black (the default color). # GGplot is also more efficient because it has a lot of built-in functions to add "layers" (geoms) # to the plot. For example, we could add regression lines instead of connecting dots: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() +geom_smooth(method="lm", se=F) # Or a smooth curve instead of connecting dots: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() # There are lots of different types of layers (or geoms) that could be added. Here is a "cheat-sheet" # that shows the most common ones: # http://www.rstudio.com/wp-content/uploads/2015/12/ggplot2-cheatsheet-2.0.pdf # Or the entire list is here: # http://ggplot2.tidyverse.org/reference/index.html # You can add as many of these layers as you want to a plot. #Each layer will "inherit" the # plot aesthetics that you set up in the ggplot() function, unless you over-ride that by setting # the inherit.aes option to FALSE. # One difference between our ggplot and the original plot created with the plot() function is that # we specified nice axis labels in our original plot. To do this in ggplot, we need to add labels # using the labs() function: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() + labs(x="Tree age (years)", y="Tree height (ft.)", title="Loblolly Pine Growth", color="Seed source") # Finally, we can change a few other things using the "theme" command. For example, we can change text size: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() + labs(x="Tree age (years)", y="Tree height (ft.)", title="Loblolly Pine Growth", color="Seed source") + theme(plot.title=element_text(size=15, face="bold"), axis.text.x=element_text(size=5), axis.text.y=element_text(size=5), axis.title.x=element_text(size=10), axis.title.y=element_text(size=10), legend.text=element_text(size=5), legend.title=element_text(size=5)) # There are other built-in themes that you can use to modify the look of a graph too: ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() + labs(x="Tree age (years)", y="Tree height (ft.)", title="Loblolly Pine Growth", color="Seed source") + theme_bw() ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() + labs(x="Tree age (years)", y="Tree height (ft.)", title="Loblolly Pine Growth", color="Seed source") + theme_dark() ggplot(Loblolly, aes(x=age, y=height, color=Seed, group=Seed)) + geom_point() + geom_smooth() + labs(x="Tree age (years)", y="Tree height (ft.)", title="Loblolly Pine Growth", color="Seed source") + theme_minimal() # There are a few more of these themes that come with the ggplot2 package (see gallery here): # https://www.r-bloggers.com/ggplot2-themes-examples/ # But you can also get a lot more themes by installing a themes package like ggthemes: # http://www.ggplot2-exts.org/ggthemes.html # or ggthemr: # https://www.r-bloggers.com/the-ggthemr-package-theme-and-colour-your-ggplot-figures/ # Because GGplot is very powerful, it can be used to create a LOT of different kinds of graphs. # Check it out: # http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html # Based on what you've learned, use ggplot to create a graph showing how Petal Width depends on # Sepal Width for the 3 different species of irises. Export your figure (click the export button # at the top of the figure window) and save your figure as a jpeg image with an informative # filename. Upload your jpg to eLearning for 3 participation points. require(stats); require(graphics) plot(height ~ age, data = Loblolly, subset = Seed == 329, xlab = "Tree age (yr)", las = 1, ylab = "Tree height (ft)", main = "Loblolly data and fitted curve (Seed 329 only)") fm1 <- nls(height ~ SSasymp(age, Asym, R0, lrc), data = Loblolly, subset = Seed == 329) age <- seq(0, 30, length.out = 101) lines(age, predict(fm1, list(age = age)))