# R in Action (2nd ed): Chapter 19 # Advanced graphs # requires packages ggplot2, gridExtra, car (for datasets) # install.packages(c("ggplot2", "gridExtra", "car")) #----------------------------------------------------- par(ask=TRUE) ## Lattice Package library(lattice) # histogram of heights conditioned on voice pitch histogram(~ height | voice.part, data = singer, main="Distribution of Heights by Voice Pitch", xlab="Height (inches)") # lattice plot examples attach(mtcars) # create factors with value labels gear <- factor(gear, levels=c(3, 4, 5), labels=c("3 gears", "4 gears", "5 gears")) cyl <- factor(cyl, levels=c(4, 6, 8), labels=c("4 cylinders", "6 cylinders", "8 cylinders")) # generate plots densityplot(~ mpg, main="Density Plot", xlab="Miles per Gallon") densityplot(~ mpg | cyl, main="Density Plot by Number of Cylinders", xlab="Miles per Gallon") bwplot(cyl ~ mpg | gear, main="Box Plots by Cylinders and Gears", xlab="Miles per Gallon", ylab="Cylinders") xyplot(mpg ~ wt | cyl * gear, main="Scatter Plots by Cylinders and Gears", xlab="Car Weight", ylab="Miles per Gallon") cloud(mpg ~ wt * qsec | cyl, main="3D Scatter Plots by Cylinders") dotplot(cyl ~ mpg | gear, main="Dot Plots by Number of Gears and Cylinders", xlab="Miles Per Gallon") splom(mtcars[c(1, 3, 4, 5, 6)], main="Scatter Plot Matrix for mtcars Data") detach(mtcars) # manipulating a graph mygraph <- densityplot(~height|voice.part, data=singer) plot(mygraph) update(mygraph, col="red", pch=16, cex=.8, jitter=.05, lwd=2) plot(mygraph) # conditioning on a continuous variable displacement <- equal.count(mtcars$disp, number=3, overlap=0) xyplot(mpg ~ wt | displacement, data=mtcars, main = "Miles per Gallon vs. Weight by Engine Displacement", xlab = "Weight", ylab = "Mile per Gallon", layout=c(3,1), aspect=1.5) # xyplot with custom panel functions displacement <- equal.count(mtcars$disp, number=3, overlap=0) mypanel <- function(x, y) { panel.xyplot(x, y, pch=19) panel.rug(x, y) panel.grid(h=-1, v=-1) panel.lmline(x, y, col="red", lwd=1, lty=2) } xyplot(mpg ~ wt|displacement, data=mtcars, layout=c(3, 1), aspect=1.5, main = "Miles per Gallon vs. Weight by Engine Displacement", xlab = "Weight", ylab = "Mile per Gallon", panel = mypanel) # xyplot with custom panel functions and additional options mtcars$transmission <- factor(mtcars$am, levels=c(0, 1), labels=c("Automatic", "Manual")) panel.smoother <- function(x, y) { panel.grid(h=-1, v=-1) panel.xyplot(x, y) panel.loess(x, y) panel.abline(h=mean(y), lwd=2, lty=2, col="green") } xyplot(mpg ~ disp|transmission, data=mtcars, scales=list(cex=.8, col="red"), panel=panel.smoother, xlab="Displacement", ylab="Miles per Gallon", main="MGP vs Displacement by Transmission Type", sub = "Dotted lines are Group Means", aspect=1) # grouping variables mtcars$transmission <- factor(mtcars$am, levels=c(0, 1), labels=c("Automatic", "Manual")) densityplot(~ mpg, data=mtcars, group=transmission, main="MPG Distribution by Transmission Type", xlab="Miles per Gallon", auto.key=TRUE) # kernel density plot with a group variable and customized legend mtcars$transmission <- factor(mtcars$am, levels=c(0,1), labels=c("Automatic", "Manual")) colors = c("red", "blue") lines = c(1, 2) points = c(16, 17) key.trans <- list(title="Trasmission", space="bottom", columns=2, text=list(levels(mtcars$transmission)), points=list(pch=points, col=colors), lines=list(col=colors, lty=lines), cex.title=1, cex=.9) densityplot(~ mpg, data=mtcars, group=transmission, main="MPG Distribution by Transmission Type", xlab="Miles per Gallon", pch=points, lty=lines, col=colors, lwd=2, jitter=.005, key=key.trans) # xyplot with group and conditioning variables and customized legend colors <- "darkgreen" symbols <- c(1:12) linetype <- c(1:3) key.species <- list(title="Plant", space="right", text=list(levels(CO2$Plant)), points=list(pch=symbols, col=colors)) xyplot(uptake ~ conc | Type*Treatment, data=CO2, group=Plant, type="o", pch=symbols, col=colors, lty=linetype, main="Carbon Dioxide Uptake\nin Grass Plants", ylab=expression(paste("Uptake ", bgroup("(", italic(frac("umol","m"^2)), ")"))), xlab=expression(paste("Concentration ", bgroup("(", italic(frac(mL,L)), ")"))), sub = "Grass Species: Echinochloa crus-galli", key=key.species) # graphical parameters show.settings() mysettings <- trellis.par.get() mysettings$superpose.symbol mysettings$superpose.symbol$pch <- c(1:10) trellis.par.set(mysettings) show.settings() # customizing plot strips histogram(~height | voice.part, data = singer, strip = strip.custom(bg="lightgrey", par.strip.text=list(col="black", cex=.8, font=3)), main="Distribution of Heights by Voice Pitch", xlab="Height (inches)") mysettings <- trellis.par.get() mysettings$strip.background <- c("lightgrey", "lightgreen") trellis.par.set(mysettings) # page arrangement graph1 <- histogram(~ height | voice.part, data=singer, main="Heights of Choral Singers by Voice Part") graph2 <- densityplot(~ height, data=singer, group=voice.part, plot.points=FALSE, auto.key=list(columns=4)) plot(graph1, split=c(1, 1, 1, 2)) plot(graph2, split=c(1, 2, 1, 2), newpage=FALSE) library(lattice) graph1 <- histogram(~ height | voice.part, data=singer, main="Heights of Choral Singers by Voice Part") graph2 <- densityplot( ~ height, data=singer, group=voice.part, plot.points=FALSE, auto.key=list(columns=4)) plot(graph1, position=c(0, .3, 1, 1)) plot(graph2, position=c(0, 0, 1, .3), newpage=FALSE) ## ---------------------------------------------------------------------------------------------------- ## ggplot2 library(ggplot2) data(Salaries, package="car") data(singer, package="lattice") # chaining statements together ggplot(data=mtcars, aes(x=wt, y=mpg)) + geom_point() + labs(title="Automobile Data", x="Weight", y="Miles Per Gallon") ggplot(data=mtcars, aes(x=wt, y=mpg)) + geom_point(pch=17, color="blue", size=2) + geom_smooth(method="lm", color="red", linetype=2) + labs(title="Automobile Data", x="Weight", y="Miles Per Gallon") # grouping and faceting mtcars$am <- factor(mtcars$am, levels=c(0,1), labels=c("Automatic", "Manual")) mtcars$vs <- factor(mtcars$vs, levels=c(0,1), labels=c("V-Engine", "Straight Engine")) mtcars$Cylinders <- factor(mtcars$cyl) ggplot(data=mtcars, aes(x=hp, y=mpg, shape=Cylinders, color=Cylinders))+ geom_point(size=3) + facet_grid(vs~am) + labs(title="Automobile Data by Engine Type", x="Horse Power", y="Miles Per Gallon") # specifying plot types with geoms ggplot(singer, aes(x=height)) + geom_histogram() ggplot(singer, aes(x=voice.part, y=height)) + geom_boxplot() ggplot(Salaries, aes(x=rank, y=salary)) + geom_boxplot(fill="cornflowerblue", color="black", notch=TRUE)+ geom_point(position="jitter", color="blue", alpha=.5)+ geom_rug(side="l", color="black") ggplot(singer, aes(x=voice.part, y=height)) + geom_violin(fill="lightblue") + geom_boxplot(fill="lightgreen", width=.2) # grouping ggplot(data=Salaries, aes(x=salary, fill=rank)) + geom_density(alpha=.3) ggplot(Salaries, aes(x=yrs.since.phd, y=salary, color=rank, shape=sex)) + geom_point() ggplot(Salaries, aes(x=rank, fill=sex)) + geom_bar(position="stack") + labs(title='position="stack"') ggplot(Salaries, aes(x=rank, fill=sex)) + geom_bar(position="dodge") + labs(title='position="dodge"') ggplot(Salaries, aes(x=rank, fill=sex)) + geom_bar(position="fill") + labs(title='position="fill"') # faceting ggplot(data=singer, aes(x=height)) + geom_histogram() + facet_wrap(~voice.part) ggplot(Salaries, aes(x=yrs.since.phd, y=salary, color=rank, shape=rank)) + geom_point() + facet_grid(.~sex) ggplot(data=singer, aes(x=height, fill=voice.part)) + geom_density() + facet_grid(voice.part~.) # adding smoothed lines ggplot(data=Salaries, aes(x=yrs.since.phd, y=salary)) + geom_smooth() + geom_point() ggplot(data=Salaries, aes(x=yrs.since.phd, y=salary, linetype=sex, shape=sex, color=sex)) + geom_smooth(method=lm, formula=y~poly(x,2), se=FALSE, size=1) + geom_point(size=2) # axes ggplot(data=Salaries, aes(x=rank, y=salary, fill=sex)) + geom_boxplot() + scale_x_discrete(breaks=c("AsstProf", "AssocProf", "Prof"), labels=c("Assistant\nProfessor", "Associate\nProfessor", "Full\nProfessor")) + scale_y_continuous(breaks=c(50000, 100000, 150000, 200000), labels=c("$50K", "$100K", "$150K", "$200K")) + labs(title="Faculty Salary by Rank and Sex", x="", y="") # legends ggplot(data=Salaries, aes(x=rank, y=salary, fill=sex)) + geom_boxplot() + scale_x_discrete(breaks=c("AsstProf", "AssocProf", "Prof"), labels=c("Assistant\nProfessor", "Associate\nProfessor", "Full\nProfessor")) + scale_y_continuous(breaks=c(50000, 100000, 150000, 200000), labels=c("$50K", "$100K", "$150K", "$200K")) + labs(title="Faculty Salary by Rank and Gender", x="", y="", fill="Gender") + theme(legend.position=c(.1,.8)) # scales ggplot(data=Salaries, aes(x=yrs.since.phd, y=salary, color=rank)) + scale_color_manual(values=c("orange", "olivedrab", "navy")) + geom_point(size=2) ggplot(data=Salaries, aes(x=yrs.since.phd, y=salary, color=rank)) + scale_color_brewer(palette="Set1") + geom_point(size=2) ggplot(mtcars, aes(x=wt, y=mpg, size=disp)) + geom_point(shape=21, color="black", fill="cornsilk") + labs(x="Weight", y="Miles Per Gallon", title="Bubble Chart", size="Engine\nDisplacement") # themes mytheme <- theme(plot.title=element_text(face="bold.italic", size="14", color="brown"), axis.title=element_text(face="bold.italic", size=10, color="brown"), axis.text=element_text(face="bold", size=9, color="blue"), panel.background=element_rect(fill="white", color="blue"), panel.grid.major.y=element_line(color="grey", linetype=1), panel.grid.minor.y=element_line(color="grey", linetype=2), panel.grid.minor.x=element_blank(), legend.position="top") ggplot(Salaries, aes(x=rank, y=salary, fill=sex)) + geom_boxplot() + labs(title="Salary by Rank and Sex", x="Rank", y="Salary") + mytheme # multiple graphs per page p1 <- ggplot(data=Salaries, aes(x=rank)) + geom_bar() p2 <- ggplot(data=Salaries, aes(x=sex)) + geom_bar() p3 <- ggplot(data=Salaries, aes(x=yrs.since.phd, y=salary)) + geom_point() library(gridExtra) grid.arrange(p1, p2, p3, ncol=3) # saving graphs myplot <- ggplot(data=mtcars, aes(x=mpg)) + geom_histogram() ggsave(file="mygraph.png", plot=myplot, width=5, height=4) ggplot(data=mtcars, aes(x=mpg)) + geom_histogram() ggsave(file="mygraph.pdf")