# R in Action (2nd ed): Chapter 18 # Advanced methods for missing data # requires packages VIM, mice # install.packages(c("VIM", mice)) #---------------------------------- par(ask=TRUE) # load the dataset data(sleep, package="VIM") # list the rows that do not have missing values sleep[complete.cases(sleep),] # list the rows that have one or more missing values sleep[!complete.cases(sleep),] # tabulate missing values patters library(mice) md.pattern(sleep) # plot missing values patterns library("VIM") aggr(sleep, prop=FALSE, numbers=TRUE) matrixplot(sleep) marginplot(sleep[c("Gest","Dream")], pch=c(20), col=c("dardgray", "red", "blue")) # use correlations to explore missing values x <- sleep x[is.na(sleep)] <- 1 x[!is.na(sleep)] <- 0 head(sleep, n=5) head(x, n=5) y <- x[which(sd(x) > 0)] cor(y) cor(sleep, y, use="pairwise.complete.obs") # complete case analysis (listwise deletion) cor(na.omit(sleep)) fit <- lm(Dream ~ Span + Gest, data=na.omit(sleep)) summary(fit) # multiple imputation library(mice) data(sleep, package="VIM") imp <- mice(sleep, seed=1234) fit <- with(imp, lm(Dream ~ Span + Gest)) pooled <- pool(fit) summary(pooled) imp