misstable <- function(atable){
op <- par(bg = "white")
plot(c(0, 400), c(0, 1000), type = "n", xlab="", ylab="",
main = "Missing Data Table")
pmin=000
pmax=400
stre=pmax-pmin
lnames=length(atable)
cstep = (stre/lnames)
for(titles in 1:lnames){
text((titles-1) * cstep+pmin+cstep/2,1000,colnames(atable)[titles])
}
gmax=900
gmin=0
gstre=gmax-gmin
rvec = as.vector(atable[ [ 1 ] ])
dnames=length(rvec)
step = gstre / dnames
for(rows in 1:dnames){
text(30,gmax - (rows-1)*step-step/2,rvec[rows])
ymax=gmax - (rows-1)*step
ymin=gmax - (rows)*step
for(col in 2:lnames-1){
if(atable[rows,col+1] == F){
tcolor = "red"
}
if(atable[rows,col+1] == T){
tcolor = "white"
}
rect((col) * (stre/lnames)+pmin, ymin, (col+1) * (stre/lnames)+pmin,
ymax,col=tcolor,lty="blank")
}
}
}
require(grDevices)
# Make a quick dataframe with true/false representing data availability
locs=c("Australia","India","New Zealand","Sri Lanka","Uruguay","Somalia")
f1=c(T,F,T,T,F,F)
f2=c(F,F,F,T,F,F)
f3=c(F,T,T,T,F,T)
atable=data.frame(locs,f1,f2,f3)
atable
#Draw the table.
misstable(atable)
# having defined the input dir and input file tried reading the excel sheet (without head 3 rows)
#dat <- readxl::read_excel(file.path(indir, infile), skip =3)
# got lots of warnings()
## 50: In read_xlsx_(path, sheet, col_names = col_names, col_types = col_types, ... :
## [1278, 4]: expecting date: got '[NULL]'
# I always worry about using excel connections so open in excel (in windows)
# and save as to convert to CSV
dat <- read.csv(file.path(indir, gsub(".xlsx", ".csv", infile)), skip =3, stringsAsFactor = F)
str(dat)
# 'data.frame': 1396 obs. of 167 variables:
# but most of the cols and a third of the rows are empty!
# check missings
dat2 <- data.frame(id = 1:nrow(dat), dat)
str(dat2)
# first if they are empty strings
dat2[dat2 == ""] <- NA
# now if NA
dat2[,2:ncol(dat2)] <- !is.na(dat2[,2:ncol(dat2)])
# Truncate the hundreds of empty cols
str(dat2[,1:18])
tail(dat2[,1:18])
svg(file.path(outdir, gsub(".csv", ".svg", outfile)) )
misstable(dat2[,1:18])
dev.off()
browseURL(file.path(outdir, gsub(".csv", ".svg", outfile)) )
# cool, that is an effective way to look at the data