Question

I have a .csv file with four columns (NAME, ID, YEAR, VALUE, see example below) and want to do some time series plots using plot('YEAR', 'VALUE', type = 'b'). As some data between the YEARS in the time series is missing I would like to write new columns containing NA values between the years so that I can plot the data without having connected lines in the YEAR gap (in my example: fill in NA values for the years 1984 to 1987 in the BARTLEY item).

Is there a way to do this?? Any help is kindly appreciated! Thanks!

My .csv file looks like this:

NAME;                ID;      YEAR; VALUE
NAME1;              885;    1988;      -2
NAME1;              885;    1989;       0
NAME2;  2665;   1999;       4
NAME2;  2665;   2000;       8
NAME2;  2665;   2001;      19
NAME2;  2665;   2002;      13
NAME2;  2665;   2003;      13
NAME3;          893 ;   1983;       0
NAME3;          893 ;   1988;       2
NAME3;          893 ;   1989;      -1
NAME4;              877 ;   1972;      -1
NAME5;          894 ;   1973;      -3
Was it helpful?

Solution

Glad to hear that you figured it out. I was still wondering about the number of plots per page. I've added a few lines to the code to allow you to set the number of rows and columns of plots to appear on one page and then to loop over as many pages of plots as necessary. I've also added a bit of ggplot stuff to change the appearance of plot text.

    # read data file into xx and change character years to Date values
    xx <- read.csv("plot_test.txt",header=TRUE,sep=";")
    xx$YEAR <- as.Date(paste(as.character(xx$YEAR),"-01-01",sep=""))
    xx$NAME_YR <- paste(xx$NAME,xx$YEAR,sep="")

    #  create Year template for years between min and max years for each NAME
    xxmin <- as.Date(tapply(xx$YEAR, xx$NAME, min ), origin="1970-01-01")
    xxmax <- as.Date(tapply(xx$YEAR, xx$NAME, max ), origin="1970-01-01")
    xxdates <- mapply(seq, xxmin, xxmax, by="12 month")
    xxyears <- data.frame(NAME=rep(names(xxdates), sapply(xxdates, length)), 
                          YEAR=as.Date(unlist (xxdates),origin="1970-01-01"))
    xxyears$NAME_YR <- paste(xxyears$NAME,xxyears$YEAR,sep="")

    #  merge  template and data and assign colnames to ploting data
    xy <- merge(xx, xxyears, by="NAME_YR", all=TRUE)[,c("NAME.y","ID","YEAR.y","VALUE")]
    names(xy) <- c("NAME","ID","YEAR","VALUE")

   # plot each NAME in a separate chart with own time axis
   library(ggplot2)
   rows_pg <- 2   # number of rows of plots per page
   cols_pg <- 2   # number of columns of plots per page
   chts_pg <- rows_pg*cols_pg
   num_plots <- nlevels(xy$NAME)
   #  set plot axis labels and main titles and set values for text
   spttl <- ggtitle("Your plot title\nSecond line of your plot title")
   spaxlb <- labs ( x="Year", y="Data Values")  
   spth <-  theme(plot.title=element_text(size=16, face="bold", colour="blue") )
   spth <-  spth + theme(axis.title.x= element_text(size=14, colour="blue") )
   spth <-  spth + theme(axis.title.y = element_text(size=14, colour="blue") )
   spth <-  spth + theme(axis.text = element_text(size=14, colour="black") )
   spth <-  spth + theme(strip.text = element_text(size=14, colour="brown"))
   #  generate plots
   for( iplt in seq(1,num_plots, chts_pg) )  {
     sp <- ggplot(data=xy[xy$NAME %in% levels(xy$NAME)[iplt:(iplt+chts_pg-1)], ],
                  aes(x=YEAR, y=VALUE)) + geom_line() + geom_point()
     sp  <- sp + facet_wrap(~ NAME, scales="free_x", nrow=rows_pg, ncol=cols_pg)
     plot(sp + spttl + spaxlb + spth)
   }

OTHER TIPS

You can read the file as you've shown it by using sep=";" in read.csv to identify separate values. You might consider something like the code below to read the data, fix up the dates, add the NAs, and do the plots. I put your data in a file called "plot_test.txt" so read.csv gets the data from there. Also from your comment about the BARTLEY item, I am assuming that you want to separate the lines for each item in your plot.

# read data file into xx and change character years to Date values
xx <- read.csv("plot_test.txt",header=TRUE,sep=";")
xx$YEAR <- as.Date(paste(as.character(xx$YEAR),"-01-01",sep=""))

#    create df as a template for all years and names
date_seq <- seq(min(xx$YEAR),max(xx$YEAR),by="12 month")
df <- merge(data.frame(NAME=unique(xx$NAME),ID=unique(xx$ID)),data.frame(YEAR=date_seq,VALUE=NA),all=TRUE)

# create unique names in xx and df to merge on
xx$NAME_YR <- paste(xx$NAME,xx$YEAR,sep="")
df$NAME_YR <- paste(df$NAME,df$YEAR,sep="")
#  merge keeping only real data columns and restore original column names
xy <- merge(xx, df,by="NAME_YR",all=TRUE)[,c("NAME.y","ID.y","YEAR.y","VALUE.x")]
names(xy) <- names(xx)[1:4]

# plot xy using ggplot
library(ggplot2)
sp <- ggplot(data=xy, aes(x=YEAR, y=VALUE, colour=NAME)) + geom_point() + geom_line()
plot(sp)  
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top