Question

I want to merge two data.frames by date. Data contains stock data for each trading day of a given stock. Events contains news about the company. Some news were published on non-trading days, so there are no stock data for this day. For example on 04.01.2000 some news for the company got published. I want to merge this article with the return of the NEXT trading day, in this case the return on the 06.01.2000. So how can I jump to the next trading day when merging?

date1 <- c("01.01.2000","02.01.2000","03.01.2000","06.01.2000","07.01.2000","09.01.2000")
ret1 <- c(-2.0,1.1,3,1.4,-0.2, 0.6)

data <- data.frame(date1, ret1)
data

#        date1 ret1
# 1 01.01.2000 -2.0
# 2 02.01.2000  1.1
# 3 03.01.2000  3.0
# 4 06.01.2000  1.4
# 5 07.01.2000 -0.2
# 6 09.01.2000  0.6

date2 <- c("02.01.2000","03.01.2000","04.01.2000","08.01.2000")
news2 <- c("blabla1", "blabla2","blabla3","blabla4")

events <- data.frame(date2, news2)
events

#        date2   news2
# 1 02.01.2000 blabla1
# 2 03.01.2000 blabla2
# 3 04.01.2000 blabla3
# 4 08.01.2000 blabla4

the output should look like this:

#         date    news  ret
# 1 02.01.2000 blabla1  1.1
# 2 03.01.2000 blabla2  3.0
# 3 06.01.2000 blabla3  1.4
# 4 09.01.2000 blabla4  0.6

Thank you!

Was it helpful?

Solution

This can be done quite easily with rolling joins using data.table package:

require(data.table)   ## 1.9.2

setDT(data)           ## convert to data.table by reference
setDT(events)         ## ,,

setkey(data, date1)   ## set the column to join on, also sorts by 'date1'
setkey(events, date2) ## ,, also sorts by 'date2' 

data[, date := date1] ## create an extra column

# Now roll join
data[events, roll=-Inf]

#         date1 ret1       date   news2
# 1: 02.01.2000  1.1 02.01.2000 blabla1
# 2: 03.01.2000  3.0 03.01.2000 blabla2
# 3: 04.01.2000  1.4 06.01.2000 blabla3
# 4: 08.01.2000  0.6 09.01.2000 blabla4

OTHER TIPS

A longish non-elegant approach, for the reluctant enthusiast of data.table #convert date1 and date2 in R Date format

data$date1=as.Date(data$date1,"%d.%m.%Y")
events$date2=as.Date(events$date2,"%d.%m.%Y")

#For dates of returns which are not in events, find the closest previous date from events

prev.df<-do.call(rbind,lapply(data$date1[!data$date1 %in% events$date2],function(x) { data.frame(date=x,prevd=as.numeric(max(events$date2[events$date2<x],na.rm=TRUE))) }))

prev.finite<-prev.df[is.finite(prev.df$prevd),]              #remove -Inf/NA values created due to missing values
prev.finite$prevd<-as.Date(prev.finite$prevd,origin="1970-01-01")  # convert to date format

prev.nodup<-prev.finite[!duplicated(prev.finite$prevd),]   #remove duplicates

events.new<-events                    #create dummy events dataset for merging

events.new$date2[events.new$date2 %in% prev.nodup$prevd]<-prev.nodup$date[prev.nodup$prevd %in% events.new$date2]


colnames(events.new)[1]<-"date1"       #change columnd name for merge with "data"

events.final<-merge(events.new,data,by="date1")  #merge with common column "date1"
> events.final                                   #final dataset
       date1   news2 ret1
1 2000-01-02 blabla1  1.1
2 2000-01-03 blabla2  3.0
3 2000-01-06 blabla3  1.4
4 2000-01-09 blabla4  0.6
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top