Question

May be this question already addressed and answered in SO, but couldn't able to find out. I'm computing cumulative sum with conditions on a very large data frame. Look at the below example

Data=data.frame("Catg"=c("A","A","A","A","A","B","B","B","C","C","C","D","D","D","D","D","D","D","D","E","E","F"),"Val"=c(67,42,12,32,28,1,11,9,38,61,75,99,22,44,89,99,51,34,82,99,74,42))
Res=NULL
UniqCatg=unique(Data$Catg)
for(i in 1:length(UniqCatg))
    Res=c(Res, cumsum(Data[Data$Catg==UniqCatg[i],"Val"]))
Data$Res=Res
Data

Is there a smart way to do it without for loops? (like apply functions)

Was it helpful?

Solution 2

Or with plyr::ddply...

require( plyr )
ddply( Data , "Catg" , transform , Res = cumsum(Val) )
#   Catg Val Res
#1     A  67  67
#2     A  42 109
#3     A  12 121
#4     A  32 153
#5     A  28 181
#6     B   1   1
#7     B  11  12
#8     B   9  21
#9     C  38  38
#10    C  61  99
#11    C  75 174
#12    D  99  99
#13    D  22 121
#14    D  44 165
#15    D  89 254
#16    D  99 353
#17    D  51 404
#18    D  34 438
#19    D  82 520
#20    E  99  99
#21    E  74 173
#22    F  42  42

OTHER TIPS

You could use ave:

Data$Res <- ave(Data$Val, Data$Catg, FUN=cumsum)
#   Catg Val Res
#1     A  67  67
#2     A  42 109
#3     A  12 121
#4     A  32 153
#5     A  28 181
#6     B   1   1
#7     B  11  12
#8     B   9  21
#9     C  38  38
#10    C  61  99
#11    C  75 174
#12    D  99  99
#13    D  22 121
#14    D  44 165
#15    D  89 254
#16    D  99 353
#17    D  51 404
#18    D  34 438
#19    D  82 520
#20    E  99  99
#21    E  74 173
#22    F  42  42
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top