Question

I am plotting start and end times of some data logging. Below is the data frame I am working with rather I have produced. Essentially I have 2 loops that run through my data looking for the start and end times each loop produces a data frame that is then pasted into "data raw".

Now I'd like to cue my viewer into what the start and end times of each discrete data set are and for the time being just coloring the points labeled "start" and "end" with different colors will suffice. Honestly though I'd like to make this more fancy drawing a box around the set with a faded background with alternating colors if possible using the points I have selected as corners. Such that the first "start" point is 1 corner and the first "end" point is the following corner. No clue is that is possible though I figured I'd crawl before I ran.

Below is the data frame I am using and the ggplot2 function I am using to just label the points.

Honestly I feel as if this is a very simple problem created by my lack of understanding of how ggplot maps points or prioritizes data.

require(ggplot2)

dataraw <- structure(list(Time = c(1383817893L, 1383817970L, 1383818010L, 
        1383818080L, 1383818170L, 1383818250L, 1383817923L, 1383818003L, 
        1383818043L, 1383818113L, 1383818203L, 1383818286L, 1383817890L, 
        1383817893L, 1383817896L, 1383817899L, 1383817902L, 1383817905L, 
        1383817908L, 1383817911L, 1383817914L, 1383817917L, 1383817920L, 
        1383817923L, 1383817970L, 1383817973L, 1383817976L, 1383817979L, 
        1383817982L, 1383817985L, 1383817988L, 1383817991L, 1383817994L, 
        1383817997L, 1383818000L, 1383818003L, 1383818010L, 1383818013L, 
        1383818016L, 1383818019L, 1383818022L, 1383818025L, 1383818028L, 
        1383818031L, 1383818034L, 1383818037L, 1383818040L, 1383818043L, 
        1383818080L, 1383818083L, 1383818086L, 1383818089L, 1383818092L, 
        1383818095L, 1383818098L, 1383818101L, 1383818104L, 1383818107L, 
        1383818110L, 1383818113L, 1383818170L, 1383818173L, 1383818176L, 
        1383818179L, 1383818182L, 1383818185L, 1383818188L, 1383818191L, 
        1383818194L, 1383818197L, 1383818200L, 1383818203L, 1383818250L, 
        1383818253L, 1383818256L, 1383818259L, 1383818262L, 1383818265L, 
        1383818268L, 1383818271L, 1383818274L, 1383818277L, 1383818280L, 
        1383818283L, 1383818286L), value = c(4307L, 4748L, 5419L, 4663L, 
        4779L, 4532L, 5539L, 4589L, 5541L, 5403L, 5277L, 5183L, 4246L, 
        4307L, 4368L, 4416L, 4930L, 5417L, 5444L, 5461L, 5485L, 5507L, 
        5520L, 5539L, 4748L, 4730L, 4741L, 4706L, 4717L, 4684L, 4673L, 
        4673L, 4660L, 4651L, 4585L, 4589L, 5419L, 5463L, 5487L, 5510L, 
        5513L, 5535L, 5535L, 5538L, 5532L, 5543L, 5534L, 5541L, 4663L, 
        4632L, 4598L, 4586L, 4577L, 4565L, 4546L, 5316L, 5348L, 5340L, 
        5363L, 5403L, 4779L, 4790L, 4790L, 4779L, 4762L, 4771L, 5249L, 
        5294L, 5286L, 5290L, 5267L, 5277L, 4532L, 4361L, 4327L, 4319L, 
        4307L, 4288L, 4285L, 5098L, 5151L, 5158L, 5163L, 5177L, 5183L
        ), variable = c("Start", "Start", "Start", "Start", "Start", 
        "Start", "End", "End", "End", "End", "End", "End", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1", 
        "rawPressure1", "rawPressure1", "rawPressure1", "rawPressure1"
        )), .Names = c("Time", "value", "variable"), row.names = c(NA, 
        85L), class = "data.frame")


    p <- ggplot(dataraw,aes(Time, value,group=variable)) +
                geom_point(aes(colour=variable))

EDIT: Here is the previous loop that finds "starts" and "ends" this does duplicate the data. As mentioned below it's best to add start and end as a factor rather than duplicate it and add it as a variable.

dataraw <-structure(list(Time = c(1383817890L, 1383817893L, 1383817896L, 
1383817899L, 1383817902L, 1383817905L, 1383817908L, 1383817911L, 
1383817914L, 1383817917L, 1383817920L, 1383817923L, 1383817970L, 
1383817973L, 1383817976L, 1383817979L, 1383817982L, 1383817985L, 
1383817988L, 1383817991L, 1383817994L, 1383817997L, 1383818000L, 
1383818003L, 1383818010L, 1383818013L, 1383818016L, 1383818019L, 
1383818022L, 1383818025L, 1383818028L, 1383818031L, 1383818034L, 
1383818037L, 1383818040L, 1383818043L, 1383818080L, 1383818083L, 
1383818086L, 1383818089L, 1383818092L, 1383818095L, 1383818098L, 
1383818101L, 1383818104L, 1383818107L, 1383818110L, 1383818113L, 
1383818170L, 1383818173L, 1383818176L, 1383818179L, 1383818182L, 
1383818185L, 1383818188L, 1383818191L, 1383818194L, 1383818197L, 
1383818200L, 1383818203L, 1383818250L, 1383818253L, 1383818256L, 
1383818259L, 1383818262L, 1383818265L, 1383818268L, 1383818271L, 
1383818274L, 1383818277L, 1383818280L, 1383818283L, 1383818286L
), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "rawPressure1", class = "factor"), value = c(4246L, 
4307L, 4368L, 4416L, 4930L, 5417L, 5444L, 5461L, 5485L, 5507L, 
5520L, 5539L, 4748L, 4730L, 4741L, 4706L, 4717L, 4684L, 4673L, 
4673L, 4660L, 4651L, 4585L, 4589L, 5419L, 5463L, 5487L, 5510L, 
5513L, 5535L, 5535L, 5538L, 5532L, 5543L, 5534L, 5541L, 4663L, 
4632L, 4598L, 4586L, 4577L, 4565L, 4546L, 5316L, 5348L, 5340L, 
5363L, 5403L, 4779L, 4790L, 4790L, 4779L, 4762L, 4771L, 5249L, 
5294L, 5286L, 5290L, 5267L, 5277L, 4532L, 4361L, 4327L, 4319L, 
4307L, 4288L, 4285L, 5098L, 5151L, 5158L, 5163L, 5177L, 5183L
), dt = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 47L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 37L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 57L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
47L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, NA)), .Names = c("Time", 
"variable", "value", "dt"), row.names = c(NA, -73L), class = "data.frame")

This is the data frame that is shoved through the loop below.

These loops simply pluck out what I consider to be the start and the end times of each discrete data set. Rather than Rbind the points I should be adding a factor to the data point instead.

Start <- data.frame()

for (i in 2:(length(dataraw[,1])-1))
{
  if (( i == 2) ||  (dataraw$dt[i-1] > 3 && dataraw$dt[i]==3)) { 
  Start <- rbind(Start,c(dataraw[i,1],dataraw[i,3]))

  }

  {next}
}
colnames(Start) <- c('Time','value')
Start$variable <- paste("Start")

End <- data.frame()
for (i in 1:(length(dataraw[,1])))
{ 
  if (( dataraw$dt[i] > 3 && dataraw$dt[i-1]==3) || is.na(dataraw$dt[i])==TRUE) { 
    End <- rbind(End,c(dataraw[i,1],dataraw[i,3]))

  }

{next}
}
colnames(End) <- c('Time','value')
End$variable <- paste("End")
Events <- rbind(Start,End)
dataraw <- dataraw[,1:3]
dataraw <- rbind(Events,dataraw)
Était-ce utile?

La solution

I think you may have your data in the wrong form for what you're trying to do.

The start and end times appear to have been created as new points rather than properties of the old points? So ggplot plots two points at the same Time and rawPressure1, hiding the coloured ones.

EDIT: If at the end of your loop, instead of using rbind to combine the dataframes, use merge, then get rid of the extra columns. That creates a dataframe which contains the points, their value and a factor which is Start, End or NA. This new factor can then be used to colour points.

So in place of the very final line of the loop add these two lines

data <- merge(x = dataraw, y = Events, by = "Time", all.x = T)
data <- data[,c(1,3,5)]

This will create the dataframe with Start/End as an attribute for the appropriate times.

Then plotting as before

p <- ggplot(data,aes(Time, rawPressure1))+
           geom_point(aes(colour= factor(pos)))
p

Figure

Autres conseils

if i understood you right (and your mainproblem is the plot) you schould try a qplot;

look at this tutorial

Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top