Create a new data frame based on another dataframe

Question 1

Below is code that gives you the answer in the exact format that you wanted, except I split your "LOCAT" column into two columns entitled "Starts" and "Stops". This code will work for your entire data frame, no need to replicate it manually for each CODE (CODE1, CODE2, etc).

It assumes that the only non-CODE column have the names "LOCAT" "START" and "END".

# need package "plyr"
library("plyr")

# test2 is the example data frame that you gave in the question
test2 <- data.frame(
    "LOCAT"=1:6, 
    "START"=c(120,345,765, 1045, 1347, 1879), 
    "END"=c(150,390,803,1120,1436, 1935), 
    "CODE1"=c(1,1,0,1,0,0),
    "CODE2"=c(1,0,0,0,-1,-1)
    )

codeNames <- names(test2)[!names(test2)%in%c("LOCAT","START","END")] # the names of columns that correspond to different codes
test3 <- reshape(test2, varying=codeNames, direction="long", v.names="CodeValue", timevar="Code") # reshape so the different codes are variables grouped into the same column
test4 <- test3[,!names(test3)%in%"id"] #remove the "id" column

sss <- function(x){ # sss gives the starting points, stopping points, and sizes (sss) in a data frame
    rleX <- rle(x[,"CodeValue"]) # rle() to get the size of consecutive values
    stops <- cumsum(rleX$lengths) # cumulative sum to get the end-points for the indices (the second value in your LOCAT column)
    starts <- c(1, head(stops,-1)+1) # the starts are the first value in your LOCAT column
    ssX0 <- data.frame("Value"=rleX$values, "Starts"=starts, "Stops"=stops) #the starts and stops from X (ss from X)
    ssX <- ssX0[ssX0[,"Value"]!=0,] # remove the rows the correspond to CODE_ values that are 0 (not POS or NEG)

    # The next 3 lines calculate the equivalent of your SIZE column
    sizeX1 <- x[ssX[,"Starts"],"START"]
    sizeX2 <- x[ssX[,"Stops"],"END"]
    sizeX <- sizeX2 - sizeX1

    sssX <- data.frame(ssX, "Size"=sizeX) # Combine the Size to the ssX (start stop of X) data frame
    return(sssX) #Added in EDIT

}

answer0 <- ddply(.data=test4, .variables="Code", .fun=sss) # use the function ddply() in the package "plyr" (apply the function to each CODE, why we reshaped)
answer <- answer0 # duplicate the original, new version will be reformatted
answer[,"Value"] <- c("NEG",NA,"POS")[answer0[,"Value"]+2] # reformat slightly so that we have POS/NEG instead of 1/-1

Hopefully this helps, good luck!

Question 2

Use run-length encoding to determine groups where CODE1 takes the same value.

rle_of_CODE1 <- rle(df1$CODE1)

For convenience, find the points where the value is non-zero, and the lenghts of the corresponding blocks.

CODE1_is_nonzero <- rle_of_CODE1$values != 0
n <- rle_of_CODE1$lengths[CODE1_is_nonzero]

Ignore the parts of df1 where CODE1 is zero.

df1_with_nonzero_CODE1 <- subset(df1, CODE1 != 0)

Define a group based on the contiguous blocks we found with rle.

df1_with_nonzero_CODE1$GROUP <- rep(seq_along(n), times = n)

Use ddply to get summary stats for each group.

summarised_by_CODE1 <- ddply(
  df1_with_nonzero_CODE1, 
  .(GROUP), 
  summarise, 
  MinOfLOCAT = min(LOCAT), 
  MaxOfLOCAT = max(LOCAT),
  SIZE       = max(END) - min(START)
)
summarised_by_CODE1$VALUE <- ifelse(
  rle_of_CODE1$values[CODE1_is_nonzero] == 1, 
  "POS", 
  "NEG"
)
summarised_by_CODE1
##   GROUP MinOfLOCAT MaxOfLOCAT SIZE VALUE
## 1     1          1          2  270   POS
## 2     3          4          4   75   POS

Now repeat with CODE2.