Question

I can't figure out what's going on with data.table in this situation:

fooFun <- function(tbl, totCols) {
    tbl[, paste0("col", 1:totCols) := 0]
}

Start with an empty 1-col data table.

> tbl = data.table(initialCol=double())

Then add 99 columns by reference:

> fooFun(tbl, 99)
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99" 

All looks good. Now add the 100th column:

> fooFun(tbl, 100)
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99"  

What? Not there... Now add one column outside of the function call:

> tbl[, newCol := 5]
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99"      "newCol"  

All good. Now add that 100th column:

> fooFun(tbl, 100)
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99"      "newCol"     "col100"    

It's there now. Now add 20 more:

> fooFun(tbl, 120)
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99"      "newCol"     "col100"     "col101"     "col102"     "col103"     "col104"     "col105"     "col106"     "col107"     "col108"     "col109"     "col110"    
[113] "col111"     "col112"     "col113"     "col114"     "col115"     "col116"     "col117"     "col118"     "col119"     "col120"    

Looks OK. Now add a bunch more:

> fooFun(tbl, 240)
> colnames(tbl)
  [1] "initialCol" "col1"       "col2"       "col3"       "col4"       "col5"       "col6"       "col7"       "col8"       "col9"       "col10"      "col11"      "col12"      "col13"     
 [15] "col14"      "col15"      "col16"      "col17"      "col18"      "col19"      "col20"      "col21"      "col22"      "col23"      "col24"      "col25"      "col26"      "col27"     
 [29] "col28"      "col29"      "col30"      "col31"      "col32"      "col33"      "col34"      "col35"      "col36"      "col37"      "col38"      "col39"      "col40"      "col41"     
 [43] "col42"      "col43"      "col44"      "col45"      "col46"      "col47"      "col48"      "col49"      "col50"      "col51"      "col52"      "col53"      "col54"      "col55"     
 [57] "col56"      "col57"      "col58"      "col59"      "col60"      "col61"      "col62"      "col63"      "col64"      "col65"      "col66"      "col67"      "col68"      "col69"     
 [71] "col70"      "col71"      "col72"      "col73"      "col74"      "col75"      "col76"      "col77"      "col78"      "col79"      "col80"      "col81"      "col82"      "col83"     
 [85] "col84"      "col85"      "col86"      "col87"      "col88"      "col89"      "col90"      "col91"      "col92"      "col93"      "col94"      "col95"      "col96"      "col97"     
 [99] "col98"      "col99"      "newCol"     "col100"     "col101"     "col102"     "col103"     "col104"     "col105"     "col106"     "col107"     "col108"     "col109"     "col110"    
[113] "col111"     "col112"     "col113"     "col114"     "col115"     "col116"     "col117"     "col118"     "col119"     "col120"     "col121"     "col122"     "col123"     "col124"    
[127] "col125"     "col126"     "col127"     "col128"     "col129"     "col130"     "col131"     "col132"     "col133"     "col134"     "col135"     "col136"     "col137"     "col138"    
[141] "col139"     "col140"     "col141"     "col142"     "col143"     "col144"     "col145"     "col146"     "col147"     "col148"     "col149"     "col150"     "col151"     "col152"    
[155] "col153"     "col154"     "col155"     "col156"     "col157"     "col158"     "col159"     "col160"     "col161"     "col162"     "col163"     "col164"     "col165"     "col166"    
[169] "col167"     "col168"     "col169"     "col170"     "col171"     "col172"     "col173"     "col174"     "col175"     "col176"     "col177"     "col178"     "col179"     "col180"    
[183] "col181"     "col182"     "col183"     "col184"     "col185"     "col186"     "col187"     "col188"     "col189"     "col190"     "col191"     "col192"     "col193"     "col194"    
[197] "col195"     "col196"     "col197"     "col198" 

No good.

What's going on?

Was it helpful?

Solution

@Arun pointed out that this issue has already been addressed on the mailing list: #5204. Following the advice in that thread, I increased the default number of column pointers that are alloc'd when a data.table is created:

options(datatable.alloccol = 900)

This way it won't hit the default pre-allocation of 100 columns when increasing the number of columns over 100 on an already-created table. This works around the underlying issue about shallow copying the object when the pre-allocation limit is reached, so that it produces the expected behavior for the tests in this SO question.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top