@G. Grothendieck's solution will probably be easier for many readers of this post. That said, I think you can fix your function by identifying the parent environment of the call to run_sql
, then using envir=
to specify that environment anytime you call a function that depends on environments -- specifically, eval()
and sqldf()
. Like so:
cpaste <- function(x) paste(x, collapse = ", ")
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(eval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(eval(x, envir=envir))) {
cpaste(eval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(eval(x, envir=envir))) cpaste(eval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
This works in your test case using x and y:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
And, with some tweaking, in a test case using do.call
and local variables:
foo <- function(y) {
bb <- dd
do.call(run_sql, list("select * from %s where a in (%s)",
as.name("bb"),
substitute(y),
envir=environment()))
}
foo(b)
To understand the problem with your original function, and to understand what environment was visible to eval()
each time it was called, I wrote a function called enveval
to wrap several sys.xxx
functions and the eval()
call. Then, back in the run_sql
function, I replaced all calls to eval
with calls to enveval
.
# enveval: Replace an eval() call with enveval() to see a description of the stack of environments experienced by eval()
enveval <- function(x, envir=parent.frame()) {
cat(paste0("EVALUATING ",as.character(x),":\n"))
stack <- data.frame(frame_num=1:sys.nframe(), call=strtrim(as.character(sys.calls()),15),
is_eval_envir=NA, vars_in_frame=NA, x_exists=NA, eval_x=NA)
for(i in 1:nrow(stack)) {
f <- which(i==stack$frame_num)
stack[f,"is_eval_envir"] <- identical(envir,sys.frame(f))
stack[f,"vars_in_frame"] <- paste(ls(envir=sys.frame(f)),collapse=",")
stack[f,"x_exists"] <- exists(as.character(x), where=sys.frame(f))
if(stack[f,"is_eval_envir"] & stack[f,"x_exists"]) {
# if all the variables to evaluate are single-element atomic, you can also run the following line:
if(is.atomic(eval(x, envir=sys.frame(f)))) {
stack[f,"eval_x"] <- cpaste(eval(x, envir=sys.frame(f)))
} else {
stack[f,"eval_x"] <- "[non-atomic]"
}
}
}
print(stack)
eval(x, envir=envir)
}
# The new run_sql where eval is replaced with enveval:
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(enveval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(enveval(x, envir=envir))) {
cpaste(enveval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(enveval(x, envir=envir))) cpaste(enveval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
Playing around with the test cases shows you what enveval
sees (and what eval
would have seen) each time it's called. For example, running the first test function:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
gave the following print-out showing that the frame for the foo(dd, b)
call was the useful environment for every call to eval()
:
EVALUATING x:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE [non-atomic]
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[2]], ... FALSE x TRUE <NA>
5 5 is.data.frame(e FALSE x TRUE <NA>
6 6 enveval(x, envi FALSE envir,f,i,stack,x TRUE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 is.data.frame(e FALSE x FALSE <NA>
6 6 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 cpaste(enveval( FALSE x FALSE <NA>
6 6 paste(x, collap FALSE collapse,sep FALSE <NA>
7 7 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
select * from x where a in (5, 6, 7, 8)
a
1 5
2 6
3 7
4 8