Question

I would like to find all local minimums of the following objective function

func <- function(b){Mat=matrix(c(+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2),2,2);d=(det(Mat));return(d)}

'func' is determinant of Fisher information matrix of Logistic regression model and is a function of parameters b1 and b2 where b1 belongs to [-.3, .3] and b2 to [6, 8]

Suppose these two initial values for b = c(b1, b2)

> in1 <- c(-0.04785405, 6.42711047)
> in2 <- c(0.2246729, 7.5211575)

The local minimum with initial value in1 is:

> optim(in1, fn = func, lower = c(-.3, 6), upper = c(.3, 8), method = "L-BFGS-B")

$par
[1] -0.04785405  6.42711047

$value
[1] 3.07185e-27

$counts
function gradient 
   1        1 

$convergence
[1] 52

$message
[1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"

As can be seen in the $massage a termination happened in optimization process and minimum could not be computed and optim returned in1 as local optima.

For 'in2' also an error is appeared:

> optim(in2, fn = func, lower = c(-.3, 6), upper = c(.3, 8), method = "L-BFGS-B")

Error in optim(in2, fn = func, lower = c(-0.3, 6), upper = c(0.3, 8),  : 
L-BFGS-B needs finite values of 'fn'

This error happened because the value of func for in2' isNaN`:

> func(in2)
[1] NaN

However for in1 the value of objective function at in1 is calculated but the optimization is terminated because optim could not continue the calculation for another intial values:

> func(in1)
[1] 3.07185e-27

Let me define func without det and just as matrix to see what happened:

Mat.func <- function(b){Mat=matrix(c(+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5)/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5)/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2,+0.5*1/((1/(exp(-b[1]-b[2]*-5)+1))*(1-(1/(exp(-b[1]-b[2]*-5)+1))))*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2*exp(-b[1] - b[2] * -5) * -5/(exp(-b[1] - b[2] * -5) + 1)^2+0.5*1/((1/(exp(-b[1]-b[2]*5)+1))*(1-(1/(exp(-b[1]-b[2]*5)+1))))*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2*exp(-b[1] - b[2] * 5) * 5/(exp(-b[1] - b[2] * 5) + 1)^2),2,2);d=Mat;return(d)}

We get

         > Mat.func(in1)
              [,1]         [,2]
         [1,] 1.109883e-14 2.784007e-15
         [2,] 2.784007e-15 2.774708e-13

        > Mat.func(in2)
              [,1] [,2]
          [1,]  Inf  Inf
          [2,]  Inf  Inf

Hence, by double precision, values of Mat.func(in2) elements are Inf. I also rewrite Mat.func with mpfr function:

Mat.func.mpfr <-function(b, prec){ d=c(+0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2+0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2,
                                   +0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) * -5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2+0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) * 5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2,
                                   +0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) * -5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2+0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5)/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) * 5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2,
                                   +0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*-5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) * -5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) * -5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * -5) + 1)^2+0.5*1/((1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))*(1-(1/(exp(-mpfr(b[1], precBits = prec)-mpfr(b[2], precBits = prec)*5)+1))))*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) * 5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2*exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) * 5/(exp(-mpfr(b[1], precBits = prec) - mpfr(b[2], precBits = prec) * 5) + 1)^2)
                               Mat = new("mpfrMatrix", d, Dim = c(2L, 2L))
                               return(Mat)}

Thus:

require(Rmpfr)
> Mat.func.mpfr(c(in1), prec = 54)
'mpfrMatrix' of dim(.) =  (2, 2) of precision  54   bits 
     [,1]                   
 [1,] 1.10988301365972506e-14
 [2,] 2.78400749725484580e-15
      [,2]                   
 [1,] 2.78400749725484580e-15
 [2,] 2.77470753414931256e-13

 > Mat.func.mpfr(c(in2), prec = 54)
 'mpfrMatrix' of dim(.) =  (2, 2) of precision  54   bits 
      [,1] [,2]
 [1,]  Inf  Inf
 [2,]  Inf  Inf

 > Mat.func.mpfr(c(in2), prec = 55)
 'mpfrMatrix' of dim(.) =  (2, 2) of precision  55   bits 
      [,1]                    
 [1,]  4.16032108702067276e-17
 [2,] -8.34300174643550123e-17
      [,2]                    
 [1,] -8.34300174643550154e-17
 [2,]  1.04008027175516816e-15

So by precision 55 the values of matrix elements are not Inf anymore. unfortunately, mpfr function changes the class of an objective and nor det neither r optimization functions can not be applied, to clarify I provide two examples:

> class(mpfr (1/3, 54))
[1] "mpfr"
attr(,"package")
[1] "Rmpfr"

## determinant
example1 <- function(x){
  d <- c(mpfr(x, prec = 54), 3 * mpfr(x, prec = 54), 5 * mpfr(x, prec = 54), 7 * mpfr(x, prec = 54))
  Mat = new("mpfrMatrix", d, Dim = c(2L, 2L))
  return(det(Mat))
}

> example1(2)
Error in UseMethod("determinant") : 
no applicable method for 'determinant' applied to an object of class "c('mpfrMatrix',    'mpfrArray', 'Mnumber', 'mNumber', 'mpfr', 'list', 'vector')"

##optimization 
example2 <- function(x)  ## Rosenbrock Banana function
   100 * (mpfr(x[2], prec = 54) - mpfr(x[1], prec = 54) * mpfr(x[1], prec = 54 ))^2 + (1 - mpfr(x[1], prec = 54))^2

> example2(c(-1.2, 1))
1 'mpfr' number of precision  54   bits 
[1] 24.1999999999999957
> optim(c(-1.2,1), example2)
Error in optim(c(-1.2, 1), example2) : 
(list) object cannot be coerced to type 'double'

Hence, using mpfr could not solve the problem.

To find All the local minimums, an algorithm which applies different random initial values should be written. But as can be seen, for some initial values the function produces NaN (ignorance of these values would not be a good idea because it may generally results in missing some local minimums ,specially for functions that have lots of local optima).

I was wondering if there is any R package that can carry on optimization process with arbitrary precision to avoid NaN for objective functions?

Thank you

Was it helpful?

Solution

I think the answer (I think 'agstudy' gave, too) is: Make sure that the function you minimize does NOT return NaN (or NA) but rather +Inf (if you minimize, or -Inf if you maximize).

2nd: Instead of log(det(.)) you REALLY should use
{ r <- determinant(., log=TRUE) ; if(r$sign <= 0) -Inf else r$modulus }

which is also more accurate. {Hint: do look how det is defined in R !}

Now to Rmpfr, I will reply separately. It should work like standard R to use "mpfr"-numbers, .... says the author of Rmpfr .... but you may need a bit of care. tryCatch() should not be needed, however.

OTHER TIPS

I tried to reformulate your horrible(sorry for the term) objective function. I am pretty sure that w we can find with simpler form. Hope that others can use this to find a solution to your optimization problem...

func1 <- function(b){
  A <- exp(-b[1]+5*b[2])
  C <- exp(-b[1]-5*b[2])
  A1 <- A + 1
  C1 <- C + 1
  D <- 1/A1
  H <- 1/C1
  K <- D*(1-D)
  J <- H*(1-H)
  M <- (A/A1^2)^2/K
  N <- (C/C1^2)^2/J


Mat <- matrix(c( 1 *M    + 1  *N,
                -5 *M    + 5  *N,
                -5 *M    + 5  *N,
                25 *M    + 25 *N),2,2)

  Mat <- 0.5*Mat
  d <- log(det(Mat))
  return(d)
}

EDIT

As I said you can simplify again your function. It looks much better

func1 <- function(b){
  A <- exp(-b[1]+5*b[2])
  C <- exp(-b[1]-5*b[2])
  A1 <- A + 1
  C1 <- C + 1
  M <- A/A1^2
  N <- C/C1^2
  det.Mat <-25*M*N
  log(det.Mat)
}

Here some tests between the 2 functions.

func1(c(1,2))
[1] -16.7814
> func1(c(8,2))
[1] -17.03498
> func1(c(10,2))
[1] -18.16742
> func(c(10,2))
[1] -18.16742
> func(c(10,5))
[1] -46.83608

The reformulation minimized the possibility of underflow/overflow ( can't store the intermediate result in the register)..that's why we get Inf and not NA(see below) , which is infinite but still a numeric, suitable for farther computing in opposition to NaN which is like an NA values..

func(c(10,100))
[1] NaN func1(c(10,100)) [1] -Inf

Now I test your optimization instruction, on the simpler form , and it converges as you can see:

in1 <- c(-0.04785405, 6.42711047)
in2 <- c(0.2246729, 7.5211575)
ll <- optim(in1, fn = func1, lower = c(-.3, 6), upper = c(.3, 8), method = "L-BFGS-B")
 do.call(rbind,ll)


            function                                           gradient                                          
par         "-0.04785405"                                      "8"                                               
value       "-76.7811241751318"                                "-76.7811241751318"                               
counts      "2"                                                "2"                                               
convergence "0"                                                "0"                                               
message     "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL" "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL"

same thing for in2

optim(in2, fn = func1, lower = c(-.3, 6), upper = c(.3, 8), method = "L-BFGS-B")
$par
[1] 0.2246729 8.0000000

$value
[1] -76.78112

$counts
function gradient 
       2        2 

$convergence
[1] 0

$message
[1] "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL"

Answering your problem, using the Rmpfr - produced matrix: (not quite efficiently though ...!...):

Yes, determinant() is not available for mpfr-matrices, however you can simply use something like

M <- Mat.func.mpfr(in2, prec = 55)
m <- as(M, "matrix")
ldm <- determinant(m) # is already  log() !

and then use the

 { r <- determinant(., log=TRUE) ; if(r$sign <= 0) -Inf else r$modulus }

I've mentioned above ... something much better than the ``wrong by design'' use of log(det(.))

For arb precision: gmp and / or Rmpfr . You might be better off with some tryCatch in your code instead, though (to avoid crashes when a given attempt causes that NaN error)

Using mpfr can be useful to avoid computationally NaN in a function (and also halt in optimization algorithm). But mpfr output is an 'mpfr' class and some R functions (such as optim and det) may not work with this kind of class. As usual as.numeric can be applied to convert 'mpfr' class to a 'numeric' one.

exp(9000)
[1] Inf

require(Rmpfr)
number <- as.numeric(exp(mpfr(9000, prec = 54)))

class(number)
[1] "numeric"

round(number)
[1] 1.797693e+308

number * 1.797692e-308
[1] 3.231699

number * 1.797693e-307
[1] 32.317

number * (1/number)
[1] 1

number * .2
[1] 3.595386e+307

number * .9
[1] 1.617924e+308

number * 1.1
[1] Inf

number * 2
[1] Inf

number / 2
[1] 8.988466e+307

number + 2
[1] 1.797693e+308

number + 2 * 10 ^ 291
[1] 1.797693e+308

number + 2 * 10 ^ 292
[1] Inf

number - 2
[1] 1.797693e+308

number - 2 * 10 ^ 307
[1] 1.597693e+308

number - 2 * 10 ^ 308
[1] -Inf

Now consider the following matrix function:

mat <- function(x){
x1 <- x[1]
x2 <- x[2]
d = matrix(c(exp(5 * x1+ 4 * x2), exp(9 * x1), exp(2 * x2 + 4 * x1),
           exp(3 * x1)), 2, 2)
         return(d)
}

elements of this matrix is highly potential to produce Inf:

mat(c(300, 1))
    [,1] [,2]
[1,]  Inf  Inf
[2,]  Inf  Inf

So if det was returned in function environment, instead of a numeric result we got NaN and the optim function would definitely be terminated. To solve this problem the determinant of this function is written by mpfr:

func <- function (x){
  x1 <- mpfr(x[1], prec = precision)
  x2 <- mpfr(x[2], prec = precision)
  mat <- new("mpfrMatrix",c(exp(5 * x1+ 4 * x2), exp(9 * x1), exp(2 * x2 + 4 * x1),   exp(3 * x1)), Dim = c(2L,2L))
  d <- mat[1, 1] * mat[2, 2] - mat[2, 1] * mat[1, 2]
  return(as.numeric(-d))
}

then for x1 = 3 and x2 = 1 we have:

func(c(3,1))
[1] 6.39842e+17

optim(c(3, 1),func)

$par
[1] 0.4500 1.4125

$value
[1] -4549.866

$counts
function gradient 
  13       NA 

$convergence
[1] 0

$message
NULL

and for x1 = 300 and x2 = 1:

func(c(300,1))
[1] 1.797693e+308

optim(c(300, 1),func)
$par
[1] 300   1

$value
[1] 1.797693e+308

$counts
function gradient 
   3       NA 

$convergence
[1] 0

$message
NULL

As can bee seen, there is no halt and even optim claimes a convergence in the optimization process. However, it seems that there are no iterations and optim just returned the initial values as local minimums (definitely, 1.797693e+308 is not a local minimum of this function!!). In such these situations, applying mpfr can just prevent termination of optimization process, but if we really expect optimization algorithm to start from such this points which their values are Inf by R double-precision and continue the iteration to reach the local minimums, besides defining a function with 'mpfr' class, the optimization function also should have this ability to work with 'mpfr' class.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top