A solution that uses parse
and eval
:
ToNumber <- function(X)
{
A <- gsub("%","*1e-2",gsub("K","*1e+3",gsub("M","*1e+6",gsub("\\$|,","",as.character(X)),fixed=TRUE),fixed=TRUE),fixed=TRUE)
B <- try(sapply(A,function(a){eval(parse(text=a))}),silent=TRUE)
if (is.numeric(B)) return (as.numeric(B)) else return(X)
}
#----------------------------------------------------------------------
# Example:
X <-
read.table( header=TRUE,
text =
'Category LaunchedProjects TotalDollars SuccessfulDollars UnsuccessfulDollars LiveDollars LiveProjects SuccessRate
Food 3,069 "$16.79 M" "$13.18 M" "$2.78 M" "$822.64 K" 189 39.27%
Theater 4,155 "$13.45 M" "$12.01 M" "$1.22 M" "$217.86 K" 111 64.09%
Comics 2,242 "$12.88 M" "$11.07 M" "$941.31 K" "$862.18 K" 134 46.11%
Fashion 2,799 "$9.62 M" "$7.59 M" "$1.44 M" "$585.98 K" 204 27.24%
Photography 2,794 "$6.76 M" "$5.48 M" "$1.06 M" "$220.75 K" 83 36.81%
Dance 1,185 "$3.43 M" "$3.13 M" "$225.82 K" "$71,322" 40 70.22%' )
numX <- as.data.frame(lapply(as.list(X),ToNumber))
options(width=1000)
print(numX,row.names=FALSE)
# Category LaunchedProjects TotalDollars SuccessfulDollars UnsuccessfulDollars LiveDollars LiveProjects SuccessRate
# Food 3069 16790000 13180000 2780000 822640 189 0.3927
# Theater 4155 13450000 12010000 1220000 217860 111 0.6409
# Comics 2242 12880000 11070000 941310 862180 134 0.4611
# Fashion 2799 9620000 7590000 1440000 585980 204 0.2724
# Photography 2794 6760000 5480000 1060000 220750 83 0.3681
# Dance 1185 3430000 3130000 225820 71322 40 0.7022