For whatever reason it seems the qdap:::strip
always strips "/"
out of character vectors. This is in the source code towards the end of the function:
x <- clean(gsub("/", " ", gsub("-", " ", x)))
This is run before the actual function which does the stripping which is defined in the body of the function strip
....
So just replace the function with your own version:
strip.new <- function (x, char.keep = "~~", digit.remove = TRUE, apostrophe.remove = TRUE,
lower.case = TRUE)
{
strp <- function(x, digit.remove, apostrophe.remove, char.keep,
lower.case) {
if (!is.null(char.keep)) {
x2 <- Trim(gsub(paste0(".*?($|'|", paste(paste0("\\",
char.keep), collapse = "|"), "|[^[:punct:]]).*?"),
"\\1", as.character(x)))
}
else {
x2 <- Trim(gsub(".*?($|'|[^[:punct:]]).*?", "\\1",
as.character(x)))
}
if (lower.case) {
x2 <- tolower(x2)
}
if (apostrophe.remove) {
x2 <- gsub("'", "", x2)
}
ifelse(digit.remove == TRUE, gsub("[[:digit:]]", "",
x2), x2)
}
unlist(lapply(x, function(x) Trim(strp(x = x, digit.remove = digit.remove,
apostrophe.remove = apostrophe.remove, char.keep = char.keep,
lower.case = lower.case))))
}
strip.new(htxt, char.keep = "/", digit.remove = F, apostrophe.remove = TRUE, lower.case = TRUE)
#[1] "rtf1ansiansicpg1252cocoartf1038cocoasubrtf360/"
#[2] "fonttblf0fswissfcharset0 helvetica"
#[3] "margl1440margr1440vieww9000viewh8400viewkind0"
The package author is pretty active on this site so he can probably clear up why strip
does this by default.