We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9c0f5ba commit 469b904Copy full SHA for 469b904
1 file changed
server/preprocessing/other-scripts/features.R
@@ -2,11 +2,10 @@ library(stringr)
2
vflog <- getLogger('vis.features')
3
4
TypeCountTokenizer <- function(x) {
5
- tokens = unlist(lapply(strsplit(words(x), split=";"), paste), use.names = FALSE)
6
- tokens = unlist(lapply(tokens, stri_replace_all, regex='[^[:alnum:]-]', replacement=""))
7
- return(tokens)
+ unlist(strsplit(as.character(x), "[^[:alnum:]-]"))
8
}
9
+
10
create_corpus <- function(metadata, text, stops) {
11
docs <- data.frame(doc_id = text$id, text = text$content)
12
corpus <- VCorpus(DataframeSource(docs))
0 commit comments