Skip to content

Commit dc8cc6b

Browse files
authored
Merge pull request #629 from OpenKnowledgeMaps/base-query-sanitization
Base query sanitization
2 parents 1e2bfa5 + 0973e1f commit dc8cc6b

2 files changed

Lines changed: 3 additions & 0 deletions

File tree

server/preprocessing/other-scripts/base.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ get_papers <- function(query, params, limit=100,
140140
subject_cleaned = gsub(" ?\\d[:?-?]?(\\d+.)+", "", subject_cleaned) # replace residuals like 5:621.313.323 or '5-76.95'
141141
subject_cleaned = gsub("\\w+:\\w+-(\\w+\\/)+", "", subject_cleaned) # replace residuals like Info:eu-repo/classification/
142142
subject_cleaned = gsub("^; $", "", subject_cleaned) # replace residuals like Info:eu-repo/classification/
143+
subject_cleaned = gsub(",", ", ", subject_cleaned) # clean up keyword separation
144+
subject_cleaned = gsub("\\s+", " ", subject_cleaned) # clean up keyword separation
143145

144146
metadata$subject = subject_cleaned
145147

server/preprocessing/other-scripts/utils.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ library(stringdist)
22
library(logging)
33

44
sanitize_query <- function(query) {
5+
query <- gsub("\\", "", query, fixed=T)
56
sanitized_query <- gsub('[“”]', '"', query)
67
return(list(raw_query=query, sanitized_query=sanitized_query))
78
}

0 commit comments

Comments
 (0)