Skip to content

Commit 7302171

Browse files
committed
final bugfix
1 parent 880ec4c commit 7302171

1 file changed

Lines changed: 24 additions & 0 deletions

File tree

  • server/preprocessing/other-scripts

server/preprocessing/other-scripts/base.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,17 @@ get_papers <- function(query, params,
9696
cc <- params$custom_clustering
9797
if (!is.null(cc)) {
9898
if (cc %in% names(fieldmapper)) {
99+
# this is the generic case for existing metadata
99100
custom_clustering_query <- paste(fieldmapper[[cc]], ":", "*", sep="")
100101
base_query <- paste(base_query, custom_clustering_query)
101102
} else {
103+
# this is the speciality case for custom clustering on annotations
102104
custom_clustering_query <- paste("dcsubject:", cc, "*", sep="")
103105
base_query <- paste(base_query, custom_clustering_query)
106+
custom_clustering_query <- paste('textus:', '"', cc, ':"', sep="")
107+
base_query <- paste(base_query, custom_clustering_query)
108+
custom_clustering_query <- paste(cc, ':*', sep="")
109+
base_query <- paste(base_query, custom_clustering_query)
104110
}
105111
}
106112

@@ -129,6 +135,12 @@ get_papers <- function(query, params,
129135

130136
req_limit <- 9
131137
r <- 0
138+
# check if custom clustering annotation param is in metadata
139+
if (!is.null(cc)) {
140+
if (!(cc %in% names(fieldmapper))) {
141+
has_custom_clustering_annotation <- unlist(lapply(metadata$subject_orig, function(x) grepl(paste0(cc, ":"), x, fixed=TRUE)))
142+
metadata <- metadata[has_custom_clustering_annotation,]
143+
}}
132144
while (nrow(metadata) - sum(metadata$is_duplicate) < limit && attr(res_raw, "numFound") > offset+120 && r < req_limit) {
133145
offset <- offset+120
134146
res_raw <- get_raw_data(limit,
@@ -147,8 +159,20 @@ get_papers <- function(query, params,
147159
metadata <- sanitize_abstract(metadata)
148160
metadata <- mark_duplicates(metadata)
149161
metadata$has_dataset <- unlist(lapply(metadata$resulttype, function(x) "Dataset" %in% x))
162+
# check if custom clustering annotation param is in metadata
163+
if (!is.null(cc)) {
164+
if (!(cc %in% names(fieldmapper))) {
165+
has_custom_clustering_annotation <- unlist(lapply(metadata$subject_orig, function(x) grepl(paste0(cc, ":"), x, fixed=TRUE)))
166+
metadata <- metadata[has_custom_clustering_annotation,]
167+
}}
150168
r <- r+1
151169
}
170+
# check if custom clustering annotation param is in metadata
171+
if (!is.null(cc)) {
172+
if (!(cc %in% names(fieldmapper))) {
173+
has_custom_clustering_annotation <- unlist(lapply(metadata$subject_orig, function(x) grepl(paste0(cc, ":"), x, fixed=TRUE)))
174+
metadata <- metadata[has_custom_clustering_annotation,]
175+
}}
152176
blog$info(paste("vis_id:", .GlobalEnv$VIS_ID, "Deduplication retrieval requests:", r))
153177

154178
metadata <- unique(metadata, by = "id")

0 commit comments

Comments
 (0)