@@ -96,11 +96,17 @@ get_papers <- function(query, params,
9696 cc <- params $ custom_clustering
9797 if (! is.null(cc )) {
9898 if (cc %in% names(fieldmapper )) {
99+ # this is the generic case for existing metadata
99100 custom_clustering_query <- paste(fieldmapper [[cc ]], " :" , " *" , sep = " " )
100101 base_query <- paste(base_query , custom_clustering_query )
101102 } else {
103+ # this is the speciality case for custom clustering on annotations
102104 custom_clustering_query <- paste(" dcsubject:" , cc , " *" , sep = " " )
103105 base_query <- paste(base_query , custom_clustering_query )
106+ custom_clustering_query <- paste(' textus:' , ' "' , cc , ' :"' , sep = " " )
107+ base_query <- paste(base_query , custom_clustering_query )
108+ custom_clustering_query <- paste(cc , ' :*' , sep = " " )
109+ base_query <- paste(base_query , custom_clustering_query )
104110 }
105111 }
106112
@@ -129,6 +135,12 @@ get_papers <- function(query, params,
129135
130136 req_limit <- 9
131137 r <- 0
138+ # check if custom clustering annotation param is in metadata
139+ if (! is.null(cc )) {
140+ if (! (cc %in% names(fieldmapper ))) {
141+ has_custom_clustering_annotation <- unlist(lapply(metadata $ subject_orig , function (x ) grepl(paste0(cc , " :" ), x , fixed = TRUE )))
142+ metadata <- metadata [has_custom_clustering_annotation ,]
143+ }}
132144 while (nrow(metadata ) - sum(metadata $ is_duplicate ) < limit && attr(res_raw , " numFound" ) > offset + 120 && r < req_limit ) {
133145 offset <- offset + 120
134146 res_raw <- get_raw_data(limit ,
@@ -147,8 +159,20 @@ get_papers <- function(query, params,
147159 metadata <- sanitize_abstract(metadata )
148160 metadata <- mark_duplicates(metadata )
149161 metadata $ has_dataset <- unlist(lapply(metadata $ resulttype , function (x ) " Dataset" %in% x ))
162+ # check if custom clustering annotation param is in metadata
163+ if (! is.null(cc )) {
164+ if (! (cc %in% names(fieldmapper ))) {
165+ has_custom_clustering_annotation <- unlist(lapply(metadata $ subject_orig , function (x ) grepl(paste0(cc , " :" ), x , fixed = TRUE )))
166+ metadata <- metadata [has_custom_clustering_annotation ,]
167+ }}
150168 r <- r + 1
151169 }
170+ # check if custom clustering annotation param is in metadata
171+ if (! is.null(cc )) {
172+ if (! (cc %in% names(fieldmapper ))) {
173+ has_custom_clustering_annotation <- unlist(lapply(metadata $ subject_orig , function (x ) grepl(paste0(cc , " :" ), x , fixed = TRUE )))
174+ metadata <- metadata [has_custom_clustering_annotation ,]
175+ }}
152176 blog $ info(paste(" vis_id:" , .GlobalEnv $ VIS_ID , " Deduplication retrieval requests:" , r ))
153177
154178 metadata <- unique(metadata , by = " id" )
0 commit comments