Skip to content

Commit 7998f57

Browse files
committed
refactor: not enough results heuristics
1 parent 9a34e42 commit 7998f57

1 file changed

Lines changed: 10 additions & 19 deletions

File tree

  • server/preprocessing/other-scripts

server/preprocessing/other-scripts/utils.R

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -120,26 +120,17 @@ detect_error <- function(failed, service, params) {
120120
reason <- c(reason, "API error: OpenAIRE not reachable")
121121
}
122122
}
123+
124+
# If not one of the known data source API errors:
125+
# "not enough results" or "timeframe too short" if it was specified
123126
if (length(reason) == 0) {
124-
result <- regmatches(failed$query, regexec(phrasepattern, failed$query))
125-
# if not one of the known data source API errors:
126-
# apply query error detection heuristics
127-
if (grepl('', failed$query, fixed = TRUE) ||
128-
grepl('', failed$query, fixed = TRUE)) {
129-
reason <- c(reason, 'query formatting')
130-
}
131-
if (!identical(result[[1]], character(0)) &&
132-
length(unlist(strsplit(result[[1]][2], " "))) > 4) {
133-
reason <- c(reason, 'too specific')
134-
} else if (length(unlist(strsplit(failed$query, " "))) < 4) {
135-
reason <- c(reason, 'typo', 'too specific')
136-
} else {
137-
reason <- c(reason, 'query length', 'too specific')
138-
}
139-
if (!is.null(failed$params$to) &&
140-
!is.null(failed$params$from) &&
141-
difftime(failed$params$to, failed$params$from) <= 60) {
142-
reason <- c(reason, 'timeframe too short')
127+
has_timeframe <- !is.null(failed$params$to) && !is.null(failed$params$from)
128+
is_short_timeframe <- has_timeframe && difftime(failed$params$to, failed$params$from) <= 60
129+
130+
if (is_short_timeframe) {
131+
reason <- c(reason, 'timeframe too short')
132+
} else {
133+
reason <- c(reason, 'not enough results')
143134
}
144135
}
145136
}

0 commit comments

Comments
 (0)