Skip to content

Commit 5247a33

Browse files
committed
fix postprocessing
1 parent e31b0ca commit 5247a33

3 files changed

Lines changed: 7 additions & 7 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ coverage/
3434
/lc_browseview_cache.json
3535
/lc_cache.json
3636
/linkedcat.sqlite
37-
.env
37+
.env
38+
.Rproj.user

server/preprocessing/other-scripts/postprocess.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@ create_overview_output <- function(named_clusters, layout, metadata) {
99
cluster_labels = named_clusters$cluster_labels
1010

1111
# Prepare the output
12-
result = data.frame(cbind(x, y, labels, cluster_labels))
12+
result = data.frame(cbind(x, y, labels, cluster_labels, metadata$id))
13+
names(result)[5] <- "id"
1314
unique_groups = data.frame(unique(result$cluster_labels))
1415
colnames(unique_groups) <- "cluster_labels"
1516
unique_groups$groups <- seq_along(unique_groups$cluster_labels)
1617
result = merge(result, unique_groups, by='cluster_labels')
17-
output = merge(metadata, result, by.x="id", by.y="labels", all=TRUE)
18+
output = merge(metadata, result, by.x="id", by.y="id", all=TRUE)
1819

1920
names(output)[names(output)=="groups"] <- "area_uri"
2021
output["area"] = paste(output$cluster_labels, sep="")

server/preprocessing/other-scripts/summarize.R

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ create_cluster_labels <- function(clusters, metadata,
5757
tfidf_top_names <- get_top_names(tfidf_top, top_n, stops)
5858
clusters$cluster_labels = ""
5959
for (k in seq(1, clusters$num_clusters)) {
60-
group = c(names(clusters$groups[clusters$groups == k]))
61-
matches = which(clusters$labels%in%group)
60+
matches = which(unname(clusters$groups == k) == TRUE)
6261
summary = tfidf_top_names[[k]]
6362
if (summary == "") {
6463
candidates = mapply(paste, metadata$title[matches], metadata$paper_abstract[matches])
@@ -106,8 +105,7 @@ get_cluster_corpus <- function(clusters, metadata, service, stops, taxonomy_sepa
106105
add_title_ngrams = T) {
107106
subjectlist = list()
108107
for (k in seq(1, clusters$num_clusters)) {
109-
group = c(names(clusters$groups[clusters$groups == k]))
110-
matches = which(metadata$id %in% group)
108+
matches = which(unname(clusters$groups == k) == TRUE)
111109
titles = metadata$title[matches]
112110
subjects = metadata$subject[matches]
113111
titles = lapply(titles, function(x) {gsub("[^[:alnum:]-]", " ", x)})

0 commit comments

Comments
 (0)