Skip to content

Commit b9a4066

Browse files
authored
Merge pull request #733 from OpenKnowledgeMaps/dev
2023-08-03
2 parents af14fca + 5cb2318 commit b9a4066

10 files changed

Lines changed: 1094 additions & 1218 deletions

File tree

package-lock.json

Lines changed: 1020 additions & 1177 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"@babel/plugin-transform-runtime": "^7.17.0",
5555
"@babel/preset-env": "^7.15.6",
5656
"@babel/preset-react": "^7.14.5",
57-
"babel-core": "^6.11.4",
57+
"babel-core": "^7.0.0-bridge.0",
5858
"babel-jest": "^27.3.1",
5959
"babel-loader": "^8.2.2",
6060
"babel-preset-es2015": "^6.9.0",

server/preprocessing/other-scripts/preprocess.R

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,13 @@ replace_keywords_if_empty <- function(metadata, stops) {
143143
vplog$info(paste("vis_id:", .GlobalEnv$VIS_ID, "Documents without subjects after replacing from title:", length(missing_subjects)))
144144
if (length(missing_subjects) > 0) {
145145
foreach (i = missing_subjects) %dopar% {
146-
candidates = mapply(paste, metadata$title[i], metadata$paper_abstract[i])
146+
if (nrow(metadata) == 1) {
147+
candidates = mapply(paste, metadata$title, metadata$paper_abstract)
148+
} else {
149+
candidates = mapply(paste, metadata$title[i,], metadata$paper_abstract[i,])
150+
}
147151
for (i in seq(1, total_length, batch_size)) {
148-
candidates = mclapply(candidates, function(x)paste(removeWords(x, stops[i:min(i+batch_size -1, total_length)]), collapse=""))
152+
candidates = mclapply(candidates, function(x)paste(removeWords(x, stops[i:min(i+batch_size -1, total_length)]), collapse=""))
149153
}
150154
candidates = lapply(candidates, function(x) {gsub("[^[:alpha:]]", " ", x)})
151155
candidates = lapply(candidates, function(x) {gsub(" +", " ", x)})
@@ -155,7 +159,11 @@ replace_keywords_if_empty <- function(metadata, stops) {
155159
replacement_keywords <- filter_out_nested_ngrams(names(nn_count), 3)
156160
replacement_keywords = lapply(replacement_keywords, FUN = function(x) {paste(unlist(x), collapse="; ")})
157161
replacement_keywords = gsub("_", " ", replacement_keywords)
158-
metadata$subject[i] <- paste(replacement_keywords, collapse="; ")
162+
if (nrow(metadata) == 1) {
163+
metadata$subject <- paste(replacement_keywords, collapse="; ")
164+
} else {
165+
metadata$subject[i] <- paste(replacement_keywords, collapse="; ")
166+
}
159167
}
160168
}
161169
return(metadata)

server/services/displayPDF.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<html>
22
<head>
3-
<meta http-equiv="refresh" content="0; url=pdf.js-hypothes.is/viewer/web/viewer.html?file=<?php echo $_GET["file"] ?>" />
3+
<meta http-equiv="refresh" content="0; url=pdf.js-hypothes.is/viewer/web/viewer.html?file=<?php echo htmlspecialchars($_GET["file"]) ?>" />
44
</head>
55
<body>
66
</body>

server/services/snapshot/headstart_snapshot.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<body style="margin:0px; padding:0px">
99

1010
<div id="visualization"></div>
11-
<script type="text/javascript" src="data-config_<?php echo $_GET['service'] ?>.js"></script>
11+
<script type="text/javascript" src="data-config_<?php echo htmlspecialchars($_GET['service']) ?>.js"></script>
1212
<script src="../../../../js/search_options.js"></script>
1313
<script>
1414
data_config.files = [{
@@ -18,7 +18,7 @@
1818
data_config.server_url = window.location.href.replace(/[^/]*$/, '') + "../../";
1919
data_config.show_context = true;
2020
data_config.create_title_from_context= true;
21-
data_config.options = options_<?php echo $_GET['service']; ?>.dropdowns;
21+
data_config.options = options_<?php echo htmlspecialchars($_GET['service']); ?>.dropdowns;
2222
if (<?php echo json_encode($_GET['service']) ?> === "linkedcat" ||
2323
<?php echo json_encode($_GET['service']) ?> === "linkedcat_authorview" ||
2424
<?php echo json_encode($_GET['service']) ?> === "linkedcat_browseview") {

server/workers/base/renv.lock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,7 @@
199199
"renv": {
200200
"Package": "renv",
201201
"Version": "0.14.0",
202-
"Source": "Repository",
203-
"Repository": "CRAN",
204-
"Hash": "30e5eba91b67f7f4d75d31de14bbfbdc"
202+
"Source": "Repository"
205203
},
206204
"rlang": {
207205
"Package": "rlang",

server/workers/base/src/base.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,13 @@ def execute_search(self, params):
8888
res = raw_metadata
8989
else:
9090
metadata = pd.DataFrame(raw_metadata)
91+
metadata = self.sanitize_metadata(metadata)
9192
metadata = filter_duplicates(metadata)
9293
metadata = pd.concat([metadata, parse_annotations_for_all(metadata, "subject_orig")], axis=1)
9394
metadata = metadata.head(params.get('list_size'))
9495
metadata.reset_index(inplace=True, drop=True)
9596
metadata = self.enrich_metadata(metadata)
96-
text = pd.concat([metadata.id, metadata[["title", "paper_abstract", "subject_orig", "published_in", "authors"]]
97+
text = pd.concat([metadata.id, metadata[["title", "paper_abstract", "subject_orig", "published_in", "sanitized_authors"]]
9798
.apply(lambda x: " ".join(x), axis=1)], axis=1)
9899
text.columns = ["id", "content"]
99100
input_data = {}
@@ -108,6 +109,10 @@ def execute_search(self, params):
108109
self.logger.error(error)
109110
raise
110111

112+
def sanitize_metadata(self, metadata):
113+
metadata["sanitized_authors"] = metadata["authors"].map(lambda x: sanitize_authors(x))
114+
return metadata
115+
111116
def enrich_metadata(self, metadata):
112117
metadata["repo"] = metadata["content_provider"].map(lambda x: self.content_providers.get(x, ""))
113118
enrichment = improved_df_parsing(metadata)
@@ -331,7 +336,7 @@ def parse_annotations_for_all(metadata, field_name):
331336
parsed_annotations = pd.DataFrame(metadata[field_name].map(lambda x: parse_annotations(x)))
332337
parsed_annotations.columns = ["annotations"]
333338
expanded_annotations = expand_dict_columns(parsed_annotations)
334-
return parsed_annotations
339+
return expanded_annotations
335340

336341
# convert DataFrame with dict columns to DataFrame with columns for each dict key
337342
def expand_dict_columns(df):
@@ -347,4 +352,10 @@ def expand_dict_columns(df):
347352

348353
def clean_up_annotations(df, field):
349354
df[field] = df[field].map(lambda x: pattern_annotations.sub("", x).strip())
350-
return df
355+
return df
356+
357+
def sanitize_authors(authors, n=15):
358+
authors = authors.split("; ")
359+
if len(authors) > n:
360+
authors = authors[:n-1] + authors[-1:]
361+
return "; ".join(authors)

server/workers/dataprocessing/src/streamgraph.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,7 @@ def aggregate_ids(series):
199199
return "NA"
200200

201201

202-
def load_stopwords():
203-
stopwords = []
204-
if os.path.isfile("../../resources/additional_stopwords.txt"):
205-
with open ("../../resources/additional_stopwords.txt") as infile:
206-
stopwords = infile.read().splitlines()
207-
return stopwords
208-
209-
stopwords = load_stopwords()
202+
stopwords = ["archeo", "archi", "art", "anthro-bio", "class", "info", "museo", "demo",
203+
"eco", "edu", "envir", "genre", "geo", "hist", "hisphilso", "droit",
204+
"lang", "litt", "manag", "stat", "musiq", "phil", "scipo", "psy",
205+
"relig", "anthro-se", "socio"]

vis/js/dataprocessing/managers/DataManager.js

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,21 @@ class DataManager {
143143
__parseAuthors(paper) {
144144
paper.authors_objects = extractAuthors(paper.authors);
145145
paper.authors_list = getAuthorsList(
146-
paper.authors,
147-
this.config.convert_author_names
146+
paper.authors,
147+
this.config.convert_author_names
148148
);
149149

150-
paper.authors_string = paper.authors_list.join(", ");
150+
// old variable with all authors_string
151+
// paper.authors_string = paper.authors_list.join(", ");
152+
153+
if (paper.authors_list.length > 15) {
154+
const firstAuthors = paper.authors_list.slice(0, 14).join(", ");
155+
const lastAuthor = paper.authors_list[paper.authors_list.length - 1];
156+
// get first 14 authors and add "..." and last author for the visual part the map
157+
paper.authors_string = `${firstAuthors}, ... ${lastAuthor}`;
158+
// in the case of more than 15 authors left an array of 16 authors for further processing in the visual part with "..." between the authors
159+
paper.authors_list = paper.authors_list.slice(0, 15).concat(lastAuthor);
160+
}
151161
}
152162

153163
// migrated from legacy code

vis/js/templates/listentry/Details.jsx

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,34 @@ import { useLocalizationContext } from "../../components/LocalizationProvider";
66

77
const MAX_AUTHORS_LENGTH = 100;
88

9-
const Details = ({ authors, source, isSelected }) => {
9+
const Details = ({authors, source, isSelected}) => {
1010
const loc = useLocalizationContext();
1111

1212
const authorsString = getAuthorsString(
13-
authors,
14-
isSelected ? Number.POSITIVE_INFINITY : MAX_AUTHORS_LENGTH
13+
authors,
14+
isSelected ? Number.POSITIVE_INFINITY : MAX_AUTHORS_LENGTH
1515
);
1616

17+
// console.log("Details.jsx: authorsString: ", authorsString);
18+
console.log("Details.jsx: loc.default_authors: ", loc.default_authors);
19+
1720
return (
18-
// html template starts here
19-
<div className="list_details">
20-
<div className="list_authors">
21-
<Highlight queryHighlight>
22-
{authorsString ? authorsString : loc.default_authors}
23-
</Highlight>
24-
</div>
25-
{!!source && (
26-
<div className={"list_source" + (isSelected ? "" : " short")}>
21+
// html template starts here
22+
<div className="list_details">
23+
<div className="list_authors">
24+
<Highlight queryHighlight>
25+
{authorsString ? authorsString : loc.default_authors}
26+
</Highlight>
27+
</div>
28+
{!!source && (
29+
<div className={"list_source" + (isSelected ? "" : " short")}>
2730
<span className="list_published_in">
2831
<Highlight queryHighlight>{source}</Highlight>
2932
</span>
30-
</div>
31-
)}
32-
</div>
33-
// html template ends here
33+
</div>
34+
)}
35+
</div>
36+
// html template ends here
3437
);
3538
};
3639

@@ -45,15 +48,22 @@ const getAuthorsString = (authorsList, maxLength) => {
4548
return "";
4649
}
4750

51+
4852
const authorsListCopy = [...authorsList];
4953

5054
const ellipsis = "...";
5155
const join = ", ";
5256
let finalString = authorsListCopy.shift();
57+
if (authorsList.length > 15) {
58+
const first19Authors = authorsList.slice(0, 14).join(", ");
59+
const lastAuthor = authorsList[authorsList.length - 1];
60+
finalString = `${first19Authors}, ... ${lastAuthor}`;
61+
return finalString;
62+
}
5363
while (authorsListCopy.length > 0) {
5464
const nextAuthor = authorsListCopy.shift();
5565
let nextPossibleLength =
56-
finalString.length + join.length + nextAuthor.length;
66+
finalString.length + join.length + nextAuthor.length;
5767

5868
if (authorsListCopy.length !== 0) {
5969
nextPossibleLength += ellipsis.length;

0 commit comments

Comments
 (0)