Skip to content

Commit 5b48179

Browse files
authored
Merge pull request #869 from OpenKnowledgeMaps/feat/remove-mesh-keywords-in-streamgraph
feat: remove mesh keywords in streamgraph
2 parents 95dd55a + 77c5c25 commit 5b48179

2 files changed

Lines changed: 83 additions & 1 deletion

File tree

server/preprocessing/other-scripts/base.R

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,13 @@ etl <- function(res, repo, non_public) {
248248
subject_cleaned = gsub("(wikidata)?\\.org/entity/[qQ]([\\d]+)?", "", subject_cleaned) # remove wikidata classification
249249
subject_cleaned = gsub("</keyword><keyword>", "", subject_cleaned) # remove </keyword><keyword>
250250
subject_cleaned = gsub("\\[No keyword\\]", "", subject_cleaned)
251-
subject_cleaned = gsub("\\[[^]]*\\]", "", subject_cleaned) # remove any text inside square brackets
251+
252+
if (!is.null(params$vis_type) && params$vis_type == "timeline") {
253+
subject_cleaned = remove_keywords_with_text_in_square_brackets(subject_cleaned)
254+
} else {
255+
subject_cleaned = remove_text_in_square_brackets_from_keywords(subject_cleaned)
256+
}
257+
252258
subject_cleaned = gsub("\\[[^\\[]+\\][^\\;]+(;|$)?", "", subject_cleaned) # remove classification
253259
subject_cleaned = gsub("[0-9]{2,} [A-Z]+[^;]*(;|$)?", "", subject_cleaned) #remove classification
254260
subject_cleaned = gsub(" -- ", "; ", subject_cleaned) #replace inconsistent keyword separation
@@ -357,6 +363,18 @@ decode_dctypenorm <- function(dctypestring) {
357363
return(typecodes)
358364
}
359365

366+
remove_keywords_with_text_in_square_brackets <- function(x) {
367+
# This function removes whole keywords that contain text in square brackets.
368+
# Example: 'Climate [MeSH]' | 'Some keywords [Chemical]'.
369+
gsub("[^;]*\\[[^]]+\\][^;]*;?", "", x)
370+
}
371+
372+
remove_text_in_square_brackets_from_keywords <- function(x) {
373+
# This function removes text in square brackets.
374+
# Example: 'Climate [MeSH]' -> 'Climate'| 'Some keywords [Chemical]' -> 'Some keywords'.
375+
gsub("\\[[^]]*\\]", "", x)
376+
}
377+
360378
dctypenorm_decoder <- list(
361379
"4"="Audio",
362380
"11"="Book",

server/workers/common/common/contentproviders.json

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,68 @@
11
[
2+
{
3+
"name": "VMRF Digital Repository",
4+
"internal_name": "ftid14684"
5+
},
6+
{
7+
"name": "STA Dergi",
8+
"internal_name": "ftid14686"
9+
},
10+
{
11+
"name": "CityUHK Scholars",
12+
"internal_name": "ftid14687"
13+
},
14+
{
15+
"name": "Evolutio Press",
16+
"internal_name": "ftid14688"
17+
},
18+
{
19+
"name": "Perry Research",
20+
"internal_name": "ftid14689"
21+
},
22+
{
23+
"name": "University of Mohamed Boudiaf - M'Sila",
24+
"internal_name": "ftid14690"
25+
},
26+
{
27+
"name": "Mekelle University Institutional Repository",
28+
"internal_name": "ftid14695"
29+
},
30+
{
31+
"name": "Atlas social de France",
32+
"internal_name": "ftid14685"
33+
},
34+
{
35+
"name": "Sustainable Trends and Business Research (STBR)",
36+
"internal_name": "ftid14672"
37+
},
38+
{
39+
"name": "Scripta Intelektual",
40+
"internal_name": "ftid14674"
41+
},
42+
{
43+
"name": "Proceedings Centre-Mersenne",
44+
"internal_name": "ftid14678"
45+
},
46+
{
47+
"name": "United Journal of Chemistry",
48+
"internal_name": "crid14680"
49+
},
50+
{
51+
"name": "University of Biskra Journals",
52+
"internal_name": "ftid14681"
53+
},
54+
{
55+
"name": "Computational and Applied Science Journal (CAS Journal)",
56+
"internal_name": "ftid14692"
57+
},
58+
{
59+
"name": "International Journal of Research Development and Technology (IJRDT)",
60+
"internal_name": "ftid14694"
61+
},
62+
{
63+
"name": "Innovative Science and Technology Publishers",
64+
"internal_name": "ftid14696"
65+
},
266
{
367
"name": "Law and innovations",
468
"internal_name": "ftid14669"

0 commit comments

Comments
 (0)