From 9a8f4bd9621c557eba30c8e0568040cc978cd082 Mon Sep 17 00:00:00 2001 From: andrei Date: Tue, 10 Feb 2026 17:36:35 +0100 Subject: [PATCH 1/4] bugfix: snapshots configuration --- local_dev/config_local_headstart.ini | 2 ++ server/preprocessing/conf/config.ini | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/local_dev/config_local_headstart.ini b/local_dev/config_local_headstart.ini index d1d7c8b11..7c5b58101 100644 --- a/local_dev/config_local_headstart.ini +++ b/local_dev/config_local_headstart.ini @@ -35,6 +35,8 @@ snapshot_php = "headstart/server/services/snapshot/headstart_snapshot.php" snapshot_local_protocol = "http://" # Size of thumbnail width snapshot_width = "1200px" +# chrome executable path optional, only needed if puppeteer cannot find chrome automatically +chrome_executable_path = "/path/to/chrome" [connection] # PostgreSQL database diff --git a/server/preprocessing/conf/config.ini b/server/preprocessing/conf/config.ini index 56de127b3..a4d503026 100644 --- a/server/preprocessing/conf/config.ini +++ b/server/preprocessing/conf/config.ini @@ -32,7 +32,7 @@ storage_path = "/path/to/storage/" snapshot_php = "server/services/snapshot/headstart_snapshot.php" # snapshot_local_protocol fallback for non-server environments snapshot_local_protocol = "http://" -# chrome executable path (optional, only needed if puppeteer cannot find chrome automatically) +# chrome executable path optional, only needed if puppeteer cannot find chrome automatically chrome_executable_path = "/path/to/chrome" From 10f5efc7cd842123d1f1e1a269bd197fc4b62a75 Mon Sep 17 00:00:00 2001 From: andrei Date: Mon, 9 Mar 2026 14:48:51 +0100 Subject: [PATCH 2/4] feat: remove keywords with text in square brackets in streamgraphs --- server/preprocessing/other-scripts/base.R | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/server/preprocessing/other-scripts/base.R b/server/preprocessing/other-scripts/base.R index 01619adba..be063cc2e 100644 --- a/server/preprocessing/other-scripts/base.R +++ b/server/preprocessing/other-scripts/base.R @@ -248,7 +248,13 @@ etl <- function(res, repo, non_public) { subject_cleaned = gsub("(wikidata)?\\.org/entity/[qQ]([\\d]+)?", "", subject_cleaned) # remove wikidata classification subject_cleaned = gsub("", "", subject_cleaned) # remove subject_cleaned = gsub("\\[No keyword\\]", "", subject_cleaned) - subject_cleaned = gsub("\\[[^]]*\\]", "", subject_cleaned) # remove any text inside square brackets + + if (!is.null(params$vis_type) && params$vis_type == "timeline") { + subject_cleaned = remove_keywords_with_text_in_square_brackets(subject_cleaned) + } else { + subject_cleaned = remove_text_in_square_brackets_from_keywords(subject_cleaned) + } + subject_cleaned = gsub("\\[[^\\[]+\\][^\\;]+(;|$)?", "", subject_cleaned) # remove classification subject_cleaned = gsub("[0-9]{2,} [A-Z]+[^;]*(;|$)?", "", subject_cleaned) #remove classification subject_cleaned = gsub(" -- ", "; ", subject_cleaned) #replace inconsistent keyword separation @@ -357,6 +363,18 @@ decode_dctypenorm <- function(dctypestring) { return(typecodes) } +remove_keywords_with_text_in_square_brackets <- function(x) { + # This function removes whole keywords that contain text in square brackets. + # Example: 'Climate [MeSH]' | 'Some keywords [Chemical]'. + gsub("[^;]*\\[[^]]+\\][^;]*;?", "", x) +} + +remove_text_in_square_brackets_from_keywords <- function(x) { + # This function removes text in square brackets. + # Example: 'Climate [MeSH]' -> 'Climate'| 'Some keywords [Chemical]' -> 'Some keywords'. + gsub("\\[[^]]*\\]", "", x) +} + dctypenorm_decoder <- list( "4"="Audio", "11"="Book", From 77c5c25b722686fe4a51e609f2ed10bcf1db5ccc Mon Sep 17 00:00:00 2001 From: chreman Date: Wed, 8 Apr 2026 17:49:00 +0200 Subject: [PATCH 3/4] update of contentprovider.json cache --- .../common/common/contentproviders.json | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/server/workers/common/common/contentproviders.json b/server/workers/common/common/contentproviders.json index 5cda1f325..f0bcdcd59 100644 --- a/server/workers/common/common/contentproviders.json +++ b/server/workers/common/common/contentproviders.json @@ -1,4 +1,68 @@ [ + { + "name": "VMRF Digital Repository", + "internal_name": "ftid14684" + }, + { + "name": "STA Dergi", + "internal_name": "ftid14686" + }, + { + "name": "CityUHK Scholars", + "internal_name": "ftid14687" + }, + { + "name": "Evolutio Press", + "internal_name": "ftid14688" + }, + { + "name": "Perry Research", + "internal_name": "ftid14689" + }, + { + "name": "University of Mohamed Boudiaf - M'Sila", + "internal_name": "ftid14690" + }, + { + "name": "Mekelle University Institutional Repository", + "internal_name": "ftid14695" + }, + { + "name": "Atlas social de France", + "internal_name": "ftid14685" + }, + { + "name": "Sustainable Trends and Business Research (STBR)", + "internal_name": "ftid14672" + }, + { + "name": "Scripta Intelektual", + "internal_name": "ftid14674" + }, + { + "name": "Proceedings Centre-Mersenne", + "internal_name": "ftid14678" + }, + { + "name": "United Journal of Chemistry", + "internal_name": "crid14680" + }, + { + "name": "University of Biskra Journals", + "internal_name": "ftid14681" + }, + { + "name": "Computational and Applied Science Journal (CAS Journal)", + "internal_name": "ftid14692" + }, + { + "name": "International Journal of Research Development and Technology (IJRDT)", + "internal_name": "ftid14694" + }, + { + "name": "Innovative Science and Technology Publishers", + "internal_name": "ftid14696" + }, { "name": "Law and innovations", "internal_name": "ftid14669" From 10634cda3b659eb436a3c05c838fa90fe073e374 Mon Sep 17 00:00:00 2001 From: chreman Date: Mon, 13 Apr 2026 14:50:46 +0200 Subject: [PATCH 4/4] hotfix for log event which causes burst requests --- vis/js/utils/actionLogger.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vis/js/utils/actionLogger.ts b/vis/js/utils/actionLogger.ts index 1c3fd9065..a8462cc9f 100644 --- a/vis/js/utils/actionLogger.ts +++ b/vis/js/utils/actionLogger.ts @@ -12,8 +12,8 @@ const logAction = (action: any, state: any) => { switch (action.type) { case "INITIALIZE": return trackMatomoEvent("Headstart", "Load"); - case "RESIZE": - return trackMatomoEvent("Headstart", "Resize window"); + // case "RESIZE": + // return trackMatomoEvent("Headstart", "Resize window"); case "SEARCH": // TODO trackSiteSearch ? // https://developer.matomo.org/guides/tracking-javascript-guide