OpenKnowledgeMaps
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 1 addition & 0 deletions b/‎docker-compose.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎local_dev/config_local_headstart.ini‎
Lines changed: 1 addition & 1 deletion b/‎local_dev/config_local_headstart.ini‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎local_dev/searchflow-container/Dockerfile‎
Lines changed: 3 additions & 1 deletion b/‎local_dev/searchflow-container/Dockerfile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎local_dev/searchflow-container/local_dev.ini‎
Lines changed: 1 addition & 0 deletions b/‎local_dev/searchflow-container/local_dev.ini‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/paper_preview/8c5a5c946a9b12159c684a1386ef3d2806ff39431611286d06b442481382df50.PDF‎
1.12 MB b/‎server/paper_preview/8c5a5c946a9b12159c684a1386ef3d2806ff39431611286d06b442481382df50.PDF‎
1.12 MB
diff --git a/‎server/services/.phpunit.result.cache‎
Lines changed: 1 addition & 0 deletions b/‎server/services/.phpunit.result.cache‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/services/getPDF.php‎
Lines changed: 179 additions & 58 deletions b/‎server/services/getPDF.php‎
Lines changed: 179 additions & 58 deletions
@@ -10,6 +10,7 @@ dist/
 /config.js
 .cache
 coverage/
+.vscode/
 
 # local deployment files
 /deploy.sh
@@ -31,6 +32,7 @@ server/workers/tests/*.txt
 server/workers/tests/testutils/
 local_dev/renv/*
 local_dev/dev.env
+local_dev/paper_preview
 
 # php files
 /server/classes/headstart/vendor
 
@@ -256,6 +256,7 @@ services:
       - ../Headstart:/var/www/html/headstart
       - ./local_dev/config_local_headstart.ini:/var/www/html/headstart/server/preprocessing/conf/config_local.ini
       - ./local_dev/entrypoint.php:/var/www/html/entrypoint.php
+      - ./local_dev/paper_preview:/var/www/html/headstart/server/paper_preview
     ports:
       - 127.0.0.1:8085:80
     networks:
 
@@ -5,7 +5,7 @@
 # Full path to the preprocessing directory
 preprocessing_dir = "/var/www/html/dev/server/preprocessing/"
 # Full path to the images directory for the client. Needs to be in the public_html/www directory. Make sure that your webserver has write access to this directory.
-images_path = "/var/www/html/dev/server/paper_preview/"
+images_path = "/var/www/html/headstart/server/paper_preview/"
 # Host of the client visualization
 host = "dev-searchflow-1/"
 # Relative path to the client visualization. Needs to be in the public_html/www directory.
 
@@ -31,4 +31,6 @@ ENV PATH      $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
 ENV NPM_PATH  $NVM_DIR/versions/node/v$NODE_VERSION/bin/npm
 RUN $NPM_PATH install -g puppeteer@^2.0.0 --unsafe-perm=true --allow-root
 
-RUN sed -i 's#AllowOverride [Nn]one#AllowOverride All#' /etc/apache2/apache2.conf
+RUN sed -i 's#AllowOverride [Nn]one#AllowOverride All#' /etc/apache2/apache2.conf
+COPY local_dev.ini /usr/local/etc/php/conf.d/local_dev.ini
+RUN chown root:root /usr/local/etc/php/conf.d/local_dev.ini
@@ -0,0 +1 @@
+log_errors = On
@@ -0,0 +1 @@
+{"version":1,"defects":{"GetPDFTest::testGetRedirectURL":8,"GetPDFTest::testGetRedirectURLReturnsParsedURL":7,"GetPDFTest::testReturnsSuccessWhenPdfDownloaded":7,"GetPDFTest::testFullScriptReturnsExpectedErrorJson":8,"GetPDFTest::testFullScriptReturnsExpectedSuccessJson":8,"GetPDFTest::testStartsWithReturnsFalse":8,"GetPDFTest::testParsePDFLinkFromMetaCitationTag":5,"GetPDFTest::testParsePDFLinkFromRawPDFLink":5,"GetPDFTest::testGetContentFromURLReturnsExpectedContent":7,"GetPDFTest::testGetRedirectDOAJParsesUrlCorrectly":7,"GetPDFTest::testParsePDFLinkWithBarePDFUrl":7,"GetPDFTest::testParsePDFLinkWithDirectPDFHeader":7,"GetPDFTest::testGetRedirectURLCallsParsePDFLinkCorrectly":8,"GetPDFTest::testGetPDFAndDownloadWritesValidPDF":8,"GetPDFTest::testGetPDFAndDownloadFailsOnNonPDF":7,"GetPDFTest::testGetContentFromURLMockedReturnsExpectedArray":7,"GetPDFTest::testGetRedirectURLWithMockedGetContentFromURL":7},"times":{"GetPDFTest::testStartsWithReturnsTrue":3.34,"GetPDFTest::testStartsWithReturnsFalse":3.118,"GetPDFTest::testStartsWithEmptyNeedle":3.192,"GetPDFTest::testStartsWithEmptyHaystack":4.502,"GetPDFTest::testParsePDFLinkFromMetaTag":0,"GetPDFTest::testGetRedirectURL":0.002,"GetPDFTest::testParsePDFLinkFromMetaCitationTag":0.006,"GetPDFTest::testParsePDFLinkFromRawPDFLink":0.004,"GetPDFTest::testGetRedirectURLReturnsParsedURL":1.35,"GetPDFTest::testGetRedirectURLWithMockedContent":0.001,"GetPDFTest::testReturnsSuccessWhenPdfDownloaded":0.016,"GetPDFTest::testFullScriptReturnsExpectedSuccessJson":2.323,"GetPDFTest::testFullScriptReturnsExpectedErrorJson":0,"GetPDFTest::testGetContentFromURLReturnsExpectedContent":5.187,"GetPDFTest::testGetRedirectDOAJParsesUrlCorrectly":3.814,"GetPDFTest::testParsePDFLinkWithDirectPDFHeader":4.308,"GetPDFTest::testParsePDFLinkWithBarePDFUrl":3.226,"GetPDFTest::testParsePDFLinkWithFullPDFUrl":3.243,"GetPDFTest::testParsePDFLinkNoMatchReturnsFalse":3.658,"GetPDFTest::testFullScriptWorksSuccessfully":7.587,"GetPDFTest::testGetContentFromURLMockedReturnsExpectedArray":0.001,"GetPDFTest::testGetRedirectURLWithMockedGetContentFromURL":0.001,"GetPDFTest::testGetPDFAndDownloadWritesValidPDF":0.001,"GetPDFTest::testGetPDFAndDownloadFailsOnNonPDF":0.01}}
@@ -4,71 +4,195 @@
 
 require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php';
 require_once dirname(__FILE__) . '/../classes/headstart/library/Toolkit.php';
-
 require 'helper.php';
 
 use headstart\library;
 
 $INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/";
-
 $ini_array = library\Toolkit::loadIni($INI_DIR);
 
 $url = library\CommUtils::getParameter($_GET, "url");
 $filename = library\CommUtils::getParameter($_GET, "filename");
 $service = library\CommUtils::getParameter($_GET, "service");
-$pdf_urls = library\CommUtils::getParameter($_GET, "pdf_urls");
+$paper_id = library\CommUtils::getParameter($_GET, "paper_id");
+$vis_id = library\CommUtils::getParameter($_GET, "vis_id");
+$vis_type = library\CommUtils::getParameter($_GET, "vis_type");
+
 $images_path = $ini_array["general"]["images_path"];
 
-if ($service == "base" || $service == "openaire") {
-  $pdf_link = getPDFLinkforBASE($pdf_urls);
-  if($pdf_link != false) {
-      getPDFAndDownload($pdf_link, $images_path, $filename);
-  } else {
-      library\CommUtils::echoOrCallback(json_encode(array("status" => "error")), $_GET);
-      exit();
-  }
+if (isServiceWithPDFList($service)) {
+    handleMultiPdfService($vis_id, $paper_id, $images_path, $filename, $vis_type);
 } else {
-    getPDFAndDownload($url, $images_path, $filename);
+    handleSingleUrlService($vis_id, $paper_id, $url, $images_path, $filename, $vis_type);
 }
 
 library\CommUtils::echoOrCallback(json_encode(array("status" => "success", "file" => $filename)), $_GET);
 
-function getPDFLinkforBASE($url) {
-  $link_list = preg_split("/;/", $url);
-  
-  $matches_pdf = array_filter($link_list, function($item) { return substr($item, -strlen(".pdf")) === ".pdf"; }); 
-  if(count($matches_pdf) != 0) {
-      return array_values($matches_pdf)[0];
-  }
-  
-  $matches_doi = array_filter($link_list, function($item) { return strpos($item, "dx.doi.org"); });
-  if(count($matches_doi) != 0) {
-      return getRedirectURL(array_values($matches_doi)[0]);
-  }
-  
-  $matches_doaj = array_filter($link_list, function($item) { return strpos($item, "doaj.org"); });
-  if(count($matches_doaj) != 0) {
-      $url = getRedirectDOAJ(array_values($matches_doaj)[0]);
-      if($url != false) {
-        return getRedirectURL($url);
-      } else {
-          //Remove all DOAJ entries and all entries that are not URLs
-          $link_list = array_filter($link_list, function($item) { return !strpos($item, "doaj.org"); });
-          $link_list = array_filter($link_list, function($item) { return filter_var($item, FILTER_VALIDATE_URL); });
-      }
-  }
-    
-  return getRedirectURL(array_values($link_list)[0]);
+function isServiceWithPDFList(string $service): bool {
+    return in_array($service, ["base", "openaire"]);
+}
+
+function handleMultiPdfService(
+    string $vis_id,
+    string $paper_id,
+    string $images_path,
+    string $filename,
+    string $vis_type
+): void {
+    $valid_pdf_urls = getValidURLs($vis_id, $paper_id, $vis_type);
+    $filtered_urls_string = implode(";", $valid_pdf_urls);
+    $pdf_link = getPDFLinkForBASE($filtered_urls_string);
+
+    if (!$pdf_link) {
+        returnError("No valid PDF link could be resolved from filtered URLs.");
+    }
+
+    getPDFAndDownload($pdf_link, $images_path, $filename);
+}
+
+function handleSingleUrlService(
+    string $vis_id,
+    string $paper_id,
+    string $url,
+    string $images_path,
+    string $filename,
+    string $vis_type
+): void {
+    $valid_pdf_urls = getValidURLs($vis_id, $paper_id, $vis_type);
+
+    $decoded_input_url = urldecode($url);
+    $normalized_valid_urls = array_map('urldecode', $valid_pdf_urls);
+
+    if (!in_array($decoded_input_url, $normalized_valid_urls, true)) {
+        returnError("Provided URL not found in valid paper links");
+    }
+
+    getPDFAndDownload($decoded_input_url, $images_path, $filename);
+}
+
+function getValidURLs(string $vis_id, string $paper_id, string $vis_type) {
+    $revision_data = fetchLatestRevision($vis_id);
+
+    if (!$revision_data) {
+        returnError("There are no revision data for such visualization id");
+    }
+
+    $valid_pdf_urls = extractValidPdfUrls($revision_data, $paper_id, $vis_type);
+
+    if (empty($valid_pdf_urls)) {
+        returnError("There are no valid PDF URLs from revision");
+    }
+
+    return $valid_pdf_urls;
+}
+
+function fetchLatestRevision(string $vis_id): ?array {
+    $latest_url = "http://" . $_SERVER['SERVER_NAME'] . dirname($_SERVER['REQUEST_URI']) . "/getLatestRevision.php?vis_id=" . urlencode($vis_id) . "&context=true";
+
+    $revision_json = @file_get_contents($latest_url);
+    if ($revision_json === false) {
+        error_log("Failed to fetch metadata from getLatestRevision.php");
+        return null;
+    }
+
+    $revision_data = json_decode($revision_json, true);
+    if (!is_array($revision_data)) {
+        error_log("Invalid JSON returned from getLatestRevision.php");
+        return null;
+    }
+
+    return $revision_data;
+}
+
+function extractValidPdfUrls(array $revision_data, string $paper_id, string $vis_type): array {
+    $valid_urls = [];
+
+    $inner_data = json_decode($revision_data["data"], true);
+    $documents_raw = $inner_data["documents"] ?? null;
+    $documents = json_decode($documents_raw, true);
+
+    if (strtolower($vis_type) == 'timeline') {
+        $inner_data = json_decode($inner_data["data"]);
+        $documents_raw = json_encode($inner_data);
+        $documents = json_decode($documents_raw, true);
+    }
+
+    if (!is_array($documents)) {
+        error_log("Invalid or missing documents array: " . json_encode($documents_raw));
+        return [];
+    }
+
+    $url_fields = ['link', 'oa_link', 'identifier', 'relation', 'fulltext'];
+
+    foreach ($documents as $entry) {
+        if (($entry["id"] ?? null) !== $paper_id) {
+            continue;
+        }
+
+        foreach ($url_fields as $field) {
+            if (!isset($entry[$field])) {
+                continue;
+            }
+
+            $urls = is_array($entry[$field]) ? $entry[$field] : explode(";", $entry[$field]);
+
+            foreach ($urls as $url) {
+                $url = trim($url);
+                if (filter_var($url, FILTER_VALIDATE_URL)) {
+                    $valid_urls[] = $url;
+                }
+            }
+        }
+    }
+
+    return array_unique($valid_urls);
+}
+
+function getPDFLinkForBASE($url) {
+    $link_list = preg_split("/;/", $url);
+    $link_list = array_map('trim', $link_list);
+    $link_list = array_filter($link_list);
+
+    $matches_pdf = array_filter($link_list, function($item) { return substr($item, -strlen(".pdf")) === ".pdf"; }); 
+    if(count($matches_pdf) != 0) {
+        return array_values($matches_pdf)[0];
+    }
+
+    $matches_doi = array_filter($link_list, function($item) { return strpos($item, "dx.doi.org"); });
+    if(count($matches_doi) != 0) {
+        return getRedirectURL(array_values($matches_doi)[0]);
+    }
+
+    $matches_doaj = array_filter($link_list, function($item) { return strpos($item, "doaj.org"); });
+    if(count($matches_doaj) != 0) {
+        $url = getRedirectDOAJ(array_values($matches_doaj)[0]);
+        if($url != false) {
+            return getRedirectURL($url);
+        } else {
+            //Remove all DOAJ entries and all entries that are not URLs
+            $link_list = array_filter($link_list, function($item) { return !strpos($item, "doaj.org"); });
+            $link_list = array_filter($link_list, function($item) { return filter_var($item, FILTER_VALIDATE_URL); });
+        }
+    }
+
+    foreach ($link_list as $fallback_url) {
+        $resolved = getRedirectURL($fallback_url);
+        if ($resolved) {
+            return $resolved;
+        }
+    }
+
+    return getRedirectURL(array_values($link_list)[0]);
 }
 
 //Example:
 //https://doaj.org/api/v1/search/articles/id%3A90764de0bd144959b1d2727c91285eb3
 function getRedirectDOAJ($doaj_url) {
     $id = substr(strrchr($doaj_url, '/' ), 1);
     $url = "https://doaj.org/api/v1/search/articles/id%3A" . $id;
-    
+
     $response = file_get_contents($url);
-    
+
     $array = json_decode($response, true);
     $fulltext_link = null;
     if($array["total"] > 0) {
@@ -77,7 +201,7 @@ function getRedirectDOAJ($doaj_url) {
             if($link["type"] === "fulltext") {
                 $fulltext_link = $link["url"];
             }
-        }   
+        }
     }
     return ($fulltext_link === null)?(false):($fulltext_link);
 }
@@ -93,25 +217,24 @@ function getContentFromURL($link) {
     $response = curl_exec($ch);
     $redir = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
     curl_close($ch);
-       
+
     return array($response, $redir);
 }
 
 function getRedirectURL($link) {
     $response = getContentFromURL($link);
-    
     return parsePDFLink($response[0], $response[1]);
 }
 
 function parsePDFLink($source, $url) {
     if(substr($source, 0, 4) === "%PDF") {
         return $url;
     }
-    
+
     $has_match = preg_match_all('/meta[\s]+content=["\']+([^"\']+)["\']+[\s]+name[\s]*=[\s]*["\']+citation_pdf_url["\']+/i', $source, $matches);
     if(!$has_match) {
         $has_match = preg_match_all('/meta[\s]+name[\s]*=[\s]*["\']+citation_pdf_url["\']+[\s]+content=["\']+([^"\']+)["\']+/i', $source, $matches);
-    
+
         if(!$has_match) {
             $has_match = preg_match_all('/["\']?([^"\'\s>]+(?:\.pdf))["\']?/i', $source, $matches);
         }
@@ -125,39 +248,37 @@ function parsePDFLink($source, $url) {
             return $best_match;
         }
     } else {
-       return false; 
+        return false;
     }
 }
 
 function startsWith($haystack, $needle) {
-     $length = strlen($needle);
-     return (substr($haystack, 0, $length) === $needle);
+    $length = strlen($needle);
+    return (substr($haystack, 0, $length) === $needle);
 }
 
 function getPDFAndDownload($url, $images_path, $filename) {
-    
     $output_path = $images_path . $filename;
-
     $pdf = getContentFromURL($url)[0];
 
     if ($pdf !== false) {
         file_put_contents($output_path, $pdf);
     } else {
-        library\CommUtils::echoOrCallback(json_encode(array("status" => "error")), $_GET);
-        exit();
+        returnError("Unable to get PDF from the URL");
     }
 
-    $finfo = finfo_open(FILEINFO_MIME_TYPE); 
-
+    $finfo = finfo_open(FILEINFO_MIME_TYPE);
     $mime_type = finfo_file($finfo, $output_path);
-
     finfo_close($finfo);
 
     if (strtolower($mime_type) != "application/pdf") {
         unlink($output_path);
-        library\CommUtils::echoOrCallback(json_encode(array("status" => "error")), $_GET);
-        exit();
+        returnError("MIME type is not application/pdf");
     }
 }
 
-
+function returnError(string $reason): void {
+    error_log("Error: " . $reason);
+    library\CommUtils::echoOrCallback(json_encode(["status" => "error"]), $_GET);
+    exit();
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+{"version":1,"defects":{"GetPDFTest::testGetRedirectURL":8,"GetPDFTest::testGetRedirectURLReturnsParsedURL":7,"GetPDFTest::testReturnsSuccessWhenPdfDownloaded":7,"GetPDFTest::testFullScriptReturnsExpectedErrorJson":8,"GetPDFTest::testFullScriptReturnsExpectedSuccessJson":8,"GetPDFTest::testStartsWithReturnsFalse":8,"GetPDFTest::testParsePDFLinkFromMetaCitationTag":5,"GetPDFTest::testParsePDFLinkFromRawPDFLink":5,"GetPDFTest::testGetContentFromURLReturnsExpectedContent":7,"GetPDFTest::testGetRedirectDOAJParsesUrlCorrectly":7,"GetPDFTest::testParsePDFLinkWithBarePDFUrl":7,"GetPDFTest::testParsePDFLinkWithDirectPDFHeader":7,"GetPDFTest::testGetRedirectURLCallsParsePDFLinkCorrectly":8,"GetPDFTest::testGetPDFAndDownloadWritesValidPDF":8,"GetPDFTest::testGetPDFAndDownloadFailsOnNonPDF":7,"GetPDFTest::testGetContentFromURLMockedReturnsExpectedArray":7,"GetPDFTest::testGetRedirectURLWithMockedGetContentFromURL":7},"times":{"GetPDFTest::testStartsWithReturnsTrue":3.34,"GetPDFTest::testStartsWithReturnsFalse":3.118,"GetPDFTest::testStartsWithEmptyNeedle":3.192,"GetPDFTest::testStartsWithEmptyHaystack":4.502,"GetPDFTest::testParsePDFLinkFromMetaTag":0,"GetPDFTest::testGetRedirectURL":0.002,"GetPDFTest::testParsePDFLinkFromMetaCitationTag":0.006,"GetPDFTest::testParsePDFLinkFromRawPDFLink":0.004,"GetPDFTest::testGetRedirectURLReturnsParsedURL":1.35,"GetPDFTest::testGetRedirectURLWithMockedContent":0.001,"GetPDFTest::testReturnsSuccessWhenPdfDownloaded":0.016,"GetPDFTest::testFullScriptReturnsExpectedSuccessJson":2.323,"GetPDFTest::testFullScriptReturnsExpectedErrorJson":0,"GetPDFTest::testGetContentFromURLReturnsExpectedContent":5.187,"GetPDFTest::testGetRedirectDOAJParsesUrlCorrectly":3.814,"GetPDFTest::testParsePDFLinkWithDirectPDFHeader":4.308,"GetPDFTest::testParsePDFLinkWithBarePDFUrl":3.226,"GetPDFTest::testParsePDFLinkWithFullPDFUrl":3.243,"GetPDFTest::testParsePDFLinkNoMatchReturnsFalse":3.658,"GetPDFTest::testFullScriptWorksSuccessfully":7.587,"GetPDFTest::testGetContentFromURLMockedReturnsExpectedArray":0.001,"GetPDFTest::testGetRedirectURLWithMockedGetContentFromURL":0.001,"GetPDFTest::testGetPDFAndDownloadWritesValidPDF":0.001,"GetPDFTest::testGetPDFAndDownloadFailsOnNonPDF":0.01}}