Skip to content

Commit e625f2d

Browse files
authored
Merge pull request #758 from OpenKnowledgeMaps/exclude-date-filters-param
Exclude date filters param
2 parents d9c0b6e + 69df93a commit e625f2d

11 files changed

Lines changed: 136 additions & 34 deletions

File tree

examples/project_website/base.html

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@
9393
//title: "fake news",
9494
//title: "dotcom",
9595
//title: "cognitive dissonance",
96-
// title: "custom_title",
97-
title: "custom_clustering",
96+
// title: "exclude_date_filters",
97+
title: "philosophy_no_dates",
9898
// file: "./data/digital-education.json",
9999
// file: "./data/digital-education-lang.json",
100100
// file: "./data/digital-education-lang[].json",
@@ -104,7 +104,9 @@
104104
//file: "./data/fake-news-sg.json",
105105
//file: "./data/dotcom-sg.json",
106106
//file: "./data/cognitive-dissonance.json"
107-
file: "./data/custom_clustering.json",
107+
// file: "./data/custom_title.json",
108+
// file: "./data/exclude_date_filters.json",
109+
file: "./data/philosophy_no_dates.json",
108110
// other attributes:
109111
is_streamgraph: false, // set true for streamgraph data
110112
show_area: true, // set false for streamgraph data

examples/project_website/data/exclude_date_filters.json

Lines changed: 10 additions & 0 deletions
Large diffs are not rendered by default.

examples/project_website/data/philosophy_no_dates.json

Lines changed: 10 additions & 0 deletions
Large diffs are not rendered by default.

server/preprocessing/other-scripts/base.R

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,31 @@ get_papers <- function(query, params,
5252

5353
blog$info(paste("vis_id:", .GlobalEnv$VIS_ID, "exact query:", exact_query))
5454

55-
year_from = params$from
56-
year_to = params$to
5755
limit = params$limit
5856

5957
# prepare query fields
60-
date_string = paste0("dcdate:[", params$from, " TO ", params$to , "]")
6158
document_types = paste("dctypenorm:", "(", paste(params$document_types, collapse=" OR "), ")", sep="")
6259

6360
sortby_string = ifelse(params$sorting == "most-recent", "dcyear desc", "")
6461
return_fields <- "dcdocid,dctitle,dcdescription,dcsource,dcdate,dcsubject,dccreator,dclink,dcoa,dcidentifier,dcrelation,dctype,dctypenorm,dcprovider,dclang,dclanguage,dccoverage"
6562

6663
if (!is.null(exact_query) && exact_query != '') {
67-
base_query <- paste(paste0("(",exact_query,")"), date_string, document_types, collapse=" ")
64+
base_query <- paste(paste0("(",exact_query,")"), document_types, collapse=" ")
6865
} else {
69-
base_query <- paste(date_string, document_types, collapse=" ")
66+
base_query <- paste(document_types, collapse=" ")
67+
}
68+
69+
if (!is.null(params$vis_type) && params$vis_type == "timeline") {
70+
if (!is.null(params$exclude_date_filters)) {
71+
params$exclude_date_filters <- NULL
72+
}
73+
}
74+
75+
if (!is.null(params$exclude_date_filters)
76+
&& (params$exclude_date_filters == TRUE || params$exclude_date_filters == "true")) {
77+
} else {
78+
date_string = paste0("dcdate:[", params$from, " TO ", params$to , "]")
79+
base_query <- paste(date_string, base_query)
7080
}
7181

7282
# apply language filter if parameter is set

server/preprocessing/other-scripts/test/params_base.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
"min_descsize": 300,
88
"limit": 120,
99
"list_size": 100,
10-
"repo": "ftunivbern"
10+
"exclude_date_filters": "true"
1111
}

server/services/searchBASE.php

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
$dirty_query = library\CommUtils::getParameter($_POST, "q");
1111
$precomputed_id = (isset($_POST["unique_id"]))?($_POST["unique_id"]):(null);
1212

13-
$params_array = array("from", "to", "document_types", "sorting", "min_descsize");
14-
$optional_get_params = ["repo", "coll", "vis_type", "q_advanced", "lang_id", "custom_title", "custom_clustering"];
13+
$params_array = array("document_types", "sorting", "min_descsize");
14+
$optional_get_params = ["repo", "coll", "vis_type", "q_advanced", "lang_id", "custom_title", "exclude_date_filters", "from", "to", "custom_clustering"];
15+
1516

1617
function filterEmptyString($value)
1718
{
@@ -36,6 +37,33 @@ function filterEmptyString($value)
3637
$post_params["lang_id"] = ["all-lang"];
3738
}
3839
}
40+
// ignore date filters if vis_type is set to timeline
41+
if (isset($post_params["vis_type"]) && $post_params["vis_type"] == "timeline") {
42+
unset($params_array[array_search("exclude_date_filters", $params_array)]);
43+
unset($post_params["exclude_date_filters"]);
44+
}
45+
// check if exclude_date_filters is set and true
46+
if (isset($post_params["exclude_date_filters"]) && $post_params["exclude_date_filters"] === true) {
47+
// Add "today" and exclude "from" and "to" from the $params_array
48+
$params_array = array_merge($params_array, ["today"]);
49+
unset($params_array["from"], $params_array["to"]);
50+
}
51+
52+
// re-establish historic order for backwards ID compatibility
53+
$historic_params_order = array("from", "to", "document_types", "sorting", "min_descsize", "repo");
54+
$reordered_params = array();
55+
foreach($historic_params_order as $param) {
56+
if (isset($post_params[$param])) {
57+
$reordered_params[] = $param;
58+
}
59+
}
60+
foreach($params_array as $param) {
61+
if (!in_array($param, $reordered_params)) {
62+
$reordered_params[] = $param;
63+
}
64+
}
65+
$params_array = $reordered_params;
66+
3967

4068

4169
$result = search("base", $dirty_query

server/workers/api/src/apis/request_validators.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
from datetime import datetime
2-
from marshmallow import Schema, fields, pre_load, validates, ValidationError
2+
from marshmallow import Schema, fields, pre_load, validates, ValidationError, EXCLUDE
33

44

55
class SearchParamSchema(Schema):
6+
class Meta:
7+
unknown = EXCLUDE
8+
69
q = fields.Str()
710
q_advanced = fields.Str()
811
sorting = fields.Str(required=True)
9-
from_ = fields.Date(required=True, data_key="from",
12+
from_ = fields.Date(data_key="from",
1013
format="%Y-%m-%d")
11-
to = fields.Date(required=True,
12-
format="%Y-%m-%d")
14+
to = fields.Date(format="%Y-%m-%d")
1315
vis_type = fields.Str(require=True)
1416
limit = fields.Int()
1517
year_range = fields.Str()
@@ -33,15 +35,18 @@ class SearchParamSchema(Schema):
3335
coll = fields.Str()
3436
list_size = fields.Int()
3537
custom_title = fields.Str()
38+
exclude_date_filters = fields.Boolean()
3639
custom_clustering = fields.Str()
3740

3841

3942
@pre_load
4043
def fix_years(self, in_data, **kwargs):
41-
if len(in_data.get('from')) == 4:
42-
in_data["from"] = in_data["from"]+"-01-01"
43-
if len(in_data.get('to')) == 4:
44-
in_data["to"] = in_data["to"]+"-12-31"
44+
if "from" in in_data:
45+
if len(in_data.get('from')) == 4:
46+
in_data["from"] = in_data["from"]+"-01-01"
47+
if "to" in in_data:
48+
if len(in_data.get('to')) == 4:
49+
in_data["to"] = in_data["to"]+"-12-31"
4550
return in_data
4651

4752
@pre_load

server/workers/api/src/apis/utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,12 @@ def get_or_create_contentprovider_lookup():
9191
cp_dict = df.name.to_dict()
9292
return cp_dict
9393
except Exception as e:
94-
df = pd.read_json("contentproviders.json")
95-
df.set_index("internal_name", inplace=True)
96-
cp_dict = df.name.to_dict()
97-
return cp_dict
94+
try:
95+
df = pd.read_json("contentproviders.json")
96+
df.set_index("internal_name", inplace=True)
97+
cp_dict = df.name.to_dict()
98+
return cp_dict
99+
except Exception as e:
100+
return {}
98101

99102
contentprovider_lookup = get_or_create_contentprovider_lookup()

server/workers/base/src/base.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
import numpy as np
1212
from parsers import improved_df_parsing
1313

14+
from datetime import datetime
15+
import sys
16+
1417
formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
1518
datefmt='%Y-%m-%d %H:%M:%S')
1619

@@ -54,7 +57,7 @@ def base_rate_limit_reached(self):
5457
BASE demands one request per second (1 QPS), per
5558
https://www.base-search.net/about/download/base_interface.pdf
5659
"""
57-
60+
5861
t = self.redis_store.time()[0]
5962
self.redis_store.setnx(self.rate_key, 0)
6063
try:
@@ -122,7 +125,10 @@ def execute_search(self, params):
122125
raise
123126

124127
def sanitize_metadata(self, metadata):
128+
125129
metadata["sanitized_authors"] = metadata["authors"].map(lambda x: sanitize_authors(x))
130+
metadata["year"] = metadata["year"].map(lambda x: sanitize_year(x))
131+
126132
return metadata
127133

128134
def enrich_metadata(self, metadata):
@@ -166,7 +172,7 @@ def run(self):
166172
res = self.execute_search(params)
167173
res["id"] = k
168174
if res.get("status") == "error" or params.get('raw') is True:
169-
self.redis_store.set(k+"_output", json.dumps(res))
175+
self.redis_store.set(k + "_output", json.dumps(res))
170176
else:
171177
self.redis_store.rpush("input_data", json.dumps(res).encode('utf8'))
172178
q_len = self.redis_store.llen("input_data")
@@ -195,7 +201,7 @@ def find_version_in_doi(doi):
195201
return int(m[0])
196202
else:
197203
return None
198-
204+
199205
def extract_doi_suffix(doi):
200206
return doi.split("/")[4:]
201207

@@ -248,7 +254,8 @@ def add_false_negatives(df):
248254
df.loc[df[(~df.is_duplicate) & (df.doi_duplicate)].index, "is_duplicate"] = True
249255
return df
250256

251-
def find_duplicate_indexes(df):
257+
258+
def find_duplicate_indexes(df):
252259
dupind = df.id.map(lambda x: df[df.duplicates.str.contains(x)].index)
253260
tmp = pd.DataFrame(dupind).astype(str).drop_duplicates().index
254261
return dupind[tmp]
@@ -269,7 +276,8 @@ def mark_latest_doi(df, dupind):
269276
df.loc[latest.index, "is_latest"] = True
270277
df.loc[latest.index, "keep"] = True
271278
return df
272-
279+
280+
273281
def remove_textual_duplicates_from_different_sources(df, dupind):
274282
for _, idx in dupind.iteritems():
275283
if len(idx) > 1:
@@ -348,7 +356,8 @@ def parse_annotations(field):
348356
return {}
349357
else:
350358
return {}
351-
359+
360+
352361
def parse_annotations_for_all(metadata, field_name):
353362
parsed_annotations = pd.DataFrame(metadata[field_name].map(lambda x: parse_annotations(x)))
354363
parsed_annotations.columns = ["annotations"]
@@ -363,4 +372,24 @@ def sanitize_authors(authors, n=15):
363372
authors = authors.split("; ")
364373
if len(authors) > n:
365374
authors = authors[:n-1] + authors[-1:]
366-
return "; ".join(authors)
375+
return "; ".join(authors)
376+
377+
378+
def sanitize_year(year_str):
379+
380+
sanitized_year = ''
381+
date_formats = ["%Y-%m-%d", "%Y-%m", "%Y-%m-%dT%H:%M:%SZ", "%Y %b %d"]
382+
383+
for fmt in date_formats:
384+
try:
385+
date_time_obj = datetime.strptime(year_str, fmt)
386+
sanitized_year = year_str # here we keep the original string
387+
break
388+
except ValueError:
389+
continue
390+
391+
# Handle formats like "2019"
392+
if year_str.isdigit() and not sanitized_year: # check sanitized_year to avoid overwriting
393+
sanitized_year = year_str # here we keep the original string
394+
395+
return sanitized_year

vis/js/components/ContextLine.js

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ class ContextLine extends React.Component {
3535
if (hidden) {
3636
return null;
3737
}
38-
39-
return (
38+
return (
4039
<ContextLineTemplate>
4140
{params.showAuthor && (
4241
<Author
@@ -60,10 +59,12 @@ class ContextLine extends React.Component {
6059
popoverContainer={this.props.popoverContainer}
6160
/>
6261
)}
63-
{defined(params.timespan) &&
62+
{(defined(params.timespan) && (!params.excludeDateFilters || params.excludeDateFilters === "false")) &&
63+
// {defined(params.timespan) &&
6464
<Timespan>
6565
<ContextTimeFrame popoverContainer={popoverContainer} timespan={params.timespan}/>
66-
</Timespan>}
66+
</Timespan>
67+
}
6768
<DocumentTypes
6869
documentTypes={params.documentTypes}
6970
popoverContainer={popoverContainer}

0 commit comments

Comments
 (0)