Skip to content

Commit 2f97c39

Browse files
committed
refactor: code logic depending on source of metadata
1 parent 679ed67 commit 2f97c39

2 files changed

Lines changed: 41 additions & 15 deletions

File tree

server/workers/common/common/utils.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import pandas as pd
1010
from datetime import timedelta
1111
from dateutil.parser import parse
12+
from typing import Dict, List, Union
13+
from typing_extensions import Literal
1214

1315

1416
redis_config = {
@@ -30,7 +32,7 @@ def get_key(store, key, timeout=180):
3032
result = {
3133
"k": key,
3234
"status": "error",
33-
"error": "timeout"
35+
"error": "timeout"
3436
}
3537
while tries <= max_tries:
3638
res = store.get(key+"_output")
@@ -122,18 +124,24 @@ def get_nested_value(data, keys, default=None):
122124
return data
123125

124126

125-
def push_metadata_to_queue(redis_store, params, metadata):
127+
def push_metadata_to_queue(
128+
redis_store: redis.Redis,
129+
params: Dict[str, Union[str, List[str]]],
130+
metadata: pd.DataFrame,
131+
source: Literal["crossref", "altmetric"]
132+
) -> str:
126133
"""
127134
Sending metadata for processing into Redis queue and returning the request_id.
128135
129136
:param redis_store: Object of the Redis store.
130137
:param params: Request params.
131138
:param metadata: DataFrame with default metadata.
139+
:param source: define from which service additional metadata will be received.
132140
:return: request_id for the receiving of the request result.
133141
"""
134142

135143
request_id = str(uuid.uuid4())
136-
params["metrics_sources"] = ["crossref"]
144+
params["metrics_sources"] = [source]
137145
task_data = json.dumps({
138146
"id": request_id,
139147
"params": params,
@@ -144,7 +152,7 @@ def push_metadata_to_queue(redis_store, params, metadata):
144152
return request_id
145153

146154

147-
def fetch_enriched_metadata(redis_store, request_id, timeout = 600):
155+
def fetch_enriched_metadata(redis_store: redis.Redis, request_id: str, timeout: int = 600) -> pd.DataFrame:
148156
"""
149157
Getting enriched metadata from Redis.
150158
@@ -157,15 +165,19 @@ def fetch_enriched_metadata(redis_store, request_id, timeout = 600):
157165
return pd.DataFrame(result["input_data"])
158166

159167

160-
def ensure_required_columns(metadata: pd.DataFrame) -> pd.DataFrame:
168+
def ensure_required_columns(metadata: pd.DataFrame, source: Literal["crossref", "altmetric"]) -> pd.DataFrame:
161169
"""
162170
Checks that all necessary columns are available or adding them with NaN value.
163171
164172
:param metadata: DataFrame with metadata.
173+
:param source: define from which service additional metadata was received.
165174
:return: Updated DataFrame.
166175
"""
167-
REQUIRED_METADATA_COLUMNS = [
168-
"citation_count",
176+
REQUIRED_METADATA_COLUMNS_FOR_CROSSREF: List[str] = [
177+
"citation_count"
178+
]
179+
180+
REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC: List[str] = [
169181
"cited_by_wikipedia_count",
170182
"cited_by_msm_count",
171183
"cited_by_policies_count",
@@ -180,31 +192,44 @@ def ensure_required_columns(metadata: pd.DataFrame) -> pd.DataFrame:
180192
"cited_by_videos_count"
181193
]
182194

183-
for column in REQUIRED_METADATA_COLUMNS:
195+
columns = None
196+
if source == 'crossref':
197+
columns = REQUIRED_METADATA_COLUMNS_FOR_CROSSREF
198+
elif source == 'altmetric':
199+
columns = REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC
200+
else:
201+
columns = [*REQUIRED_METADATA_COLUMNS_FOR_CROSSREF, *REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC]
202+
203+
for column in columns:
184204
if column not in metadata.columns:
185205
metadata[column] = np.NaN
186206

187207
return metadata
188208

189209

190-
def enrich_metadata(redis_store, params, metadata: pd.DataFrame) -> pd.DataFrame:
210+
def enrich_metadata(
211+
redis: redis.Redis,
212+
params: Dict[str, Union[str, List[str]]],
213+
metadata: pd.DataFrame,
214+
source: Literal["crossref", "altmetric"]
215+
) -> pd.DataFrame:
191216
"""
192217
Enriching metadata - adding information about citations from Redis.
193218
194-
:param redis_store: store object of Redis.
219+
:param redis: store object of Redis.
195220
:param params: params of the request.
196221
:param metadata: DataFrame with default metadata.
197-
222+
:param source: define from which service additional metadata will be received.
198223
:return: Enriched DataFrame with metadata.
199224
"""
200225

201226
# Creates a request to metrics for metadata enrichment
202227
# and returns request_id for receiving the result later
203-
request_id = push_metadata_to_queue(redis_store, params, metadata)
228+
request_id = push_metadata_to_queue(redis, params, metadata, source)
204229

205230
# Getting the result after metadata enrichment at metrics
206-
enriched_metadata = fetch_enriched_metadata(redis_store, request_id)
231+
enriched_metadata = fetch_enriched_metadata(redis, request_id)
207232

208233
# Checks that all necessary columns are available or adding them with NaN value
209-
enriched_metadata = ensure_required_columns(enriched_metadata)
234+
enriched_metadata = ensure_required_columns(enriched_metadata, source)
210235
return enriched_metadata

server/workers/pubmed/src/pubmed.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ def execute_search(self, params):
4141
return raw_metadata
4242

4343
metadata = pd.DataFrame(raw_metadata)
44+
source_for_metadata_enrichment = "crossref"
4445

45-
metadata = enrich_metadata(self.redis_store, params, metadata)
46+
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment)
4647
for index, row in metadata.iterrows():
4748
self.logger.debug(f"Title: {row['title']}, DOI: {row['doi']}, Citations: {row.get('citation_count', 'N/A')}")
4849

0 commit comments

Comments
 (0)