Skip to content

Commit 60586f4

Browse files
committed
fix: required metadata columns for orcid integration
1 parent 23aebef commit 60586f4

3 files changed

Lines changed: 40 additions & 32 deletions

File tree

server/workers/common/common/utils.py

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -165,41 +165,46 @@ def fetch_enriched_metadata(redis_store: redis.Redis, request_id: str, timeout:
165165
return pd.DataFrame(result["input_data"])
166166

167167

168-
def ensure_required_columns(metadata: pd.DataFrame, source: Literal["crossref", "altmetric"]) -> pd.DataFrame:
168+
def get_metadata_columns_for_integration(integration: Literal["pubmed", "orcid"]):
169+
"""
170+
Returning required metadata columns for different integrations.
171+
172+
:param integration: integration service.
173+
:return: array with required metadata columns.
174+
"""
175+
176+
if integration == 'pubmed':
177+
return ["citation_count"]
178+
elif integration == 'orcid':
179+
return [
180+
"citation_count",
181+
"cited_by_wikipedia_count",
182+
"cited_by_msm_count",
183+
"cited_by_policies_count",
184+
"cited_by_patents_count",
185+
"cited_by_accounts_count",
186+
"cited_by_fbwalls_count",
187+
"cited_by_feeds_count",
188+
"cited_by_gplus_count",
189+
"cited_by_rdts_count",
190+
"cited_by_qna_count",
191+
"cited_by_tweeters_count",
192+
"cited_by_videos_count"
193+
]
194+
195+
return []
196+
197+
198+
def ensure_required_columns(metadata: pd.DataFrame, integration: Literal["pubmed", "orcid"]) -> pd.DataFrame:
169199
"""
170200
Checks that all necessary columns are available or adding them with NaN value.
171201
172202
:param metadata: DataFrame with metadata.
173-
:param source: define from which service additional metadata was received.
203+
:param integration: integration service.
174204
:return: Updated DataFrame.
175205
"""
176-
REQUIRED_METADATA_COLUMNS_FOR_CROSSREF: List[str] = [
177-
"citation_count"
178-
]
179-
180-
REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC: List[str] = [
181-
"cited_by_wikipedia_count",
182-
"cited_by_msm_count",
183-
"cited_by_policies_count",
184-
"cited_by_patents_count",
185-
"cited_by_accounts_count",
186-
"cited_by_fbwalls_count",
187-
"cited_by_feeds_count",
188-
"cited_by_gplus_count",
189-
"cited_by_rdts_count",
190-
"cited_by_qna_count",
191-
"cited_by_tweeters_count",
192-
"cited_by_videos_count"
193-
]
194-
195-
columns = None
196-
if source == 'crossref':
197-
columns = REQUIRED_METADATA_COLUMNS_FOR_CROSSREF
198-
elif source == 'altmetric':
199-
columns = REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC
200-
else:
201-
columns = [*REQUIRED_METADATA_COLUMNS_FOR_CROSSREF, *REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC]
202206

207+
columns = get_metadata_columns_for_integration(integration)
203208
for column in columns:
204209
if column not in metadata.columns:
205210
metadata[column] = np.NaN
@@ -211,7 +216,8 @@ def enrich_metadata(
211216
redis: redis.Redis,
212217
params: Dict[str, Union[str, List[str]]],
213218
metadata: pd.DataFrame,
214-
source: Literal["crossref", "altmetric"]
219+
source: Literal["crossref", "altmetric"],
220+
integration: Literal["pubmed", "orcid"]
215221
) -> pd.DataFrame:
216222
"""
217223
Enriching metadata - adding information about citations from Redis.
@@ -231,5 +237,5 @@ def enrich_metadata(
231237
enriched_metadata = fetch_enriched_metadata(redis, request_id)
232238

233239
# Checks that all necessary columns are available or adding them with NaN value
234-
enriched_metadata = ensure_required_columns(enriched_metadata, source)
240+
enriched_metadata = ensure_required_columns(enriched_metadata, integration)
235241
return enriched_metadata

server/workers/orcid/src/orcid_service.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,8 @@ def _retrieve_author_info_and_metadata(self, orcid: Orcid) -> Tuple[AuthorInfo,
389389
def _process_metadata(self, metadata: pd.DataFrame, author_info: AuthorInfo, params: Dict[str, str]) -> pd.DataFrame:
390390
metadata["authors"] = metadata["authors"].replace("", author_info.author_name)
391391
source_for_metadata_enrichment = "crossref"
392-
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment)
392+
integration = 'orcid'
393+
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment, integration)
393394
self.logger.debug(f'metadata shape after base enrichment: {metadata.shape}')
394395
author_info = self.enrich_author_info(author_info, metadata, params)
395396
self.logger.debug(f'metadata shape after enrichment: {metadata.shape}')

server/workers/pubmed/src/pubmed.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ def execute_search(self, params):
4242

4343
metadata = pd.DataFrame(raw_metadata)
4444
source_for_metadata_enrichment = "crossref"
45+
integration = 'pubmed'
4546

46-
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment)
47+
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment, integration)
4748

4849
text = pd.DataFrame(raw_text)
4950

0 commit comments

Comments
 (0)