Skip to content

Commit a1cba10

Browse files
committed
refactor: enrich_metadata from the utils.py at orcid_service.py
1 parent 2f97c39 commit a1cba10

1 file changed

Lines changed: 3 additions & 51 deletions

File tree

server/workers/orcid/src/orcid_service.py

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pyorcid import Orcid, errors as pyorcid_errors
99
from pyorcid.orcid_authentication import OrcidAuthentication
1010
from typing import Tuple
11-
from common.utils import get_key, get_nested_value
11+
from common.utils import get_key, get_nested_value, enrich_metadata
1212
from repositories.author_info import AuthorInfoRepository
1313
from repositories.works import WorksRepository
1414
from redis import StrictRedis
@@ -79,56 +79,7 @@ def execute_search(self, params: Dict[str, str]) -> Union[SuccessResult, ErrorRe
7979
except (pyorcid_errors.Unauthorized, Exception) as e:
8080
return self._handle_error(params, "unexpected data processing error", e)
8181

82-
def enrich_metadata(self, params: Dict[str, str], metadata: pd.DataFrame) -> pd.DataFrame:
83-
"""
84-
This function enriches the metadata DataFrame with additional information
85-
from external sources, in this case crossref and altmetric.
86-
The function will store the enriched metadata in the Redis queue for further
87-
processing, from where it will be picked up by the metrics worker.
88-
Returned data will be the original metadata enriched with additional
89-
metadata columns from the external sources.
90-
91-
Parameters:
92-
- params (dict): The parameters for the search endpoint.
93-
- metadata (pd.DataFrame): The metadata DataFrame to enrich.
9482

95-
Returns:
96-
- pd.DataFrame: The enriched metadata DataFrame.
97-
"""
98-
99-
self.logger.debug(f"Enriching metadata for ORCID {params.get('orcid')}")
100-
101-
request_id = str(uuid.uuid4())
102-
task_data = {
103-
"id": request_id,
104-
"params": params,
105-
"metadata": metadata.to_json(orient="records"),
106-
}
107-
self.redis_store.rpush("metrics", json.dumps(task_data))
108-
result = get_key(self.redis_store, request_id, 600)
109-
110-
metadata = pd.DataFrame(result["input_data"])
111-
112-
for c in [
113-
"citation_count",
114-
"cited_by_wikipedia_count",
115-
"cited_by_msm_count",
116-
"cited_by_policies_count",
117-
"cited_by_patents_count",
118-
"cited_by_accounts_count",
119-
"cited_by_fbwalls_count",
120-
"cited_by_feeds_count",
121-
"cited_by_gplus_count",
122-
"cited_by_rdts_count",
123-
"cited_by_qna_count",
124-
"cited_by_tweeters_count",
125-
"cited_by_videos_count"
126-
]:
127-
if c not in metadata.columns:
128-
metadata[c] = np.NaN
129-
130-
return metadata
131-
13283
def log_dataframe(self, df: pd.DataFrame, params: Dict[str, str], name: str, ):
13384
orcid = params.get('orcid')
13485

@@ -437,7 +388,8 @@ def _retrieve_author_info_and_metadata(self, orcid: Orcid) -> Tuple[AuthorInfo,
437388

438389
def _process_metadata(self, metadata: pd.DataFrame, author_info: AuthorInfo, params: Dict[str, str]) -> pd.DataFrame:
439390
metadata["authors"] = metadata["authors"].replace("", author_info.author_name)
440-
metadata = self.enrich_metadata(params, metadata)
391+
source_for_metadata_enrichment = "crossref"
392+
metadata = enrich_metadata(self.redis_store, params, metadata, source_for_metadata_enrichment)
441393
self.logger.debug(f'metadata shape after base enrichment: {metadata.shape}')
442394
author_info = self.enrich_author_info(author_info, metadata, params)
443395
self.logger.debug(f'metadata shape after enrichment: {metadata.shape}')

0 commit comments

Comments
 (0)