@@ -128,30 +128,49 @@ def push_metadata_to_queue(
128128 redis_store : redis .Redis ,
129129 params : Dict [str , Union [str , List [str ]]],
130130 metadata : pd .DataFrame ,
131- source : Literal [ "crossref" , "altmetric" ]
131+ source_list : List [ str ]
132132) -> str :
133133 """
134134 Sending metadata for processing into Redis queue and returning the request_id.
135135
136136 :param redis_store: Object of the Redis store.
137137 :param params: Request params.
138138 :param metadata: DataFrame with default metadata.
139- :param source : define from which service additional metadata will be received.
139+ :param source_list : define from which service additional metadata will be received (available values: "crossref", "altmetric") .
140140 :return: request_id for the receiving of the request result.
141141 """
142+ # Checks that valid values are specified in the source array
143+ check_metadata_enrichment_source (source_list )
142144
145+ # Creates a new unique request identifier that will then be used to retrieve the result
143146 request_id = str (uuid .uuid4 ())
144- params ["metrics_sources" ] = [source ]
147+
148+ # Specifies from which sources to obtain information
149+ params ["metrics_sources" ] = source_list
150+
151+ # Payload object creation
145152 task_data = json .dumps ({
146153 "id" : request_id ,
147154 "params" : params ,
148155 "metadata" : metadata .to_json (orient = "records" ),
149156 })
150157
158+ # Pushing request to Redis and returning request id
151159 redis_store .rpush ("metrics" , task_data )
152160 return request_id
153161
154162
163+ def check_metadata_enrichment_source (source_list : List [str ]) -> None :
164+ """
165+ Checks that valid values are specified in the source array.
166+
167+ :param source_list: List of sources from where metadata will be enriched.
168+ :return: None.
169+ """
170+ if not all (source in ("crossref" , "altmetric" ) for source in source_list ):
171+ raise ValueError ("Source list must contain only 'crossref' or 'altmetric'" )
172+
173+
155174def fetch_enriched_metadata (redis_store : redis .Redis , request_id : str , timeout : int = 600 ) -> pd .DataFrame :
156175 """
157176 Getting enriched metadata from Redis.
@@ -161,23 +180,29 @@ def fetch_enriched_metadata(redis_store: redis.Redis, request_id: str, timeout:
161180 :param timeout: Results waiting time (default - 600 seconds).
162181 :return: Enriched DataFrame with metadata.
163182 """
183+ # Getting result of metadata enrichment from Redis
164184 result = get_key (redis_store , request_id , timeout )
165185 return pd .DataFrame (result ["input_data" ])
166186
167187
168- def get_metadata_columns_for_integration ( integration : Literal [ "pubmed" , "orcid" ]) :
188+ def get_metadata_columns_for_source ( source_list : List [ str ]) -> List [ str ] :
169189 """
170- Returning required metadata columns for different integrations .
190+ Returning required metadata columns for different sources .
171191
172- :param integration: integration service .
192+ :param source_list: List of sources from where metadata received .
173193 :return: array with required metadata columns.
174194 """
195+ # Checks that valid values are specified in the source array
196+ check_metadata_enrichment_source (source_list )
197+
198+ # Define required metadata columns for different sources and return them
199+ result = []
175200
176- if integration == 'pubmed' :
177- return ["citation_count" ]
178- elif integration == 'orcid' :
179- return [
180- "citation_count" ,
201+ if "crossref" in source_list :
202+ result . extend ( ["citation_count" ])
203+
204+ if "altmetric" in source_list :
205+ result . extend ([
181206 "cited_by_wikipedia_count" ,
182207 "cited_by_msm_count" ,
183208 "cited_by_policies_count" ,
@@ -190,21 +215,24 @@ def get_metadata_columns_for_integration(integration: Literal["pubmed", "orcid"]
190215 "cited_by_qna_count" ,
191216 "cited_by_tweeters_count" ,
192217 "cited_by_videos_count"
193- ]
218+ ])
194219
195- return []
220+ return result
196221
197222
198- def ensure_required_columns (metadata : pd .DataFrame , integration : Literal [ "pubmed" , "orcid" ]) -> pd .DataFrame :
223+ def ensure_required_columns (metadata : pd .DataFrame , source_list : List [ str ]) -> pd .DataFrame :
199224 """
200225 Checks that all necessary columns are available or adding them with NaN value.
201226
202227 :param metadata: DataFrame with metadata.
203- :param integration: integration service .
228+ :param source_list: List of sources from where metadata received .
204229 :return: Updated DataFrame.
205230 """
231+ # Checks that valid values are specified in the source array
232+ check_metadata_enrichment_source (source_list )
206233
207- columns = get_metadata_columns_for_integration (integration )
234+ # Gets metadata columns that must be received from source(-s)
235+ columns = get_metadata_columns_for_source (source_list )
208236 for column in columns :
209237 if column not in metadata .columns :
210238 metadata [column ] = np .NaN
@@ -216,26 +244,27 @@ def enrich_metadata(
216244 redis : redis .Redis ,
217245 params : Dict [str , Union [str , List [str ]]],
218246 metadata : pd .DataFrame ,
219- source : Literal ["crossref" , "altmetric" ],
220- integration : Literal ["pubmed" , "orcid" ]
247+ source_list : List [str ],
221248) -> pd .DataFrame :
222249 """
223250 Enriching metadata - adding information about citations from Redis.
224251
225252 :param redis: store object of Redis.
226253 :param params: params of the request.
227254 :param metadata: DataFrame with default metadata.
228- :param source: define from which service additional metadata will be received.
255+ :param source: define from which service additional metadata will be received (available values: "crossref", "altmetric") .
229256 :return: Enriched DataFrame with metadata.
230257 """
258+ # Checks that valid values are specified in the source array
259+ check_metadata_enrichment_source (source_list )
231260
232261 # Creates a request to metrics for metadata enrichment
233262 # and returns request_id for receiving the result later
234- request_id = push_metadata_to_queue (redis , params , metadata , source )
263+ request_id = push_metadata_to_queue (redis , params , metadata , source_list )
235264
236265 # Getting the result after metadata enrichment at metrics
237266 enriched_metadata = fetch_enriched_metadata (redis , request_id )
238267
239268 # Checks that all necessary columns are available or adding them with NaN value
240- enriched_metadata = ensure_required_columns (enriched_metadata , integration )
269+ enriched_metadata = ensure_required_columns (enriched_metadata , source_list )
241270 return enriched_metadata
0 commit comments