@@ -139,23 +139,30 @@ def push_metadata_to_queue(
139139 :param source_list: define from which service additional metadata will be received (available values: "crossref", "altmetric").
140140 :return: request_id for the receiving of the request result.
141141 """
142+ # Checks that valid values are specified in the source array
142143 check_metadata_enrichment_source (source_list )
143144
145+ # Creates a new unique request identifier that will then be used to retrieve the result
144146 request_id = str (uuid .uuid4 ())
147+
148+ # Specifies from which sources to obtain information
145149 params ["metrics_sources" ] = source_list
150+
151+ # Payload object creation
146152 task_data = json .dumps ({
147153 "id" : request_id ,
148154 "params" : params ,
149155 "metadata" : metadata .to_json (orient = "records" ),
150156 })
151157
158+ # Pushing request to Redis and returning request id
152159 redis_store .rpush ("metrics" , task_data )
153160 return request_id
154161
155162
156163def check_metadata_enrichment_source (source_list : List [str ]) -> None :
157164 """
158- Checks that source for metadata enrichment contains correct values .
165+ Checks that valid values are specified in the source array .
159166
160167 :param source_list: List of sources from where metadata will be enriched.
161168 :return: None.
@@ -173,23 +180,29 @@ def fetch_enriched_metadata(redis_store: redis.Redis, request_id: str, timeout:
173180 :param timeout: Results waiting time (default - 600 seconds).
174181 :return: Enriched DataFrame with metadata.
175182 """
183+ # Getting result of metadata enrichment from Redis
176184 result = get_key (redis_store , request_id , timeout )
177185 return pd .DataFrame (result ["input_data" ])
178186
179187
180- def get_metadata_columns_for_integration ( integration : Literal [ "pubmed" , "orcid" ]) :
188+ def get_metadata_columns_for_source ( source_list : List [ str ]) -> List [ str ] :
181189 """
182- Returning required metadata columns for different integrations .
190+ Returning required metadata columns for different sources .
183191
184- :param integration: integration service .
192+ :param source_list: List of sources from where metadata received .
185193 :return: array with required metadata columns.
186194 """
195+ # Checks that valid values are specified in the source array
196+ check_metadata_enrichment_source (source_list )
197+
198+ # Define required metadata columns for different sources and return them
199+ result = []
187200
188- if integration == 'pubmed' :
189- return ["citation_count" ]
190- elif integration == 'orcid' :
191- return [
192- "citation_count" ,
201+ if "crossref" in source_list :
202+ result . extend ( ["citation_count" ])
203+
204+ if "altmetric" in source_list :
205+ result . extend ([
193206 "cited_by_wikipedia_count" ,
194207 "cited_by_msm_count" ,
195208 "cited_by_policies_count" ,
@@ -202,21 +215,24 @@ def get_metadata_columns_for_integration(integration: Literal["pubmed", "orcid"]
202215 "cited_by_qna_count" ,
203216 "cited_by_tweeters_count" ,
204217 "cited_by_videos_count"
205- ]
218+ ])
206219
207- return []
220+ return result
208221
209222
210- def ensure_required_columns (metadata : pd .DataFrame , integration : Literal [ "pubmed" , "orcid" ]) -> pd .DataFrame :
223+ def ensure_required_columns (metadata : pd .DataFrame , source_list : List [ str ]) -> pd .DataFrame :
211224 """
212225 Checks that all necessary columns are available or adding them with NaN value.
213226
214227 :param metadata: DataFrame with metadata.
215- :param integration: integration service .
228+ :param source_list: List of sources from where metadata received .
216229 :return: Updated DataFrame.
217230 """
231+ # Checks that valid values are specified in the source array
232+ check_metadata_enrichment_source (source_list )
218233
219- columns = get_metadata_columns_for_integration (integration )
234+ # Gets metadata columns that must be received from source(-s)
235+ columns = get_metadata_columns_for_source (source_list )
220236 for column in columns :
221237 if column not in metadata .columns :
222238 metadata [column ] = np .NaN
@@ -229,7 +245,6 @@ def enrich_metadata(
229245 params : Dict [str , Union [str , List [str ]]],
230246 metadata : pd .DataFrame ,
231247 source_list : List [str ],
232- integration : Literal ["pubmed" , "orcid" ]
233248) -> pd .DataFrame :
234249 """
235250 Enriching metadata - adding information about citations from Redis.
@@ -240,7 +255,7 @@ def enrich_metadata(
240255 :param source: define from which service additional metadata will be received (available values: "crossref", "altmetric").
241256 :return: Enriched DataFrame with metadata.
242257 """
243- # Checks that source list contains valid values
258+ # Checks that valid values are specified in the source array
244259 check_metadata_enrichment_source (source_list )
245260
246261 # Creates a request to metrics for metadata enrichment
@@ -251,5 +266,5 @@ def enrich_metadata(
251266 enriched_metadata = fetch_enriched_metadata (redis , request_id )
252267
253268 # Checks that all necessary columns are available or adding them with NaN value
254- enriched_metadata = ensure_required_columns (enriched_metadata , integration )
269+ enriched_metadata = ensure_required_columns (enriched_metadata , source_list )
255270 return enriched_metadata
0 commit comments