99import pandas as pd
1010from datetime import timedelta
1111from dateutil .parser import parse
12+ from typing import Dict , List , Union
13+ from typing_extensions import Literal
1214
1315
1416redis_config = {
@@ -30,7 +32,7 @@ def get_key(store, key, timeout=180):
3032 result = {
3133 "k" : key ,
3234 "status" : "error" ,
33- "error" : "timeout"
35+ "error" : "timeout"
3436 }
3537 while tries <= max_tries :
3638 res = store .get (key + "_output" )
@@ -122,18 +124,24 @@ def get_nested_value(data, keys, default=None):
122124 return data
123125
124126
125- def push_metadata_to_queue (redis_store , params , metadata ):
127+ def push_metadata_to_queue (
128+ redis_store : redis .Redis ,
129+ params : Dict [str , Union [str , List [str ]]],
130+ metadata : pd .DataFrame ,
131+ source : Literal ["crossref" , "altmetric" ]
132+ ) -> str :
126133 """
127134 Sending metadata for processing into Redis queue and returning the request_id.
128135
129136 :param redis_store: Object of the Redis store.
130137 :param params: Request params.
131138 :param metadata: DataFrame with default metadata.
139+ :param source: define from which service additional metadata will be received.
132140 :return: request_id for the receiving of the request result.
133141 """
134142
135143 request_id = str (uuid .uuid4 ())
136- params ["metrics_sources" ] = ["crossref" ]
144+ params ["metrics_sources" ] = [source ]
137145 task_data = json .dumps ({
138146 "id" : request_id ,
139147 "params" : params ,
@@ -144,7 +152,7 @@ def push_metadata_to_queue(redis_store, params, metadata):
144152 return request_id
145153
146154
147- def fetch_enriched_metadata (redis_store , request_id , timeout = 600 ):
155+ def fetch_enriched_metadata (redis_store : redis . Redis , request_id : str , timeout : int = 600 ) -> pd . DataFrame :
148156 """
149157 Getting enriched metadata from Redis.
150158
@@ -157,15 +165,19 @@ def fetch_enriched_metadata(redis_store, request_id, timeout = 600):
157165 return pd .DataFrame (result ["input_data" ])
158166
159167
160- def ensure_required_columns (metadata : pd .DataFrame ) -> pd .DataFrame :
168+ def ensure_required_columns (metadata : pd .DataFrame , source : Literal [ "crossref" , "altmetric" ] ) -> pd .DataFrame :
161169 """
162170 Checks that all necessary columns are available or adding them with NaN value.
163171
164172 :param metadata: DataFrame with metadata.
173+ :param source: define from which service additional metadata was received.
165174 :return: Updated DataFrame.
166175 """
167- REQUIRED_METADATA_COLUMNS = [
168- "citation_count" ,
176+ REQUIRED_METADATA_COLUMNS_FOR_CROSSREF : List [str ] = [
177+ "citation_count"
178+ ]
179+
180+ REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC : List [str ] = [
169181 "cited_by_wikipedia_count" ,
170182 "cited_by_msm_count" ,
171183 "cited_by_policies_count" ,
@@ -180,31 +192,44 @@ def ensure_required_columns(metadata: pd.DataFrame) -> pd.DataFrame:
180192 "cited_by_videos_count"
181193 ]
182194
183- for column in REQUIRED_METADATA_COLUMNS :
195+ columns = None
196+ if source == 'crossref' :
197+ columns = REQUIRED_METADATA_COLUMNS_FOR_CROSSREF
198+ elif source == 'altmetric' :
199+ columns = REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC
200+ else :
201+ columns = [* REQUIRED_METADATA_COLUMNS_FOR_CROSSREF , * REQUIRED_METADATA_COLUMNS_FOR_ALTMETRIC ]
202+
203+ for column in columns :
184204 if column not in metadata .columns :
185205 metadata [column ] = np .NaN
186206
187207 return metadata
188208
189209
190- def enrich_metadata (redis_store , params , metadata : pd .DataFrame ) -> pd .DataFrame :
210+ def enrich_metadata (
211+ redis : redis .Redis ,
212+ params : Dict [str , Union [str , List [str ]]],
213+ metadata : pd .DataFrame ,
214+ source : Literal ["crossref" , "altmetric" ]
215+ ) -> pd .DataFrame :
191216 """
192217 Enriching metadata - adding information about citations from Redis.
193218
194- :param redis_store : store object of Redis.
219+ :param redis : store object of Redis.
195220 :param params: params of the request.
196221 :param metadata: DataFrame with default metadata.
197-
222+ :param source: define from which service additional metadata will be received.
198223 :return: Enriched DataFrame with metadata.
199224 """
200225
201226 # Creates a request to metrics for metadata enrichment
202227 # and returns request_id for receiving the result later
203- request_id = push_metadata_to_queue (redis_store , params , metadata )
228+ request_id = push_metadata_to_queue (redis , params , metadata , source )
204229
205230 # Getting the result after metadata enrichment at metrics
206- enriched_metadata = fetch_enriched_metadata (redis_store , request_id )
231+ enriched_metadata = fetch_enriched_metadata (redis , request_id )
207232
208233 # Checks that all necessary columns are available or adding them with NaN value
209- enriched_metadata = ensure_required_columns (enriched_metadata )
234+ enriched_metadata = ensure_required_columns (enriched_metadata , source )
210235 return enriched_metadata
0 commit comments