11import re
2- import ast
32import sys
43import hashlib
54import pandas as pd
65
76from pathlib import Path
7+ from .helpers .coordinates import get_row_coordinates_with_collision_offset
8+ from .constants .constants import COLUMNS
89
910PATH_TO_FOLDER_IN_CONTAINER = "common/common/aquanavi/"
1011CSV_PATH_WITH_REAL_DATA = f"{ PATH_TO_FOLDER_IN_CONTAINER } mesocosm_data_cleaned.csv"
1112CSV_PATH_WITH_TEST_DATA = f"{ PATH_TO_FOLDER_IN_CONTAINER } mesocosm_test_data.csv"
1213DEFAULT_DOCUMENT_TYPE = "physical object"
1314DEFAULT_RESULT_TYPE = ['Other/Unknown material' ]
14- COLUMNS = {
15- "url" : "url" ,
16- "name" : "Name" ,
17- "country" : "Country" ,
18- "continent" : "Continent" ,
19- "equipment" : "Equipment" ,
20- "research_topics" : "Research Topics" ,
21- "specialist_areas" : "Specialist areas" ,
22- "grand_challenges" : "Primary interests" ,
23- "controlled_parameters" : "Controlled Parameters" ,
24- "description" : "Description of Facility" ,
25- "location" : "Facility location(s) split" ,
26- "years_of_experiments" : "Years of Mesocosm Experiments" ,
27- "photos_of_experiments" : "Photos of experiments/installations images" ,
28- }
2915
3016def process_column (row , column_name , join_value_parts_with ):
3117 """
@@ -86,31 +72,6 @@ def get_and_process_value(row, column_name, is_remove_trailing_dot, join_value_p
8672
8773 return value
8874
89- def get_latitude_longitude (row ):
90- """
91- The function returns a list with latitude and longitude.
92-
93- Args:
94- row (str): String DataFrame.
95-
96- Returns:
97- list: A list with latitude and longitude (or None).
98- """
99- coordinates_string = str (row [COLUMNS ['location' ]]).strip ()
100-
101- latitude , longitude = None , None
102-
103- if "," in coordinates_string :
104- try :
105- coords = ast .literal_eval (coordinates_string )
106- if isinstance (coords , (list , tuple )) and len (coords ) == 2 :
107- latitude = float (str (coords [0 ]).strip ())
108- longitude = float (str (coords [1 ]).strip ())
109- except Exception :
110- latitude , longitude = None , None
111-
112- return [latitude , longitude ]
113-
11475def get_years_of_experiments (row ):
11576 """
11677 The function returns a time range in the list format.
@@ -163,19 +124,20 @@ def get_years_of_experiments(row):
163124
164125 return None , None
165126
166- def get_coverage (row ):
127+ def get_coverage_with_coordinates (row , latitude , longitude ):
167128 """
168129 Creates a coverage field information for each data entry. The coverage field contains
169130 string value in format as presented in the line below:
170131 "country=France; continent=Europe; east=-0.618181; north=44.776596 ; start=2010-07; end=2012-06"
171132
172133 Args:
173134 row (pandas.Series): String DataFrame.
135+ latitude (float|None): Latitude.
136+ longitude (float|None): Longitude.
174137
175138 Returns:
176139 str: String in the coverage field format.
177140 """
178- latitude , longitude = get_latitude_longitude (row )
179141 start , end = get_years_of_experiments (row )
180142
181143 coverage_parts = []
@@ -342,11 +304,13 @@ def map_sample_data():
342304 df = load_and_prepare_dataframe ()
343305
344306 result = []
307+ seen_coordinates = {}
345308 for _ , row in df .iterrows ():
346309 id = get_id (row )
347310 title = str (row [COLUMNS ['name' ]]).strip () if row [COLUMNS ['name' ]] else ""
348311 url = str (row [COLUMNS ['url' ]]).strip () if row [COLUMNS ['url' ]] else ""
349312 image = row [COLUMNS ['photos_of_experiments' ]] if row [COLUMNS ['photos_of_experiments' ]] else ""
313+ latitude , longitude = get_row_coordinates_with_collision_offset (row , seen_coordinates )
350314
351315 result .append ({
352316 "id" : id ,
@@ -363,7 +327,7 @@ def map_sample_data():
363327 "relation" : image ,
364328 "paper_abstract" : get_abstract (row ),
365329 "subject_orig" : get_keywords (row ),
366- "coverage" : get_coverage (row )
330+ "coverage" : get_coverage_with_coordinates (row , latitude = latitude , longitude = longitude )
367331 })
368332
369333 return { "documents" : result }
0 commit comments