Skip to content

Commit 4f8134a

Browse files
committed
feat: offset for duplicate coordinates
1 parent 29a2708 commit 4f8134a

5 files changed

Lines changed: 119 additions & 45 deletions

File tree

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
COLUMNS = {
2+
"url": "url",
3+
"name": "Name",
4+
"country": "Country",
5+
"continent": "Continent",
6+
"equipment": "Equipment",
7+
"research_topics": "Research Topics",
8+
"specialist_areas": "Specialist areas",
9+
"grand_challenges": "Primary interests",
10+
"controlled_parameters": "Controlled Parameters",
11+
"description": "Description of Facility",
12+
"location": "Facility location(s) split",
13+
"years_of_experiments": "Years of Mesocosm Experiments",
14+
"photos_of_experiments": "Photos of experiments/installations images",
15+
}

server/workers/common/common/aquanavi/helpers/__init__.py

Whitespace-only changes.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import ast
2+
import math
3+
4+
from ..constants.constants import COLUMNS
5+
6+
# Cap jitter so markers stay near the original location.
7+
MAX_OFFSET_METERS = 30.0
8+
# Controls how quickly the offset grows for the 2nd, 3rd, ... duplicates.
9+
GROWTH_FACTOR = 6.0
10+
# Golden angle for an even spiral distribution.
11+
GOLDEN_ANGLE_DEGREES = 137.50776405003785
12+
# Approx conversion: meters per 1 degree of latitude.
13+
METERS_PER_DEGREE_LAT = 111_111.0
14+
# Avoid division by ~0 for longitude conversion near the poles.
15+
COS_EPSILON = 1e-6
16+
17+
def get_row_coordinates_with_collision_offset(row, seen_coordinates):
18+
"""
19+
Returns row coordinates with a small deterministic offset applied for duplicates.
20+
21+
Args:
22+
row (pandas.Series): DataFrame row.
23+
seen_coordinates (dict[tuple[float, float], int]): Occurrence counter by exact (lat, lon).
24+
25+
Returns:
26+
tuple[float|None, float|None]: (latitude, longitude), possibly offset for duplicates.
27+
"""
28+
latitude, longitude = get_latitude_longitude(row)
29+
30+
duplicate_index = 0
31+
if latitude is not None and longitude is not None:
32+
key = (latitude, longitude)
33+
duplicate_index = seen_coordinates.get(key, 0)
34+
seen_coordinates[key] = duplicate_index + 1
35+
36+
return offset_duplicate_coordinates(latitude, longitude, duplicate_index)
37+
38+
def get_latitude_longitude(row):
39+
"""
40+
The function returns a list with latitude and longitude.
41+
42+
Args:
43+
row (str): String DataFrame.
44+
45+
Returns:
46+
list: A list with latitude and longitude (or None).
47+
"""
48+
coordinates_string = str(row[COLUMNS['location']]).strip()
49+
50+
latitude, longitude = None, None
51+
52+
if "," in coordinates_string:
53+
try:
54+
coords = ast.literal_eval(coordinates_string)
55+
if isinstance(coords, (list, tuple)) and len(coords) == 2:
56+
latitude = float(str(coords[0]).strip())
57+
longitude = float(str(coords[1]).strip())
58+
except Exception:
59+
latitude, longitude = None, None
60+
61+
return [latitude, longitude]
62+
63+
def offset_duplicate_coordinates(latitude, longitude, duplicate_index):
64+
"""
65+
Applies a tiny deterministic offset (meters) to avoid marker collision for identical coordinates.
66+
67+
The offset is intentionally limited to keep the point nearby while making collisions visible.
68+
69+
Args:
70+
latitude (float|None): Latitude.
71+
longitude (float|None): Longitude.
72+
duplicate_index (int): 0 for the first occurrence (no offset), 1..N for duplicates.
73+
74+
Returns:
75+
tuple[float|None, float|None]: (latitude, longitude) with offset applied.
76+
"""
77+
if latitude is None or longitude is None:
78+
return latitude, longitude
79+
if duplicate_index <= 0:
80+
return latitude, longitude
81+
82+
golden_angle_rad = math.radians(GOLDEN_ANGLE_DEGREES)
83+
angle = duplicate_index * golden_angle_rad
84+
85+
radius_m = min(GROWTH_FACTOR * math.sqrt(duplicate_index), MAX_OFFSET_METERS)
86+
east_m = radius_m * math.cos(angle)
87+
north_m = radius_m * math.sin(angle)
88+
89+
lat_rad = math.radians(latitude)
90+
meters_per_degree_lon = METERS_PER_DEGREE_LAT * max(math.cos(lat_rad), COS_EPSILON)
91+
92+
dlat = north_m / METERS_PER_DEGREE_LAT
93+
dlon = east_m / meters_per_degree_lon
94+
95+
return latitude + dlat, longitude + dlon

server/workers/common/common/aquanavi/mapping.py

Lines changed: 8 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,17 @@
11
import re
2-
import ast
32
import sys
43
import hashlib
54
import pandas as pd
65

76
from pathlib import Path
7+
from .helpers.coordinates import get_row_coordinates_with_collision_offset
8+
from .constants.constants import COLUMNS
89

910
PATH_TO_FOLDER_IN_CONTAINER = "common/common/aquanavi/"
1011
CSV_PATH_WITH_REAL_DATA = f"{PATH_TO_FOLDER_IN_CONTAINER}mesocosm_data_cleaned.csv"
1112
CSV_PATH_WITH_TEST_DATA = f"{PATH_TO_FOLDER_IN_CONTAINER}mesocosm_test_data.csv"
1213
DEFAULT_DOCUMENT_TYPE = "physical object"
1314
DEFAULT_RESULT_TYPE = ['Other/Unknown material']
14-
COLUMNS = {
15-
"url": "url",
16-
"name": "Name",
17-
"country": "Country",
18-
"continent": "Continent",
19-
"equipment": "Equipment",
20-
"research_topics": "Research Topics",
21-
"specialist_areas": "Specialist areas",
22-
"grand_challenges": "Primary interests",
23-
"controlled_parameters": "Controlled Parameters",
24-
"description": "Description of Facility",
25-
"location": "Facility location(s) split",
26-
"years_of_experiments": "Years of Mesocosm Experiments",
27-
"photos_of_experiments": "Photos of experiments/installations images",
28-
}
2915

3016
def process_column(row, column_name, join_value_parts_with):
3117
"""
@@ -86,31 +72,6 @@ def get_and_process_value(row, column_name, is_remove_trailing_dot, join_value_p
8672

8773
return value
8874

89-
def get_latitude_longitude(row):
90-
"""
91-
The function returns a list with latitude and longitude.
92-
93-
Args:
94-
row (str): String DataFrame.
95-
96-
Returns:
97-
list: A list with latitude and longitude (or None).
98-
"""
99-
coordinates_string = str(row[COLUMNS['location']]).strip()
100-
101-
latitude, longitude = None, None
102-
103-
if "," in coordinates_string:
104-
try:
105-
coords = ast.literal_eval(coordinates_string)
106-
if isinstance(coords, (list, tuple)) and len(coords) == 2:
107-
latitude = float(str(coords[0]).strip())
108-
longitude = float(str(coords[1]).strip())
109-
except Exception:
110-
latitude, longitude = None, None
111-
112-
return [latitude, longitude]
113-
11475
def get_years_of_experiments(row):
11576
"""
11677
The function returns a time range in the list format.
@@ -163,19 +124,20 @@ def get_years_of_experiments(row):
163124

164125
return None, None
165126

166-
def get_coverage(row):
127+
def get_coverage_with_coordinates(row, latitude, longitude):
167128
"""
168129
Creates a coverage field information for each data entry. The coverage field contains
169130
string value in format as presented in the line below:
170131
"country=France; continent=Europe; east=-0.618181; north=44.776596 ; start=2010-07; end=2012-06"
171132
172133
Args:
173134
row (pandas.Series): String DataFrame.
135+
latitude (float|None): Latitude.
136+
longitude (float|None): Longitude.
174137
175138
Returns:
176139
str: String in the coverage field format.
177140
"""
178-
latitude, longitude = get_latitude_longitude(row)
179141
start, end = get_years_of_experiments(row)
180142

181143
coverage_parts = []
@@ -342,11 +304,13 @@ def map_sample_data():
342304
df = load_and_prepare_dataframe()
343305

344306
result = []
307+
seen_coordinates = {}
345308
for _, row in df.iterrows():
346309
id = get_id(row)
347310
title = str(row[COLUMNS['name']]).strip() if row[COLUMNS['name']] else ""
348311
url = str(row[COLUMNS['url']]).strip() if row[COLUMNS['url']] else ""
349312
image = row[COLUMNS['photos_of_experiments']] if row[COLUMNS['photos_of_experiments']] else ""
313+
latitude, longitude = get_row_coordinates_with_collision_offset(row, seen_coordinates)
350314

351315
result.append({
352316
"id": id,
@@ -363,7 +327,7 @@ def map_sample_data():
363327
"relation": image,
364328
"paper_abstract": get_abstract(row),
365329
"subject_orig": get_keywords(row),
366-
"coverage": get_coverage(row)
330+
"coverage": get_coverage_with_coordinates(row, latitude=latitude, longitude=longitude)
367331
})
368332

369333
return { "documents": result }

vis/js/templates/Geomap/Pins/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { useDispatch } from "react-redux";
44
import { useActiveDataItem } from "@/hooks/useActiveDataItem";
55
import { useData } from "@/hooks/useData";
66
import { selectPaper } from "@/js/actions";
7-
import { AllPossiblePapersType } from "@/js/types";
7+
import { AllPossiblePapersType, AquanaviPaper } from "@/js/types";
88

99
import { Pin } from "./Pin";
1010

0 commit comments

Comments
 (0)