Skip to content

Commit 8be276a

Browse files
committed
Rename HnswDistanceType to VectorDistanceType #47
1 parent dc6fe8a commit 8be276a

5 files changed

Lines changed: 49 additions & 44 deletions

File tree

example/vectorsearch-cities/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ class City:
1111
location = Property(np.ndarray, type=PropertyType.floatVector, id=3, uid=1003, index=HnswIndex(
1212
id=3, uid=10001,
1313
dimensions=2,
14-
distance_type=HnswDistanceType.EUCLIDEAN
14+
distance_type=VectorDistanceType.EUCLIDEAN
1515
))
1616

17+
1718
def get_objectbox_model():
1819
m = Model()
1920
m.entity(City, last_property_id=IdUid(3, 1003))
2021
m.last_entity_id = IdUid(1, 1)
21-
m.last_index_id = IdUid(3,10001)
22+
m.last_index_id = IdUid(3, 10001)
2223
return m

objectbox/c.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def shlib_name(library: str) -> str:
7979
OBXPutPaddingMode = ctypes.c_int
8080
OBXOrderFlags = ctypes.c_int
8181
OBXHnswFlags = ctypes.c_int
82-
OBXHnswDistanceType = ctypes.c_int
82+
OBXVectorDistanceType = ctypes.c_int
8383
OBXValidateOnOpenPagesFlags = ctypes.c_int
8484
OBXValidateOnOpenKvFlags = ctypes.c_int
8585
OBXBackupRestoreFlags = ctypes.c_int
@@ -393,9 +393,8 @@ def c_array_pointer(py_list: Union[List[Any], np.ndarray], c_type):
393393
obx_model_property_index_hnsw_flags = \
394394
c_fn_rc('obx_model_property_index_hnsw_flags', [OBX_model_p, OBXHnswFlags])
395395

396-
# obx_err obx_model_property_index_hnsw_distance_type(OBX_model* model, OBXHnswDistanceType value)
397-
obx_model_property_index_hnsw_distance_type = \
398-
c_fn_rc('obx_model_property_index_hnsw_distance_type', [OBX_model_p, OBXHnswDistanceType])
396+
# obx_err obx_model_property_index_hnsw_distance_type(OBX_model* model, OBXVectorDistanceType value)
397+
obx_model_property_index_hnsw_distance_type = c_fn_rc('obx_model_property_index_hnsw_distance_type', [OBX_model_p, OBXVectorDistanceType])
399398

400399
# obx_err obx_model_property_index_hnsw_reparation_backlink_probability(OBX_model* model, float value)
401400
obx_model_property_index_hnsw_reparation_backlink_probability = \
@@ -980,11 +979,11 @@ def c_array_pointer(py_list: Union[List[Any], np.ndarray], c_type):
980979
OBXHnswFlags_VECTOR_CACHE_SIMD_PADDING_OFF = 4
981980
OBXHnswFlags_REPARATION_LIMIT_CANDIDATES = 8
982981

983-
OBXHnswDistanceType_UNKNOWN = 0
984-
OBXHnswDistanceType_EUCLIDEAN = 1
985-
OBXHnswDistanceType_COSINE = 2
986-
OBXHnswDistanceType_DOT_PRODUCT = 3
987-
OBXHnswDistanceType_DOT_PRODUCT_NON_NORMALIZED = 10
982+
OBXVectorDistanceType_UNKNOWN = 0
983+
OBXVectorDistanceType_EUCLIDEAN = 1
984+
OBXVectorDistanceType_COSINE = 2
985+
OBXVectorDistanceType_DOT_PRODUCT = 3
986+
OBXVectorDistanceType_DOT_PRODUCT_NON_NORMALIZED = 10
988987

989988
OBXPutPaddingMode_PaddingAutomatic = 1
990989
OBXPutPaddingMode_PaddingAllowedByBuffer = 2

objectbox/model/properties.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,26 @@ class HnswFlags(IntEnum):
9494
REPARATION_LIMIT_CANDIDATES = 8
9595

9696

97-
class HnswDistanceType(IntEnum):
98-
UNKNOWN = OBXHnswDistanceType_UNKNOWN
99-
EUCLIDEAN = OBXHnswDistanceType_EUCLIDEAN
100-
COSINE = OBXHnswDistanceType_COSINE
101-
DOT_PRODUCT = OBXHnswDistanceType_DOT_PRODUCT
102-
DOT_PRODUCT_NON_NORMALIZED = OBXHnswDistanceType_DOT_PRODUCT_NON_NORMALIZED
103-
104-
HnswDistanceType.UNKNOWN.__doc__ = "Not a real type, just best practice (e.g. forward compatibility)"
105-
HnswDistanceType.EUCLIDEAN.__doc__ = "The default; typically 'euclidean squared' internally."
106-
HnswDistanceType.COSINE.__doc__ = """
97+
class VectorDistanceType(IntEnum):
98+
UNKNOWN = OBXVectorDistanceType_UNKNOWN
99+
EUCLIDEAN = OBXVectorDistanceType_EUCLIDEAN
100+
COSINE = OBXVectorDistanceType_COSINE
101+
DOT_PRODUCT = OBXVectorDistanceType_DOT_PRODUCT
102+
DOT_PRODUCT_NON_NORMALIZED = OBXVectorDistanceType_DOT_PRODUCT_NON_NORMALIZED
103+
104+
VectorDistanceType.UNKNOWN.__doc__ = "Not a real type, just best practice (e.g. forward compatibility)"
105+
VectorDistanceType.EUCLIDEAN.__doc__ = "The default; typically 'euclidean squared' internally."
106+
VectorDistanceType.COSINE.__doc__ = """
107107
Cosine similarity compares two vectors irrespective of their magnitude (compares the angle of two vectors).
108108
Often used for document or semantic similarity.
109109
Value range: 0.0 - 2.0 (0.0: same direction, 1.0: orthogonal, 2.0: opposite direction)
110110
"""
111-
HnswDistanceType.DOT_PRODUCT.__doc__ = """
111+
VectorDistanceType.DOT_PRODUCT.__doc__ = """
112112
For normalized vectors (vector length == 1.0), the dot product is equivalent to the cosine similarity.
113113
Because of this, the dot product is often preferred as it performs better.
114114
Value range (normalized vectors): 0.0 - 2.0 (0.0: same direction, 1.0: orthogonal, 2.0: opposite direction)
115115
"""
116-
HnswDistanceType.DOT_PRODUCT_NON_NORMALIZED.__doc__ = """
116+
VectorDistanceType.DOT_PRODUCT_NON_NORMALIZED.__doc__ = """
117117
A custom dot product similarity measure that does not require the vectors to be normalized.
118118
Note: this is no replacement for cosine similarity (like DotProduct for normalized vectors is).
119119
The non-linear conversion provides a high precision over the entire float range (for the raw dot product).
@@ -130,7 +130,7 @@ class HnswIndex:
130130
neighbors_per_node: Optional[int] = None
131131
indexing_search_count: Optional[int] = None
132132
flags: HnswFlags = HnswFlags.NONE
133-
distance_type: HnswDistanceType = HnswDistanceType.EUCLIDEAN
133+
distance_type: VectorDistanceType = VectorDistanceType.EUCLIDEAN
134134
reparation_backlink_probability: Optional[float] = None
135135
vector_cache_hint_size_kb: Optional[float] = None
136136

tests/model.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,22 @@ class VectorEntity:
5858
id = Id(id=1, uid=4001)
5959
name = Property(str, type=PropertyType.string, id=2, uid=4002)
6060
vector_euclidean = Property(np.ndarray, type=PropertyType.floatVector, id=3, uid=4003,
61-
index=HnswIndex(
62-
id=3, uid=40001,
63-
dimensions=2, distance_type=HnswDistanceType.EUCLIDEAN)
64-
)
61+
index=HnswIndex(
62+
id=3, uid=40001,
63+
dimensions=2, distance_type=VectorDistanceType.EUCLIDEAN)
64+
)
6565
vector_cosine = Property(np.ndarray, type=PropertyType.floatVector, id=4, uid=4004,
66-
index=HnswIndex(
67-
id=4, uid=40002,
68-
dimensions=2, distance_type=HnswDistanceType.COSINE)
69-
)
66+
index=HnswIndex(
67+
id=4, uid=40002,
68+
dimensions=2, distance_type=VectorDistanceType.COSINE)
69+
)
7070
vector_dot_product = Property(np.ndarray, type=PropertyType.floatVector, id=5, uid=4005,
71-
index=HnswIndex(
72-
id=5, uid=40003,
73-
dimensions=2, distance_type=HnswDistanceType.DOT_PRODUCT)
74-
)
75-
#vector_dot_product_non_normalized = Property(np.ndarray, type=PropertyType.floatVector, id=6, uid=4006,
71+
index=HnswIndex(
72+
id=5, uid=40003,
73+
dimensions=2, distance_type=VectorDistanceType.DOT_PRODUCT)
74+
)
75+
# vector_dot_product_non_normalized = Property(np.ndarray, type=PropertyType.floatVector, id=6, uid=4006,
7676
# index=HnswIndex(
7777
# id=6, uid=40004,
78-
# dimensions=2, distance_type=HnswDistanceType.DOT_PRODUCT_NON_NORMALIZED)
78+
# dimensions=2, distance_type=VectorDistanceType.DOT_PRODUCT_NON_NORMALIZED)
7979
# )
80-

tests/test_hnsw.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@ def _find_expected_nn(points: np.ndarray, query: np.ndarray, n: int):
1515
return np.argsort(d)[:n]
1616

1717

18-
def _test_random_points(num_points: int, num_query_points: int, seed: Optional[int] = None, distance_type: HnswDistanceType = HnswDistanceType.EUCLIDEAN, min_score: float = 0.5):
18+
def _test_random_points(
19+
num_points: int,
20+
num_query_points: int,
21+
seed: Optional[int] = None,
22+
distance_type: VectorDistanceType = VectorDistanceType.EUCLIDEAN,
23+
min_score: float = 0.5):
1924
""" Generates random points in a 2d plane; checks the queried NN against the expected. """
2025

2126
vector_field_name = "vector_"+distance_type.name.lower()
@@ -76,7 +81,7 @@ def _test_random_points(num_points: int, num_query_points: int, seed: Optional[i
7681
def test_random_points():
7782

7883
min_score = 0.5
79-
distance_type = HnswDistanceType.EUCLIDEAN
84+
distance_type = VectorDistanceType.EUCLIDEAN
8085
_test_random_points(num_points=100, num_query_points=10, seed=10, distance_type=distance_type, min_score=min_score)
8186
_test_random_points(num_points=100, num_query_points=10, seed=11, distance_type=distance_type, min_score=min_score)
8287
_test_random_points(num_points=100, num_query_points=10, seed=12, distance_type=distance_type, min_score=min_score)
@@ -86,8 +91,9 @@ def test_random_points():
8691

8792
# TODO: Cosine and Dot Product may result in 0 score
8893

89-
def _test_combined_nn_search(distance_type: HnswDistanceType = HnswDistanceType.EUCLIDEAN):
90-
94+
95+
def _test_combined_nn_search(distance_type: VectorDistanceType = VectorDistanceType.EUCLIDEAN):
96+
9197
db = create_test_objectbox()
9298

9399
box = objectbox.Box(db, VectorEntity)
@@ -175,6 +181,6 @@ def _test_combined_nn_search(distance_type: HnswDistanceType = HnswDistanceType.
175181

176182
def test_combined_nn_search():
177183
""" Tests NN search combined with regular query conditions, offset and limit. """
178-
distance_type = HnswDistanceType.EUCLIDEAN
184+
distance_type = VectorDistanceType.EUCLIDEAN
179185
_test_combined_nn_search(distance_type)
180186
# TODO: Cosine, DotProduct diverges see below

0 commit comments

Comments
 (0)