Skip to content

Commit 0cf7d9c

Browse files
authored
Fix #1866 (#1898)
1 parent de7376d commit 0cf7d9c

1 file changed

Lines changed: 28 additions & 28 deletions

File tree

bertopic/_bertopic.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3241,37 +3241,37 @@ def merge_models(cls, models, min_similarity: float = .7, embedding_model=None):
32413241

32423242
# Merge Topic Representations
32433243
new_topics_dict = {}
3244-
new_topic_val = max_topic + 1
3245-
for index, new_topic in enumerate(new_topics):
3246-
new_topic_val = max_topic + index + 1
3247-
new_topics_dict[new_topic] = new_topic_val
3248-
merged_topics["topic_representations"][str(new_topic_val)] = selected_topics["topic_representations"][str(new_topic)]
3249-
merged_topics["topic_labels"][str(new_topic_val)] = selected_topics["topic_labels"][str(new_topic)]
3250-
3251-
# Add new aspects
3252-
if selected_topics["topic_aspects"]:
3253-
aspects_1 = set(merged_topics["topic_aspects"].keys())
3254-
aspects_2 = set(selected_topics["topic_aspects"].keys())
3255-
aspects_diff = aspects_2.difference(aspects_1)
3256-
if aspects_diff:
3257-
for aspect in aspects_diff:
3258-
merged_topics["topic_aspects"][aspect] = {}
3259-
3260-
# If the original model does not have topic aspects but the to be added model does
3261-
if not merged_topics.get("topic_aspects"):
3262-
merged_topics["topic_aspects"] = selected_topics["topic_aspects"]
3263-
3264-
# If they both contain topic aspects, add to the existing set of aspects
3265-
else:
3266-
for aspect, values in selected_topics["topic_aspects"].items():
3267-
merged_topics["topic_aspects"][aspect][str(new_topic_val)] = values[str(new_topic)]
3244+
for new_topic in new_topics:
3245+
if new_topic != -1:
3246+
max_topic += 1
3247+
new_topics_dict[new_topic] = max_topic
3248+
merged_topics["topic_representations"][str(max_topic)] = selected_topics["topic_representations"][str(new_topic)]
3249+
merged_topics["topic_labels"][str(max_topic)] = selected_topics["topic_labels"][str(new_topic)]
3250+
3251+
# Add new aspects
3252+
if selected_topics["topic_aspects"]:
3253+
aspects_1 = set(merged_topics["topic_aspects"].keys())
3254+
aspects_2 = set(selected_topics["topic_aspects"].keys())
3255+
aspects_diff = aspects_2.difference(aspects_1)
3256+
if aspects_diff:
3257+
for aspect in aspects_diff:
3258+
merged_topics["topic_aspects"][aspect] = {}
3259+
3260+
# If the original model does not have topic aspects but the to be added model does
3261+
if not merged_topics.get("topic_aspects"):
3262+
merged_topics["topic_aspects"] = selected_topics["topic_aspects"]
3263+
3264+
# If they both contain topic aspects, add to the existing set of aspects
3265+
else:
3266+
for aspect, values in selected_topics["topic_aspects"].items():
3267+
merged_topics["topic_aspects"][aspect][str(max_topic)] = values[str(new_topic)]
32683268

3269-
# Add new embeddings
3270-
new_tensors = tensors[new_topic + selected_topics["_outliers"]]
3271-
merged_tensors = np.vstack([merged_tensors, new_tensors])
3269+
# Add new embeddings
3270+
new_tensors = tensors[new_topic + selected_topics["_outliers"]]
3271+
merged_tensors = np.vstack([merged_tensors, new_tensors])
32723272

32733273
# Topic Mapper
3274-
merged_topics["topic_mapper"] = TopicMapper(list(range(-1, new_topic_val+1, 1))).mappings_
3274+
merged_topics["topic_mapper"] = TopicMapper(list(range(-1, max_topic+1, 1))).mappings_
32753275

32763276
# Find similar topics and re-assign those from the new models
32773277
sims_idx = np.argmax(sim_matrix, axis=1)

0 commit comments

Comments
 (0)