@@ -281,28 +281,28 @@ Let's write a helper to get the strongest words for each topic. This will show t
281281``` python
282282import pandas as pd
283283
284- def show_topics (topic , n ):
285- # Get the feature names (terms) from the vectorizer
284+ def show_topics (vectorizer , svdmodel , topic_number , n ):
285+ # Get the feature names (terms) from the TF-IDF vectorizer
286286 terms = vectorizer.get_feature_names_out()
287287
288288 # Get the weights of the terms for the specified topic from the SVD model
289- weights = svdmodel.components_[topic ]
289+ weights = svdmodel.components_[topic_number ]
290290
291291 # Create a DataFrame with terms and their corresponding weights
292292 df = pd.DataFrame({" Term" : terms, " Weight" : weights})
293293
294- # Sort the DataFrame by weights in descending order to get top n terms
295- tops = df.sort_values(by = [" Weight" ], ascending = False )[0 :n]
294+ # Sort the DataFrame by weights in descending order to get top n terms (largest positive weights)
295+ highs = df.sort_values(by = [" Weight" ], ascending = False )[0 :n]
296296
297- # Sort the DataFrame by weights in ascending order to get bottom n terms
298- bottoms = df.sort_values(by = [" Weight" ], ascending = False )[- n:]
297+ # Sort the DataFrame by weights in ascending order to get bottom n terms (largest negative weights)
298+ lows = df.sort_values(by = [" Weight" ], ascending = False )[- n:]
299299
300300 # Concatenate top and bottom terms into a single DataFrame and return
301- return pd.concat([tops, bottoms ])
301+ return pd.concat([highs, lows ])
302302
303303# Get the top 5 and bottom 5 terms for each specified topic
304- topic_words_x = show_topics(1 , 5 ) # Topic 1
305- topic_words_y = show_topics(2 , 5 ) # Topic 2
304+ topic_words_x = show_topics(vectorizer, svdmodel, 1 , 5 ) # Topic 1
305+ topic_words_y = show_topics(vectorizer, svdmodel, 2 , 5 ) # Topic 2
306306
307307```
308308
0 commit comments