Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +3 -4
tweet_analyzer.py
CHANGED
@@ -109,15 +109,14 @@ class TweetDatasetProcessor:
|
|
109 |
|
110 |
def analyze_topics(self, n_topics=5):
|
111 |
"""Extract and identify different topics the author has tweeted about"""
|
112 |
-
all_tweets = [tweet['content']
|
113 |
-
|
114 |
tfidf_matrix = vectorizer.fit_transform(all_tweets)
|
115 |
nmf_model = NMF(n_components=n_topics, random_state=1)
|
116 |
nmf_model.fit(tfidf_matrix)
|
117 |
topics = []
|
118 |
for topic_idx, topic in enumerate(nmf_model.components_):
|
119 |
-
topic_words = [vectorizer.get_feature_names_out()[i]
|
120 |
-
for i in topic.argsort()[:-n_topics - 1:-1]]
|
121 |
topics.append(" ".join(topic_words))
|
122 |
return topics
|
123 |
|
|
|
109 |
|
110 |
def analyze_topics(self, n_topics=5):
|
111 |
"""Extract and identify different topics the author has tweeted about"""
|
112 |
+
all_tweets = [tweet['content'] for tweet in self.tweets]
|
113 |
+
vectorizer = TfidfVectorizer(stop_words='english')
|
114 |
tfidf_matrix = vectorizer.fit_transform(all_tweets)
|
115 |
nmf_model = NMF(n_components=n_topics, random_state=1)
|
116 |
nmf_model.fit(tfidf_matrix)
|
117 |
topics = []
|
118 |
for topic_idx, topic in enumerate(nmf_model.components_):
|
119 |
+
topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
|
|
|
120 |
topics.append(" ".join(topic_words))
|
121 |
return topics
|
122 |
|