Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +17 -0
tweet_analyzer.py
CHANGED
@@ -107,6 +107,19 @@ class TweetDatasetProcessor:
|
|
107 |
self.personality_profile = response.choices[0].message.content
|
108 |
return self.personality_profile
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
def generate_tweet(self, context=""):
|
111 |
"""Generate a new tweet based on personality profile and optional context"""
|
112 |
additional_contexts = [
|
@@ -116,6 +129,10 @@ class TweetDatasetProcessor:
|
|
116 |
"Reflect on a past experience.",
|
117 |
"Provide advice to followers."
|
118 |
]
|
|
|
|
|
|
|
|
|
119 |
|
120 |
# Randomly choose an additional context to diversify tweets
|
121 |
import random
|
|
|
107 |
self.personality_profile = response.choices[0].message.content
|
108 |
return self.personality_profile
|
109 |
|
110 |
+
def analyze_topics(self, n_topics=5):
|
111 |
+
"""Extract and identify different topics the author has tweeted about"""
|
112 |
+
all_tweets = [tweet['content']
|
113 |
+
for tweet in self.tweets] vectorizer = TfidfVectorizer(stop_words='english')
|
114 |
+
tfidf_matrix = vectorizer.fit_transform(all_tweets)
|
115 |
+
nmf_model = NMF(n_components=n_topics, random_state=1)
|
116 |
+
nmf_model.fit(tfidf_matrix)
|
117 |
+
topics = []
|
118 |
+
for topic_idx, topic in enumerate(nmf_model.components_):
|
119 |
+
topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
|
120 |
+
topics.append(" ".join(topic_words))
|
121 |
+
return topics
|
122 |
+
|
123 |
def generate_tweet(self, context=""):
|
124 |
"""Generate a new tweet based on personality profile and optional context"""
|
125 |
additional_contexts = [
|
|
|
129 |
"Reflect on a past experience.",
|
130 |
"Provide advice to followers."
|
131 |
]
|
132 |
+
|
133 |
+
# Include historical topics in the context
|
134 |
+
historical_topics = self.analyze_topics()
|
135 |
+
additional_contexts.extend(historical_topics)
|
136 |
|
137 |
# Randomly choose an additional context to diversify tweets
|
138 |
import random
|