Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +28 -76
tweet_analyzer.py
CHANGED
@@ -5,6 +5,8 @@ from dotenv import load_dotenv
|
|
5 |
import groq
|
6 |
import json
|
7 |
from datetime import datetime
|
|
|
|
|
8 |
|
9 |
class TweetDatasetProcessor:
|
10 |
def __init__(self):
|
@@ -39,8 +41,7 @@ class TweetDatasetProcessor:
|
|
39 |
|
40 |
def _extract_timestamp(self, text):
|
41 |
"""Extract timestamp if present in tweet"""
|
42 |
-
# Implement timestamp extraction logic
|
43 |
-
return None
|
44 |
|
45 |
def _extract_mentions(self, text):
|
46 |
"""Extract mentioned users from tweet"""
|
@@ -54,71 +55,35 @@ class TweetDatasetProcessor:
|
|
54 |
"""Comprehensive personality analysis"""
|
55 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
56 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
57 |
-
|
58 |
-
1. Core Beliefs and Values:
|
59 |
-
- What fundamental beliefs shape their worldview?
|
60 |
-
- What causes or issues do they care about?
|
61 |
-
|
62 |
-
2. Cognitive Patterns:
|
63 |
-
- How do they process information?
|
64 |
-
- What decision-making patterns are visible?
|
65 |
-
|
66 |
-
3. Emotional Tendencies:
|
67 |
-
- What triggers emotional responses?
|
68 |
-
- How do they express emotions?
|
69 |
-
|
70 |
-
4. Social Interaction Style:
|
71 |
-
- How do they engage with others?
|
72 |
-
- What relationship patterns emerge?
|
73 |
-
|
74 |
-
5. Knowledge Areas:
|
75 |
-
- What topics do they discuss with expertise?
|
76 |
-
- What experiences do they draw from?
|
77 |
-
|
78 |
-
6. Communication Style:
|
79 |
-
- Vocabulary preferences
|
80 |
-
- Rhetorical patterns
|
81 |
-
- Humor style
|
82 |
-
|
83 |
-
7. Behavioral Patterns:
|
84 |
-
- Daily routines mentioned
|
85 |
-
- Regular activities
|
86 |
-
- Habits and preferences
|
87 |
-
|
88 |
Tweets for analysis:
|
89 |
{json.dumps(all_tweets[:30], indent=2)}
|
90 |
"""
|
91 |
|
92 |
response = self.groq_client.chat.completions.create(
|
93 |
messages=[
|
94 |
-
{
|
95 |
-
|
96 |
-
"content": "You are an expert psychologist specializing in personality analysis through written communication."
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"role": "user",
|
100 |
-
"content": analysis_prompt
|
101 |
-
}
|
102 |
],
|
103 |
model="mixtral-8x7b-32768",
|
104 |
temperature=0.1,
|
105 |
)
|
106 |
-
|
107 |
self.personality_profile = response.choices[0].message.content
|
108 |
return self.personality_profile
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
122 |
|
123 |
def generate_tweet(self, context=""):
|
124 |
"""Generate a new tweet based on personality profile and optional context"""
|
@@ -130,8 +95,8 @@ class TweetDatasetProcessor:
|
|
130 |
"Provide advice to followers."
|
131 |
]
|
132 |
|
133 |
-
# Include historical topics in the context
|
134 |
-
historical_topics = self.analyze_topics()
|
135 |
additional_contexts.extend(historical_topics)
|
136 |
|
137 |
# Randomly choose an additional context to diversify tweets
|
@@ -143,30 +108,17 @@ class TweetDatasetProcessor:
|
|
143 |
Current context or topic (if any):
|
144 |
{context}
|
145 |
Additionally, consider this specific context:
|
146 |
-
|
147 |
-
|
148 |
-
1. Their core beliefs and values
|
149 |
-
2. Their typical emotional expression
|
150 |
-
3. Their communication style and vocabulary
|
151 |
-
4. Their knowledge areas and experiences
|
152 |
-
5. Current context (if provided)
|
153 |
-
The tweet should feel indistinguishable from their authentic tweets.
|
154 |
-
"""
|
155 |
|
156 |
response = self.groq_client.chat.completions.create(
|
157 |
messages=[
|
158 |
-
{
|
159 |
-
|
160 |
-
"content": "You are an expert in replicating individual writing and thinking patterns."
|
161 |
-
},
|
162 |
-
{
|
163 |
-
"role": "user",
|
164 |
-
"content": generation_prompt
|
165 |
-
}
|
166 |
],
|
167 |
model="mixtral-8x7b-32768",
|
168 |
-
temperature=0.8,
|
169 |
-
max_tokens=150,
|
170 |
)
|
171 |
-
|
172 |
return response.choices[0].message.content
|
|
|
|
5 |
import groq
|
6 |
import json
|
7 |
from datetime import datetime
|
8 |
+
from sklearn.decomposition import NMF
|
9 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
|
11 |
class TweetDatasetProcessor:
|
12 |
def __init__(self):
|
|
|
41 |
|
42 |
def _extract_timestamp(self, text):
|
43 |
"""Extract timestamp if present in tweet"""
|
44 |
+
return None # Implement timestamp extraction logic if needed
|
|
|
45 |
|
46 |
def _extract_mentions(self, text):
|
47 |
"""Extract mentioned users from tweet"""
|
|
|
55 |
"""Comprehensive personality analysis"""
|
56 |
all_tweets = [tweet['content'] for tweet in self.tweets]
|
57 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
|
58 |
+
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
Tweets for analysis:
|
60 |
{json.dumps(all_tweets[:30], indent=2)}
|
61 |
"""
|
62 |
|
63 |
response = self.groq_client.chat.completions.create(
|
64 |
messages=[
|
65 |
+
{"role": "system", "content": "You are an expert psychologist."},
|
66 |
+
{"role": "user", "content": analysis_prompt},
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
],
|
68 |
model="mixtral-8x7b-32768",
|
69 |
temperature=0.1,
|
70 |
)
|
|
|
71 |
self.personality_profile = response.choices[0].message.content
|
72 |
return self.personality_profile
|
73 |
|
74 |
+
def analyze_topics(self, n_topics=5):
|
75 |
+
"""Extract and identify different topics the author has tweeted about"""
|
76 |
+
all_tweets = [tweet['content'] for tweet in self.tweets]
|
77 |
+
vectorizer = TfidfVectorizer(stop_words='english')
|
78 |
+
tfidf_matrix = vectorizer.fit_transform(all_tweets)
|
79 |
+
nmf_model = NMF(n_components=n_topics, random_state=1)
|
80 |
+
nmf_model.fit(tfidf_matrix)
|
81 |
+
|
82 |
+
topics = []
|
83 |
+
for topic_idx, topic in enumerate(nmf_model.components_):
|
84 |
+
topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
|
85 |
+
topics.append(" ".join(topic_words))
|
86 |
+
return topics
|
87 |
|
88 |
def generate_tweet(self, context=""):
|
89 |
"""Generate a new tweet based on personality profile and optional context"""
|
|
|
95 |
"Provide advice to followers."
|
96 |
]
|
97 |
|
98 |
+
# Include historical topics in the context
|
99 |
+
historical_topics = self.analyze_topics()
|
100 |
additional_contexts.extend(historical_topics)
|
101 |
|
102 |
# Randomly choose an additional context to diversify tweets
|
|
|
108 |
Current context or topic (if any):
|
109 |
{context}
|
110 |
Additionally, consider this specific context:
|
111 |
+
{random_context}
|
112 |
+
Generate a tweet that this person would write right now."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
response = self.groq_client.chat.completions.create(
|
115 |
messages=[
|
116 |
+
{"role": "system", "content": "You are an expert in replicating writing patterns."},
|
117 |
+
{"role": "user", "content": generation_prompt},
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
],
|
119 |
model="mixtral-8x7b-32768",
|
120 |
+
temperature=0.8,
|
121 |
+
max_tokens=150,
|
122 |
)
|
|
|
123 |
return response.choices[0].message.content
|
124 |
+
|