Manasa1 commited on
Commit
391392f
·
verified ·
1 Parent(s): 0fd270a

Update tweet_analyzer.py

Browse files
Files changed (1) hide show
  1. tweet_analyzer.py +28 -76
tweet_analyzer.py CHANGED
@@ -5,6 +5,8 @@ from dotenv import load_dotenv
5
  import groq
6
  import json
7
  from datetime import datetime
 
 
8
 
9
  class TweetDatasetProcessor:
10
  def __init__(self):
@@ -39,8 +41,7 @@ class TweetDatasetProcessor:
39
 
40
  def _extract_timestamp(self, text):
41
  """Extract timestamp if present in tweet"""
42
- # Implement timestamp extraction logic
43
- return None
44
 
45
  def _extract_mentions(self, text):
46
  """Extract mentioned users from tweet"""
@@ -54,71 +55,35 @@ class TweetDatasetProcessor:
54
  """Comprehensive personality analysis"""
55
  all_tweets = [tweet['content'] for tweet in self.tweets]
56
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
57
-
58
- 1. Core Beliefs and Values:
59
- - What fundamental beliefs shape their worldview?
60
- - What causes or issues do they care about?
61
-
62
- 2. Cognitive Patterns:
63
- - How do they process information?
64
- - What decision-making patterns are visible?
65
-
66
- 3. Emotional Tendencies:
67
- - What triggers emotional responses?
68
- - How do they express emotions?
69
-
70
- 4. Social Interaction Style:
71
- - How do they engage with others?
72
- - What relationship patterns emerge?
73
-
74
- 5. Knowledge Areas:
75
- - What topics do they discuss with expertise?
76
- - What experiences do they draw from?
77
-
78
- 6. Communication Style:
79
- - Vocabulary preferences
80
- - Rhetorical patterns
81
- - Humor style
82
-
83
- 7. Behavioral Patterns:
84
- - Daily routines mentioned
85
- - Regular activities
86
- - Habits and preferences
87
-
88
  Tweets for analysis:
89
  {json.dumps(all_tweets[:30], indent=2)}
90
  """
91
 
92
  response = self.groq_client.chat.completions.create(
93
  messages=[
94
- {
95
- "role": "system",
96
- "content": "You are an expert psychologist specializing in personality analysis through written communication."
97
- },
98
- {
99
- "role": "user",
100
- "content": analysis_prompt
101
- }
102
  ],
103
  model="mixtral-8x7b-32768",
104
  temperature=0.1,
105
  )
106
-
107
  self.personality_profile = response.choices[0].message.content
108
  return self.personality_profile
109
 
110
- def analyze_topics(self, n_topics=5):
111
- """Extract and identify different topics the author has tweeted about"""
112
- all_tweets = [tweet['content'] for tweet in self.tweets]
113
- vectorizer = TfidfVectorizer(stop_words='english')
114
- tfidf_matrix = vectorizer.fit_transform(all_tweets)
115
- nmf_model = NMF(n_components=n_topics, random_state=1)
116
- nmf_model.fit(tfidf_matrix)
117
- topics = []
118
- for topic_idx, topic in enumerate(nmf_model.components_):
119
- topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
120
- topics.append(" ".join(topic_words))
121
- return topics
 
122
 
123
  def generate_tweet(self, context=""):
124
  """Generate a new tweet based on personality profile and optional context"""
@@ -130,8 +95,8 @@ class TweetDatasetProcessor:
130
  "Provide advice to followers."
131
  ]
132
 
133
- # Include historical topics in the context
134
- historical_topics = self.analyze_topics()
135
  additional_contexts.extend(historical_topics)
136
 
137
  # Randomly choose an additional context to diversify tweets
@@ -143,30 +108,17 @@ class TweetDatasetProcessor:
143
  Current context or topic (if any):
144
  {context}
145
  Additionally, consider this specific context:
146
- {random_context}
147
- Generate a tweet that this person would write right now. Consider:
148
- 1. Their core beliefs and values
149
- 2. Their typical emotional expression
150
- 3. Their communication style and vocabulary
151
- 4. Their knowledge areas and experiences
152
- 5. Current context (if provided)
153
- The tweet should feel indistinguishable from their authentic tweets.
154
- """
155
 
156
  response = self.groq_client.chat.completions.create(
157
  messages=[
158
- {
159
- "role": "system",
160
- "content": "You are an expert in replicating individual writing and thinking patterns."
161
- },
162
- {
163
- "role": "user",
164
- "content": generation_prompt
165
- }
166
  ],
167
  model="mixtral-8x7b-32768",
168
- temperature=0.8, # Increase temperature for more diversity
169
- max_tokens=150, # Adjust max tokens for more substantial tweets
170
  )
171
-
172
  return response.choices[0].message.content
 
 
5
  import groq
6
  import json
7
  from datetime import datetime
8
+ from sklearn.decomposition import NMF
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
 
11
  class TweetDatasetProcessor:
12
  def __init__(self):
 
41
 
42
  def _extract_timestamp(self, text):
43
  """Extract timestamp if present in tweet"""
44
+ return None # Implement timestamp extraction logic if needed
 
45
 
46
  def _extract_mentions(self, text):
47
  """Extract mentioned users from tweet"""
 
55
  """Comprehensive personality analysis"""
56
  all_tweets = [tweet['content'] for tweet in self.tweets]
57
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
58
+ Core beliefs, emotional tendencies, cognitive patterns, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  Tweets for analysis:
60
  {json.dumps(all_tweets[:30], indent=2)}
61
  """
62
 
63
  response = self.groq_client.chat.completions.create(
64
  messages=[
65
+ {"role": "system", "content": "You are an expert psychologist."},
66
+ {"role": "user", "content": analysis_prompt},
 
 
 
 
 
 
67
  ],
68
  model="mixtral-8x7b-32768",
69
  temperature=0.1,
70
  )
 
71
  self.personality_profile = response.choices[0].message.content
72
  return self.personality_profile
73
 
74
+ def analyze_topics(self, n_topics=5):
75
+ """Extract and identify different topics the author has tweeted about"""
76
+ all_tweets = [tweet['content'] for tweet in self.tweets]
77
+ vectorizer = TfidfVectorizer(stop_words='english')
78
+ tfidf_matrix = vectorizer.fit_transform(all_tweets)
79
+ nmf_model = NMF(n_components=n_topics, random_state=1)
80
+ nmf_model.fit(tfidf_matrix)
81
+
82
+ topics = []
83
+ for topic_idx, topic in enumerate(nmf_model.components_):
84
+ topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
85
+ topics.append(" ".join(topic_words))
86
+ return topics
87
 
88
  def generate_tweet(self, context=""):
89
  """Generate a new tweet based on personality profile and optional context"""
 
95
  "Provide advice to followers."
96
  ]
97
 
98
+ # Include historical topics in the context
99
+ historical_topics = self.analyze_topics()
100
  additional_contexts.extend(historical_topics)
101
 
102
  # Randomly choose an additional context to diversify tweets
 
108
  Current context or topic (if any):
109
  {context}
110
  Additionally, consider this specific context:
111
+ {random_context}
112
+ Generate a tweet that this person would write right now."""
 
 
 
 
 
 
 
113
 
114
  response = self.groq_client.chat.completions.create(
115
  messages=[
116
+ {"role": "system", "content": "You are an expert in replicating writing patterns."},
117
+ {"role": "user", "content": generation_prompt},
 
 
 
 
 
 
118
  ],
119
  model="mixtral-8x7b-32768",
120
+ temperature=0.8,
121
+ max_tokens=150,
122
  )
 
123
  return response.choices[0].message.content
124
+