Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Manasa1 commited on Nov 15, 2024

Commit

391392f

verified ·

1 Parent(s): 0fd270a

Update tweet_analyzer.py

Browse files

Files changed (1) hide show

tweet_analyzer.py +28 -76

tweet_analyzer.py CHANGED Viewed

@@ -5,6 +5,8 @@ from dotenv import load_dotenv
 import groq
 import json
 from datetime import datetime
 class TweetDatasetProcessor:
     def __init__(self):
@@ -39,8 +41,7 @@ class TweetDatasetProcessor:
     def _extract_timestamp(self, text):
         """Extract timestamp if present in tweet"""
-        # Implement timestamp extraction logic
-        return None
     def _extract_mentions(self, text):
         """Extract mentioned users from tweet"""
@@ -54,71 +55,35 @@ class TweetDatasetProcessor:
         """Comprehensive personality analysis"""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
-        1. Core Beliefs and Values:
-           - What fundamental beliefs shape their worldview?
-           - What causes or issues do they care about?
-        2. Cognitive Patterns:
-           - How do they process information?
-           - What decision-making patterns are visible?
-        3. Emotional Tendencies:
-           - What triggers emotional responses?
-           - How do they express emotions?
-        4. Social Interaction Style:
-           - How do they engage with others?
-           - What relationship patterns emerge?
-        5. Knowledge Areas:
-           - What topics do they discuss with expertise?
-           - What experiences do they draw from?
-        6. Communication Style:
-           - Vocabulary preferences
-           - Rhetorical patterns
-           - Humor style
-        7. Behavioral Patterns:
-           - Daily routines mentioned
-           - Regular activities
-           - Habits and preferences
         Tweets for analysis:
         {json.dumps(all_tweets[:30], indent=2)}
         """
         response = self.groq_client.chat.completions.create(
             messages=[
-                {
-                    "role": "system",
-                    "content": "You are an expert psychologist specializing in personality analysis through written communication."
-                },
-                {
-                    "role": "user",
-                    "content": analysis_prompt
-                }
             ],
             model="mixtral-8x7b-32768",
             temperature=0.1,
         )
         self.personality_profile = response.choices[0].message.content
         return self.personality_profile
-        def analyze_topics(self, n_topics=5):
-            """Extract and identify different topics the author has tweeted about"""
-            all_tweets = [tweet['content'] for tweet in self.tweets]
-            vectorizer = TfidfVectorizer(stop_words='english')
-            tfidf_matrix = vectorizer.fit_transform(all_tweets)
-            nmf_model = NMF(n_components=n_topics, random_state=1)
-            nmf_model.fit(tfidf_matrix)
-            topics = []
-            for topic_idx, topic in enumerate(nmf_model.components_):
-               topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
-               topics.append(" ".join(topic_words))
-            return topics
     def generate_tweet(self, context=""):
         """Generate a new tweet based on personality profile and optional context"""
@@ -130,8 +95,8 @@ class TweetDatasetProcessor:
             "Provide advice to followers."
         ]
-        # Include historical topics in the context
-        historical_topics = self.analyze_topics()
         additional_contexts.extend(historical_topics)
         # Randomly choose an additional context to diversify tweets
@@ -143,30 +108,17 @@ class TweetDatasetProcessor:
         Current context or topic (if any):
         {context}
         Additionally, consider this specific context:
-       {random_context}
-       Generate a tweet that this person would write right now. Consider:
-       1. Their core beliefs and values
-       2. Their typical emotional expression
-       3. Their communication style and vocabulary
-       4. Their knowledge areas and experiences
-       5. Current context (if provided)
-       The tweet should feel indistinguishable from their authentic tweets.
-        """
         response = self.groq_client.chat.completions.create(
             messages=[
-                {
-                    "role": "system",
-                    "content": "You are an expert in replicating individual writing and thinking patterns."
-                },
-                {
-                    "role": "user",
-                    "content": generation_prompt
-                }
             ],
             model="mixtral-8x7b-32768",
-            temperature=0.8,  # Increase temperature for more diversity
-            max_tokens=150,   # Adjust max tokens for more substantial tweets
         )
         return response.choices[0].message.content

 import groq
 import json
 from datetime import datetime
+from sklearn.decomposition import NMF
+from sklearn.feature_extraction.text import TfidfVectorizer
 class TweetDatasetProcessor:
     def __init__(self):
     def _extract_timestamp(self, text):
         """Extract timestamp if present in tweet"""
+        return None  # Implement timestamp extraction logic if needed
     def _extract_mentions(self, text):
         """Extract mentioned users from tweet"""
         """Comprehensive personality analysis"""
         all_tweets = [tweet['content'] for tweet in self.tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
+        Core beliefs, emotional tendencies, cognitive patterns, etc.
         Tweets for analysis:
         {json.dumps(all_tweets[:30], indent=2)}
         """
         response = self.groq_client.chat.completions.create(
             messages=[
+                {"role": "system", "content": "You are an expert psychologist."},
+                {"role": "user", "content": analysis_prompt},
             ],
             model="mixtral-8x7b-32768",
             temperature=0.1,
         )
         self.personality_profile = response.choices[0].message.content
         return self.personality_profile
+    def analyze_topics(self, n_topics=5):
+        """Extract and identify different topics the author has tweeted about"""
+        all_tweets = [tweet['content'] for tweet in self.tweets]
+        vectorizer = TfidfVectorizer(stop_words='english')
+        tfidf_matrix = vectorizer.fit_transform(all_tweets)
+        nmf_model = NMF(n_components=n_topics, random_state=1)
+        nmf_model.fit(tfidf_matrix)
+        topics = []
+        for topic_idx, topic in enumerate(nmf_model.components_):
+            topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
+            topics.append(" ".join(topic_words))
+        return topics
     def generate_tweet(self, context=""):
         """Generate a new tweet based on personality profile and optional context"""
             "Provide advice to followers."
         ]
+        # Include historical topics in the context
+        historical_topics = self.analyze_topics()
         additional_contexts.extend(historical_topics)
         # Randomly choose an additional context to diversify tweets
         Current context or topic (if any):
         {context}
         Additionally, consider this specific context:
+        {random_context}
+        Generate a tweet that this person would write right now."""
         response = self.groq_client.chat.completions.create(
             messages=[
+                {"role": "system", "content": "You are an expert in replicating writing patterns."},
+                {"role": "user", "content": generation_prompt},
             ],
             model="mixtral-8x7b-32768",
+            temperature=0.8,
+            max_tokens=150,
         )
         return response.choices[0].message.content