Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Manasa1 commited on Nov 15, 2024

Commit

7367aa3

verified ·

1 Parent(s): dac9332

Update tweet_analyzer.py

Browse files

Files changed (1) hide show

tweet_analyzer.py +7 -6

tweet_analyzer.py CHANGED Viewed

@@ -7,6 +7,7 @@ import json
 from datetime import datetime
 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
 class TweetDatasetProcessor:
     def __init__(self):
@@ -83,6 +84,9 @@ class TweetDatasetProcessor:
         for topic_idx, topic in enumerate(nmf_model.components_):
             topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
             topics.append(" ".join(topic_words))
         return topics
     def generate_tweet(self, context=""):
@@ -100,7 +104,6 @@ class TweetDatasetProcessor:
         additional_contexts.extend(historical_topics)
         # Randomly select multiple contexts to increase diversity
-        import random
         selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
         generation_prompt = f"""Based on this personality profile:
@@ -114,18 +117,16 @@ class TweetDatasetProcessor:
         2. Incorporates insights from multiple topics when possible.
         3. Uses a natural communication style and vocabulary.
         4. Includes relevant mentions or hashtags if applicable.
-        The tweet should feel diverse and authentic."""
         response = self.groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
                 {"role": "user", "content": generation_prompt},
             ],
             model="mixtral-8x7b-32768",
-            temperature=0.8,  # Adjust for balanced diversity and coherence
             max_tokens=150,
         )
         return response.choices[0].message.content

 from datetime import datetime
 from sklearn.decomposition import NMF
 from sklearn.feature_extraction.text import TfidfVectorizer
+import random
 class TweetDatasetProcessor:
     def __init__(self):
         for topic_idx, topic in enumerate(nmf_model.components_):
             topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
             topics.append(" ".join(topic_words))
+        # Filter out very similar topics (optional)
+        topics = list(set(topics))  # Remove duplicates
         return topics
     def generate_tweet(self, context=""):
         additional_contexts.extend(historical_topics)
         # Randomly select multiple contexts to increase diversity
         selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
         generation_prompt = f"""Based on this personality profile:
         2. Incorporates insights from multiple topics when possible.
         3. Uses a natural communication style and vocabulary.
         4. Includes relevant mentions or hashtags if applicable.
+        The tweet should feel diverse and authentic, touching on a variety of topics."""
         response = self.groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
                 {"role": "user", "content": generation_prompt},
             ],
             model="mixtral-8x7b-32768",
+            temperature=1.0,  # Increased temperature for more diversity
             max_tokens=150,
         )
         return response.choices[0].message.content