Manasa1 commited on
Commit
7367aa3
·
verified ·
1 Parent(s): dac9332

Update tweet_analyzer.py

Browse files
Files changed (1) hide show
  1. tweet_analyzer.py +7 -6
tweet_analyzer.py CHANGED
@@ -7,6 +7,7 @@ import json
7
  from datetime import datetime
8
  from sklearn.decomposition import NMF
9
  from sklearn.feature_extraction.text import TfidfVectorizer
 
10
 
11
  class TweetDatasetProcessor:
12
  def __init__(self):
@@ -83,6 +84,9 @@ class TweetDatasetProcessor:
83
  for topic_idx, topic in enumerate(nmf_model.components_):
84
  topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
85
  topics.append(" ".join(topic_words))
 
 
 
86
  return topics
87
 
88
  def generate_tweet(self, context=""):
@@ -100,7 +104,6 @@ class TweetDatasetProcessor:
100
  additional_contexts.extend(historical_topics)
101
 
102
  # Randomly select multiple contexts to increase diversity
103
- import random
104
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
105
 
106
  generation_prompt = f"""Based on this personality profile:
@@ -114,18 +117,16 @@ class TweetDatasetProcessor:
114
  2. Incorporates insights from multiple topics when possible.
115
  3. Uses a natural communication style and vocabulary.
116
  4. Includes relevant mentions or hashtags if applicable.
117
- The tweet should feel diverse and authentic."""
118
-
119
  response = self.groq_client.chat.completions.create(
120
  messages=[
121
  {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
122
  {"role": "user", "content": generation_prompt},
123
  ],
124
  model="mixtral-8x7b-32768",
125
- temperature=0.8, # Adjust for balanced diversity and coherence
126
  max_tokens=150,
127
  )
128
 
129
  return response.choices[0].message.content
130
-
131
-
 
7
  from datetime import datetime
8
  from sklearn.decomposition import NMF
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
+ import random
11
 
12
  class TweetDatasetProcessor:
13
  def __init__(self):
 
84
  for topic_idx, topic in enumerate(nmf_model.components_):
85
  topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
86
  topics.append(" ".join(topic_words))
87
+
88
+ # Filter out very similar topics (optional)
89
+ topics = list(set(topics)) # Remove duplicates
90
  return topics
91
 
92
  def generate_tweet(self, context=""):
 
104
  additional_contexts.extend(historical_topics)
105
 
106
  # Randomly select multiple contexts to increase diversity
 
107
  selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
108
 
109
  generation_prompt = f"""Based on this personality profile:
 
117
  2. Incorporates insights from multiple topics when possible.
118
  3. Uses a natural communication style and vocabulary.
119
  4. Includes relevant mentions or hashtags if applicable.
120
+ The tweet should feel diverse and authentic, touching on a variety of topics."""
121
+
122
  response = self.groq_client.chat.completions.create(
123
  messages=[
124
  {"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
125
  {"role": "user", "content": generation_prompt},
126
  ],
127
  model="mixtral-8x7b-32768",
128
+ temperature=1.0, # Increased temperature for more diversity
129
  max_tokens=150,
130
  )
131
 
132
  return response.choices[0].message.content