Spaces:
Sleeping
Sleeping
Update tweet_analyzer.py
Browse files- tweet_analyzer.py +7 -6
tweet_analyzer.py
CHANGED
@@ -7,6 +7,7 @@ import json
|
|
7 |
from datetime import datetime
|
8 |
from sklearn.decomposition import NMF
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
10 |
|
11 |
class TweetDatasetProcessor:
|
12 |
def __init__(self):
|
@@ -83,6 +84,9 @@ class TweetDatasetProcessor:
|
|
83 |
for topic_idx, topic in enumerate(nmf_model.components_):
|
84 |
topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
|
85 |
topics.append(" ".join(topic_words))
|
|
|
|
|
|
|
86 |
return topics
|
87 |
|
88 |
def generate_tweet(self, context=""):
|
@@ -100,7 +104,6 @@ class TweetDatasetProcessor:
|
|
100 |
additional_contexts.extend(historical_topics)
|
101 |
|
102 |
# Randomly select multiple contexts to increase diversity
|
103 |
-
import random
|
104 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
105 |
|
106 |
generation_prompt = f"""Based on this personality profile:
|
@@ -114,18 +117,16 @@ class TweetDatasetProcessor:
|
|
114 |
2. Incorporates insights from multiple topics when possible.
|
115 |
3. Uses a natural communication style and vocabulary.
|
116 |
4. Includes relevant mentions or hashtags if applicable.
|
117 |
-
The tweet should feel diverse and authentic."""
|
118 |
-
|
119 |
response = self.groq_client.chat.completions.create(
|
120 |
messages=[
|
121 |
{"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
|
122 |
{"role": "user", "content": generation_prompt},
|
123 |
],
|
124 |
model="mixtral-8x7b-32768",
|
125 |
-
temperature=0
|
126 |
max_tokens=150,
|
127 |
)
|
128 |
|
129 |
return response.choices[0].message.content
|
130 |
-
|
131 |
-
|
|
|
7 |
from datetime import datetime
|
8 |
from sklearn.decomposition import NMF
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
+
import random
|
11 |
|
12 |
class TweetDatasetProcessor:
|
13 |
def __init__(self):
|
|
|
84 |
for topic_idx, topic in enumerate(nmf_model.components_):
|
85 |
topic_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-n_topics - 1:-1]]
|
86 |
topics.append(" ".join(topic_words))
|
87 |
+
|
88 |
+
# Filter out very similar topics (optional)
|
89 |
+
topics = list(set(topics)) # Remove duplicates
|
90 |
return topics
|
91 |
|
92 |
def generate_tweet(self, context=""):
|
|
|
104 |
additional_contexts.extend(historical_topics)
|
105 |
|
106 |
# Randomly select multiple contexts to increase diversity
|
|
|
107 |
selected_contexts = random.sample(additional_contexts, min(3, len(additional_contexts)))
|
108 |
|
109 |
generation_prompt = f"""Based on this personality profile:
|
|
|
117 |
2. Incorporates insights from multiple topics when possible.
|
118 |
3. Uses a natural communication style and vocabulary.
|
119 |
4. Includes relevant mentions or hashtags if applicable.
|
120 |
+
The tweet should feel diverse and authentic, touching on a variety of topics."""
|
121 |
+
|
122 |
response = self.groq_client.chat.completions.create(
|
123 |
messages=[
|
124 |
{"role": "system", "content": "You are an expert in replicating writing and thinking patterns."},
|
125 |
{"role": "user", "content": generation_prompt},
|
126 |
],
|
127 |
model="mixtral-8x7b-32768",
|
128 |
+
temperature=1.0, # Increased temperature for more diversity
|
129 |
max_tokens=150,
|
130 |
)
|
131 |
|
132 |
return response.choices[0].message.content
|
|
|
|