Jack_Clone / tweet_analyzer.py
Manasa1's picture
Update tweet_analyzer.py
2317b49 verified
raw
history blame
4.89 kB
import os
from PyPDF2 import PdfReader
import pandas as pd
from dotenv import load_dotenv
import groq
import json
from datetime import datetime
class TweetDatasetProcessor:
def __init__(self):
load_dotenv()
self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
self.tweets = []
self.personality_profile = {}
def extract_text_from_pdf(self, pdf_path):
"""Extract text content from PDF file"""
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def process_pdf_content(self, text):
"""Process PDF content and extract tweets with metadata"""
lines = text.split('\n')
for line in lines:
if line.strip():
self.tweets.append({
'content': line.strip(),
'timestamp': self._extract_timestamp(line) if self._extract_timestamp(line) else datetime.now(),
'mentions': self._extract_mentions(line),
'hashtags': self._extract_hashtags(line)
})
df = pd.DataFrame(self.tweets)
df.to_csv('processed_tweets.csv', index=False)
return df
def _extract_timestamp(self, text):
"""Extract timestamp if present in tweet"""
# Implement timestamp extraction logic
return None
def _extract_mentions(self, text):
"""Extract mentioned users from tweet"""
return [word for word in text.split() if word.startswith('@')]
def _extract_hashtags(self, text):
"""Extract hashtags from tweet"""
return [word for word in text.split() if word.startswith('#')]
def analyze_personality(self):
"""Comprehensive personality analysis"""
all_tweets = [tweet['content'] for tweet in self.tweets]
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:
1. Core Beliefs and Values:
- What fundamental beliefs shape their worldview?
- What causes or issues do they care about?
2. Cognitive Patterns:
- How do they process information?
- What decision-making patterns are visible?
3. Emotional Tendencies:
- What triggers emotional responses?
- How do they express emotions?
4. Social Interaction Style:
- How do they engage with others?
- What relationship patterns emerge?
5. Knowledge Areas:
- What topics do they discuss with expertise?
- What experiences do they draw from?
6. Communication Style:
- Vocabulary preferences
- Rhetorical patterns
- Humor style
7. Behavioral Patterns:
- Daily routines mentioned
- Regular activities
- Habits and preferences
Tweets for analysis:
{json.dumps(all_tweets[:30], indent=2)}
"""
response = self.groq_client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are an expert psychologist specializing in personality analysis through written communication."
},
{
"role": "user",
"content": analysis_prompt
}
],
model="mixtral-8x7b-32768",
temperature=0.1,
)
self.personality_profile = response.choices[0].message.content
return self.personality_profile
def generate_tweet(self, context=""):
"""Generate a new tweet based on personality profile and optional context"""
generation_prompt = f"""Based on this personality profile:
{self.personality_profile}
Current context or topic (if any):
{context}
Generate a tweet that this person would write right now. Consider:
1. Their core beliefs and values
2. Their typical emotional expression
3. Their communication style and vocabulary
4. Their knowledge areas and experiences
5. Current context (if provided)
The tweet should feel indistinguishable from their authentic tweets.
"""
response = self.groq_client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are an expert in replicating individual writing and thinking patterns."
},
{
"role": "user",
"content": generation_prompt
}
],
model="mixtral-8x7b-32768",
temperature=0.7,
)
return response.choices[0].message.content