Spaces:

Manasa1
/

Jack_Clone

Sleeping

App Files Files Community

Jack_Clone / tweet_analyzer.py

Manasa1

Update tweet_analyzer.py

2317b49 verified 9 months ago

raw

history blame

4.89 kB

	import os
	from PyPDF2 import PdfReader
	import pandas as pd
	from dotenv import load_dotenv
	import groq
	import json
	from datetime import datetime

	class TweetDatasetProcessor:
	def __init__(self):
	load_dotenv()
	self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
	self.tweets = []
	self.personality_profile = {}

	def extract_text_from_pdf(self, pdf_path):
	"""Extract text content from PDF file"""
	reader = PdfReader(pdf_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	def process_pdf_content(self, text):
	"""Process PDF content and extract tweets with metadata"""
	lines = text.split('\n')
	for line in lines:
	if line.strip():
	self.tweets.append({
	'content': line.strip(),
	'timestamp': self._extract_timestamp(line) if self._extract_timestamp(line) else datetime.now(),
	'mentions': self._extract_mentions(line),
	'hashtags': self._extract_hashtags(line)
	})

	df = pd.DataFrame(self.tweets)
	df.to_csv('processed_tweets.csv', index=False)
	return df

	def _extract_timestamp(self, text):
	"""Extract timestamp if present in tweet"""
	# Implement timestamp extraction logic
	return None

	def _extract_mentions(self, text):
	"""Extract mentioned users from tweet"""
	return [word for word in text.split() if word.startswith('@')]

	def _extract_hashtags(self, text):
	"""Extract hashtags from tweet"""
	return [word for word in text.split() if word.startswith('#')]

	def analyze_personality(self):
	"""Comprehensive personality analysis"""
	all_tweets = [tweet['content'] for tweet in self.tweets]
	analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:

	1. Core Beliefs and Values:
	- What fundamental beliefs shape their worldview?
	- What causes or issues do they care about?

	2. Cognitive Patterns:
	- How do they process information?
	- What decision-making patterns are visible?

	3. Emotional Tendencies:
	- What triggers emotional responses?
	- How do they express emotions?

	4. Social Interaction Style:
	- How do they engage with others?
	- What relationship patterns emerge?

	5. Knowledge Areas:
	- What topics do they discuss with expertise?
	- What experiences do they draw from?

	6. Communication Style:
	- Vocabulary preferences
	- Rhetorical patterns
	- Humor style

	7. Behavioral Patterns:
	- Daily routines mentioned
	- Regular activities
	- Habits and preferences

	Tweets for analysis:
	{json.dumps(all_tweets[:30], indent=2)}
	"""

	response = self.groq_client.chat.completions.create(
	messages=[
	{
	"role": "system",
	"content": "You are an expert psychologist specializing in personality analysis through written communication."
	},
	{
	"role": "user",
	"content": analysis_prompt
	}
	],
	model="mixtral-8x7b-32768",
	temperature=0.1,
	)

	self.personality_profile = response.choices[0].message.content
	return self.personality_profile

	def generate_tweet(self, context=""):
	"""Generate a new tweet based on personality profile and optional context"""
	generation_prompt = f"""Based on this personality profile:
	{self.personality_profile}

	Current context or topic (if any):
	{context}

	Generate a tweet that this person would write right now. Consider:
	1. Their core beliefs and values
	2. Their typical emotional expression
	3. Their communication style and vocabulary
	4. Their knowledge areas and experiences
	5. Current context (if provided)

	The tweet should feel indistinguishable from their authentic tweets.
	"""

	response = self.groq_client.chat.completions.create(
	messages=[
	{
	"role": "system",
	"content": "You are an expert in replicating individual writing and thinking patterns."
	},
	{
	"role": "user",
	"content": generation_prompt
	}
	],
	model="mixtral-8x7b-32768",
	temperature=0.7,
	)
	return response.choices[0].message.content