Spaces:

Manasa1
/

Jack_Clone

Sleeping

File size: 4,889 Bytes

import os
from PyPDF2 import PdfReader
import pandas as pd
from dotenv import load_dotenv
import groq
import json
from datetime import datetime

class TweetDatasetProcessor:
    def __init__(self):
        load_dotenv()
        self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
        self.tweets = []
        self.personality_profile = {}

    def extract_text_from_pdf(self, pdf_path):
        """Extract text content from PDF file"""
        reader = PdfReader(pdf_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

    def process_pdf_content(self, text):
        """Process PDF content and extract tweets with metadata"""
        lines = text.split('\n')
        for line in lines:
            if line.strip():
                self.tweets.append({
                    'content': line.strip(),
                    'timestamp': self._extract_timestamp(line) if self._extract_timestamp(line) else datetime.now(),
                    'mentions': self._extract_mentions(line),
                    'hashtags': self._extract_hashtags(line)
                })
        
        df = pd.DataFrame(self.tweets)
        df.to_csv('processed_tweets.csv', index=False)
        return df

    def _extract_timestamp(self, text):
        """Extract timestamp if present in tweet"""
        # Implement timestamp extraction logic
        return None

    def _extract_mentions(self, text):
        """Extract mentioned users from tweet"""
        return [word for word in text.split() if word.startswith('@')]

    def _extract_hashtags(self, text):
        """Extract hashtags from tweet"""
        return [word for word in text.split() if word.startswith('#')]

    def analyze_personality(self):
        """Comprehensive personality analysis"""
        all_tweets = [tweet['content'] for tweet in self.tweets]
        analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:

        1. Core Beliefs and Values:
           - What fundamental beliefs shape their worldview?
           - What causes or issues do they care about?

        2. Cognitive Patterns:
           - How do they process information?
           - What decision-making patterns are visible?

        3. Emotional Tendencies:
           - What triggers emotional responses?
           - How do they express emotions?

        4. Social Interaction Style:
           - How do they engage with others?
           - What relationship patterns emerge?

        5. Knowledge Areas:
           - What topics do they discuss with expertise?
           - What experiences do they draw from?

        6. Communication Style:
           - Vocabulary preferences
           - Rhetorical patterns
           - Humor style

        7. Behavioral Patterns:
           - Daily routines mentioned
           - Regular activities
           - Habits and preferences

        Tweets for analysis:
        {json.dumps(all_tweets[:30], indent=2)}
        """
        
        response = self.groq_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert psychologist specializing in personality analysis through written communication."
                },
                {
                    "role": "user",
                    "content": analysis_prompt
                }
            ],
            model="mixtral-8x7b-32768",
            temperature=0.1,
        )
        
        self.personality_profile = response.choices[0].message.content
        return self.personality_profile

    def generate_tweet(self, context=""):
        """Generate a new tweet based on personality profile and optional context"""
        generation_prompt = f"""Based on this personality profile:
        {self.personality_profile}

        Current context or topic (if any):
        {context}

        Generate a tweet that this person would write right now. Consider:
        1. Their core beliefs and values
        2. Their typical emotional expression
        3. Their communication style and vocabulary
        4. Their knowledge areas and experiences
        5. Current context (if provided)

        The tweet should feel indistinguishable from their authentic tweets.
        """
        
        response = self.groq_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert in replicating individual writing and thinking patterns."
                },
                {
                    "role": "user",
                    "content": generation_prompt
                }
            ],
            model="mixtral-8x7b-32768",
            temperature=0.7,
        )
        return response.choices[0].message.content