File size: 4,889 Bytes
559513f
 
 
 
 
 
 
 
 
 
 
2317b49
559513f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
from PyPDF2 import PdfReader
import pandas as pd
from dotenv import load_dotenv
import groq
import json
from datetime import datetime

class TweetDatasetProcessor:
    def __init__(self):
        load_dotenv()
        self.groq_client = groq.Groq(api_key=os.getenv('Groq_api'))
        self.tweets = []
        self.personality_profile = {}

    def extract_text_from_pdf(self, pdf_path):
        """Extract text content from PDF file"""
        reader = PdfReader(pdf_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

    def process_pdf_content(self, text):
        """Process PDF content and extract tweets with metadata"""
        lines = text.split('\n')
        for line in lines:
            if line.strip():
                self.tweets.append({
                    'content': line.strip(),
                    'timestamp': self._extract_timestamp(line) if self._extract_timestamp(line) else datetime.now(),
                    'mentions': self._extract_mentions(line),
                    'hashtags': self._extract_hashtags(line)
                })
        
        df = pd.DataFrame(self.tweets)
        df.to_csv('processed_tweets.csv', index=False)
        return df

    def _extract_timestamp(self, text):
        """Extract timestamp if present in tweet"""
        # Implement timestamp extraction logic
        return None

    def _extract_mentions(self, text):
        """Extract mentioned users from tweet"""
        return [word for word in text.split() if word.startswith('@')]

    def _extract_hashtags(self, text):
        """Extract hashtags from tweet"""
        return [word for word in text.split() if word.startswith('#')]

    def analyze_personality(self):
        """Comprehensive personality analysis"""
        all_tweets = [tweet['content'] for tweet in self.tweets]
        analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets. Analyze:

        1. Core Beliefs and Values:
           - What fundamental beliefs shape their worldview?
           - What causes or issues do they care about?

        2. Cognitive Patterns:
           - How do they process information?
           - What decision-making patterns are visible?

        3. Emotional Tendencies:
           - What triggers emotional responses?
           - How do they express emotions?

        4. Social Interaction Style:
           - How do they engage with others?
           - What relationship patterns emerge?

        5. Knowledge Areas:
           - What topics do they discuss with expertise?
           - What experiences do they draw from?

        6. Communication Style:
           - Vocabulary preferences
           - Rhetorical patterns
           - Humor style

        7. Behavioral Patterns:
           - Daily routines mentioned
           - Regular activities
           - Habits and preferences

        Tweets for analysis:
        {json.dumps(all_tweets[:30], indent=2)}
        """
        
        response = self.groq_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert psychologist specializing in personality analysis through written communication."
                },
                {
                    "role": "user",
                    "content": analysis_prompt
                }
            ],
            model="mixtral-8x7b-32768",
            temperature=0.1,
        )
        
        self.personality_profile = response.choices[0].message.content
        return self.personality_profile

    def generate_tweet(self, context=""):
        """Generate a new tweet based on personality profile and optional context"""
        generation_prompt = f"""Based on this personality profile:
        {self.personality_profile}

        Current context or topic (if any):
        {context}

        Generate a tweet that this person would write right now. Consider:
        1. Their core beliefs and values
        2. Their typical emotional expression
        3. Their communication style and vocabulary
        4. Their knowledge areas and experiences
        5. Current context (if provided)

        The tweet should feel indistinguishable from their authentic tweets.
        """
        
        response = self.groq_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert in replicating individual writing and thinking patterns."
                },
                {
                    "role": "user",
                    "content": generation_prompt
                }
            ],
            model="mixtral-8x7b-32768",
            temperature=0.7,
        )
        return response.choices[0].message.content