Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,10 +22,12 @@ class TweetDatasetProcessor:
|
|
22 |
|
23 |
@staticmethod
|
24 |
def _process_line(line):
|
25 |
-
"""Process a single line."""
|
26 |
line = line.strip()
|
27 |
if not line or line.startswith('http'): # Skip empty lines and URLs
|
28 |
return None
|
|
|
|
|
29 |
return {
|
30 |
'content': line,
|
31 |
'timestamp': datetime.now(),
|
@@ -34,7 +36,7 @@ class TweetDatasetProcessor:
|
|
34 |
}
|
35 |
|
36 |
def extract_text_from_pdf(self):
|
37 |
-
"""Extract text content from PDF file."""
|
38 |
reader = PdfReader(self.pdf_path)
|
39 |
text = ""
|
40 |
for page in reader.pages:
|
@@ -42,7 +44,7 @@ class TweetDatasetProcessor:
|
|
42 |
return text
|
43 |
|
44 |
def process_pdf_content(self, text):
|
45 |
-
"""Process PDF content and clean extracted tweets."""
|
46 |
if not text.strip():
|
47 |
raise ValueError("The provided PDF appears to be empty.")
|
48 |
|
@@ -56,16 +58,16 @@ class TweetDatasetProcessor:
|
|
56 |
return self.tweets
|
57 |
|
58 |
def analyze_personality(self, max_tweets=50):
|
59 |
-
"""Comprehensive personality analysis using a limited subset of tweets."""
|
60 |
if not self.tweets:
|
61 |
raise ValueError("No tweets available for personality analysis.")
|
62 |
|
63 |
all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
|
64 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
|
70 |
input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
|
71 |
output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
|
@@ -75,7 +77,7 @@ class TweetDatasetProcessor:
|
|
75 |
return self.personality_profile
|
76 |
|
77 |
def generate_tweet(self, context="", sample_size=3):
|
78 |
-
"""Generate a new tweet by sampling random tweets and avoiding repetition."""
|
79 |
if not self.tweets:
|
80 |
return "Error: No tweets available for generation."
|
81 |
|
@@ -96,20 +98,16 @@ class TweetDatasetProcessor:
|
|
96 |
|
97 |
# Construct the prompt
|
98 |
prompt = f"""Based on this personality profile:
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
input_ids =
|
109 |
-
attention_mask = inputs['attention_mask']
|
110 |
-
|
111 |
-
pad_token_id = self.tokenizer.eos_token_id
|
112 |
-
output = self.model.generate(input_ids, attention_mask=attention_mask, max_length=500, num_return_sequences=1, temperature=1.0, pad_token_id=pad_token_id,do_sample = True)
|
113 |
generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
|
114 |
|
115 |
return generated_tweet
|
@@ -124,22 +122,19 @@ def gradio_interface():
|
|
124 |
|
125 |
text = processor.extract_text_from_pdf()
|
126 |
tweets = processor.process_pdf_content(text)
|
127 |
-
|
128 |
generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
|
129 |
|
130 |
-
return
|
131 |
|
132 |
# Gradio app setup
|
133 |
iface = gr.Interface(
|
134 |
fn=gradio_interface,
|
135 |
inputs=[],
|
136 |
-
outputs=
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
live=True,
|
141 |
-
title="AI Personality and Tweet Generation",
|
142 |
-
description="Automatically analyze personality and generate tweets based on a provided PDF of tweets."
|
143 |
)
|
144 |
|
145 |
# Launch the app
|
|
|
22 |
|
23 |
@staticmethod
|
24 |
def _process_line(line):
|
25 |
+
"""Process a single line."""
|
26 |
line = line.strip()
|
27 |
if not line or line.startswith('http'): # Skip empty lines and URLs
|
28 |
return None
|
29 |
+
# Clean any unwanted characters and fix broken words
|
30 |
+
line = line.replace('\u201c', '"').replace('\u201d', '"') # Replacing smart quotes
|
31 |
return {
|
32 |
'content': line,
|
33 |
'timestamp': datetime.now(),
|
|
|
36 |
}
|
37 |
|
38 |
def extract_text_from_pdf(self):
|
39 |
+
"""Extract text content from PDF file."""
|
40 |
reader = PdfReader(self.pdf_path)
|
41 |
text = ""
|
42 |
for page in reader.pages:
|
|
|
44 |
return text
|
45 |
|
46 |
def process_pdf_content(self, text):
|
47 |
+
"""Process PDF content and clean extracted tweets."""
|
48 |
if not text.strip():
|
49 |
raise ValueError("The provided PDF appears to be empty.")
|
50 |
|
|
|
58 |
return self.tweets
|
59 |
|
60 |
def analyze_personality(self, max_tweets=50):
|
61 |
+
"""Comprehensive personality analysis using a limited subset of tweets."""
|
62 |
if not self.tweets:
|
63 |
raise ValueError("No tweets available for personality analysis.")
|
64 |
|
65 |
all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
|
66 |
analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
|
67 |
+
Core beliefs, emotional tendencies, cognitive patterns, etc.
|
68 |
+
Tweets for analysis:
|
69 |
+
{json.dumps(all_tweets, indent=2)}
|
70 |
+
"""
|
71 |
|
72 |
input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
|
73 |
output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
|
|
|
77 |
return self.personality_profile
|
78 |
|
79 |
def generate_tweet(self, context="", sample_size=3):
|
80 |
+
"""Generate a new tweet by sampling random tweets and avoiding repetition."""
|
81 |
if not self.tweets:
|
82 |
return "Error: No tweets available for generation."
|
83 |
|
|
|
98 |
|
99 |
# Construct the prompt
|
100 |
prompt = f"""Based on this personality profile:
|
101 |
+
{personality_profile_excerpt}
|
102 |
+
Current context or topic (if any):
|
103 |
+
{context}
|
104 |
+
Tweets for context:
|
105 |
+
{', '.join(sampled_contents)}
|
106 |
+
**Only generate the tweet. Do not include analysis, explanation, or any other content.**
|
107 |
+
"""
|
108 |
+
|
109 |
+
input_ids = self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True)
|
110 |
+
output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=1.0)
|
|
|
|
|
|
|
|
|
111 |
generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
|
112 |
|
113 |
return generated_tweet
|
|
|
122 |
|
123 |
text = processor.extract_text_from_pdf()
|
124 |
tweets = processor.process_pdf_content(text)
|
125 |
+
processor.analyze_personality(max_tweets=50) # Analyze personality, but don't return the result
|
126 |
generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
|
127 |
|
128 |
+
return generated_tweet # Only return the generated tweet
|
129 |
|
130 |
# Gradio app setup
|
131 |
iface = gr.Interface(
|
132 |
fn=gradio_interface,
|
133 |
inputs=[],
|
134 |
+
outputs=gr.Textbox(label="Generated Tweet"), # Only output the generated tweet
|
135 |
+
live=False, # Set to False to generate only when user clicks the button
|
136 |
+
title="AI Tweet Generation",
|
137 |
+
description="Generate tweets based on the personality profile and tweets from a PDF document."
|
|
|
|
|
|
|
138 |
)
|
139 |
|
140 |
# Launch the app
|