Manasa1 commited on
Commit
ac5fd0d
·
verified ·
1 Parent(s): b92a169

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -22,10 +22,12 @@ class TweetDatasetProcessor:
22
 
23
  @staticmethod
24
  def _process_line(line):
25
- """Process a single line."""
26
  line = line.strip()
27
  if not line or line.startswith('http'): # Skip empty lines and URLs
28
  return None
 
 
29
  return {
30
  'content': line,
31
  'timestamp': datetime.now(),
@@ -34,7 +36,7 @@ class TweetDatasetProcessor:
34
  }
35
 
36
  def extract_text_from_pdf(self):
37
- """Extract text content from PDF file."""
38
  reader = PdfReader(self.pdf_path)
39
  text = ""
40
  for page in reader.pages:
@@ -42,7 +44,7 @@ class TweetDatasetProcessor:
42
  return text
43
 
44
  def process_pdf_content(self, text):
45
- """Process PDF content and clean extracted tweets."""
46
  if not text.strip():
47
  raise ValueError("The provided PDF appears to be empty.")
48
 
@@ -56,16 +58,16 @@ class TweetDatasetProcessor:
56
  return self.tweets
57
 
58
  def analyze_personality(self, max_tweets=50):
59
- """Comprehensive personality analysis using a limited subset of tweets."""
60
  if not self.tweets:
61
  raise ValueError("No tweets available for personality analysis.")
62
 
63
  all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
64
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
65
- Core beliefs, emotional tendencies, cognitive patterns, etc.
66
- Tweets for analysis:
67
- {json.dumps(all_tweets, indent=2)}
68
- """
69
 
70
  input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
71
  output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
@@ -75,7 +77,7 @@ class TweetDatasetProcessor:
75
  return self.personality_profile
76
 
77
  def generate_tweet(self, context="", sample_size=3):
78
- """Generate a new tweet by sampling random tweets and avoiding repetition."""
79
  if not self.tweets:
80
  return "Error: No tweets available for generation."
81
 
@@ -96,20 +98,16 @@ class TweetDatasetProcessor:
96
 
97
  # Construct the prompt
98
  prompt = f"""Based on this personality profile:
99
- {personality_profile_excerpt}
100
- Current context or topic (if any):
101
- {context}
102
- Tweets for context:
103
- {', '.join(sampled_contents)}
104
- **Only generate the tweet. Do not include analysis, explanation, or any other content.**
105
- """
106
-
107
- inputs = self.tokenizer(prompt, return_tensors='pt', truncation=True, padding=True)
108
- input_ids = inputs['input_ids']
109
- attention_mask = inputs['attention_mask']
110
-
111
- pad_token_id = self.tokenizer.eos_token_id
112
- output = self.model.generate(input_ids, attention_mask=attention_mask, max_length=500, num_return_sequences=1, temperature=1.0, pad_token_id=pad_token_id,do_sample = True)
113
  generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
114
 
115
  return generated_tweet
@@ -124,22 +122,19 @@ def gradio_interface():
124
 
125
  text = processor.extract_text_from_pdf()
126
  tweets = processor.process_pdf_content(text)
127
- personality_analysis = processor.analyze_personality(max_tweets=50)
128
  generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
129
 
130
- return personality_analysis, generated_tweet
131
 
132
  # Gradio app setup
133
  iface = gr.Interface(
134
  fn=gradio_interface,
135
  inputs=[],
136
- outputs=[
137
- gr.Textbox(label="Personality Analysis"),
138
- gr.Textbox(label="Generated Tweet")
139
- ],
140
- live=True,
141
- title="AI Personality and Tweet Generation",
142
- description="Automatically analyze personality and generate tweets based on a provided PDF of tweets."
143
  )
144
 
145
  # Launch the app
 
22
 
23
  @staticmethod
24
  def _process_line(line):
25
+ """Process a single line."""
26
  line = line.strip()
27
  if not line or line.startswith('http'): # Skip empty lines and URLs
28
  return None
29
+ # Clean any unwanted characters and fix broken words
30
+ line = line.replace('\u201c', '"').replace('\u201d', '"') # Replacing smart quotes
31
  return {
32
  'content': line,
33
  'timestamp': datetime.now(),
 
36
  }
37
 
38
  def extract_text_from_pdf(self):
39
+ """Extract text content from PDF file."""
40
  reader = PdfReader(self.pdf_path)
41
  text = ""
42
  for page in reader.pages:
 
44
  return text
45
 
46
  def process_pdf_content(self, text):
47
+ """Process PDF content and clean extracted tweets."""
48
  if not text.strip():
49
  raise ValueError("The provided PDF appears to be empty.")
50
 
 
58
  return self.tweets
59
 
60
  def analyze_personality(self, max_tweets=50):
61
+ """Comprehensive personality analysis using a limited subset of tweets."""
62
  if not self.tweets:
63
  raise ValueError("No tweets available for personality analysis.")
64
 
65
  all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
66
  analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
67
+ Core beliefs, emotional tendencies, cognitive patterns, etc.
68
+ Tweets for analysis:
69
+ {json.dumps(all_tweets, indent=2)}
70
+ """
71
 
72
  input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
73
  output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
 
77
  return self.personality_profile
78
 
79
  def generate_tweet(self, context="", sample_size=3):
80
+ """Generate a new tweet by sampling random tweets and avoiding repetition."""
81
  if not self.tweets:
82
  return "Error: No tweets available for generation."
83
 
 
98
 
99
  # Construct the prompt
100
  prompt = f"""Based on this personality profile:
101
+ {personality_profile_excerpt}
102
+ Current context or topic (if any):
103
+ {context}
104
+ Tweets for context:
105
+ {', '.join(sampled_contents)}
106
+ **Only generate the tweet. Do not include analysis, explanation, or any other content.**
107
+ """
108
+
109
+ input_ids = self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True)
110
+ output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=1.0)
 
 
 
 
111
  generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
112
 
113
  return generated_tweet
 
122
 
123
  text = processor.extract_text_from_pdf()
124
  tweets = processor.process_pdf_content(text)
125
+ processor.analyze_personality(max_tweets=50) # Analyze personality, but don't return the result
126
  generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
127
 
128
+ return generated_tweet # Only return the generated tweet
129
 
130
  # Gradio app setup
131
  iface = gr.Interface(
132
  fn=gradio_interface,
133
  inputs=[],
134
+ outputs=gr.Textbox(label="Generated Tweet"), # Only output the generated tweet
135
+ live=False, # Set to False to generate only when user clicks the button
136
+ title="AI Tweet Generation",
137
+ description="Generate tweets based on the personality profile and tweets from a PDF document."
 
 
 
138
  )
139
 
140
  # Launch the app