Manasa1 commited on
Commit
c2c3e4f
·
verified ·
1 Parent(s): fd73a47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -46
app.py CHANGED
@@ -1,48 +1,143 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
-
4
- # Load pre-trained model (or fine-tuned model)
5
- model_name = "Manasa1/GPT_Finetuned_tweets" # Replace with the fine-tuned model name
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name)
8
-
9
- # Function to generate tweets
10
- def generate_tweet(input_text):
11
- inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True, padding=True)
12
- outputs = model.generate(
13
- inputs['input_ids'],
14
- attention_mask=inputs['attention_mask'],
15
- max_length=150, # Limit to 150 tokens for brevity
16
- num_return_sequences=1,
17
- top_p=0.9, # Narrow focus to ensure more concise results
18
- top_k=40, # Focus on high-probability words
19
- do_sample=True,
20
- pad_token_id=tokenizer.pad_token_id
21
- )
22
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
-
24
- # Extract the tweet text (exclude prompt if included)
25
- return generated_text.strip()
26
-
27
- # Gradio interface
28
- def main():
29
- with gr.Blocks() as interface:
30
- gr.Markdown("""
31
- # Tweet Generator
32
- Enter a topic or idea, and the AI will craft a concise, engaging, and impactful tweet inspired by innovative thought leadership.
33
- """)
34
-
35
- with gr.Row():
36
- input_text = gr.Textbox(label="Enter your idea or topic:")
37
- output_tweet = gr.Textbox(label="Generated Tweet:", interactive=False)
38
-
39
- generate_button = gr.Button("Generate Tweet")
40
-
41
- generate_button.click(generate_tweet, inputs=[input_text], outputs=[output_tweet])
42
-
43
- return interface
44
-
45
- # Run Gradio app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  if __name__ == "__main__":
47
- app = main()
48
- app.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import random
4
+ from datetime import datetime
5
+ from PyPDF2 import PdfReader
6
+ import json
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ class TweetDatasetProcessor:
12
+ def __init__(self, fine_tuned_model_name, pdf_path):
13
+ self.tweets = []
14
+ self.personality_profile = {}
15
+ self.vectorizer = None # No need for vectorizer here since we're not clustering
16
+ self.used_tweets = set() # Track used tweets to avoid repetition
17
+ self.pdf_path = pdf_path
18
+
19
+ # Load fine-tuned model and tokenizer
20
+ self.model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_name)
21
+ self.tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
22
+
23
+ @staticmethod
24
+ def _process_line(line):
25
+ """Process a single line."""
26
+ line = line.strip()
27
+ if not line or line.startswith('http'): # Skip empty lines and URLs
28
+ return None
29
+ return {
30
+ 'content': line,
31
+ 'timestamp': datetime.now(),
32
+ 'mentions': [word for word in line.split() if word.startswith('@')],
33
+ 'hashtags': [word for word in line.split() if word.startswith('#')]
34
+ }
35
+
36
+ def extract_text_from_pdf(self):
37
+ """Extract text content from PDF file."""
38
+ reader = PdfReader(self.pdf_path)
39
+ text = ""
40
+ for page in reader.pages:
41
+ text += page.extract_text()
42
+ return text
43
+
44
+ def process_pdf_content(self, text):
45
+ """Process PDF content and clean extracted tweets."""
46
+ if not text.strip():
47
+ raise ValueError("The provided PDF appears to be empty.")
48
+
49
+ lines = text.split('\n')
50
+ clean_tweets = [TweetDatasetProcessor._process_line(line) for line in lines]
51
+ self.tweets = [tweet for tweet in clean_tweets if tweet]
52
+
53
+ if not self.tweets:
54
+ raise ValueError("No tweets were extracted from the PDF. Ensure the content is properly formatted.")
55
+
56
+ return self.tweets
57
+
58
+ def analyze_personality(self, max_tweets=50):
59
+ """Comprehensive personality analysis using a limited subset of tweets."""
60
+ if not self.tweets:
61
+ raise ValueError("No tweets available for personality analysis.")
62
+
63
+ all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
64
+ analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
65
+ Core beliefs, emotional tendencies, cognitive patterns, etc.
66
+ Tweets for analysis:
67
+ {json.dumps(all_tweets, indent=2)}
68
+ """
69
+
70
+ input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
71
+ output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
72
+ personality_analysis = self.tokenizer.decode(output[0], skip_special_tokens=True)
73
+
74
+ self.personality_profile = personality_analysis
75
+ return self.personality_profile
76
+
77
+ def generate_tweet(self, context="", sample_size=3):
78
+ """Generate a new tweet by sampling random tweets and avoiding repetition."""
79
+ if not self.tweets:
80
+ return "Error: No tweets available for generation."
81
+
82
+ # Randomly sample unique tweets
83
+ available_tweets = [tweet for tweet in self.tweets if tweet['content'] not in self.used_tweets]
84
+ if len(available_tweets) < sample_size:
85
+ self.used_tweets.clear() # Reset used tweets if all have been used
86
+ available_tweets = self.tweets
87
+
88
+ sampled_tweets = random.sample(available_tweets, sample_size)
89
+ sampled_contents = [tweet['content'] for tweet in sampled_tweets]
90
+
91
+ # Update the used tweets tracker
92
+ self.used_tweets.update(sampled_contents)
93
+
94
+ # Truncate personality profile to avoid token overflow
95
+ personality_profile_excerpt = self.personality_profile[:400] if len(self.personality_profile) > 400 else self.personality_profile
96
+
97
+ # Construct the prompt
98
+ prompt = f"""Based on this personality profile:
99
+ {personality_profile_excerpt}
100
+ Current context or topic (if any):
101
+ {context}
102
+ Tweets for context:
103
+ {', '.join(sampled_contents)}
104
+ **Only generate the tweet. Do not include analysis, explanation, or any other content.**
105
+ """
106
+
107
+ input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
108
+ output = self.model.generate(input_ids, max_length=150, num_return_sequences=1, temperature=1.0)
109
+ generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
110
+
111
+ return generated_tweet
112
+
113
+ # Gradio Interface Function
114
+ def gradio_interface():
115
+ # Path to the PDF with tweets
116
+ pdf_path = 'path_to_your_pdf.pdf' # Replace with your PDF file path
117
+ fine_tuned_model_name = 'path_to_your_fine_tuned_model' # Replace with the path to your fine-tuned model
118
+
119
+ processor = TweetDatasetProcessor(fine_tuned_model_name, pdf_path)
120
+
121
+ text = processor.extract_text_from_pdf()
122
+ tweets = processor.process_pdf_content(text)
123
+ personality_analysis = processor.analyze_personality(max_tweets=50)
124
+ generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
125
+
126
+ return personality_analysis, generated_tweet
127
+
128
+ # Gradio app setup
129
+ iface = gr.Interface(
130
+ fn=gradio_interface,
131
+ inputs=[],
132
+ outputs=[
133
+ gr.Textbox(label="Personality Analysis"),
134
+ gr.Textbox(label="Generated Tweet")
135
+ ],
136
+ live=True,
137
+ title="AI Personality and Tweet Generation",
138
+ description="Automatically analyze personality and generate tweets based on a provided PDF of tweets."
139
+ )
140
+
141
+ # Launch the app
142
  if __name__ == "__main__":
143
+ iface.launch()