Spaces:

Manasa1
/

tweets_clone

Sleeping

App Files Files Community

Manasa1 commited on Dec 14, 2024

Commit

ac5fd0d

verified ·

1 Parent(s): b92a169

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -32

app.py CHANGED Viewed

@@ -22,10 +22,12 @@ class TweetDatasetProcessor:
     @staticmethod
     def _process_line(line):
-        """Process a single line."""
         line = line.strip()
         if not line or line.startswith('http'):  # Skip empty lines and URLs
             return None
         return {
             'content': line,
             'timestamp': datetime.now(),
@@ -34,7 +36,7 @@ class TweetDatasetProcessor:
         }
     def extract_text_from_pdf(self):
-        """Extract text content from PDF file."""
         reader = PdfReader(self.pdf_path)
         text = ""
         for page in reader.pages:
@@ -42,7 +44,7 @@ class TweetDatasetProcessor:
         return text
     def process_pdf_content(self, text):
-        """Process PDF content and clean extracted tweets."""
         if not text.strip():
             raise ValueError("The provided PDF appears to be empty.")
@@ -56,16 +58,16 @@ class TweetDatasetProcessor:
         return self.tweets
     def analyze_personality(self, max_tweets=50):
-        """Comprehensive personality analysis using a limited subset of tweets."""
         if not self.tweets:
             raise ValueError("No tweets available for personality analysis.")
         all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
-        Core beliefs, emotional tendencies, cognitive patterns, etc.
-        Tweets for analysis:
-        {json.dumps(all_tweets, indent=2)}
-        """
         input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
         output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
@@ -75,7 +77,7 @@ class TweetDatasetProcessor:
         return self.personality_profile
     def generate_tweet(self, context="", sample_size=3):
-        """Generate a new tweet by sampling random tweets and avoiding repetition."""
         if not self.tweets:
             return "Error: No tweets available for generation."
@@ -96,20 +98,16 @@ class TweetDatasetProcessor:
         # Construct the prompt
         prompt = f"""Based on this personality profile:
-        {personality_profile_excerpt}
-        Current context or topic (if any):
-        {context}
-        Tweets for context:
-        {', '.join(sampled_contents)}
-        **Only generate the tweet. Do not include analysis, explanation, or any other content.**
-        """
-        inputs = self.tokenizer(prompt, return_tensors='pt', truncation=True, padding=True)
-        input_ids = inputs['input_ids']
-        attention_mask = inputs['attention_mask']
-        pad_token_id = self.tokenizer.eos_token_id
-        output = self.model.generate(input_ids, attention_mask=attention_mask, max_length=500, num_return_sequences=1, temperature=1.0, pad_token_id=pad_token_id,do_sample = True)
         generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
         return generated_tweet
@@ -124,22 +122,19 @@ def gradio_interface():
     text = processor.extract_text_from_pdf()
     tweets = processor.process_pdf_content(text)
-    personality_analysis = processor.analyze_personality(max_tweets=50)
     generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
-    return personality_analysis, generated_tweet
 # Gradio app setup
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[],
-    outputs=[
-        gr.Textbox(label="Personality Analysis"),
-        gr.Textbox(label="Generated Tweet")
-    ],
-    live=True,
-    title="AI Personality and Tweet Generation",
-    description="Automatically analyze personality and generate tweets based on a provided PDF of tweets."
 )
 # Launch the app

     @staticmethod
     def _process_line(line):
+        """Process a single line."""
         line = line.strip()
         if not line or line.startswith('http'):  # Skip empty lines and URLs
             return None
+        # Clean any unwanted characters and fix broken words
+        line = line.replace('\u201c', '"').replace('\u201d', '"')  # Replacing smart quotes
         return {
             'content': line,
             'timestamp': datetime.now(),
         }
     def extract_text_from_pdf(self):
+        """Extract text content from PDF file."""
         reader = PdfReader(self.pdf_path)
         text = ""
         for page in reader.pages:
         return text
     def process_pdf_content(self, text):
+        """Process PDF content and clean extracted tweets."""
         if not text.strip():
             raise ValueError("The provided PDF appears to be empty.")
         return self.tweets
     def analyze_personality(self, max_tweets=50):
+        """Comprehensive personality analysis using a limited subset of tweets."""
         if not self.tweets:
             raise ValueError("No tweets available for personality analysis.")
         all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
         analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
+Core beliefs, emotional tendencies, cognitive patterns, etc.
+Tweets for analysis:
+{json.dumps(all_tweets, indent=2)}
+"""
         input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
         output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
         return self.personality_profile
     def generate_tweet(self, context="", sample_size=3):
+        """Generate a new tweet by sampling random tweets and avoiding repetition."""
         if not self.tweets:
             return "Error: No tweets available for generation."
         # Construct the prompt
         prompt = f"""Based on this personality profile:
+{personality_profile_excerpt}
+Current context or topic (if any):
+{context}
+Tweets for context:
+{', '.join(sampled_contents)}
+**Only generate the tweet. Do not include analysis, explanation, or any other content.**
+"""
+        input_ids = self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True)
+        output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=1.0)
         generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
         return generated_tweet
     text = processor.extract_text_from_pdf()
     tweets = processor.process_pdf_content(text)
+    processor.analyze_personality(max_tweets=50)  # Analyze personality, but don't return the result
     generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
+    return generated_tweet  # Only return the generated tweet
 # Gradio app setup
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=[],
+    outputs=gr.Textbox(label="Generated Tweet"),  # Only output the generated tweet
+    live=False,  # Set to False to generate only when user clicks the button
+    title="AI Tweet Generation",
+    description="Generate tweets based on the personality profile and tweets from a PDF document."
 )
 # Launch the app