Spaces:

Manasa1
/

tweets_clone

Sleeping

App Files Files Community

Manasa1 commited on Dec 15, 2024

Commit

07099e3

verified ·

1 Parent(s): b6a091c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -132

app.py CHANGED Viewed

@@ -4,139 +4,39 @@ import random
 from datetime import datetime
 from PyPDF2 import PdfReader
 import json
-from dotenv import load_dotenv
-load_dotenv()
-class TweetDatasetProcessor:
-    def __init__(self, fine_tuned_model_name, pdf_path):
-        self.tweets = []
-        self.personality_profile = {}
-        self.vectorizer = None  # No need for vectorizer here since we're not clustering
-        self.used_tweets = set()  # Track used tweets to avoid repetition
-        self.pdf_path = pdf_path
-        # Load fine-tuned model and tokenizer
-        self.model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_name)
-        self.tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
-    @staticmethod
-    def _process_line(line):
-        """Process a single line."""
-        line = line.strip()
-        if not line or line.startswith('http'):  # Skip empty lines and URLs
-            return None
-        # Clean any unwanted characters and fix broken words
-        line = line.replace('\u201c', '"').replace('\u201d', '"')  # Replacing smart quotes
-        return {
-            'content': line,
-            'timestamp': datetime.now(),
-            'mentions': [word for word in line.split() if word.startswith('@')],
-            'hashtags': [word for word in line.split() if word.startswith('#')]
-        }
-    def extract_text_from_pdf(self):
-        """Extract text content from PDF file."""
-        reader = PdfReader(self.pdf_path)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text()
-        return text
-    def process_pdf_content(self, text):
-        """Process PDF content and clean extracted tweets."""
-        if not text.strip():
-            raise ValueError("The provided PDF appears to be empty.")
-        lines = text.split('\n')
-        clean_tweets = [TweetDatasetProcessor._process_line(line) for line in lines]
-        self.tweets = [tweet for tweet in clean_tweets if tweet]
-        if not self.tweets:
-            raise ValueError("No tweets were extracted from the PDF. Ensure the content is properly formatted.")
-        return self.tweets
-    def analyze_personality(self, max_tweets=50):
-        """Comprehensive personality analysis using a limited subset of tweets."""
-        if not self.tweets:
-            raise ValueError("No tweets available for personality analysis.")
-        all_tweets = [tweet['content'] for tweet in self.tweets][:max_tweets]
-        analysis_prompt = f"""Perform a deep psychological analysis of the author based on these tweets:
-Core beliefs, emotional tendencies, cognitive patterns, etc.
-Tweets for analysis:
-{json.dumps(all_tweets, indent=2)}
-"""
-        input_ids = self.tokenizer.encode(analysis_prompt, return_tensors='pt')
-        output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=0.7)
-        personality_analysis = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        self.personality_profile = personality_analysis
-        return self.personality_profile
-    def generate_tweet(self, context="", sample_size=3):
-        """Generate a new tweet by sampling random tweets and avoiding repetition."""
-        if not self.tweets:
-            return "Error: No tweets available for generation."
-        # Randomly sample unique tweets
-        available_tweets = [tweet for tweet in self.tweets if tweet['content'] not in self.used_tweets]
-        if len(available_tweets) < sample_size:
-            self.used_tweets.clear()  # Reset used tweets if all have been used
-            available_tweets = self.tweets
-        sampled_tweets = random.sample(available_tweets, sample_size)
-        sampled_contents = [tweet['content'] for tweet in sampled_tweets]
-        # Update the used tweets tracker
-        self.used_tweets.update(sampled_contents)
-        # Truncate personality profile to avoid token overflow
-        personality_profile_excerpt = self.personality_profile[:400] if len(self.personality_profile) > 400 else self.personality_profile
-        # Construct the prompt
-        prompt = f"""Based on this personality profile:
-{personality_profile_excerpt}
-Current context or topic (if any):
-{context}
-Tweets for context:
-{', '.join(sampled_contents)}
-**Only generate the tweet. Do not include analysis, explanation, or any other content.**
-"""
-        input_ids = self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True)
-        output = self.model.generate(input_ids, max_length=500, num_return_sequences=1, temperature=1.0)
-        generated_tweet = self.tokenizer.decode(output[0], skip_special_tokens=True).strip()
-        return generated_tweet
-# Gradio Interface Function
-def gradio_interface():
-    # Path to the PDF with tweets
-    pdf_path = 'Dataset (4).pdf'  # Replace with your PDF file path
-    fine_tuned_model_name = 'Manasa1/GPT2_Finetuned_tweets'  # Replace with the path to your fine-tuned model
-    processor = TweetDatasetProcessor(fine_tuned_model_name, pdf_path)
-    text = processor.extract_text_from_pdf()
-    tweets = processor.process_pdf_content(text)
-    processor.analyze_personality(max_tweets=50)  # Analyze personality, but don't return the result
-    generated_tweet = processor.generate_tweet(context="AI-powered tweet generation", sample_size=3)
-    return generated_tweet  # Only return the generated tweet
-# Gradio app setup
-iface = gr.Interface(
-    fn=gradio_interface,
-    inputs=[],
-    outputs=gr.Textbox(label="Generated Tweet"),  # Only output the generated tweet
-    live=False,  # Set to False to generate only when user clicks the button
-    title="AI Tweet Generation",
-    description="Generate tweets based on the personality profile and tweets from a PDF document."
 )
 # Launch the app
-if __name__ == "__main__":
-    iface.launch()

 from datetime import datetime
 from PyPDF2 import PdfReader
 import json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Replace 'username/your_model_name' with your Hugging Face model name
+model_name = "username/your_model_name"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+def generate_tweet(prompt):
+    # Tokenize the input
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # Generate text using the model
+    outputs = model.generate(
+        inputs["input_ids"],
+        max_length=280,  # Limit tweets to 280 characters
+        num_return_sequences=1,  # Number of tweets to generate
+        top_k=50,  # Sampling from top k tokens
+        top_p=0.95,  # Sampling from top p cumulative probability
+        temperature=0.7,  # Adjust creativity
+        do_sample=True,  # Enable sampling
+    )
+    # Decode the generated text
+    tweet = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return tweet
+interface = gr.Interface(
+    fn=generate_tweet,  # The function to call
+    inputs="text",      # User input is a single text box
+    outputs="text",     # Output is text
+    title="AI Tweet Generator",
+    description="Enter a topic or a few words, and the AI will generate a creative tweet!"
 )
 # Launch the app
+interface.launch()