Spaces:

Murtaza249
/

Text_to_Quiz_Generator

Sleeping

App Files Files Community

Murtaza249 commited on Apr 21

Commit

c06e820

verified ·

1 Parent(s): 2285290

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -30

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import pipeline, AutoTokenizer
 import random
 import time
@@ -11,7 +11,7 @@ st.set_page_config(
     layout="wide"
 )
-# Load the pipeline with caching
 @st.cache_resource
 def load_model():
     try:
@@ -22,24 +22,20 @@ def load_model():
         # Using a smaller, more efficient model that works well for question generation
         model_name = "valhalla/t5-small-e2e-qg"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Set device
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
-        # Load pipeline
-        qg_pipeline = pipeline(
-            "text2text-generation",
-            model=model_name,
-            tokenizer=tokenizer,
-            device=device
-        )
-        return qg_pipeline
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         print(f"Error details: {str(e)}")
-        return None
 # Custom CSS
 def load_css():
@@ -87,29 +83,64 @@ def load_css():
     """, unsafe_allow_html=True)
 # Function to generate questions from a passage
-def generate_questions(pipeline, text, num_questions=5):
     try:
-        # Make sure text is not too long
-        max_length = 1024
         if len(text) > max_length:
-            text = text[:max_length]
-        # Generate questions
-        result = pipeline(
-            text,
-            max_length=128,
-            num_return_sequences=num_questions,
-            clean_up_tokenization_spaces=True
-        )
         # Process and extract questions and answers
         questions_answers = []
-        for item in result:
-            generated_text = item["generated_text"]
-            # Handle different potential formats
             if "?" in generated_text:
-                # Try to find question and answer
                 parts = generated_text.split("?", 1)
                 if len(parts) > 1:
                     question = parts[0].strip() + "?"
@@ -129,6 +160,7 @@ def generate_questions(pipeline, text, num_questions=5):
         return questions_answers
     except Exception as e:
         st.error(f"Error generating questions: {str(e)}")
         return []
 # Function to create quiz from generated Q&A pairs
@@ -181,6 +213,48 @@ def create_quiz(questions_answers, num_options=4):
     return quiz_items
 # Main app
 def main():
     load_css()
@@ -215,14 +289,19 @@ def main():
         if passage and len(passage) > 50:
             # Loading the model (with the cached resource)
             with st.spinner("Loading AI model..."):
-                qg_pipeline = load_model()
-            if qg_pipeline:
                 # Generate questions
                 with st.spinner("Generating questions..."):
                     # Add a small delay for UX
                     time.sleep(1)
-                    questions_answers = generate_questions(qg_pipeline, passage, num_questions)
                 if questions_answers:
                     # Create quiz

 import streamlit as st
 import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import random
 import time
     layout="wide"
 )
+# Load the model with caching
 @st.cache_resource
 def load_model():
     try:
         # Using a smaller, more efficient model that works well for question generation
         model_name = "valhalla/t5-small-e2e-qg"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         # Set device
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
+        # Move model to device
+        model = model.to(device)
+        return model, tokenizer, device
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         print(f"Error details: {str(e)}")
+        return None, None, None
 # Custom CSS
 def load_css():
     """, unsafe_allow_html=True)
 # Function to generate questions from a passage
+def generate_questions(model, tokenizer, device, text, num_questions=5):
     try:
+        # Process text in chunks if it's too long
+        max_length = 512
+        chunks = []
         if len(text) > max_length:
+            # Simple chunking based on sentences
+            sentences = text.split('. ')
+            current_chunk = ""
+            for sentence in sentences:
+                if len(current_chunk) + len(sentence) < max_length:
+                    current_chunk += sentence + ". "
+                else:
+                    chunks.append(current_chunk)
+                    current_chunk = sentence + ". "
+            if current_chunk:
+                chunks.append(current_chunk)
+        else:
+            chunks = [text]
+        all_generated_texts = []
+        # Process each chunk
+        for chunk in chunks:
+            inputs = tokenizer(chunk, return_tensors="pt", max_length=512, truncation=True)
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Generate with beam search for multiple diverse outputs
+            with torch.no_grad():
+                outputs = model.generate(
+                    inputs["input_ids"],
+                    max_length=64,
+                    num_beams=5,
+                    num_return_sequences=min(3, num_questions),  # Generate up to 3 questions per chunk
+                    temperature=1.0,
+                    diversity_penalty=1.0,
+                    num_beam_groups=5,
+                    early_stopping=True
+                )
+            decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+            all_generated_texts.extend(decoded_outputs)
+            # If we have enough questions, stop
+            if len(all_generated_texts) >= num_questions:
+                break
+        # Ensure we don't return more than num_questions
+        all_generated_texts = all_generated_texts[:num_questions]
         # Process and extract questions and answers
         questions_answers = []
+        for generated_text in all_generated_texts:
+            # Try to find question and answer
             if "?" in generated_text:
                 parts = generated_text.split("?", 1)
                 if len(parts) > 1:
                     question = parts[0].strip() + "?"
         return questions_answers
     except Exception as e:
         st.error(f"Error generating questions: {str(e)}")
+        print(f"Detailed error: {str(e)}")
         return []
 # Function to create quiz from generated Q&A pairs
     return quiz_items
+# Alternative question generation using simpler approach
+def generate_questions_simple(text, num_questions=5):
+    try:
+        # Simple question generation for demonstration
+        # In a real app, you'd use a proper NLP model
+        # Extract sentences
+        sentences = text.split('.')
+        sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
+        # Select random sentences to turn into questions
+        if len(sentences) < num_questions:
+            selected_sentences = sentences
+        else:
+            selected_sentences = random.sample(sentences, num_questions)
+        questions_answers = []
+        # Simple transformation of sentences into questions
+        for sentence in selected_sentences:
+            # Very simple question generation (not ideal but works as fallback)
+            words = sentence.split()
+            if len(words) < 5:
+                continue
+            # Extract key entities for answer
+            potential_answer = " ".join(words[-3:])
+            # Create question from beginning of sentence
+            question_words = words[:len(words)-3]
+            question = " ".join(question_words) + "?"
+            questions_answers.append({
+                "question": question,
+                "answer": potential_answer
+            })
+        return questions_answers
+    except Exception as e:
+        print(f"Error in simple question generation: {str(e)}")
+        return []
 # Main app
 def main():
     load_css()
         if passage and len(passage) > 50:
             # Loading the model (with the cached resource)
             with st.spinner("Loading AI model..."):
+                model, tokenizer, device = load_model()
+            if model and tokenizer and device:
                 # Generate questions
                 with st.spinner("Generating questions..."):
                     # Add a small delay for UX
                     time.sleep(1)
+                    questions_answers = generate_questions(model, tokenizer, device, passage, num_questions)
+                    # If primary method fails, try fallback approach
+                    if not questions_answers:
+                        st.warning("Advanced question generation failed. Using simple approach instead.")
+                        questions_answers = generate_questions_simple(passage, num_questions)
                 if questions_answers:
                     # Create quiz