import gradio as gr import PyPDF2 import docx from openai import OpenAI import io import json import time from typing import List, Dict, Any, Optional import spaces import os import re # Global variables to store API key and document text API_KEY = "" DOCUMENT_TEXT = "" MODEL = "google/gemma-3-27b-it:free" def setup_client(api_key: str): """Initialize and test API key""" global API_KEY try: if not api_key or api_key.strip() == "": return "❌ Please enter a valid API key" # Test the API key by creating a client client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=api_key.strip(), ) # Store the API key globally API_KEY = api_key.strip() return "✅ API Key configured successfully!" except Exception as e: return f"❌ Error configuring API: {str(e)}" def create_client() -> Optional[OpenAI]: """Create OpenAI client with stored API key""" if not API_KEY: return None return OpenAI( base_url="https://openrouter.ai/api/v1", api_key=API_KEY, ) def extract_text_from_pdf(file_path: str) -> str: """Extract text from PDF file""" try: with open(file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page_num, page in enumerate(pdf_reader.pages): try: page_text = page.extract_text() if page_text: text += page_text + "\n" except Exception as e: print(f"Error extracting text from page {page_num}: {e}") continue return text.strip() except Exception as e: return f"Error reading PDF: {str(e)}" def extract_text_from_docx(file_path: str) -> str: """Extract text from DOCX file""" try: doc = docx.Document(file_path) text = "" for paragraph in doc.paragraphs: if paragraph.text.strip(): text += paragraph.text + "\n" # Also extract text from tables for table in doc.tables: for row in table.rows: for cell in row.cells: if cell.text.strip(): text += cell.text + "\n" return text.strip() except Exception as e: return f"Error reading DOCX: {str(e)}" def process_document(file): """Process uploaded document and extract text""" global DOCUMENT_TEXT print(f"Processing file: {file}") # Debug print if file is None: DOCUMENT_TEXT = "" return "❌ No file uploaded", "❌ No document loaded" try: file_path = file.name if hasattr(file, 'name') else str(file) print(f"File path: {file_path}") # Debug print # Check if file exists if not os.path.exists(file_path): DOCUMENT_TEXT = "" return "❌ File not found", "❌ No document loaded" # Get file extension file_extension = file_path.lower().split('.')[-1] print(f"File extension: {file_extension}") # Debug print # Extract text based on file type if file_extension == 'pdf': extracted_text = extract_text_from_pdf(file_path) elif file_extension in ['docx', 'doc']: extracted_text = extract_text_from_docx(file_path) else: DOCUMENT_TEXT = "" return "❌ Unsupported file format. Please upload PDF or DOCX files.", "❌ No document loaded" print(f"Extracted text length: {len(extracted_text) if extracted_text else 0}") # Debug print # Check if extraction was successful if extracted_text.startswith("Error"): DOCUMENT_TEXT = "" return extracted_text, "❌ No document loaded" # Clean and set the global variable DOCUMENT_TEXT = extracted_text.strip() if DOCUMENT_TEXT and len(DOCUMENT_TEXT) > 10: # Minimum length check word_count = len(DOCUMENT_TEXT.split()) char_count = len(DOCUMENT_TEXT) preview = DOCUMENT_TEXT[:300] + "..." if len(DOCUMENT_TEXT) > 300 else DOCUMENT_TEXT status_msg = f"✅ Document loaded ({word_count} words, {char_count} characters)" process_msg = f"✅ Document processed successfully!\n📄 Word count: {word_count}\n📝 Character count: {char_count}\n\n📖 Preview:\n{preview}" print(f"Document processed successfully. Word count: {word_count}") # Debug print return process_msg, status_msg else: DOCUMENT_TEXT = "" return "❌ Could not extract meaningful text from the document. The document might be empty, contain only images, or be corrupted.", "❌ No document loaded" except Exception as e: DOCUMENT_TEXT = "" error_msg = f"❌ Error processing document: {str(e)}" print(f"Error: {error_msg}") # Debug print return error_msg, "❌ No document loaded" def clean_html_response(content: str) -> str: """Clean HTML response by removing markdown code blocks and fixing formatting""" if not content: return content # Remove ```html and ``` blocks content = re.sub(r'```html\s*', '', content) content = re.sub(r'```\s*$', '', content, flags=re.MULTILINE) content = re.sub(r'```', '', content) # Remove any remaining markdown code block indicators content = re.sub(r'^```.*?\n', '', content, flags=re.MULTILINE) # Clean up extra whitespace content = re.sub(r'\n\s*\n\s*\n', '\n\n', content) return content.strip() def generate_content(prompt: str, max_tokens: int = 2000) -> str: """Generate content using the AI model""" global DOCUMENT_TEXT, API_KEY print(f"Generate content called. API_KEY exists: {bool(API_KEY)}, DOCUMENT_TEXT length: {len(DOCUMENT_TEXT) if DOCUMENT_TEXT else 0}") # Debug print if not API_KEY or API_KEY.strip() == "": return "❌ Please configure your API key first" if not DOCUMENT_TEXT or len(DOCUMENT_TEXT.strip()) < 10: return "❌ Please upload and process a document first. Make sure the document contains readable text." try: client = create_client() if not client: return "❌ Failed to create API client" print("Sending request to API...") # Debug print completion = client.chat.completions.create( extra_headers={ "HTTP-Referer": "https://educational-assistant.app", "X-Title": "Educational Content Creator", }, model=MODEL, messages=[ { "role": "system", "content": "You are an expert educational content creator. Create comprehensive, engaging, and pedagogically sound educational materials based on the provided document content. Format your response using proper HTML tags for better presentation. IMPORTANT: Do not wrap your response in ```html or ``` code blocks. Return clean HTML content only." }, { "role": "user", "content": f"Document Content:\n{DOCUMENT_TEXT[:4000]}\n\n{prompt}" # Limit document content to avoid token limits } ], max_tokens=max_tokens, temperature=0.7 ) result = completion.choices[0].message.content print(f"API response received. Length: {len(result) if result else 0}") # Debug print # Clean the HTML response cleaned_result = clean_html_response(result) return cleaned_result except Exception as e: error_msg = f"❌ Error generating content: {str(e)}" print(f"API Error: {error_msg}") # Debug print return error_msg def create_download_file(content: str, filename: str) -> str: """Create downloadable HTML file""" html_template = f""" Educational Content - {filename}
{content}
""" # Save to temporary file temp_filename = f"temp_{filename}.html" with open(temp_filename, 'w', encoding='utf-8') as f: f.write(html_template) return temp_filename # Content generation functions with @spaces.GPU decorator @spaces.GPU def generate_summary(): """Generate comprehensive summary""" prompt = """Create a comprehensive summary of this document with proper HTML formatting: Use these HTML tags for structure: -

for main title -

for section headers (Executive Summary, Key Points, etc.) -

for subsections -