import gradio as gr import spaces import PyPDF2 import docx import io import os from typing import Optional from huggingface_hub import InferenceClient from prompts import SYSTEM_PROMPT, PROMPTS def extract_text_from_file(file) -> str: """Extract text from uploaded files""" if file is None: return "" file_path = file.name text = "" try: if file_path.endswith('.pdf'): with open(file_path, 'rb') as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: text += page.extract_text() + "\n" elif file_path.endswith('.docx'): doc = docx.Document(file_path) for paragraph in doc.paragraphs: text += paragraph.text + "\n" elif file_path.endswith('.txt'): with open(file_path, 'r', encoding='utf-8') as f: text = f.read() except Exception as e: return f"Error reading file: {str(e)}" return text @spaces.GPU def process_document(document, operation_type, text_input): """Main processing function using Cerebras Llama through HuggingFace""" # Extract text from file or use text input if document is not None: text = extract_text_from_file(document) else: text = text_input if not text.strip(): return "Please provide either a document or text input." # Get the appropriate prompt prompt = PROMPTS.get(operation_type, "") # Create the client with Cerebras provider try: client = InferenceClient( "meta-llama/Llama-3.3-70B-Instruct", provider="cerebras", token=os.getenv("HF_TOKEN"), ) # Create conversation messages messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"} ] # Generate response using chat completion response = client.chat_completion( messages=messages, max_tokens=3000, temperature=0.1, stream=False ) return response.choices[0].message.content except Exception as e: return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account" # Create the Gradio interface with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo: gr.Markdown("# 📚 Study Assistant - Document Analysis Tool") gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.") gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*") with gr.Row(): with gr.Column(): document = gr.File( label="Upload Document", file_types=[".pdf", ".docx", ".txt"], file_count="single" ) text_input = gr.Textbox( label="Or paste text directly", lines=5, placeholder="Paste your text here if you don't want to upload a file..." ) with gr.Column(): operation_type = gr.Dropdown( choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"], label="Select Operation", value="Summary" ) process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg") output = gr.Textbox( label="Output", lines=20, show_copy_button=True ) gr.Markdown("---") gr.Markdown("### Tips:") gr.Markdown("- Supported formats: PDF, DOCX, TXT") gr.Markdown("- Maximum file size: 200MB") gr.Markdown("- Text can be pasted directly if you don't have a file") gr.Markdown("- Uses HuggingFace Pro account with Cerebras access") process_btn.click( fn=process_document, inputs=[document, operation_type, text_input], outputs=output, show_progress=True ) if __name__ == "__main__": demo.launch()