Spaces:
Running
Running
import gradio as gr | |
import spaces | |
import PyPDF2 | |
import docx | |
import io | |
import os | |
from typing import Optional | |
from huggingface_hub import InferenceClient | |
from prompts import SYSTEM_PROMPT, PROMPTS | |
def extract_text_from_file(file) -> str: | |
"""Extract text from uploaded files""" | |
if file is None: | |
return "" | |
file_path = file.name | |
text = "" | |
try: | |
if file_path.endswith('.pdf'): | |
with open(file_path, 'rb') as f: | |
reader = PyPDF2.PdfReader(f) | |
for page in reader.pages: | |
text += page.extract_text() + "\n" | |
elif file_path.endswith('.docx'): | |
doc = docx.Document(file_path) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
elif file_path.endswith('.txt'): | |
with open(file_path, 'r', encoding='utf-8') as f: | |
text = f.read() | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
return text | |
def process_document(document, operation_type, text_input): | |
"""Main processing function using Cerebras Llama through HuggingFace""" | |
# Extract text from file or use text input | |
if document is not None: | |
text = extract_text_from_file(document) | |
else: | |
text = text_input | |
if not text.strip(): | |
return "Please provide either a document or text input." | |
# Get the appropriate prompt | |
prompt = PROMPTS.get(operation_type, "") | |
# Create the client with Cerebras provider | |
try: | |
client = InferenceClient( | |
"meta-llama/Llama-3.3-70B-Instruct", | |
provider="cerebras", | |
token=os.getenv("HF_TOKEN"), | |
) | |
# Create conversation messages | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT}, | |
{"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"} | |
] | |
# Generate response using chat completion | |
response = client.chat_completion( | |
messages=messages, | |
max_tokens=3000, | |
temperature=0.1, | |
stream=False | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account" | |
# Create the Gradio interface | |
with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π Study Assistant - Document Analysis Tool") | |
gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.") | |
gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*") | |
with gr.Row(): | |
with gr.Column(): | |
document = gr.File( | |
label="Upload Document", | |
file_types=[".pdf", ".docx", ".txt"], | |
file_count="single" | |
) | |
text_input = gr.Textbox( | |
label="Or paste text directly", | |
lines=5, | |
placeholder="Paste your text here if you don't want to upload a file..." | |
) | |
with gr.Column(): | |
operation_type = gr.Dropdown( | |
choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"], | |
label="Select Operation", | |
value="Summary" | |
) | |
process_btn = gr.Button("π Process Document", variant="primary", size="lg") | |
output = gr.Textbox( | |
label="Output", | |
lines=20, | |
show_copy_button=True | |
) | |
gr.Markdown("---") | |
gr.Markdown("### Tips:") | |
gr.Markdown("- Supported formats: PDF, DOCX, TXT") | |
gr.Markdown("- Maximum file size: 200MB") | |
gr.Markdown("- Text can be pasted directly if you don't have a file") | |
gr.Markdown("- Uses HuggingFace Pro account with Cerebras access") | |
process_btn.click( | |
fn=process_document, | |
inputs=[document, operation_type, text_input], | |
outputs=output, | |
show_progress=True | |
) | |
if __name__ == "__main__": | |
demo.launch() |