Document / app.py
SlouchyBuffalo's picture
Update app.py
e5b3ba7 verified
raw
history blame
4.26 kB
import gradio as gr
import spaces
import PyPDF2
import docx
import io
import os
from typing import Optional
from huggingface_hub import InferenceClient
from prompts import SYSTEM_PROMPT, PROMPTS
def extract_text_from_file(file) -> str:
"""Extract text from uploaded files"""
if file is None:
return ""
file_path = file.name
text = ""
try:
if file_path.endswith('.pdf'):
with open(file_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
for page in reader.pages:
text += page.extract_text() + "\n"
elif file_path.endswith('.docx'):
doc = docx.Document(file_path)
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
elif file_path.endswith('.txt'):
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
except Exception as e:
return f"Error reading file: {str(e)}"
return text
@spaces.GPU
def process_document(document, operation_type, text_input):
"""Main processing function using Cerebras Llama through HuggingFace"""
# Extract text from file or use text input
if document is not None:
text = extract_text_from_file(document)
else:
text = text_input
if not text.strip():
return "Please provide either a document or text input."
# Get the appropriate prompt
prompt = PROMPTS.get(operation_type, "")
# Create the client with Cerebras provider
try:
client = InferenceClient(
"meta-llama/Llama-3.3-70B-Instruct",
provider="cerebras",
token=os.getenv("HF_TOKEN"),
)
# Create conversation messages
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"}
]
# Generate response using chat completion
response = client.chat_completion(
messages=messages,
max_tokens=3000,
temperature=0.1,
stream=False
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account"
# Create the Gradio interface
with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ“š Study Assistant - Document Analysis Tool")
gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.")
gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*")
with gr.Row():
with gr.Column():
document = gr.File(
label="Upload Document",
file_types=[".pdf", ".docx", ".txt"],
file_count="single"
)
text_input = gr.Textbox(
label="Or paste text directly",
lines=5,
placeholder="Paste your text here if you don't want to upload a file..."
)
with gr.Column():
operation_type = gr.Dropdown(
choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"],
label="Select Operation",
value="Summary"
)
process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
output = gr.Textbox(
label="Output",
lines=20,
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("### Tips:")
gr.Markdown("- Supported formats: PDF, DOCX, TXT")
gr.Markdown("- Maximum file size: 200MB")
gr.Markdown("- Text can be pasted directly if you don't have a file")
gr.Markdown("- Uses HuggingFace Pro account with Cerebras access")
process_btn.click(
fn=process_document,
inputs=[document, operation_type, text_input],
outputs=output,
show_progress=True
)
if __name__ == "__main__":
demo.launch()