Spaces:
Running
Running
File size: 4,263 Bytes
b6c73df e5b3ba7 b6c73df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
import spaces
import PyPDF2
import docx
import io
import os
from typing import Optional
from huggingface_hub import InferenceClient
from prompts import SYSTEM_PROMPT, PROMPTS
def extract_text_from_file(file) -> str:
"""Extract text from uploaded files"""
if file is None:
return ""
file_path = file.name
text = ""
try:
if file_path.endswith('.pdf'):
with open(file_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
for page in reader.pages:
text += page.extract_text() + "\n"
elif file_path.endswith('.docx'):
doc = docx.Document(file_path)
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
elif file_path.endswith('.txt'):
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
except Exception as e:
return f"Error reading file: {str(e)}"
return text
@spaces.GPU
def process_document(document, operation_type, text_input):
"""Main processing function using Cerebras Llama through HuggingFace"""
# Extract text from file or use text input
if document is not None:
text = extract_text_from_file(document)
else:
text = text_input
if not text.strip():
return "Please provide either a document or text input."
# Get the appropriate prompt
prompt = PROMPTS.get(operation_type, "")
# Create the client with Cerebras provider
try:
client = InferenceClient(
"meta-llama/Llama-3.3-70B-Instruct",
provider="cerebras",
token=os.getenv("HF_TOKEN"),
)
# Create conversation messages
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"}
]
# Generate response using chat completion
response = client.chat_completion(
messages=messages,
max_tokens=3000,
temperature=0.1,
stream=False
)
return response.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account"
# Create the Gradio interface
with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("# π Study Assistant - Document Analysis Tool")
gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.")
gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*")
with gr.Row():
with gr.Column():
document = gr.File(
label="Upload Document",
file_types=[".pdf", ".docx", ".txt"],
file_count="single"
)
text_input = gr.Textbox(
label="Or paste text directly",
lines=5,
placeholder="Paste your text here if you don't want to upload a file..."
)
with gr.Column():
operation_type = gr.Dropdown(
choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"],
label="Select Operation",
value="Summary"
)
process_btn = gr.Button("π Process Document", variant="primary", size="lg")
output = gr.Textbox(
label="Output",
lines=20,
show_copy_button=True
)
gr.Markdown("---")
gr.Markdown("### Tips:")
gr.Markdown("- Supported formats: PDF, DOCX, TXT")
gr.Markdown("- Maximum file size: 200MB")
gr.Markdown("- Text can be pasted directly if you don't have a file")
gr.Markdown("- Uses HuggingFace Pro account with Cerebras access")
process_btn.click(
fn=process_document,
inputs=[document, operation_type, text_input],
outputs=output,
show_progress=True
)
if __name__ == "__main__":
demo.launch() |