Spaces:

SlouchyBuffalo
/

Document

Running

App Files Files Community

Document / app.py

SlouchyBuffalo

Update app.py

e5b3ba7 verified 23 days ago

raw

history blame

4.26 kB

	import gradio as gr
	import spaces
	import PyPDF2
	import docx
	import io
	import os
	from typing import Optional
	from huggingface_hub import InferenceClient
	from prompts import SYSTEM_PROMPT, PROMPTS

	def extract_text_from_file(file) -> str:
	"""Extract text from uploaded files"""
	if file is None:
	return ""

	file_path = file.name
	text = ""

	try:
	if file_path.endswith('.pdf'):
	with open(file_path, 'rb') as f:
	reader = PyPDF2.PdfReader(f)
	for page in reader.pages:
	text += page.extract_text() + "\n"

	elif file_path.endswith('.docx'):
	doc = docx.Document(file_path)
	for paragraph in doc.paragraphs:
	text += paragraph.text + "\n"

	elif file_path.endswith('.txt'):
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()
	except Exception as e:
	return f"Error reading file: {str(e)}"

	return text

	@spaces.GPU
	def process_document(document, operation_type, text_input):
	"""Main processing function using Cerebras Llama through HuggingFace"""

	# Extract text from file or use text input
	if document is not None:
	text = extract_text_from_file(document)
	else:
	text = text_input

	if not text.strip():
	return "Please provide either a document or text input."

	# Get the appropriate prompt
	prompt = PROMPTS.get(operation_type, "")

	# Create the client with Cerebras provider
	try:
	client = InferenceClient(
	"meta-llama/Llama-3.3-70B-Instruct",
	provider="cerebras",
	token=os.getenv("HF_TOKEN"),
	)

	# Create conversation messages
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"}
	]

	# Generate response using chat completion
	response = client.chat_completion(
	messages=messages,
	max_tokens=3000,
	temperature=0.1,
	stream=False
	)

	return response.choices[0].message.content

	except Exception as e:
	return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account"

	# Create the Gradio interface
	with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📚 Study Assistant - Document Analysis Tool")
	gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.")
	gr.Markdown("Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace")

	with gr.Row():
	with gr.Column():
	document = gr.File(
	label="Upload Document",
	file_types=[".pdf", ".docx", ".txt"],
	file_count="single"
	)
	text_input = gr.Textbox(
	label="Or paste text directly",
	lines=5,
	placeholder="Paste your text here if you don't want to upload a file..."
	)

	with gr.Column():
	operation_type = gr.Dropdown(
	choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"],
	label="Select Operation",
	value="Summary"
	)
	process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")

	output = gr.Textbox(
	label="Output",
	lines=20,
	show_copy_button=True
	)

	gr.Markdown("---")
	gr.Markdown("### Tips:")
	gr.Markdown("- Supported formats: PDF, DOCX, TXT")
	gr.Markdown("- Maximum file size: 200MB")
	gr.Markdown("- Text can be pasted directly if you don't have a file")
	gr.Markdown("- Uses HuggingFace Pro account with Cerebras access")

	process_btn.click(
	fn=process_document,
	inputs=[document, operation_type, text_input],
	outputs=output,
	show_progress=True
	)

	if __name__ == "__main__":
	demo.launch()