File size: 4,263 Bytes
b6c73df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5b3ba7
b6c73df
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import gradio as gr
import spaces
import PyPDF2
import docx
import io
import os
from typing import Optional
from huggingface_hub import InferenceClient
from prompts import SYSTEM_PROMPT, PROMPTS

def extract_text_from_file(file) -> str:
    """Extract text from uploaded files"""
    if file is None:
        return ""
    
    file_path = file.name
    text = ""
    
    try:
        if file_path.endswith('.pdf'):
            with open(file_path, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                for page in reader.pages:
                    text += page.extract_text() + "\n"
        
        elif file_path.endswith('.docx'):
            doc = docx.Document(file_path)
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
        
        elif file_path.endswith('.txt'):
            with open(file_path, 'r', encoding='utf-8') as f:
                text = f.read()
    except Exception as e:
        return f"Error reading file: {str(e)}"
    
    return text

@spaces.GPU
def process_document(document, operation_type, text_input):
    """Main processing function using Cerebras Llama through HuggingFace"""
    
    # Extract text from file or use text input
    if document is not None:
        text = extract_text_from_file(document)
    else:
        text = text_input
    
    if not text.strip():
        return "Please provide either a document or text input."
    
    # Get the appropriate prompt
    prompt = PROMPTS.get(operation_type, "")
    
    # Create the client with Cerebras provider
    try:
        client = InferenceClient(
            "meta-llama/Llama-3.3-70B-Instruct",
            provider="cerebras",
            token=os.getenv("HF_TOKEN"),
        )
        
        # Create conversation messages
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"}
        ]
        
        # Generate response using chat completion
        response = client.chat_completion(
            messages=messages,
            max_tokens=3000,
            temperature=0.1,
            stream=False
        )
        
        return response.choices[0].message.content
        
    except Exception as e:
        return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account"

# Create the Gradio interface
with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ“š Study Assistant - Document Analysis Tool")
    gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.")
    gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*")
    
    with gr.Row():
        with gr.Column():
            document = gr.File(
                label="Upload Document",
                file_types=[".pdf", ".docx", ".txt"],
                file_count="single"
            )
            text_input = gr.Textbox(
                label="Or paste text directly",
                lines=5,
                placeholder="Paste your text here if you don't want to upload a file..."
            )
            
        with gr.Column():
            operation_type = gr.Dropdown(
                choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"],
                label="Select Operation",
                value="Summary"
            )
            process_btn = gr.Button("πŸš€ Process Document", variant="primary", size="lg")
    
    output = gr.Textbox(
        label="Output",
        lines=20,
        show_copy_button=True
    )
    
    gr.Markdown("---")
    gr.Markdown("### Tips:")
    gr.Markdown("- Supported formats: PDF, DOCX, TXT")
    gr.Markdown("- Maximum file size: 200MB")
    gr.Markdown("- Text can be pasted directly if you don't have a file")
    gr.Markdown("- Uses HuggingFace Pro account with Cerebras access")
    
    process_btn.click(
        fn=process_document,
        inputs=[document, operation_type, text_input],
        outputs=output,
        show_progress=True
    )

if __name__ == "__main__":
    demo.launch()