File size: 2,869 Bytes
03836f6
d881f5c
df690e6
d405ed8
c0be42f
d405ed8
587fb3d
c0be42f
 
 
d405ed8
c0be42f
62f0ba5
c0be42f
d405ed8
 
c0be42f
 
df690e6
c0be42f
d881f5c
c0be42f
d881f5c
 
 
 
 
 
d405ed8
 
 
d881f5c
 
 
d405ed8
d881f5c
 
 
 
 
 
c0be42f
d881f5c
c0be42f
d881f5c
 
 
 
df690e6
c0be42f
 
 
62f0ba5
c0be42f
 
 
d881f5c
 
 
c0be42f
d881f5c
 
c0be42f
d881f5c
d405ed8
d881f5c
 
 
c0be42f
 
d881f5c
df690e6
d881f5c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import json
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from ppt_parser import transfer_to_structure

# βœ… Hugging Face token (optional if public + unauthenticated)
hf_token = os.getenv("HF_TOKEN", None)
model_id = "meta-llama/Llama-3.1-8B-Instruct"

# βœ… Load model + tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=hf_token,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
llama_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)

# βœ… Global storage
extracted_text = ""

def extract_text_from_pptx_json(parsed_json: dict) -> str:
    text = ""
    for slide in parsed_json.values():
        for shape in slide.values():
            if shape.get("type") == "group":
                for group_shape in shape.get("group_content", {}).values():
                    if group_shape.get("type") == "text":
                        for para_key, para in group_shape.items():
                            if para_key.startswith("paragraph_"):
                                text += para.get("text", "") + "\n"
            elif shape.get("type") == "text":
                for para_key, para in shape.items():
                    if para_key.startswith("paragraph_"):
                        text += para.get("text", "") + "\n"
    return text.strip()

def handle_pptx_upload(pptx_file):
    global extracted_text
    tmp_path = pptx_file.name
    parsed_json_str, _ = transfer_to_structure(tmp_path, "images")
    parsed_json = json.loads(parsed_json_str)
    extracted_text = extract_text_from_pptx_json(parsed_json)
    return extracted_text or "No readable text found in slides."

def ask_llama(question):
    global extracted_text
    if not extracted_text:
        return "Please upload a PPTX file first."

    prompt = f"<|user|>\nContext:\n{extracted_text}\n\nQuestion: {question}<|end|>\n<|assistant|>\n"
    response = llama_pipe(prompt)[0]["generated_text"]
    return response.replace(prompt, "").strip()

# βœ… Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Study Assistant with LLaMA 3.1 8B")

    pptx_input = gr.File(label="πŸ“‚ Upload PPTX File", file_types=[".pptx"])
    extract_btn = gr.Button("πŸ“œ Extract Slide Text")

    extracted_output = gr.Textbox(label="πŸ“„ Slide Text", lines=10, interactive=False)
    extract_btn.click(handle_pptx_upload, inputs=[pptx_input], outputs=[extracted_output])

    question = gr.Textbox(label="❓ Ask a Question")
    ask_btn = gr.Button("πŸ’¬ Ask LLaMA")
    ai_answer = gr.Textbox(label="πŸ€– LLaMA Answer", lines=6)

    ask_btn.click(ask_llama, inputs=[question], outputs=[ai_answer])

if __name__ == "__main__":
    demo.launch()