g0th commited on
Commit
587fb3d
Β·
verified Β·
1 Parent(s): c9c596e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -77
app.py CHANGED
@@ -1,77 +1,48 @@
1
- import gradio as gr
2
- import google.generativeai as genai
3
- from transformers import pipeline
4
- import json
5
- from ppt_parser import transfer_to_structure
6
-
7
- # βœ… Your Google Gemini API Key
8
- GOOGLE_API_KEY = "your_google_api_key_here"
9
- genai.configure(api_key=GOOGLE_API_KEY)
10
-
11
- # βœ… Load Models
12
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
13
- gemini_model = genai.GenerativeModel("models/gemini-1.5-flash")
14
-
15
- # βœ… Global variable to hold extracted text
16
- extracted_text = ""
17
-
18
- def extract_text_from_pptx_json(parsed_json: dict) -> str:
19
- extracted_text = ""
20
- for slide_key, slide in parsed_json.items():
21
- for shape_key, shape in slide.items():
22
- if shape.get('type') == 'group':
23
- group = shape.get('group_content', {})
24
- for _, group_shape in group.items():
25
- if group_shape.get('type') == 'text':
26
- for para_key, para in group_shape.items():
27
- if para_key.startswith("paragraph_"):
28
- extracted_text += para.get("text", "") + "\n"
29
- elif shape.get('type') == 'text':
30
- for para_key, para in shape.items():
31
- if para_key.startswith("paragraph_"):
32
- extracted_text += para.get("text", "") + "\n"
33
- return extracted_text.strip()
34
-
35
- def handle_pptx_upload(pptx_file):
36
- global extracted_text
37
- tmp_path = pptx_file.name
38
- parsed_json_str, _ = transfer_to_structure(tmp_path, "images")
39
- parsed_json = json.loads(parsed_json_str)
40
- extracted_text = extract_text_from_pptx_json(parsed_json)
41
- return extracted_text or "No readable text found in slides."
42
-
43
- def summarize_text():
44
- global extracted_text
45
- if not extracted_text:
46
- return "Please upload and extract text from a PPTX file first."
47
- summary = summarizer(extracted_text, max_length=200, min_length=50, do_sample=False)[0]['summary_text']
48
- return summary
49
-
50
- def clarify_concept(question):
51
- global extracted_text
52
- if not extracted_text:
53
- return "Please upload and extract text from a PPTX file first."
54
- prompt = f"Context:\n{extracted_text}\n\nQuestion: {question}"
55
- response = gemini_model.generate_content(prompt)
56
- return response.text if response else "No response from Gemini."
57
-
58
- with gr.Blocks() as demo:
59
- gr.Markdown("## 🧠 AI-Powered Study Assistant for PowerPoint Lectures")
60
-
61
- pptx_input = gr.File(label="πŸ“‚ Upload PPTX File", file_types=[".pptx"])
62
- extract_btn = gr.Button("πŸ“œ Extract & Summarize")
63
-
64
- extracted_output = gr.Textbox(label="πŸ“„ Extracted Text", lines=10, interactive=False)
65
- summary_output = gr.Textbox(label="πŸ“ Summary", interactive=False)
66
-
67
- extract_btn.click(handle_pptx_upload, inputs=[pptx_input], outputs=[extracted_output])
68
- extract_btn.click(summarize_text, outputs=[summary_output])
69
-
70
- question = gr.Textbox(label="❓ Ask a Question")
71
- ask_btn = gr.Button("πŸ’¬ Ask Gemini")
72
- ai_answer = gr.Textbox(label="πŸ€– Gemini Answer", lines=4)
73
-
74
- ask_btn.click(clarify_concept, inputs=[question], outputs=[ai_answer])
75
-
76
- if __name__ == "__main__":
77
- demo.launch()
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ import PyPDF2
4
+ import torch
5
+
6
+ st.set_page_config(page_title="Perplexity Clone (Gemma)", layout="wide")
7
+ st.title("πŸ“š Perplexity-Style AI Study Assistant using Gemma")
8
+
9
+ # Load Gemma model and tokenizer
10
+ @st.cache_resource
11
+ def load_model():
12
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it")
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ "google/gemma-7b-it",
15
+ torch_dtype=torch.float16,
16
+ device_map="auto"
17
+ )
18
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
19
+ return pipe
20
+
21
+ textgen = load_model()
22
+
23
+ # Extract text from uploaded PDF
24
+ def extract_text_from_pdf(file):
25
+ reader = PyPDF2.PdfReader(file)
26
+ text = ""
27
+ for page in reader.pages:
28
+ text += page.extract_text() + "\n"
29
+ return text.strip()
30
+
31
+ # UI Layout
32
+ query = st.text_input("Ask a question or type a query:")
33
+
34
+ uploaded_file = st.file_uploader("Or upload a PDF to analyze its content:", type=["pdf"])
35
+
36
+ context = ""
37
+ if uploaded_file:
38
+ context = extract_text_from_pdf(uploaded_file)
39
+ st.text_area("Extracted Content", context, height=200)
40
+
41
+ if st.button("Generate Answer"):
42
+ with st.spinner("Generating with Gemma..."):
43
+ prompt = query
44
+ if context:
45
+ prompt = f"Context:\n{context}\n\nQuestion: {query}"
46
+ output = textgen(prompt)[0]["generated_text"]
47
+ st.success("Answer:")
48
+ st.write(output)