Tulika2000 commited on
Commit
ee04366
·
verified ·
1 Parent(s): 9d59476

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi
8
+ """
9
+
10
+ import os
11
+ import pdfplumber
12
+ import gradio as gr
13
+ from langchain_groq.chat_models import ChatGroq
14
+
15
+ # Set Groq API key securely
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables
17
+ if not GROQ_API_KEY:
18
+ raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.")
19
+
20
+ # Initialize LLM (Mistral-8x7B)
21
+ llm = ChatGroq(model_name="mixtral-8x7b-32768")
22
+
23
+ def extract_text_from_pdf(pdf_file):
24
+ """Extract text from a PDF file using pdfplumber."""
25
+ text = ""
26
+ with pdfplumber.open(pdf_file) as pdf:
27
+ for page in pdf.pages:
28
+ text += page.extract_text() or "" # Extract text safely
29
+ return text.strip()
30
+
31
+ def summarize_text(text, length, style):
32
+ """Summarize text with adjustable length and style."""
33
+ prompt = (
34
+ f"""
35
+ Read the following document and summarize it in {style.lower()} format.
36
+ Keep the summary {length.lower()}.
37
+ Follow this step-by-step approach:
38
+ 1. Identify key sections.
39
+ 2. Extract essential points.
40
+ 3. Remove unnecessary details.
41
+ 4. Ensure factual accuracy without adding extra information.
42
+
43
+ Document:
44
+ {text[:10000]} # Limit input to 10,000 characters for efficiency
45
+ """
46
+ )
47
+ response = llm.predict(prompt)
48
+ return response.strip()
49
+
50
+ def process_pdf(file, length, style):
51
+ """Extract text and summarize PDF using Mistral-8x7B with customization."""
52
+ if not file:
53
+ return "No file uploaded."
54
+
55
+ text = extract_text_from_pdf(file.name) # Extract text
56
+ if not text:
57
+ return "Could not extract text from the PDF. Try another file."
58
+
59
+ return summarize_text(text, length, style)
60
+
61
+ # Create Gradio Interface
62
+ interface = gr.Interface(
63
+ fn=process_pdf,
64
+ inputs=[
65
+ gr.File(label="Upload a PDF"),
66
+ gr.Radio(["Short", "Medium", "Long"], label="Summary Length", value="Medium"),
67
+ gr.Radio(["Bullets", "Key Takeaways", "Concise Paragraph"], label="Summary Style", value="Key Takeaways"),
68
+ ],
69
+ outputs="text",
70
+ title="📄 Advanced PDF Summarizer",
71
+ description="Upload a PDF file and get a structured summary with customization options."
72
+ )
73
+
74
+ # Run the app
75
+ interface.launch()