Tulika2000 commited on
Commit
02d81ba
·
verified ·
1 Parent(s): f0cc0d1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi
8
+ """
9
+
10
+ import os
11
+ import PyPDF2
12
+ import gradio as gr
13
+ from langchain_groq.chat_models import ChatGroq
14
+
15
+ # Set Groq API key securely
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables
17
+
18
+ # Ensure API key is available
19
+ if not GROQ_API_KEY:
20
+ raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.")
21
+
22
+ # Initialize LLM (Mistral-8x7B)
23
+ llm = ChatGroq(model_name="mixtral-8x7b-32768")
24
+
25
+ def extract_text_from_pdf(pdf_file):
26
+ """Extract text from a PDF file."""
27
+ text = ""
28
+ reader = PyPDF2.PdfReader(pdf_file)
29
+ for page in reader.pages:
30
+ page_text = page.extract_text()
31
+ if page_text:
32
+ text += page_text + "\n"
33
+ return text
34
+
35
+ def summarize_text(text, length='Medium', style='Concise Paragraph'):
36
+ """Summarize the text with CoT and adjustable format."""
37
+
38
+ # Adjust summary length
39
+ length_map = {
40
+ 'Short': 'Summarize in 3-4 lines.',
41
+ 'Medium': 'Summarize in 6-8 lines.',
42
+ 'Long': 'Provide a detailed summary in multiple paragraphs.'
43
+ }
44
+
45
+ # Adjust summary style
46
+ style_map = {
47
+ 'Bulleted List': 'Format the summary as a list of key points.',
48
+ 'Key Takeaways': 'Extract the most important insights as key takeaways.',
49
+ 'Concise Paragraph': 'Write the summary as a structured paragraph.'
50
+ }
51
+
52
+ prompt = f"""
53
+ Step 1: Identify the main topics covered in the document.
54
+ Step 2: Extract key facts, arguments, and conclusions.
55
+ Step 3: Generate a structured summary based on extracted information.
56
+ {length_map[length]} {style_map[style]}
57
+
58
+ Document:
59
+ {text[:10000]}
60
+ """
61
+
62
+ response = llm.predict(prompt)
63
+ return response
64
+
65
+ def process_pdf(file, length, style):
66
+ """Extract text and summarize PDF using Mistral-8x7B with customization."""
67
+ if file is None:
68
+ return "No file uploaded."
69
+
70
+ text = extract_text_from_pdf(file)
71
+ summary = summarize_text(text, length, style)
72
+ return summary
73
+
74
+ # Create Gradio Interface
75
+ interface = gr.Interface(
76
+ fn=process_pdf,
77
+ inputs=[
78
+ gr.File(label="Upload a PDF"),
79
+ gr.Radio(["Short", "Medium", "Long"], value="Medium", label="Summary Length"),
80
+ gr.Radio(["Bulleted List", "Key Takeaways", "Concise Paragraph"], value="Concise Paragraph", label="Summary Style")
81
+ ],
82
+ outputs="text",
83
+ title="📄 AI-Powered PDF Summarizer",
84
+ description="Upload a PDF file and customize the summary format and length."
85
+ )
86
+
87
+ # Run the app
88
+ interface.launch()