Tulika2000 commited on
Commit
ce13f7e
·
verified ·
1 Parent(s): 94b1232

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi
8
+ """
9
+
10
+ import os
11
+ import PyPDF2
12
+ import gradio as gr
13
+ from langchain_groq.chat_models import ChatGroq
14
+
15
+ # Set Groq API key securely
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables
17
+
18
+ # Ensure API key is available
19
+ if not GROQ_API_KEY:
20
+ raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.")
21
+
22
+ # Initialize LLM (Mistral-8x7B)
23
+ llm = ChatGroq(model_name="mixtral-8x7b-32768")
24
+
25
+ def extract_text_from_pdf(pdf_file):
26
+ """Extract text from a PDF file."""
27
+ text = ""
28
+ reader = PyPDF2.PdfReader(pdf_file)
29
+ for page in reader.pages:
30
+ page_text = page.extract_text()
31
+ if page_text:
32
+ text += page_text + "\n"
33
+ return text
34
+
35
+ def summarize_text(text):
36
+ """Summarize the text"""
37
+ prompt = f"Summarize the following document:\n\n{text[:10000]}" # Limit input size
38
+ response = llm.predict(prompt)
39
+ return response
40
+
41
+ def process_pdf(file):
42
+ """Extract text and summarize PDF using Mistral-8x7B."""
43
+ if file is None:
44
+ return "No file uploaded."
45
+
46
+ # Read file bytes and process it using PyPDF2
47
+ pdf_reader = PyPDF2.PdfReader(file)
48
+ text = ""
49
+ for page in pdf_reader.pages:
50
+ page_text = page.extract_text()
51
+ if page_text:
52
+ text += page_text + "\n"
53
+
54
+ # Limit text size for API efficiency
55
+ text = text[:10000] if len(text) > 10000 else text
56
+
57
+ # Summarize
58
+ summary = summarize_text(text)
59
+ return summary
60
+
61
+ # Create Gradio Interface
62
+ interface = gr.Interface(
63
+ fn=process_pdf,
64
+ inputs=gr.File(label="Upload a PDF"),
65
+ outputs="text",
66
+ title="📄 PDF Summarizer",
67
+ description="Upload a PDF file and get a summary"
68
+ )
69
+
70
+ # Run the app
71
+ interface.launch()