Tulika2000 commited on
Commit
8ba0424
·
verified ·
1 Parent(s): 3a6c147

Delete app (1).py

Browse files
Files changed (1) hide show
  1. app (1).py +0 -84
app (1).py DELETED
@@ -1,84 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """app.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi
8
- """
9
-
10
- import os
11
- import pdfplumber
12
- import gradio as gr
13
- from langchain_groq.chat_models import ChatGroq
14
-
15
- # Set Groq API key securely
16
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables
17
- if not GROQ_API_KEY:
18
- raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.")
19
-
20
- # Initialize LLM (llama-3.3-70b-versatile)
21
- llm = ChatGroq(model_name="llama-3.3-70b-versatile")
22
-
23
- def extract_text_from_pdf(pdf_file):
24
- """Extracts clean text from a text-based PDF while handling edge cases."""
25
- text = ""
26
- try:
27
- with pdfplumber.open(pdf_file) as pdf:
28
- for page in pdf.pages:
29
- page_text = page.extract_text()
30
- if page_text:
31
- text += page_text.strip() + "\n\n" # Keep formatting clean
32
- except Exception as e:
33
- return f"Error extracting text: {str(e)}"
34
-
35
- if not text.strip():
36
- return "⚠️ No readable text found. This might be a scanned or image-based PDF."
37
-
38
- return text.strip()
39
-
40
- def summarize_text(text, length, style):
41
- """Summarizes extracted text using Mistral-8x7B with structured formatting."""
42
- prompt = (
43
- f"""
44
- Read the following document and summarize it in {style.lower()} format.
45
- Keep the summary {length.lower()}.
46
- Follow this structured reasoning:
47
- 1. Identify key sections & main topics.
48
- 2. Extract essential points from each section.
49
- 3. Remove redundant information.
50
- 4. Ensure accuracy without hallucination.
51
-
52
- Document:
53
- {text[:10000]} # Limit input to 10,000 characters for efficiency
54
- """
55
- )
56
- response = llm.predict(prompt)
57
- return response.strip()
58
-
59
- def process_pdf(file, length, style):
60
- """Extracts text and summarizes PDF with customization options."""
61
- if not file:
62
- return "⚠️ No file uploaded. Please upload a PDF."
63
-
64
- text = extract_text_from_pdf(file.name)
65
- if text.startswith("⚠️") or text.startswith("Error"):
66
- return text # Return error messages directly
67
-
68
- return summarize_text(text, length, style)
69
-
70
- # Create Gradio Interface
71
- interface = gr.Interface(
72
- fn=process_pdf,
73
- inputs=[
74
- gr.File(label="📄 Upload a PDF"),
75
- gr.Radio(["Short", "Medium", "Long"], label="📏 Summary Length", value="Medium"),
76
- gr.Radio(["Bullets", "Key Takeaways", "Concise Paragraph"], label="📌 Summary Style", value="Key Takeaways"),
77
- ],
78
- outputs="text",
79
- title="📄 PDF Summarizer (Text-Based PDFs Only)",
80
- description="Upload a PDF file (text-based only) and get a structured summary. Not for scanned/image PDFs.",
81
- )
82
-
83
- # Run the app
84
- interface.launch()