Izza-shahzad-13 commited on
Commit
ce93c96
·
verified ·
1 Parent(s): 7707abb

Delete FYP RAG/summerization-app/app.py

Browse files
Files changed (1) hide show
  1. FYP RAG/summerization-app/app.py +0 -446
FYP RAG/summerization-app/app.py DELETED
@@ -1,446 +0,0 @@
1
- from flask import Flask, render_template, request, send_file,abort,send_from_directory
2
- import torch
3
- from werkzeug.security import generate_password_hash, check_password_hash
4
- from flask import Flask, render_template, request, redirect, url_for, jsonify, session
5
-
6
- from flask import Flask, request, jsonify
7
- from pymongo import MongoClient
8
- import pickle
9
- from pymongo import MongoClient
10
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
11
- from PyPDF2 import PdfReader
12
- from docx import Document
13
- import re
14
- import seaborn as sns
15
- import matplotlib.pyplot as plt
16
- import os
17
- import string
18
- import requests
19
- from bs4 import BeautifulSoup
20
- from flask import jsonify
21
- from flask import Flask, render_template, request, jsonify
22
- from sentence_transformers import SentenceTransformer
23
- import faiss
24
- import fitz
25
- import groq
26
- import PyPDF2
27
- import numpy as np
28
- from flask import Flask, render_template, request, redirect, url_for, flash, session
29
- from flask_argon2 import Argon2
30
- from pymongo import MongoClient
31
- import os
32
- import pdfplumber
33
- from groq import Groq
34
- import logging
35
- logging.getLogger("pdfminer").setLevel(logging.ERROR)
36
-
37
-
38
-
39
- app = Flask(__name__)
40
-
41
- GROQ_API_KEY = "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"
42
- cli = Groq(api_key=GROQ_API_KEY)
43
-
44
- # Load embedding model globally
45
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
46
-
47
- # Load preprocessed data
48
- with open("data//legal_data.pkl", "rb") as f:
49
- legal_data = pickle.load(f)
50
-
51
- # Load FAISS indices
52
- faiss_indices = {}
53
- for law in legal_data:
54
- try:
55
- index_path = f"data/{law.replace(' ', '_')}_faiss.index"
56
- index = faiss.read_index(index_path)
57
- faiss_indices[law] = (index, legal_data[law])
58
- except Exception as e:
59
- print(f"Error loading FAISS index for {law}: {str(e)}")
60
-
61
- # Helper to match section
62
- def get_exact_section(section_number, structured_data):
63
- for section in structured_data:
64
- if section["section_id"].strip() == section_number.strip():
65
- return section
66
- return None
67
-
68
- # Helper to find relevant section via similarity
69
- def find_relevant_section(query, model, index, structured_data, top_k=3):
70
- query_embedding = model.encode([query])
71
- distances, indices = index.search(np.array(query_embedding), top_k)
72
- return [structured_data[i] for i in indices[0] if i < len(structured_data)]
73
-
74
- # Generate answer using Groq
75
- def generate_response_with_groq(prompt, section_number, book_name, context):
76
- full_prompt = f"According to Section {section_number} of {book_name}, {prompt}"
77
- try:
78
- response = cli.chat.completions.create(
79
- model="llama3-8b-8192",
80
- messages=[
81
- {
82
- "role": "system",
83
- "content": "You are a legal assistant providing detailed and comprehensive legal explanations based on Pakistani law. Always provide at least 5-6 sentences per response."
84
- },
85
- {
86
- "role": "user",
87
- "content": f"{full_prompt}\n\nContext: {context}"
88
- }
89
- ],
90
- max_tokens=1000
91
- )
92
- return response.choices[0].message.content.strip()
93
- except Exception as e:
94
- print(f"Error with Groq API: {str(e)}")
95
- return "Error communicating with Groq API."
96
-
97
-
98
-
99
- # Main route
100
- @app.route('/ask', methods=['POST'])
101
- def ask():
102
- try:
103
- # Ensure the request is JSON
104
- if not request.is_json:
105
- return jsonify({'response': 'Request must be JSON'}), 400
106
-
107
- data = request.get_json()
108
- query = data.get('query', '').strip()
109
-
110
- if not query:
111
- return jsonify({'response': 'Please enter a valid question.'}), 400
112
-
113
- # Extract section number and book name using regex
114
- pattern = r"what\s+is\s+section\s+no\.?\s*(\d+[A-Z]?(?:\(\d+\))?)\s+of\s+(.*)"
115
- match = re.search(pattern, query, re.IGNORECASE)
116
-
117
- if match:
118
- section_number = match.group(1).strip()
119
- book_name = match.group(2).strip()
120
-
121
- matched_book = None
122
- for law in legal_data:
123
- if book_name.lower() in law.lower():
124
- matched_book = law
125
- break
126
-
127
- if not matched_book:
128
- return jsonify({'response': 'Book name not recognized. Please try again with a valid book name.'}), 404
129
-
130
- index, structured_data = faiss_indices.get(matched_book, (None, None))
131
- if index is None or structured_data is None:
132
- return jsonify({'response': 'Error loading FAISS index for the selected law.'}), 500
133
-
134
- exact_section = get_exact_section(section_number, structured_data)
135
- if exact_section:
136
- response = generate_response_with_groq(query, section_number, matched_book, exact_section['content'])
137
- return jsonify({'response': response}), 200
138
- else:
139
- relevant = find_relevant_section(query, embedding_model, index, structured_data)
140
- if relevant:
141
- response = generate_response_with_groq(query, relevant[0]['section_id'], matched_book, relevant[0]['content'])
142
- return jsonify({'response': response}), 200
143
- else:
144
- return jsonify({'response': 'No relevant section found.'}), 404
145
- else:
146
- return jsonify({'response': 'Please ask your question in this format: "What is Section No. 302 of Pakistan Penal Code?"'}), 400
147
-
148
- except Exception as e:
149
- print(f"Server error at /ask: {str(e)}")
150
- return jsonify({'response': 'An internal error occurred. Please try again later.'}), 500
151
-
152
-
153
-
154
- # Flask route
155
-
156
-
157
- # Load the fine-tuned Legal LED model
158
- MODEL_NAME = "Izza-shahzad-13/legal-LED-final"
159
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
160
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
161
-
162
- # Function to generate summary
163
- def generate_summary(text):
164
- inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
165
- outputs = model.generate(inputs, max_length=800, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
166
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
167
-
168
- # Function to calculate sentence importance scores
169
- def calculate_sentence_importance(summary):
170
- sentences = summary.split(". ")
171
- scores = [len(sentence) for sentence in sentences] # Score based on sentence length
172
- max_score = max(scores) if scores else 1
173
- normalized_scores = [score / max_score for score in scores]
174
- return sentences, normalized_scores
175
-
176
- # Function to generate heatmap
177
- def generate_heatmap(scores):
178
- plt.figure(figsize=(10, 2))
179
- sns.heatmap([scores], annot=True, cmap="coolwarm", xticklabels=False, yticklabels=False, cbar=True)
180
- plt.title("Sentence Importance Heatmap")
181
- plt.savefig("static/heatmap.png") # Save heatmap image
182
- plt.close()
183
-
184
- # Function to highlight sentences in the summary
185
- def highlight_summary(sentences, scores):
186
- cmap = sns.color_palette("coolwarm", as_cmap=True)
187
- highlighted_summary = ""
188
-
189
- for sentence, score in zip(sentences, scores):
190
- color = cmap(score)
191
- rgb_color = f"rgb({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)})"
192
- highlighted_summary += f'<span style="background-color:{rgb_color};padding:2px;">{sentence}.</span> '
193
-
194
- return highlighted_summary
195
-
196
- # Function to highlight legal terms
197
- def highlight_keywords(text):
198
- patterns = {
199
- 'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s[\d]{4})?))\s*,\s*(\d{4})\b',
200
- 'article': r'\bArticle\s\d{1,3}(-[A-Z])?\b',
201
- 'section': r'\bSection\s\d{1,3}[-A-Za-z]?\(?[a-zA-Z]?\)?\b',
202
- 'date': r'\b(?:[A-Za-z]+)\s\d{4}\b|\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b',
203
- 'persons': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\b',
204
- 'ordinance': r'\b([A-Z][a-z\s]+Ordinance(?:,\s\d{4})?)\b', # Example: PEMRA Ordinance, 2002
205
- 'petition': r'\b(?:[A-Za-z\s]*Petition\sNo\.\s\d+/\d{4})\b', # Example: Constitutional Petition No. 123/2024
206
- 'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s\d{4})?)),\s*(\d{4})\b', # Example: Control of Narcotic Substances Act, 1997
207
- 'article': r'\b(Article\s\d{1,3}(-[A-Z])?)\b', # Example: Article 10-A
208
- 'section': r'\b(Section\s\d{1,3}(\([a-zA-Z0-9]+\))?)\b', # Example: Section 302(b), Section 9(c), Section 144-A
209
- 'date': r'\b(?:\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|\d{4}|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},?\s\d{4})\b',
210
- # Examples: 15/07/2015, July 2015, March 5, 2021, 2023
211
- 'person': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b' # Example: Justice Ali Raza
212
-
213
- }
214
-
215
- highlighted_text = text
216
- for pattern in patterns.values():
217
- highlighted_text = re.sub(pattern, lambda match: f'<span class="highlight">{match.group(0)}</span>', highlighted_text)
218
-
219
- return highlighted_text
220
-
221
- # Function to read uploaded files
222
- def read_file(file):
223
- if file.filename.endswith(".txt"):
224
- return file.read().decode("utf-8")
225
- elif file.filename.endswith(".pdf"):
226
- pdf_reader = PdfReader(file)
227
- return " ".join(page.extract_text() for page in pdf_reader.pages)
228
- elif file.filename.endswith(".docx"):
229
- doc = Document(file)
230
- return " ".join(paragraph.text for paragraph in doc.paragraphs)
231
- return None
232
-
233
- # Function to fetch text from a URL
234
- def fetch_text_from_url(url):
235
- try:
236
- response = requests.get(url)
237
- response.raise_for_status()
238
-
239
- # Check content type
240
- content_type = response.headers.get("Content-Type", "")
241
- if "text/html" in content_type: # If it's a webpage
242
- soup = BeautifulSoup(response.text, "html.parser")
243
- paragraphs = soup.find_all("p") # Extract paragraph text
244
- return " ".join([p.get_text() for p in paragraphs])
245
-
246
- elif "text/plain" in content_type: # If it's a plain text file
247
- return response.text
248
-
249
- else:
250
- return None
251
- except Exception as e:
252
- print("Error fetching URL:", e)
253
- return None
254
-
255
- @app.route("/", methods=["GET", "POST"])
256
- def index():
257
- document_text = None
258
- summary = None
259
- heatmap_url = None
260
-
261
- if request.method == "POST":
262
- file = request.files.get("file")
263
- pasted_text = request.form.get("pasteText", "").strip()
264
- url = request.form.get("url", "").strip()
265
-
266
- if file and file.filename:
267
- document_text = read_file(file)
268
- elif pasted_text:
269
- document_text = pasted_text
270
- elif url:
271
- document_text = fetch_text_from_url(url)
272
-
273
- if document_text:
274
- summary = generate_summary(document_text)
275
- sentences, scores = calculate_sentence_importance(summary)
276
-
277
- generate_heatmap(scores)
278
-
279
- highlighted_summary = highlight_summary(sentences, scores)
280
- highlighted_summary = highlight_keywords(highlighted_summary)
281
-
282
- # Save the summary to a text file
283
- with open("summary.txt", "w", encoding="utf-8") as f:
284
- f.write(summary)
285
-
286
- return render_template("mainscreen.html", document_text=document_text, summary=highlighted_summary, heatmap_url="static/heatmap.png")
287
-
288
- return render_template("mainscreen.html", document_text=None, summary=None, heatmap_url=None)
289
-
290
- @app.route("/download_summary")
291
- def download_summary():
292
- file_path = os.path.join(os.getcwd(), "summary.txt")
293
-
294
- if not os.path.exists(file_path):
295
- return abort(404, description="File not found")
296
-
297
- return send_file(file_path, as_attachment=True, download_name="summary.txt", mimetype="text/plain")
298
-
299
-
300
- # Homepage
301
- @app.route("/home")
302
- def home():
303
- return render_template("homepage.html")
304
- @app.route("/about")
305
- def about():
306
- return render_template("aboutpage.html")
307
- @app.route("/summarization")
308
- def summarization():
309
- return render_template("mainscreen.html") # Login Page
310
-
311
-
312
- @app.route('/lawbooks/<filename>')
313
- def serve_pdf(filename):
314
- return send_from_directory('static/lawbooks', filename)
315
-
316
-
317
-
318
-
319
- # MongoDB connection
320
- client = MongoClient('mongodb+srv://law:[email protected]/?retryWrites=true&w=majority&appName=law')
321
- db = client['chatbotDB']
322
- users = db['users']
323
-
324
-
325
-
326
- @app.route('/signup', methods=['GET'])
327
- def signup():
328
- return render_template('signuppage.html') # Render the HTML form
329
-
330
- @app.route('/api/signup', methods=['POST'])
331
- def api_signup():
332
- # Get JSON data from the request
333
- data = request.get_json()
334
- first_name = data.get('firstName')
335
- last_name = data.get('lastName')
336
- email = data.get('email')
337
- password = data.get('password')
338
-
339
- # Hash the password for security before storing it in the database
340
- hashed_pw = generate_password_hash(password)
341
-
342
- # Check if the user already exists
343
- if users.find_one({'email': email}):
344
- return jsonify({'message': 'Email already exists!'}), 400
345
-
346
- # Insert the user data into MongoDB
347
- users.insert_one({
348
- 'first_name': first_name,
349
- 'last_name': last_name,
350
- 'email': email,
351
- 'password': hashed_pw
352
- })
353
-
354
- # Return a success response
355
- return jsonify({'message': 'Signup successful!'}), 201
356
-
357
- # Success page or login page
358
- @app.route('/login', methods=['GET', 'POST'])
359
- def login():
360
- if request.method == 'POST':
361
- # Handle POST request for login
362
- data = request.get_json()
363
- email = data.get('email')
364
- password = data.get('password')
365
-
366
- # Log login attempt
367
- print(f"Login attempt - Email: {email}")
368
-
369
- # Check if the user exists
370
- user = users.find_one({'email': email})
371
- if not user:
372
- print(f"Login failed - Email '{email}' not found.")
373
- return jsonify({'message': 'Invalid email or password!'}), 401
374
-
375
- # Check if the password is correct (compare hashed passwords)
376
- if not check_password_hash(user['password'], password):
377
- print(f"Login failed - Incorrect password for email '{email}'.")
378
- return jsonify({'message': 'Invalid email or password!'}), 401
379
-
380
- # Log successful login
381
- print(f"Login successful - Email: {email}")
382
- return jsonify({'message': 'Login successful!'}), 200
383
-
384
- # Handle GET request - Show login form (if needed)
385
- return render_template('loginpage.html') # This would be the login form page (replace with your template)
386
-
387
-
388
- @app.route('/reset-password', methods=['GET', 'POST'])
389
- def reset_password():
390
- if request.method == 'POST':
391
- email = request.form['email']
392
- new_password = request.form['newPassword']
393
- confirm_password = request.form['confirmPassword']
394
-
395
- # Check if passwords match
396
- if new_password != confirm_password:
397
- return jsonify({'message': 'Passwords do not match!'}), 400
398
-
399
- # Check if user exists
400
- user = users.find_one({'email': email})
401
- if not user:
402
- return jsonify({'message': 'User not found!'}), 404
403
-
404
- # Hash the new password
405
- hashed_pw = generate_password_hash(new_password)
406
-
407
- # Update the user's password in the database
408
- users.update_one({'email': email}, {'$set': {'password': hashed_pw}})
409
- return jsonify({'message': 'Password updated successfully!'}), 200
410
-
411
- return render_template('forgetpasswordpage.html')
412
-
413
-
414
- contacts_collection = db["contacts"]
415
- @app.route('/contact', methods=['GET', 'POST'])
416
- def contact():
417
- if request.method == 'POST':
418
- name = request.form.get('name')
419
- email = request.form.get('email')
420
- message = request.form.get('message')
421
-
422
- print(f"Name: {name}, Email: {email}, Message: {message}") # Debug
423
-
424
- if not name or not email or not message:
425
- return jsonify({'message': 'All fields are required!'}), 400
426
-
427
- contact_data = {
428
- 'name': name,
429
- 'email': email,
430
- 'message': message
431
- }
432
-
433
- contacts_collection.insert_one(contact_data)
434
- return jsonify({'message': f'Thank you, {name}! Your message has been sent successfully.',
435
- 'status': 'success'}), 200
436
-
437
- return render_template('contactpage.html')
438
-
439
-
440
-
441
-
442
-
443
-
444
-
445
- if __name__ == "__main__":
446
- app.run(host="0.0.0.0", port=7860, debug=True)