Izza-shahzad-13 commited on
Commit
d9b6f2c
·
verified ·
1 Parent(s): 4617fc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +446 -0
app.py CHANGED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, send_file,abort,send_from_directory
2
+ import torch
3
+ from werkzeug.security import generate_password_hash, check_password_hash
4
+ from flask import Flask, render_template, request, redirect, url_for, jsonify, session
5
+
6
+ from flask import Flask, request, jsonify
7
+ from pymongo import MongoClient
8
+ import pickle
9
+ from pymongo import MongoClient
10
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
11
+ from PyPDF2 import PdfReader
12
+ from docx import Document
13
+ import re
14
+ import seaborn as sns
15
+ import matplotlib.pyplot as plt
16
+ import os
17
+ import string
18
+ import requests
19
+ from bs4 import BeautifulSoup
20
+ from flask import jsonify
21
+ from flask import Flask, render_template, request, jsonify
22
+ from sentence_transformers import SentenceTransformer
23
+ import faiss
24
+ import fitz
25
+ import groq
26
+ import PyPDF2
27
+ import numpy as np
28
+ from flask import Flask, render_template, request, redirect, url_for, flash, session
29
+ from flask_argon2 import Argon2
30
+ from pymongo import MongoClient
31
+ import os
32
+ import pdfplumber
33
+ from groq import Groq
34
+ import logging
35
+ logging.getLogger("pdfminer").setLevel(logging.ERROR)
36
+
37
+
38
+
39
+ app = Flask(__name__)
40
+
41
+ GROQ_API_KEY = "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"
42
+ cli = Groq(api_key=GROQ_API_KEY)
43
+
44
+ # Load embedding model globally
45
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
46
+
47
+ # Load preprocessed data
48
+ with open("data//legal_data.pkl", "rb") as f:
49
+ legal_data = pickle.load(f)
50
+
51
+ # Load FAISS indices
52
+ faiss_indices = {}
53
+ for law in legal_data:
54
+ try:
55
+ index_path = f"data/{law.replace(' ', '_')}_faiss.index"
56
+ index = faiss.read_index(index_path)
57
+ faiss_indices[law] = (index, legal_data[law])
58
+ except Exception as e:
59
+ print(f"Error loading FAISS index for {law}: {str(e)}")
60
+
61
+ # Helper to match section
62
+ def get_exact_section(section_number, structured_data):
63
+ for section in structured_data:
64
+ if section["section_id"].strip() == section_number.strip():
65
+ return section
66
+ return None
67
+
68
+ # Helper to find relevant section via similarity
69
+ def find_relevant_section(query, model, index, structured_data, top_k=3):
70
+ query_embedding = model.encode([query])
71
+ distances, indices = index.search(np.array(query_embedding), top_k)
72
+ return [structured_data[i] for i in indices[0] if i < len(structured_data)]
73
+
74
+ # Generate answer using Groq
75
+ def generate_response_with_groq(prompt, section_number, book_name, context):
76
+ full_prompt = f"According to Section {section_number} of {book_name}, {prompt}"
77
+ try:
78
+ response = cli.chat.completions.create(
79
+ model="llama3-8b-8192",
80
+ messages=[
81
+ {
82
+ "role": "system",
83
+ "content": "You are a legal assistant providing detailed and comprehensive legal explanations based on Pakistani law. Always provide at least 5-6 sentences per response."
84
+ },
85
+ {
86
+ "role": "user",
87
+ "content": f"{full_prompt}\n\nContext: {context}"
88
+ }
89
+ ],
90
+ max_tokens=1000
91
+ )
92
+ return response.choices[0].message.content.strip()
93
+ except Exception as e:
94
+ print(f"Error with Groq API: {str(e)}")
95
+ return "Error communicating with Groq API."
96
+
97
+
98
+
99
+ # Main route
100
+ @app.route('/ask', methods=['POST'])
101
+ def ask():
102
+ try:
103
+ # Ensure the request is JSON
104
+ if not request.is_json:
105
+ return jsonify({'response': 'Request must be JSON'}), 400
106
+
107
+ data = request.get_json()
108
+ query = data.get('query', '').strip()
109
+
110
+ if not query:
111
+ return jsonify({'response': 'Please enter a valid question.'}), 400
112
+
113
+ # Extract section number and book name using regex
114
+ pattern = r"what\s+is\s+section\s+no\.?\s*(\d+[A-Z]?(?:\(\d+\))?)\s+of\s+(.*)"
115
+ match = re.search(pattern, query, re.IGNORECASE)
116
+
117
+ if match:
118
+ section_number = match.group(1).strip()
119
+ book_name = match.group(2).strip()
120
+
121
+ matched_book = None
122
+ for law in legal_data:
123
+ if book_name.lower() in law.lower():
124
+ matched_book = law
125
+ break
126
+
127
+ if not matched_book:
128
+ return jsonify({'response': 'Book name not recognized. Please try again with a valid book name.'}), 404
129
+
130
+ index, structured_data = faiss_indices.get(matched_book, (None, None))
131
+ if index is None or structured_data is None:
132
+ return jsonify({'response': 'Error loading FAISS index for the selected law.'}), 500
133
+
134
+ exact_section = get_exact_section(section_number, structured_data)
135
+ if exact_section:
136
+ response = generate_response_with_groq(query, section_number, matched_book, exact_section['content'])
137
+ return jsonify({'response': response}), 200
138
+ else:
139
+ relevant = find_relevant_section(query, embedding_model, index, structured_data)
140
+ if relevant:
141
+ response = generate_response_with_groq(query, relevant[0]['section_id'], matched_book, relevant[0]['content'])
142
+ return jsonify({'response': response}), 200
143
+ else:
144
+ return jsonify({'response': 'No relevant section found.'}), 404
145
+ else:
146
+ return jsonify({'response': 'Please ask your question in this format: "What is Section No. 302 of Pakistan Penal Code?"'}), 400
147
+
148
+ except Exception as e:
149
+ print(f"Server error at /ask: {str(e)}")
150
+ return jsonify({'response': 'An internal error occurred. Please try again later.'}), 500
151
+
152
+
153
+
154
+ # Flask route
155
+
156
+
157
+ # Load the fine-tuned Legal LED model
158
+ MODEL_NAME = "Izza-shahzad-13/legal-LED-final"
159
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
160
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
161
+
162
+ # Function to generate summary
163
+ def generate_summary(text):
164
+ inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
165
+ outputs = model.generate(inputs, max_length=800, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
166
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
167
+
168
+ # Function to calculate sentence importance scores
169
+ def calculate_sentence_importance(summary):
170
+ sentences = summary.split(". ")
171
+ scores = [len(sentence) for sentence in sentences] # Score based on sentence length
172
+ max_score = max(scores) if scores else 1
173
+ normalized_scores = [score / max_score for score in scores]
174
+ return sentences, normalized_scores
175
+
176
+ # Function to generate heatmap
177
+ def generate_heatmap(scores):
178
+ plt.figure(figsize=(10, 2))
179
+ sns.heatmap([scores], annot=True, cmap="coolwarm", xticklabels=False, yticklabels=False, cbar=True)
180
+ plt.title("Sentence Importance Heatmap")
181
+ plt.savefig("static/heatmap.png") # Save heatmap image
182
+ plt.close()
183
+
184
+ # Function to highlight sentences in the summary
185
+ def highlight_summary(sentences, scores):
186
+ cmap = sns.color_palette("coolwarm", as_cmap=True)
187
+ highlighted_summary = ""
188
+
189
+ for sentence, score in zip(sentences, scores):
190
+ color = cmap(score)
191
+ rgb_color = f"rgb({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)})"
192
+ highlighted_summary += f'<span style="background-color:{rgb_color};padding:2px;">{sentence}.</span> '
193
+
194
+ return highlighted_summary
195
+
196
+ # Function to highlight legal terms
197
+ def highlight_keywords(text):
198
+ patterns = {
199
+ 'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s[\d]{4})?))\s*,\s*(\d{4})\b',
200
+ 'article': r'\bArticle\s\d{1,3}(-[A-Z])?\b',
201
+ 'section': r'\bSection\s\d{1,3}[-A-Za-z]?\(?[a-zA-Z]?\)?\b',
202
+ 'date': r'\b(?:[A-Za-z]+)\s\d{4}\b|\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b',
203
+ 'persons': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\b',
204
+ 'ordinance': r'\b([A-Z][a-z\s]+Ordinance(?:,\s\d{4})?)\b', # Example: PEMRA Ordinance, 2002
205
+ 'petition': r'\b(?:[A-Za-z\s]*Petition\sNo\.\s\d+/\d{4})\b', # Example: Constitutional Petition No. 123/2024
206
+ 'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s\d{4})?)),\s*(\d{4})\b', # Example: Control of Narcotic Substances Act, 1997
207
+ 'article': r'\b(Article\s\d{1,3}(-[A-Z])?)\b', # Example: Article 10-A
208
+ 'section': r'\b(Section\s\d{1,3}(\([a-zA-Z0-9]+\))?)\b', # Example: Section 302(b), Section 9(c), Section 144-A
209
+ 'date': r'\b(?:\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|\d{4}|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},?\s\d{4})\b',
210
+ # Examples: 15/07/2015, July 2015, March 5, 2021, 2023
211
+ 'person': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b' # Example: Justice Ali Raza
212
+
213
+ }
214
+
215
+ highlighted_text = text
216
+ for pattern in patterns.values():
217
+ highlighted_text = re.sub(pattern, lambda match: f'<span class="highlight">{match.group(0)}</span>', highlighted_text)
218
+
219
+ return highlighted_text
220
+
221
+ # Function to read uploaded files
222
+ def read_file(file):
223
+ if file.filename.endswith(".txt"):
224
+ return file.read().decode("utf-8")
225
+ elif file.filename.endswith(".pdf"):
226
+ pdf_reader = PdfReader(file)
227
+ return " ".join(page.extract_text() for page in pdf_reader.pages)
228
+ elif file.filename.endswith(".docx"):
229
+ doc = Document(file)
230
+ return " ".join(paragraph.text for paragraph in doc.paragraphs)
231
+ return None
232
+
233
+ # Function to fetch text from a URL
234
+ def fetch_text_from_url(url):
235
+ try:
236
+ response = requests.get(url)
237
+ response.raise_for_status()
238
+
239
+ # Check content type
240
+ content_type = response.headers.get("Content-Type", "")
241
+ if "text/html" in content_type: # If it's a webpage
242
+ soup = BeautifulSoup(response.text, "html.parser")
243
+ paragraphs = soup.find_all("p") # Extract paragraph text
244
+ return " ".join([p.get_text() for p in paragraphs])
245
+
246
+ elif "text/plain" in content_type: # If it's a plain text file
247
+ return response.text
248
+
249
+ else:
250
+ return None
251
+ except Exception as e:
252
+ print("Error fetching URL:", e)
253
+ return None
254
+
255
+ @app.route("/", methods=["GET", "POST"])
256
+ def index():
257
+ document_text = None
258
+ summary = None
259
+ heatmap_url = None
260
+
261
+ if request.method == "POST":
262
+ file = request.files.get("file")
263
+ pasted_text = request.form.get("pasteText", "").strip()
264
+ url = request.form.get("url", "").strip()
265
+
266
+ if file and file.filename:
267
+ document_text = read_file(file)
268
+ elif pasted_text:
269
+ document_text = pasted_text
270
+ elif url:
271
+ document_text = fetch_text_from_url(url)
272
+
273
+ if document_text:
274
+ summary = generate_summary(document_text)
275
+ sentences, scores = calculate_sentence_importance(summary)
276
+
277
+ generate_heatmap(scores)
278
+
279
+ highlighted_summary = highlight_summary(sentences, scores)
280
+ highlighted_summary = highlight_keywords(highlighted_summary)
281
+
282
+ # Save the summary to a text file
283
+ with open("summary.txt", "w", encoding="utf-8") as f:
284
+ f.write(summary)
285
+
286
+ return render_template("mainscreen.html", document_text=document_text, summary=highlighted_summary, heatmap_url="static/heatmap.png")
287
+
288
+ return render_template("mainscreen.html", document_text=None, summary=None, heatmap_url=None)
289
+
290
+ @app.route("/download_summary")
291
+ def download_summary():
292
+ file_path = os.path.join(os.getcwd(), "summary.txt")
293
+
294
+ if not os.path.exists(file_path):
295
+ return abort(404, description="File not found")
296
+
297
+ return send_file(file_path, as_attachment=True, download_name="summary.txt", mimetype="text/plain")
298
+
299
+
300
+ # Homepage
301
+ @app.route("/home")
302
+ def home():
303
+ return render_template("homepage.html")
304
+ @app.route("/about")
305
+ def about():
306
+ return render_template("aboutpage.html")
307
+ @app.route("/summarization")
308
+ def summarization():
309
+ return render_template("mainscreen.html") # Login Page
310
+
311
+
312
+ @app.route('/lawbooks/<filename>')
313
+ def serve_pdf(filename):
314
+ return send_from_directory('static/lawbooks', filename)
315
+
316
+
317
+
318
+
319
+ # MongoDB connection
320
+ client = MongoClient('mongodb+srv://law:[email protected]/?retryWrites=true&w=majority&appName=law')
321
+ db = client['chatbotDB']
322
+ users = db['users']
323
+
324
+
325
+
326
+ @app.route('/signup', methods=['GET'])
327
+ def signup():
328
+ return render_template('signuppage.html') # Render the HTML form
329
+
330
+ @app.route('/api/signup', methods=['POST'])
331
+ def api_signup():
332
+ # Get JSON data from the request
333
+ data = request.get_json()
334
+ first_name = data.get('firstName')
335
+ last_name = data.get('lastName')
336
+ email = data.get('email')
337
+ password = data.get('password')
338
+
339
+ # Hash the password for security before storing it in the database
340
+ hashed_pw = generate_password_hash(password)
341
+
342
+ # Check if the user already exists
343
+ if users.find_one({'email': email}):
344
+ return jsonify({'message': 'Email already exists!'}), 400
345
+
346
+ # Insert the user data into MongoDB
347
+ users.insert_one({
348
+ 'first_name': first_name,
349
+ 'last_name': last_name,
350
+ 'email': email,
351
+ 'password': hashed_pw
352
+ })
353
+
354
+ # Return a success response
355
+ return jsonify({'message': 'Signup successful!'}), 201
356
+
357
+ # Success page or login page
358
+ @app.route('/login', methods=['GET', 'POST'])
359
+ def login():
360
+ if request.method == 'POST':
361
+ # Handle POST request for login
362
+ data = request.get_json()
363
+ email = data.get('email')
364
+ password = data.get('password')
365
+
366
+ # Log login attempt
367
+ print(f"Login attempt - Email: {email}")
368
+
369
+ # Check if the user exists
370
+ user = users.find_one({'email': email})
371
+ if not user:
372
+ print(f"Login failed - Email '{email}' not found.")
373
+ return jsonify({'message': 'Invalid email or password!'}), 401
374
+
375
+ # Check if the password is correct (compare hashed passwords)
376
+ if not check_password_hash(user['password'], password):
377
+ print(f"Login failed - Incorrect password for email '{email}'.")
378
+ return jsonify({'message': 'Invalid email or password!'}), 401
379
+
380
+ # Log successful login
381
+ print(f"Login successful - Email: {email}")
382
+ return jsonify({'message': 'Login successful!'}), 200
383
+
384
+ # Handle GET request - Show login form (if needed)
385
+ return render_template('loginpage.html') # This would be the login form page (replace with your template)
386
+
387
+
388
+ @app.route('/reset-password', methods=['GET', 'POST'])
389
+ def reset_password():
390
+ if request.method == 'POST':
391
+ email = request.form['email']
392
+ new_password = request.form['newPassword']
393
+ confirm_password = request.form['confirmPassword']
394
+
395
+ # Check if passwords match
396
+ if new_password != confirm_password:
397
+ return jsonify({'message': 'Passwords do not match!'}), 400
398
+
399
+ # Check if user exists
400
+ user = users.find_one({'email': email})
401
+ if not user:
402
+ return jsonify({'message': 'User not found!'}), 404
403
+
404
+ # Hash the new password
405
+ hashed_pw = generate_password_hash(new_password)
406
+
407
+ # Update the user's password in the database
408
+ users.update_one({'email': email}, {'$set': {'password': hashed_pw}})
409
+ return jsonify({'message': 'Password updated successfully!'}), 200
410
+
411
+ return render_template('forgetpasswordpage.html')
412
+
413
+
414
+ contacts_collection = db["contacts"]
415
+ @app.route('/contact', methods=['GET', 'POST'])
416
+ def contact():
417
+ if request.method == 'POST':
418
+ name = request.form.get('name')
419
+ email = request.form.get('email')
420
+ message = request.form.get('message')
421
+
422
+ print(f"Name: {name}, Email: {email}, Message: {message}") # Debug
423
+
424
+ if not name or not email or not message:
425
+ return jsonify({'message': 'All fields are required!'}), 400
426
+
427
+ contact_data = {
428
+ 'name': name,
429
+ 'email': email,
430
+ 'message': message
431
+ }
432
+
433
+ contacts_collection.insert_one(contact_data)
434
+ return jsonify({'message': f'Thank you, {name}! Your message has been sent successfully.',
435
+ 'status': 'success'}), 200
436
+
437
+ return render_template('contactpage.html')
438
+
439
+
440
+
441
+
442
+
443
+
444
+
445
+ if __name__ == "__main__":
446
+ app.run(host="0.0.0.0", port=7860, debug=True)