Spaces:

Izza-shahzad-13
/

lawsumm

Sleeping

App Files Files Community

Izza-shahzad-13 commited on Jun 17

Commit

ce93c96

verified ·

1 Parent(s): 7707abb

Delete FYP RAG/summerization-app/app.py

Browse files

Files changed (1) hide show

FYP RAG/summerization-app/app.py +0 -446

FYP RAG/summerization-app/app.py DELETED Viewed

@@ -1,446 +0,0 @@
-from flask import Flask, render_template, request, send_file,abort,send_from_directory
-import torch
-from werkzeug.security import generate_password_hash, check_password_hash
-from flask import Flask, render_template, request, redirect, url_for, jsonify, session
-from flask import Flask, request, jsonify
-from pymongo import MongoClient
-import pickle
-from pymongo import MongoClient
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from PyPDF2 import PdfReader
-from docx import Document
-import re
-import seaborn as sns
-import matplotlib.pyplot as plt
-import os
-import string
-import requests
-from bs4 import BeautifulSoup
-from flask import jsonify
-from flask import Flask, render_template, request, jsonify
-from sentence_transformers import SentenceTransformer
-import faiss
-import fitz
-import groq
-import PyPDF2
-import numpy as np
-from flask import Flask, render_template, request, redirect, url_for, flash, session
-from flask_argon2 import Argon2
-from pymongo import MongoClient
-import os
-import pdfplumber
-from groq import Groq
-import logging
-logging.getLogger("pdfminer").setLevel(logging.ERROR)
-app = Flask(__name__)
-GROQ_API_KEY = "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"
-cli = Groq(api_key=GROQ_API_KEY)
-# Load embedding model globally
-embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Load preprocessed data
-with open("data//legal_data.pkl", "rb") as f:
-    legal_data = pickle.load(f)
-# Load FAISS indices
-faiss_indices = {}
-for law in legal_data:
-    try:
-        index_path = f"data/{law.replace(' ', '_')}_faiss.index"
-        index = faiss.read_index(index_path)
-        faiss_indices[law] = (index, legal_data[law])
-    except Exception as e:
-        print(f"Error loading FAISS index for {law}: {str(e)}")
-# Helper to match section
-def get_exact_section(section_number, structured_data):
-    for section in structured_data:
-        if section["section_id"].strip() == section_number.strip():
-            return section
-    return None
-# Helper to find relevant section via similarity
-def find_relevant_section(query, model, index, structured_data, top_k=3):
-    query_embedding = model.encode([query])
-    distances, indices = index.search(np.array(query_embedding), top_k)
-    return [structured_data[i] for i in indices[0] if i < len(structured_data)]
-# Generate answer using Groq
-def generate_response_with_groq(prompt, section_number, book_name, context):
-    full_prompt = f"According to Section {section_number} of {book_name}, {prompt}"
-    try:
-        response = cli.chat.completions.create(
-            model="llama3-8b-8192",
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are a legal assistant providing detailed and comprehensive legal explanations based on Pakistani law. Always provide at least 5-6 sentences per response."
-                },
-                {
-                    "role": "user",
-                    "content": f"{full_prompt}\n\nContext: {context}"
-                }
-            ],
-            max_tokens=1000
-        )
-        return response.choices[0].message.content.strip()
-    except Exception as e:
-        print(f"Error with Groq API: {str(e)}")
-        return "Error communicating with Groq API."
-# Main route
-@app.route('/ask', methods=['POST'])
-def ask():
-    try:
-        # Ensure the request is JSON
-        if not request.is_json:
-            return jsonify({'response': 'Request must be JSON'}), 400
-        data = request.get_json()
-        query = data.get('query', '').strip()
-        if not query:
-            return jsonify({'response': 'Please enter a valid question.'}), 400
-        # Extract section number and book name using regex
-        pattern = r"what\s+is\s+section\s+no\.?\s*(\d+[A-Z]?(?:\(\d+\))?)\s+of\s+(.*)"
-        match = re.search(pattern, query, re.IGNORECASE)
-        if match:
-            section_number = match.group(1).strip()
-            book_name = match.group(2).strip()
-            matched_book = None
-            for law in legal_data:
-                if book_name.lower() in law.lower():
-                    matched_book = law
-                    break
-            if not matched_book:
-                return jsonify({'response': 'Book name not recognized. Please try again with a valid book name.'}), 404
-            index, structured_data = faiss_indices.get(matched_book, (None, None))
-            if index is None or structured_data is None:
-                return jsonify({'response': 'Error loading FAISS index for the selected law.'}), 500
-            exact_section = get_exact_section(section_number, structured_data)
-            if exact_section:
-                response = generate_response_with_groq(query, section_number, matched_book, exact_section['content'])
-                return jsonify({'response': response}), 200
-            else:
-                relevant = find_relevant_section(query, embedding_model, index, structured_data)
-                if relevant:
-                    response = generate_response_with_groq(query, relevant[0]['section_id'], matched_book, relevant[0]['content'])
-                    return jsonify({'response': response}), 200
-                else:
-                    return jsonify({'response': 'No relevant section found.'}), 404
-        else:
-            return jsonify({'response': 'Please ask your question in this format: "What is Section No. 302 of Pakistan Penal Code?"'}), 400
-    except Exception as e:
-        print(f"Server error at /ask: {str(e)}")
-        return jsonify({'response': 'An internal error occurred. Please try again later.'}), 500
-# Flask route
-# Load the fine-tuned Legal LED model
-MODEL_NAME = "Izza-shahzad-13/legal-LED-final"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-# Function to generate summary
-def generate_summary(text):
-    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
-    outputs = model.generate(inputs, max_length=800, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Function to calculate sentence importance scores
-def calculate_sentence_importance(summary):
-    sentences = summary.split(". ")
-    scores = [len(sentence) for sentence in sentences]  # Score based on sentence length
-    max_score = max(scores) if scores else 1
-    normalized_scores = [score / max_score for score in scores]
-    return sentences, normalized_scores
-# Function to generate heatmap
-def generate_heatmap(scores):
-    plt.figure(figsize=(10, 2))
-    sns.heatmap([scores], annot=True, cmap="coolwarm", xticklabels=False, yticklabels=False, cbar=True)
-    plt.title("Sentence Importance Heatmap")
-    plt.savefig("static/heatmap.png")  # Save heatmap image
-    plt.close()
-# Function to highlight sentences in the summary
-def highlight_summary(sentences, scores):
-    cmap = sns.color_palette("coolwarm", as_cmap=True)
-    highlighted_summary = ""
-    for sentence, score in zip(sentences, scores):
-        color = cmap(score)
-        rgb_color = f"rgb({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)})"
-        highlighted_summary += f'<span style="background-color:{rgb_color};padding:2px;">{sentence}.</span> '
-    return highlighted_summary
-# Function to highlight legal terms
-def highlight_keywords(text):
-    patterns = {
-        'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s[\d]{4})?))\s*,\s*(\d{4})\b',
-        'article': r'\bArticle\s\d{1,3}(-[A-Z])?\b',
-        'section': r'\bSection\s\d{1,3}[-A-Za-z]?\(?[a-zA-Z]?\)?\b',
-        'date': r'\b(?:[A-Za-z]+)\s\d{4}\b|\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b',
-        'persons': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\b',
-        'ordinance': r'\b([A-Z][a-z\s]+Ordinance(?:,\s\d{4})?)\b',  # Example: PEMRA Ordinance, 2002
-        'petition': r'\b(?:[A-Za-z\s]*Petition\sNo\.\s\d+/\d{4})\b',  # Example: Constitutional Petition No. 123/2024
-        'act_with_year': r'\b([A-Za-z\s]+(?:\sAct(?:\s\d{4})?)),\s*(\d{4})\b',  # Example: Control of Narcotic Substances Act, 1997
-        'article': r'\b(Article\s\d{1,3}(-[A-Z])?)\b',  # Example: Article 10-A
-        'section': r'\b(Section\s\d{1,3}(\([a-zA-Z0-9]+\))?)\b',  # Example: Section 302(b), Section 9(c), Section 144-A
-        'date': r'\b(?:\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|\d{4}|\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},?\s\d{4})\b',
-        # Examples: 15/07/2015, July 2015, March 5, 2021, 2023
-        'person': r'\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b'  # Example: Justice Ali Raza
-    }
-    highlighted_text = text
-    for pattern in patterns.values():
-        highlighted_text = re.sub(pattern, lambda match: f'<span class="highlight">{match.group(0)}</span>', highlighted_text)
-    return highlighted_text
-# Function to read uploaded files
-def read_file(file):
-    if file.filename.endswith(".txt"):
-        return file.read().decode("utf-8")
-    elif file.filename.endswith(".pdf"):
-        pdf_reader = PdfReader(file)
-        return " ".join(page.extract_text() for page in pdf_reader.pages)
-    elif file.filename.endswith(".docx"):
-        doc = Document(file)
-        return " ".join(paragraph.text for paragraph in doc.paragraphs)
-    return None
-# Function to fetch text from a URL
-def fetch_text_from_url(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        # Check content type
-        content_type = response.headers.get("Content-Type", "")
-        if "text/html" in content_type:  # If it's a webpage
-            soup = BeautifulSoup(response.text, "html.parser")
-            paragraphs = soup.find_all("p")  # Extract paragraph text
-            return " ".join([p.get_text() for p in paragraphs])
-        elif "text/plain" in content_type:  # If it's a plain text file
-            return response.text
-        else:
-            return None
-    except Exception as e:
-        print("Error fetching URL:", e)
-        return None
-@app.route("/", methods=["GET", "POST"])
-def index():
-    document_text = None
-    summary = None
-    heatmap_url = None
-    if request.method == "POST":
-        file = request.files.get("file")
-        pasted_text = request.form.get("pasteText", "").strip()
-        url = request.form.get("url", "").strip()
-        if file and file.filename:
-            document_text = read_file(file)
-        elif pasted_text:
-            document_text = pasted_text
-        elif url:
-            document_text = fetch_text_from_url(url)
-        if document_text:
-            summary = generate_summary(document_text)
-            sentences, scores = calculate_sentence_importance(summary)
-            generate_heatmap(scores)
-            highlighted_summary = highlight_summary(sentences, scores)
-            highlighted_summary = highlight_keywords(highlighted_summary)
-            # Save the summary to a text file
-            with open("summary.txt", "w", encoding="utf-8") as f:
-                f.write(summary)
-            return render_template("mainscreen.html", document_text=document_text, summary=highlighted_summary, heatmap_url="static/heatmap.png")
-    return render_template("mainscreen.html", document_text=None, summary=None, heatmap_url=None)
-@app.route("/download_summary")
-def download_summary():
-    file_path = os.path.join(os.getcwd(), "summary.txt")
-    if not os.path.exists(file_path):
-        return abort(404, description="File not found")
-    return send_file(file_path, as_attachment=True, download_name="summary.txt", mimetype="text/plain")
-  # Homepage
-@app.route("/home")
-def home():
-     return render_template("homepage.html")
-@app.route("/about")
-def about():
-    return render_template("aboutpage.html")
-@app.route("/summarization")
-def summarization():
-    return render_template("mainscreen.html")  # Login Page
-@app.route('/lawbooks/<filename>')
-def serve_pdf(filename):
-    return send_from_directory('static/lawbooks', filename)
-# MongoDB connection
-client = MongoClient('mongodb+srv://law:[email protected]/?retryWrites=true&w=majority&appName=law')
-db = client['chatbotDB']
-users = db['users']
-@app.route('/signup', methods=['GET'])
-def signup():
-    return render_template('signuppage.html')  # Render the HTML form
-@app.route('/api/signup', methods=['POST'])
-def api_signup():
-    # Get JSON data from the request
-    data = request.get_json()
-    first_name = data.get('firstName')
-    last_name = data.get('lastName')
-    email = data.get('email')
-    password = data.get('password')
-    # Hash the password for security before storing it in the database
-    hashed_pw = generate_password_hash(password)
-    # Check if the user already exists
-    if users.find_one({'email': email}):
-        return jsonify({'message': 'Email already exists!'}), 400
-    # Insert the user data into MongoDB
-    users.insert_one({
-        'first_name': first_name,
-        'last_name': last_name,
-        'email': email,
-        'password': hashed_pw
-    })
-    # Return a success response
-    return jsonify({'message': 'Signup successful!'}), 201
-# Success page or login page
-@app.route('/login', methods=['GET', 'POST'])
-def login():
-    if request.method == 'POST':
-        # Handle POST request for login
-        data = request.get_json()
-        email = data.get('email')
-        password = data.get('password')
-        # Log login attempt
-        print(f"Login attempt - Email: {email}")
-        # Check if the user exists
-        user = users.find_one({'email': email})
-        if not user:
-            print(f"Login failed - Email '{email}' not found.")
-            return jsonify({'message': 'Invalid email or password!'}), 401
-        # Check if the password is correct (compare hashed passwords)
-        if not check_password_hash(user['password'], password):
-            print(f"Login failed - Incorrect password for email '{email}'.")
-            return jsonify({'message': 'Invalid email or password!'}), 401
-        # Log successful login
-        print(f"Login successful - Email: {email}")
-        return jsonify({'message': 'Login successful!'}), 200
-    # Handle GET request - Show login form (if needed)
-    return render_template('loginpage.html')  # This would be the login form page (replace with your template)
-@app.route('/reset-password', methods=['GET', 'POST'])
-def reset_password():
-    if request.method == 'POST':
-        email = request.form['email']
-        new_password = request.form['newPassword']
-        confirm_password = request.form['confirmPassword']
-        # Check if passwords match
-        if new_password != confirm_password:
-            return jsonify({'message': 'Passwords do not match!'}), 400
-        # Check if user exists
-        user = users.find_one({'email': email})
-        if not user:
-            return jsonify({'message': 'User not found!'}), 404
-        # Hash the new password
-        hashed_pw = generate_password_hash(new_password)
-        # Update the user's password in the database
-        users.update_one({'email': email}, {'$set': {'password': hashed_pw}})
-        return jsonify({'message': 'Password updated successfully!'}), 200
-    return render_template('forgetpasswordpage.html')
-contacts_collection = db["contacts"]
-@app.route('/contact', methods=['GET', 'POST'])
-def contact():
-    if request.method == 'POST':
-        name = request.form.get('name')
-        email = request.form.get('email')
-        message = request.form.get('message')
-        print(f"Name: {name}, Email: {email}, Message: {message}")  # Debug
-        if not name or not email or not message:
-            return jsonify({'message': 'All fields are required!'}), 400
-        contact_data = {
-            'name': name,
-            'email': email,
-            'message': message
-        }
-        contacts_collection.insert_one(contact_data)
-        return jsonify({'message': f'Thank you, {name}! Your message has been sent successfully.',
-            'status': 'success'}), 200
-    return render_template('contactpage.html')
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=True)