import gradio as gr import torch from transformers import T5ForConditionalGeneration, T5Tokenizer # Load tokenizer and model from local folder model_path = "./" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = T5Tokenizer.from_pretrained(model_path) model = T5ForConditionalGeneration.from_pretrained(model_path).to(device) model.eval() def generate_sql(schema, user_query, date_info=""): prompt = f"""Given the following database schema and requirements, generate a PostgreSQL query: {schema} User Query: "{user_query}" IMPORTANT REQUIREMENTS: - Always filter by user_id = $1 for security - The current year is 2025. You are working in this year! - CRITICAL: Use ONLY the dates provided in the input parameters. Do NOT infer or change dates on your own! - If date range is provided, use DATE(created_at) BETWEEN 'startDate' AND 'endDate' - If single date is provided, use DATE(created_at) = 'YYYY-MM-DD' - NEVER use hardcoded years like 2024 - always use the provided dates exactly as given - Use proper SQL syntax - CRITICAL: Generate ONLY simple SQL statements - NO WITH clauses, NO CTEs, NO complex subqueries - Use direct SELECT, INSERT, UPDATE, DELETE statements only - Keep queries simple and straightforward - For INSERT statements, use the RETURNING clause - For INSERT statements with specific date: * Use INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, amount_value, 'category_name', 'note_text', 'YYYY-MM-DD HH:MM:SS') RETURNING * * If date is provided, use that specific date for created_at instead of NOW() * Format the date as 'YYYY-MM-DD 00:00:00' for the specific date - For SELECT statements: * Use SIMPLE SELECT statements - NO WITH clauses, NO CTEs, NO complex subqueries * If category is provided: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' * If keywords are provided: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' * If single date is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = 'YYYY-MM-DD' * If date range is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' * If both category and keywords: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' * If both category and single date: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) = 'YYYY-MM-DD' * If both category and date range: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' * If both keywords and single date: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' * If both keywords and date range: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' * If all three (category, keywords, date): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' * If all three (category, keywords, date range): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' * If none provided: SELECT * FROM expenses WHERE user_id = $1 * Return all relevant columns * CRITICAL: Use ONLY simple SELECT statements, NO WITH clauses, NO CTEs * CRITICAL: For date filtering, use DATE(created_at) = 'YYYY-MM-DD' for single dates, DATE(created_at) BETWEEN 'startDate' AND 'endDate' for date ranges * CRITICAL: Use the EXACT dates provided in the input parameters. Do NOT infer or override dates from the user query wording. * EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' - For UPDATE statements: * Use UPDATE expenses SET amount = new_amount WHERE user_id = $1 AND LOWER(note) LIKE '%keyword%' * If category is provided, also add AND category = 'category_name' * Use RETURNING clause to return the updated record - For DELETE statements: * If category is provided: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' * If keywords are provided: DELETE FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' * If both category and keywords: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' * If no category and no keywords (general delete): DELETE FROM expenses WHERE user_id = $1 * Use RETURNING clause to return the deleted records - Handle the user_id parameter safely - If the query mentions a specific month (like "June"), filter by that month using EXTRACT(MONTH FROM created_at) - If the query mentions a specific year, filter by that year using EXTRACT(YEAR FROM created_at) - CRITICAL: If a specific date is provided (like "26th june 2025"), use exact date filtering: DATE(created_at) = 'YYYY-MM-DD' - For date filtering, use proper PostgreSQL date functions - Use EXACT category names from the list above - For keyword searches in UPDATE/DELETE, use LOWER(note) LIKE '%exact_keyword%' pattern - For category searches, use category = 'exact_category_name' - When both category and keywords are provided for SELECT, prioritize category filtering - CRITICAL: If the intent is UPDATE, generate an UPDATE query, NOT a SELECT query - CRITICAL: If the intent is DELETE, generate a DELETE query, NOT a SELECT query - NEVER use placeholder text like '%keyword%' or 'category_name' - use the actual values provided CORRECT SQL EXAMPLES: - SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; - SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; - SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; - SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; - SELECT * FROM expenses WHERE user_id = $1 AND category = 'Food & Dining'; - SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%lunch%'; - INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, 500.00, 'Health & Fitness', 'Spent 500 rupees yesterday on medicines', '2025-01-19 00:00:00') RETURNING *; - EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' INCORRECT SQL EXAMPLES (DO NOT USE): - WITH filtered_date AS (SELECT '2025-06-24' AS target_date) SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = (SELECT target_date FROM filtered_date); - WITH filtered_expenses AS (SELECT * FROM expenses WHERE user_id = $1) SELECT * FROM filtered_expenses; SQL Query:""" inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device) with torch.no_grad(): outputs = model.generate(**inputs, max_length=256 , decoder_start_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token)) generated_sql = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_sql # Gradio UI iface = gr.Interface( fn=generate_sql, inputs=[ gr.Textbox(label="Database Schema", lines=12, placeholder="CREATE TABLE ..."), gr.Textbox(label="User Query", placeholder="How much did I spend on food last week?"), gr.Textbox(label="Date Info (optional)", placeholder="2025-06-12 or 2025-06-01 to 2025-06-07") ], outputs=gr.Textbox(label="Generated SQL Query"), title="HISAB AI - Natural Language to SQL", description="Enter your schema, user query and date (optional). Model will output SQL query." ) iface.launch()