|
import gradio as gr |
|
import torch |
|
from transformers import T5ForConditionalGeneration, T5Tokenizer |
|
|
|
|
|
model_path = "./" |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
tokenizer = T5Tokenizer.from_pretrained(model_path) |
|
model = T5ForConditionalGeneration.from_pretrained(model_path).to(device) |
|
model.eval() |
|
|
|
def generate_sql(schema, user_query, date_info=""): |
|
prompt = f"""Given the following database schema and requirements, generate a PostgreSQL query: |
|
|
|
{schema} |
|
|
|
User Query: "{user_query}" |
|
|
|
IMPORTANT REQUIREMENTS: |
|
- Always filter by user_id = $1 for security |
|
- The current year is 2025. You are working in this year! |
|
- CRITICAL: Use ONLY the dates provided in the input parameters. Do NOT infer or change dates on your own! |
|
- If date range is provided, use DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
|
- If single date is provided, use DATE(created_at) = 'YYYY-MM-DD' |
|
- NEVER use hardcoded years like 2024 - always use the provided dates exactly as given |
|
- Use proper SQL syntax |
|
- CRITICAL: Generate ONLY simple SQL statements - NO WITH clauses, NO CTEs, NO complex subqueries |
|
- Use direct SELECT, INSERT, UPDATE, DELETE statements only |
|
- Keep queries simple and straightforward |
|
- For INSERT statements, use the RETURNING clause |
|
- For INSERT statements with specific date: |
|
* Use INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, amount_value, 'category_name', 'note_text', 'YYYY-MM-DD HH:MM:SS') RETURNING * |
|
* If date is provided, use that specific date for created_at instead of NOW() |
|
* Format the date as 'YYYY-MM-DD 00:00:00' for the specific date |
|
- For SELECT statements: |
|
* Use SIMPLE SELECT statements - NO WITH clauses, NO CTEs, NO complex subqueries |
|
* If category is provided: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' |
|
* If keywords are provided: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' |
|
* If single date is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = 'YYYY-MM-DD' |
|
* If date range is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
|
* If both category and keywords: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' |
|
* If both category and single date: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) = 'YYYY-MM-DD' |
|
* If both category and date range: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
|
* If both keywords and single date: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' |
|
* If both keywords and date range: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
|
* If all three (category, keywords, date): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD' |
|
* If all three (category, keywords, date range): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate' |
|
* If none provided: SELECT * FROM expenses WHERE user_id = $1 |
|
* Return all relevant columns |
|
* CRITICAL: Use ONLY simple SELECT statements, NO WITH clauses, NO CTEs |
|
* CRITICAL: For date filtering, use DATE(created_at) = 'YYYY-MM-DD' for single dates, DATE(created_at) BETWEEN 'startDate' AND 'endDate' for date ranges |
|
* CRITICAL: Use the EXACT dates provided in the input parameters. Do NOT infer or override dates from the user query wording. |
|
* EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' |
|
- For UPDATE statements: |
|
* Use UPDATE expenses SET amount = new_amount WHERE user_id = $1 AND LOWER(note) LIKE '%keyword%' |
|
* If category is provided, also add AND category = 'category_name' |
|
* Use RETURNING clause to return the updated record |
|
- For DELETE statements: |
|
* If category is provided: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' |
|
* If keywords are provided: DELETE FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' |
|
* If both category and keywords: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' |
|
* If no category and no keywords (general delete): DELETE FROM expenses WHERE user_id = $1 |
|
* Use RETURNING clause to return the deleted records |
|
- Handle the user_id parameter safely |
|
- If the query mentions a specific month (like "June"), filter by that month using EXTRACT(MONTH FROM created_at) |
|
- If the query mentions a specific year, filter by that year using EXTRACT(YEAR FROM created_at) |
|
- CRITICAL: If a specific date is provided (like "26th june 2025"), use exact date filtering: DATE(created_at) = 'YYYY-MM-DD' |
|
- For date filtering, use proper PostgreSQL date functions |
|
- Use EXACT category names from the list above |
|
- For keyword searches in UPDATE/DELETE, use LOWER(note) LIKE '%exact_keyword%' pattern |
|
- For category searches, use category = 'exact_category_name' |
|
- When both category and keywords are provided for SELECT, prioritize category filtering |
|
- CRITICAL: If the intent is UPDATE, generate an UPDATE query, NOT a SELECT query |
|
- CRITICAL: If the intent is DELETE, generate a DELETE query, NOT a SELECT query |
|
- NEVER use placeholder text like '%keyword%' or 'category_name' - use the actual values provided |
|
|
|
CORRECT SQL EXAMPLES: |
|
- SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; |
|
- SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26'; |
|
- SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; |
|
- SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'; |
|
- SELECT * FROM expenses WHERE user_id = $1 AND category = 'Food & Dining'; |
|
- SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%lunch%'; |
|
- INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, 500.00, 'Health & Fitness', 'Spent 500 rupees yesterday on medicines', '2025-01-19 00:00:00') RETURNING *; |
|
- EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19' |
|
|
|
INCORRECT SQL EXAMPLES (DO NOT USE): |
|
- WITH filtered_date AS (SELECT '2025-06-24' AS target_date) SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = (SELECT target_date FROM filtered_date); |
|
- WITH filtered_expenses AS (SELECT * FROM expenses WHERE user_id = $1) SELECT * FROM filtered_expenses; |
|
|
|
|
|
SQL Query:""" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device) |
|
with torch.no_grad(): |
|
outputs = model.generate(**inputs, max_length=256 , decoder_start_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token)) |
|
generated_sql = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return generated_sql |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_sql, |
|
inputs=[ |
|
gr.Textbox(label="Database Schema", lines=12, placeholder="CREATE TABLE ..."), |
|
gr.Textbox(label="User Query", placeholder="How much did I spend on food last week?"), |
|
gr.Textbox(label="Date Info (optional)", placeholder="2025-06-12 or 2025-06-01 to 2025-06-07") |
|
], |
|
outputs=gr.Textbox(label="Generated SQL Query"), |
|
title="HISAB AI - Natural Language to SQL", |
|
description="Enter your schema, user query and date (optional). Model will output SQL query." |
|
) |
|
|
|
iface.launch() |
|
|