t5_sql / app.py
Sid26Roy's picture
Update app.py
bbb0afc verified
import gradio as gr
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
# Load tokenizer and model from local folder
model_path = "./"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path).to(device)
model.eval()
def generate_sql(schema, user_query, date_info=""):
prompt = f"""Given the following database schema and requirements, generate a PostgreSQL query:
{schema}
User Query: "{user_query}"
IMPORTANT REQUIREMENTS:
- Always filter by user_id = $1 for security
- The current year is 2025. You are working in this year!
- CRITICAL: Use ONLY the dates provided in the input parameters. Do NOT infer or change dates on your own!
- If date range is provided, use DATE(created_at) BETWEEN 'startDate' AND 'endDate'
- If single date is provided, use DATE(created_at) = 'YYYY-MM-DD'
- NEVER use hardcoded years like 2024 - always use the provided dates exactly as given
- Use proper SQL syntax
- CRITICAL: Generate ONLY simple SQL statements - NO WITH clauses, NO CTEs, NO complex subqueries
- Use direct SELECT, INSERT, UPDATE, DELETE statements only
- Keep queries simple and straightforward
- For INSERT statements, use the RETURNING clause
- For INSERT statements with specific date:
* Use INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, amount_value, 'category_name', 'note_text', 'YYYY-MM-DD HH:MM:SS') RETURNING *
* If date is provided, use that specific date for created_at instead of NOW()
* Format the date as 'YYYY-MM-DD 00:00:00' for the specific date
- For SELECT statements:
* Use SIMPLE SELECT statements - NO WITH clauses, NO CTEs, NO complex subqueries
* If category is provided: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name'
* If keywords are provided: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%'
* If single date is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = 'YYYY-MM-DD'
* If date range is provided: SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN 'startDate' AND 'endDate'
* If both category and keywords: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%'
* If both category and single date: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) = 'YYYY-MM-DD'
* If both category and date range: SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate'
* If both keywords and single date: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD'
* If both keywords and date range: SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate'
* If all three (category, keywords, date): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) = 'YYYY-MM-DD'
* If all three (category, keywords, date range): SELECT * FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%' AND DATE(created_at) BETWEEN 'startDate' AND 'endDate'
* If none provided: SELECT * FROM expenses WHERE user_id = $1
* Return all relevant columns
* CRITICAL: Use ONLY simple SELECT statements, NO WITH clauses, NO CTEs
* CRITICAL: For date filtering, use DATE(created_at) = 'YYYY-MM-DD' for single dates, DATE(created_at) BETWEEN 'startDate' AND 'endDate' for date ranges
* CRITICAL: Use the EXACT dates provided in the input parameters. Do NOT infer or override dates from the user query wording.
* EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'
- For UPDATE statements:
* Use UPDATE expenses SET amount = new_amount WHERE user_id = $1 AND LOWER(note) LIKE '%keyword%'
* If category is provided, also add AND category = 'category_name'
* Use RETURNING clause to return the updated record
- For DELETE statements:
* If category is provided: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name'
* If keywords are provided: DELETE FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%exact_keyword%'
* If both category and keywords: DELETE FROM expenses WHERE user_id = $1 AND category = 'exact_category_name' AND LOWER(note) LIKE '%exact_keyword%'
* If no category and no keywords (general delete): DELETE FROM expenses WHERE user_id = $1
* Use RETURNING clause to return the deleted records
- Handle the user_id parameter safely
- If the query mentions a specific month (like "June"), filter by that month using EXTRACT(MONTH FROM created_at)
- If the query mentions a specific year, filter by that year using EXTRACT(YEAR FROM created_at)
- CRITICAL: If a specific date is provided (like "26th june 2025"), use exact date filtering: DATE(created_at) = 'YYYY-MM-DD'
- For date filtering, use proper PostgreSQL date functions
- Use EXACT category names from the list above
- For keyword searches in UPDATE/DELETE, use LOWER(note) LIKE '%exact_keyword%' pattern
- For category searches, use category = 'exact_category_name'
- When both category and keywords are provided for SELECT, prioritize category filtering
- CRITICAL: If the intent is UPDATE, generate an UPDATE query, NOT a SELECT query
- CRITICAL: If the intent is DELETE, generate a DELETE query, NOT a SELECT query
- NEVER use placeholder text like '%keyword%' or 'category_name' - use the actual values provided
CORRECT SQL EXAMPLES:
- SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26';
- SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) = '2025-06-26';
- SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19';
- SELECT SUM(amount) FROM expenses WHERE user_id = $1 AND DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19';
- SELECT * FROM expenses WHERE user_id = $1 AND category = 'Food & Dining';
- SELECT * FROM expenses WHERE user_id = $1 AND LOWER(note) LIKE '%lunch%';
- INSERT INTO expenses (user_id, amount, category, note, created_at) VALUES ($1, 500.00, 'Health & Fitness', 'Spent 500 rupees yesterday on medicines', '2025-01-19 00:00:00') RETURNING *;
- EXAMPLE: If date range is '2025-01-13' to '2025-01-19', use DATE(created_at) BETWEEN '2025-01-13' AND '2025-01-19'
INCORRECT SQL EXAMPLES (DO NOT USE):
- WITH filtered_date AS (SELECT '2025-06-24' AS target_date) SELECT * FROM expenses WHERE user_id = $1 AND DATE(created_at) = (SELECT target_date FROM filtered_date);
- WITH filtered_expenses AS (SELECT * FROM expenses WHERE user_id = $1) SELECT * FROM filtered_expenses;
SQL Query:"""
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
with torch.no_grad():
outputs = model.generate(**inputs, max_length=256 , decoder_start_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token))
generated_sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_sql
# Gradio UI
iface = gr.Interface(
fn=generate_sql,
inputs=[
gr.Textbox(label="Database Schema", lines=12, placeholder="CREATE TABLE ..."),
gr.Textbox(label="User Query", placeholder="How much did I spend on food last week?"),
gr.Textbox(label="Date Info (optional)", placeholder="2025-06-12 or 2025-06-01 to 2025-06-07")
],
outputs=gr.Textbox(label="Generated SQL Query"),
title="HISAB AI - Natural Language to SQL",
description="Enter your schema, user query and date (optional). Model will output SQL query."
)
iface.launch()