import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")
model = AutoModelForSeq2SeqLM.from_pretrained("hrshtsharma2012/NL2SQL-Picard-final")

# Load a part of the Spider dataset
spider_dataset = load_dataset("spider", split='train[:5]')

def generate_sql(query):
    inputs = tokenizer(query, return_tensors="pt", padding=True)
    outputs = model.generate(**inputs, max_length=512)
    sql_query = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return sql_query

# Use examples from the Spider dataset
example_questions = [(question['question'],) for question in spider_dataset]

# Create a Gradio interface
interface = gr.Interface(
    fn=generate_sql,
    inputs=gr.Textbox(lines=2, placeholder="Enter your natural language query here..."),
    outputs="text",
    examples=example_questions,
    title="NL to SQL with Picard",
    description="This model converts natural language queries into SQL using the Spider dataset. Try one of the example questions or enter your own!"
)

# Launch the app
if __name__ == "__main__":
    interface.launch()