import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import json

model_name = "google/flan-t5-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

def generate_json(prompt):
    # Revise the instruction to make it explicit for valid JSON output
    instruction = (
        "Return only a valid JSON object, with no additional text. "
        "The JSON object must contain exactly the keys: "
        '"title", "author", "tags". '
        "For the following prompt, generate this JSON object. "
        f"Prompt: {prompt}"
    )
    result = generator(instruction, max_length=512, do_sample=False)
    generated_text = result[0]["generated_text"].strip()
    
    # Debug: print the raw output to inspect the format
    print(f"Raw Model Output: {generated_text}")
    
    try:
        parsed = json.loads(generated_text)
        formatted_json = json.dumps(parsed, indent=2)
    except Exception as e:
        formatted_json = f"Raw Output:\n{generated_text}\n\nError parsing JSON: {e}"
    
    return formatted_json

demo = gr.Interface(
    fn=generate_json,
    inputs=gr.Textbox(lines=4, label="Enter Prompt"),
    outputs=gr.Textbox(lines=20, label="Generated JSON"),
    title="Lightweight JSON Generator",
    description="Enter a prompt describing the structure or content you want in JSON format."
)

demo.queue()
demo.launch(show_error=True)