File size: 3,511 Bytes
016b5dd
e97aebb
a558492
016b5dd
 
 
59921cd
016b5dd
 
 
 
 
 
 
 
a558492
016b5dd
 
 
 
 
 
e5964e8
016b5dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e97aebb
016b5dd
 
 
 
 
 
 
 
e97aebb
016b5dd
ac06315
016b5dd
 
 
 
 
 
 
 
ac06315
016b5dd
 
 
 
 
 
ac06315
016b5dd
 
 
 
 
 
 
 
 
a558492
016b5dd
 
 
 
e97aebb
016b5dd
 
 
 
f77ae83
016b5dd
 
f77ae83
016b5dd
 
e97aebb
 
016b5dd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import gradio as gr
import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, pipeline
from diffusers import StableDiffusionPipeline
from PIL import Image

# Load translation model/tokenizer (Tamil→English)
try:
    translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
    tokenizer.src_lang = "ta"
except Exception as e:
    print(f"Error loading M2M100 model: {e}")
    translator = tokenizer = None

# Load GPT-2 text generation pipeline
try:
    text_generator = pipeline("text-generation", model="gpt2")
except Exception as e:
    print(f"Error loading GPT-2 model: {e}")
    text_generator = None

# Load Stable Diffusion pipeline
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
    pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5", 
        torch_dtype=torch.float16 if device=="cuda" else torch.float32,
        use_auth_token=hf_token
    )
    pipe = pipe.to(device)
    # Optionally enable efficient attention slicing if on GPU to save memory
    if device == "cuda":
        pipe.enable_attention_slicing()
except Exception as e:
    print(f"Error loading Stable Diffusion pipeline: {e}")
    pipe = None

def tamil_to_image(tamil_text):
    """
    Translate Tamil text to English, generate new text with GPT-2, 
    and produce an image with Stable Diffusion.
    Returns (PIL.Image, info_text).
    """
    if not tamil_text or not tamil_text.strip():
        return None, "Error: Please enter Tamil text as input."

    # Translation
    try:
        tokenizer.src_lang = "ta"
        encoded = tokenizer(tamil_text, return_tensors="pt")
        generated_tokens = translator.generate(
            **encoded, forced_bos_token_id=tokenizer.get_lang_id("en")
        )
        translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    except Exception as e:
        return None, f"Translation error: {e}"

    # Text generation with GPT-2
    try:
        gen = text_generator(translation, max_length=50, num_return_sequences=1)
        gen_text = gen[0]['generated_text'] if isinstance(gen, list) else gen['generated_text']
    except Exception as e:
        return None, f"Text generation error: {e}"

    # Image generation with Stable Diffusion
    try:
        # Use the generated text as prompt
        prompt = gen_text
        if device == "cuda":
            image = pipe(prompt, num_inference_steps=50).images[0]
        else:
            # On CPU, reduce steps to speed up if needed
            image = pipe(prompt, num_inference_steps=25).images[0]
    except Exception as e:
        return None, f"Image generation error: {e}"

    info = f"Translated → English: {translation}\nGPT-2 Prompt: {prompt}"
    return image, info

# Build Gradio interface
iface = gr.Interface(
    fn=tamil_to_image,
    inputs=gr.Textbox(label="Tamil Input", placeholder="Enter Tamil text here", type="text"),
    outputs=[
        gr.Image(type="pil", label="Generated Image"),
        gr.Textbox(label="Output Info", type="text")
    ],
    title="Tamil Text-to-Image Generator",
    description="Enter Tamil text; this demo translates it to English, generates a story prompt with GPT-2, then creates an image with Stable Diffusion."
)

# Launch the app (in Spaces this will run on startup)
iface.launch()