File size: 2,434 Bytes
625a47c
1c69011
625a47c
1c69011
 
 
 
 
 
 
edac42b
0724936
a68e5a8
1c69011
 
 
625a47c
a68e5a8
 
 
 
625a47c
a68e5a8
1c69011
 
 
625a47c
a68e5a8
edac42b
1c69011
 
 
edac42b
1c69011
 
a68e5a8
1c69011
a68e5a8
 
 
 
1c69011
a68e5a8
 
1c69011
 
a68e5a8
 
1c69011
 
 
 
 
 
a68e5a8
1c69011
 
 
 
a68e5a8
1c69011
 
 
edac42b
 
1c69011
edac42b
 
a68e5a8
 
edac42b
 
1c69011
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import torch
import gradio as gr
from huggingface_hub import login
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    GPT2LMHeadModel,
    GPT2Tokenizer
)
from diffusers import StableDiffusionPipeline

# Authenticate via token
hf_token = os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
    login(token=hf_token)

# πŸ“š Tamil ↔ English Translation (Multilingual M2M100 model)
trans_checkpoint = "Hemanth-thunder/english-tamil-mt"
trans_tokenizer = AutoTokenizer.from_pretrained(trans_checkpoint)
trans_model = AutoModelForSeq2SeqLM.from_pretrained(trans_checkpoint)

# 🧠 GPT-2 English Text Generation
gpt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt_model.eval()

# 🎨 Stable Diffusion
device = "cuda" if torch.cuda.is_available() else "cpu"
sd_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    use_auth_token=hf_token,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

# Pipeline: Tamil β†’ English β†’ GPT-2 β†’ Image
def tam_to_image_pipeline(tamil_text):
    # Translate Tamil β†’ English
    inputs = trans_tokenizer(tamil_text, return_tensors="pt", truncation=True)
    translated_ids = trans_model.generate(**inputs, max_length=128)
    english_text = trans_tokenizer.decode(translated_ids[0], skip_special_tokens=True)

    # Generate additional English Description via GPT-2
    input_ids = gpt_tokenizer.encode(english_text, return_tensors="pt")
    with torch.no_grad():
        gpt_output = gpt_model.generate(
            input_ids,
            max_length=60,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            pad_token_id=gpt_tokenizer.eos_token_id
        )
    generated_text = gpt_tokenizer.decode(gpt_output[0], skip_special_tokens=True)

    # Generate image from description
    image = sd_pipe(generated_text).images[0]

    return english_text, generated_text, image

# Gradio UI
interface = gr.Interface(
    fn=tam_to_image_pipeline,
    inputs=gr.Textbox(label="Enter Tamil Text"),
    outputs=[
        gr.Textbox(label="Translated English Text"),
        gr.Textbox(label="Generated Description"),
        gr.Image(label="Generated Image")
    ],
    title="Tamil β†’ Image Generator",
    description="πŸ“˜ Tamil to English (M2M100) β†’ GPT‑2 β†’ Image via Stable Diffusion"
)

interface.launch()