File size: 2,390 Bytes
625a47c
1c69011
625a47c
1c69011
 
 
 
 
 
 
edac42b
0724936
1c69011
 
 
 
625a47c
1c69011
 
 
625a47c
1c69011
 
 
 
625a47c
1c69011
edac42b
1c69011
 
 
edac42b
1c69011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edac42b
 
1c69011
edac42b
 
1c69011
 
edac42b
 
1c69011
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import torch
import gradio as gr
from huggingface_hub import login
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    GPT2LMHeadModel,
    GPT2Tokenizer
)
from diffusers import StableDiffusionPipeline

# Authenticate with Hugging Face Token
hf_token = os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
    login(token=hf_token)

# Load Tamil to English Translation Model
trans_tokenizer = AutoTokenizer.from_pretrained("nandhinivaradharajan14/tam-eng-translator")
trans_model = AutoModelForSeq2SeqLM.from_pretrained("nandhinivaradharajan14/tam-eng-translator")

# Load GPT-2 for English Text Generation
gpt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt_model.eval()

# Load Stable Diffusion
device = "cuda" if torch.cuda.is_available() else "cpu"
sd_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    use_auth_token=hf_token,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

# Main function
def tam_to_image_pipeline(tamil_text):
    # 1. Tamil to English Translation
    inputs = trans_tokenizer(tamil_text, return_tensors="pt")
    translated = trans_model.generate(**inputs)
    english_text = trans_tokenizer.decode(translated[0], skip_special_tokens=True)

    # 2. Generate Descriptive Text using GPT-2
    gpt_input = gpt_tokenizer.encode(english_text, return_tensors="pt")
    with torch.no_grad():
        gpt_output = gpt_model.generate(
            gpt_input,
            max_length=50,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            pad_token_id=gpt_tokenizer.eos_token_id
        )
    generated_text = gpt_tokenizer.decode(gpt_output[0], skip_special_tokens=True)

    # 3. Generate Image using Stable Diffusion
    image = sd_pipe(generated_text).images[0]

    return english_text, generated_text, image

# Gradio Interface
interface = gr.Interface(
    fn=tam_to_image_pipeline,
    inputs=gr.Textbox(label="Enter Tamil Text"),
    outputs=[
        gr.Textbox(label="Translated English Text"),
        gr.Textbox(label="Generated Description"),
        gr.Image(label="Generated Image")
    ],
    title="Tamil to Image Generator",
    description="πŸ”€ Tamil β†’ English β†’ GPT-2 Description β†’ 🎨 Stable Diffusion Image Generator"
)

# Launch app
interface.launch()