File size: 2,434 Bytes
625a47c 1c69011 625a47c 1c69011 edac42b 0724936 a68e5a8 1c69011 625a47c a68e5a8 625a47c a68e5a8 1c69011 625a47c a68e5a8 edac42b 1c69011 edac42b 1c69011 a68e5a8 1c69011 a68e5a8 1c69011 a68e5a8 1c69011 a68e5a8 1c69011 a68e5a8 1c69011 a68e5a8 1c69011 edac42b 1c69011 edac42b a68e5a8 edac42b 1c69011 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import torch
import gradio as gr
from huggingface_hub import login
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM,
GPT2LMHeadModel,
GPT2Tokenizer
)
from diffusers import StableDiffusionPipeline
# Authenticate via token
hf_token = os.getenv("HUGGINGFACE_TOKEN")
if hf_token:
login(token=hf_token)
# π Tamil β English Translation (Multilingual M2M100 model)
trans_checkpoint = "Hemanth-thunder/english-tamil-mt"
trans_tokenizer = AutoTokenizer.from_pretrained(trans_checkpoint)
trans_model = AutoModelForSeq2SeqLM.from_pretrained(trans_checkpoint)
# π§ GPT-2 English Text Generation
gpt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt_model.eval()
# π¨ Stable Diffusion
device = "cuda" if torch.cuda.is_available() else "cpu"
sd_pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
use_auth_token=hf_token,
torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
# Pipeline: Tamil β English β GPT-2 β Image
def tam_to_image_pipeline(tamil_text):
# Translate Tamil β English
inputs = trans_tokenizer(tamil_text, return_tensors="pt", truncation=True)
translated_ids = trans_model.generate(**inputs, max_length=128)
english_text = trans_tokenizer.decode(translated_ids[0], skip_special_tokens=True)
# Generate additional English Description via GPT-2
input_ids = gpt_tokenizer.encode(english_text, return_tensors="pt")
with torch.no_grad():
gpt_output = gpt_model.generate(
input_ids,
max_length=60,
num_return_sequences=1,
no_repeat_ngram_size=2,
pad_token_id=gpt_tokenizer.eos_token_id
)
generated_text = gpt_tokenizer.decode(gpt_output[0], skip_special_tokens=True)
# Generate image from description
image = sd_pipe(generated_text).images[0]
return english_text, generated_text, image
# Gradio UI
interface = gr.Interface(
fn=tam_to_image_pipeline,
inputs=gr.Textbox(label="Enter Tamil Text"),
outputs=[
gr.Textbox(label="Translated English Text"),
gr.Textbox(label="Generated Description"),
gr.Image(label="Generated Image")
],
title="Tamil β Image Generator",
description="π Tamil to English (M2M100) β GPTβ2 β Image via Stable Diffusion"
)
interface.launch()
|