import os import gradio as gr import torch from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer, pipeline from diffusers import StableDiffusionPipeline from PIL import Image # Load translation model/tokenizer (Tamil→English) try: translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") tokenizer.src_lang = "ta" except Exception as e: print(f"Error loading M2M100 model: {e}") translator = tokenizer = None # Load GPT-2 text generation pipeline try: text_generator = pipeline("text-generation", model="gpt2") except Exception as e: print(f"Error loading GPT-2 model: {e}") text_generator = None # Load Stable Diffusion pipeline hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") device = "cuda" if torch.cuda.is_available() else "cpu" try: pipe = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device=="cuda" else torch.float32, use_auth_token=hf_token ) pipe = pipe.to(device) # Optionally enable efficient attention slicing if on GPU to save memory if device == "cuda": pipe.enable_attention_slicing() except Exception as e: print(f"Error loading Stable Diffusion pipeline: {e}") pipe = None def tamil_to_image(tamil_text): """ Translate Tamil text to English, generate new text with GPT-2, and produce an image with Stable Diffusion. Returns (PIL.Image, info_text). """ if not tamil_text or not tamil_text.strip(): return None, "Error: Please enter Tamil text as input." # Translation try: tokenizer.src_lang = "ta" encoded = tokenizer(tamil_text, return_tensors="pt") generated_tokens = translator.generate( **encoded, forced_bos_token_id=tokenizer.get_lang_id("en") ) translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] except Exception as e: return None, f"Translation error: {e}" # Text generation with GPT-2 try: gen = text_generator(translation, max_length=50, num_return_sequences=1) gen_text = gen[0]['generated_text'] if isinstance(gen, list) else gen['generated_text'] except Exception as e: return None, f"Text generation error: {e}" # Image generation with Stable Diffusion try: # Use the generated text as prompt prompt = gen_text if device == "cuda": image = pipe(prompt, num_inference_steps=50).images[0] else: # On CPU, reduce steps to speed up if needed image = pipe(prompt, num_inference_steps=25).images[0] except Exception as e: return None, f"Image generation error: {e}" info = f"Translated → English: {translation}\nGPT-2 Prompt: {prompt}" return image, info # Build Gradio interface iface = gr.Interface( fn=tamil_to_image, inputs=gr.Textbox(label="Tamil Input", placeholder="Enter Tamil text here", type="text"), outputs=[ gr.Image(type="pil", label="Generated Image"), gr.Textbox(label="Output Info", type="text") ], title="Tamil Text-to-Image Generator", description="Enter Tamil text; this demo translates it to English, generates a story prompt with GPT-2, then creates an image with Stable Diffusion." ) # Launch the app (in Spaces this will run on startup) iface.launch()