File size: 2,566 Bytes
92c7729 bb4fda3 92c7729 bf7e1be 92c7729 87884fb 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 b6ca69f 92c7729 a0c12a7 92c7729 52b96f9 92c7729 2c77ef8 92c7729 2c77ef8 92c7729 2c77ef8 92c7729 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# Install necessary libraries if not already installed
# !pip install transformers diffusers torch torchvision accelerate huggingface_hub matplotlib Pillow
from huggingface_hub import login
from transformers import MarianTokenizer, MarianMTModel
from PIL import Image
import torch
from torchvision import transforms
from transformers import BlipProcessor, BlipForConditionalGeneration
import matplotlib.pyplot as plt
# Step 1: Login to Hugging Face
login("your_huggingface_token_here") # 🔐 Replace this with your actual token
# Step 2: Tamil to English Translation
def translate_tamil_to_english(text):
model_name = "Helsinki-NLP/opus-mt-ta-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated = model.generate(**inputs)
english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return english_text
# Step 3: Generate Image (using any placeholder image here since BLIP is captioning-only)
# You can download a sample image or use a real generation model like Stable Diffusion
def get_sample_image():
img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
img = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
return img
# Step 4: Describe the Image
def describe_image(image):
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
inputs = processor(images=image, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
return caption
# === MAIN ===
if __name__ == "__main__":
import requests
# Step A: Input Tamil text
tamil_text = "ஒரு சிறிய வீடு கடற்கரைக்கு அருகிலுள்ளது"
print("Tamil Input:", tamil_text)
# Step B: Translate to English
english_translation = translate_tamil_to_english(tamil_text)
print("Translated English:", english_translation)
# Step C: Get sample image (placeholder for now, since we aren't using text-to-image yet)
image = get_sample_image()
# Step D: Describe the image
caption = describe_image(image)
print("Image Caption:", caption)
# Optional: Display the image
plt.imshow(image)
plt.title(caption)
plt.axis("off")
plt.show()
|