File size: 2,566 Bytes
92c7729
 
bb4fda3
92c7729
 
bf7e1be
 
92c7729
 
 
87884fb
92c7729
 
b6ca69f
92c7729
 
 
 
 
b6ca69f
92c7729
 
b6ca69f
 
 
92c7729
 
 
 
 
b6ca69f
 
92c7729
b6ca69f
92c7729
 
b6ca69f
92c7729
 
 
 
b6ca69f
92c7729
 
 
a0c12a7
92c7729
 
 
52b96f9
92c7729
 
 
2c77ef8
92c7729
 
2c77ef8
92c7729
 
 
2c77ef8
92c7729
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Install necessary libraries if not already installed
# !pip install transformers diffusers torch torchvision accelerate huggingface_hub matplotlib Pillow

from huggingface_hub import login
from transformers import MarianTokenizer, MarianMTModel
from PIL import Image
import torch
from torchvision import transforms
from transformers import BlipProcessor, BlipForConditionalGeneration
import matplotlib.pyplot as plt

# Step 1: Login to Hugging Face
login("your_huggingface_token_here")  # 🔐 Replace this with your actual token

# Step 2: Tamil to English Translation
def translate_tamil_to_english(text):
    model_name = "Helsinki-NLP/opus-mt-ta-en"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True)
    translated = model.generate(**inputs)
    english_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return english_text

# Step 3: Generate Image (using any placeholder image here since BLIP is captioning-only)
# You can download a sample image or use a real generation model like Stable Diffusion
def get_sample_image():
    img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
    img = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
    return img

# Step 4: Describe the Image
def describe_image(image):
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

    inputs = processor(images=image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# === MAIN ===
if __name__ == "__main__":
    import requests

    # Step A: Input Tamil text
    tamil_text = "ஒரு சிறிய வீடு கடற்கரைக்கு அருகிலுள்ளது"
    print("Tamil Input:", tamil_text)

    # Step B: Translate to English
    english_translation = translate_tamil_to_english(tamil_text)
    print("Translated English:", english_translation)

    # Step C: Get sample image (placeholder for now, since we aren't using text-to-image yet)
    image = get_sample_image()

    # Step D: Describe the image
    caption = describe_image(image)
    print("Image Caption:", caption)

    # Optional: Display the image
    plt.imshow(image)
    plt.title(caption)
    plt.axis("off")
    plt.show()