Ujeshhh commited on
Commit
97e1f50
·
verified ·
1 Parent(s): 2747657

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -50
app.py CHANGED
@@ -1,54 +1,43 @@
1
- from transformers import BlipProcessor, BlipForConditionalGeneration
2
- from transformers import MarianMTModel, MarianTokenizer
3
  import gradio as gr
 
4
 
5
- # Load BLIP model for image captioning
6
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
7
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
8
-
9
- # Load MarianMT model for translation (English to Tamil)
10
- translation_model_name = "Helsinki-NLP/opus-mt-en-ta"
11
- translator_model = MarianMTModel.from_pretrained(translation_model_name)
12
- translator_tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
13
-
14
- import numpy as np
15
- from PIL import Image
16
- import torch
17
 
18
  def generate_caption(image):
19
- # Ensure the image is in PIL format (Gradio should handle this, but let's explicitly ensure it)
20
- if isinstance(image, Image.Image) is False:
21
- image = Image.open(image)
22
-
23
- # Resize the image to the expected size (e.g., 384x384)
24
- image = image.resize((384, 384))
25
-
26
- # Convert image to numpy array with float32 dtype
27
- image_array = np.array(image).astype(np.float32)
28
-
29
- # Normalize the image (if needed)
30
- image_array /= 255.0 # Normalize to [0, 1]
31
-
32
- # Convert the numpy array to a tensor, explicitly specifying dtype as float32
33
- image_tensor = torch.tensor(image_array, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0) # [1, C, H, W]
34
-
35
- # Generate caption from image
36
- inputs = processor(images=image_tensor, return_tensors="pt", padding=True) # Added padding=True
37
- out = model.generate(**inputs)
38
- caption = processor.decode(out[0], skip_special_tokens=True)
39
-
40
- # Translate caption to Tamil
41
- translated = translator_tokenizer(caption, return_tensors="pt", padding=True)
42
- translated_text = translator_model.generate(**translated)
43
- translation = translator_tokenizer.decode(translated_text[0], skip_special_tokens=True)
44
-
45
- return caption, translation
46
-
47
- # Gradio interface
48
- interface = gr.Interface(fn=generate_caption,
49
- inputs=gr.Image(type="pil"),
50
- outputs=[gr.Textbox(label="Caption in English"),
51
- gr.Textbox(label="Caption in Tamil")])
52
-
53
- # Launch the Gradio app
54
- interface.launch()
 
1
+ import openai
 
2
  import gradio as gr
3
+ import os
4
 
5
+ # Set your OpenAI API Key
6
+ openai.api_key = "YOUR_OPENAI_API_KEY" # Replace with your API key
 
 
 
 
 
 
 
 
 
 
7
 
8
  def generate_caption(image):
9
+ """Generate a caption for the uploaded image using OpenAI's GPT-4 Vision API."""
10
+ with open(image, "rb") as img_file:
11
+ response = openai.ChatCompletion.create(
12
+ model="gpt-4-vision-preview",
13
+ messages=[
14
+ {
15
+ "role": "system",
16
+ "content": "You are an AI assistant that describes images accurately."
17
+ },
18
+ {
19
+ "role": "user",
20
+ "content": [
21
+ {"type": "text", "text": "Describe the contents of this image in detail."},
22
+ {"type": "image", "image": img_file.read()},
23
+ ],
24
+ },
25
+ ],
26
+ max_tokens=100
27
+ )
28
+
29
+ caption = response["choices"][0]["message"]["content"]
30
+ return caption
31
+
32
+ # Gradio UI
33
+ iface = gr.Interface(
34
+ fn=generate_caption,
35
+ inputs=gr.Image(type="filepath"),
36
+ outputs="text",
37
+ title="Image Captioning App",
38
+ description="Upload an image, and the AI will generate a descriptive caption."
39
+ )
40
+
41
+ # Run the app
42
+ if __name__ == "__main__":
43
+ iface.launch()