josephtran04 commited on
Commit
91efc56
·
verified ·
1 Parent(s): ae7ad61

Upload 3 files

Browse files
Files changed (3) hide show
  1. LICENSE +21 -0
  2. app.py +41 -0
  3. requirements.txt +6 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Trần Minh Phát
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This script creates a simple web application using Gradio to generate answers for VQA using the BLIP model from Hugging Face's Transformers library.
2
+ # Import necessary libraries
3
+ import gradio as gr
4
+ import numpy as np
5
+ from PIL import Image
6
+ from transformers import BlipProcessor, BlipForQuestionAnswering
7
+
8
+ # Load BLIP processor and model
9
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
10
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
11
+
12
+ # Define the function for Visual Question Answering
13
+ def VQA(input_image: np.ndarray, question):
14
+ # Convert numpy array to PIL Image and convert to RGB
15
+ raw_image = Image.fromarray(input_image).convert('RGB')
16
+
17
+ # Prepare the inputs for the model
18
+ inputs = processor(raw_image, question, return_tensors="pt")
19
+
20
+ # Generate the answer using the model
21
+ outputs = model.generate(**inputs, max_length=100)
22
+
23
+ # Decode the generated tokens to text and store it into `answer`
24
+ answer = processor.decode(outputs[0], skip_special_tokens=True)
25
+
26
+ return answer
27
+
28
+ # Create a Gradio interface
29
+ iface = gr.Interface(
30
+ fn=VQA,
31
+ inputs=[
32
+ gr.Image(label="Input image:"),
33
+ gr.Textbox(label="Question:", placeholder="Type your question here...")
34
+ ],
35
+ outputs="text",
36
+ title="Visual Question Answering",
37
+ description="This is a simple web app for VQA using BLIP model from Salesforce.\nUpload the image file:"
38
+ )
39
+
40
+ # Launch the Gradio app
41
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain==0.1.11
2
+ gradio==5.23.2
3
+ transformers==4.38.2
4
+ bs4==0.0.2
5
+ requests==2.31.0
6
+ torch==2.2.1